diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -56,6 +56,8 @@ locale/*/LC_MESSAGES/hg.mo hgext/__index__.py +rust/target/ + # Generated wheels wheelhouse/ diff --git a/rust/.cargo/config b/rust/.cargo/config new file mode 100644 --- /dev/null +++ b/rust/.cargo/config @@ -0,0 +1,7 @@ +# Rust builds with a modern MSVC and uses a newer CRT. +# Python 2.7 has a shared library dependency on an older CRT (msvcr90.dll). +# We statically link the modern CRT to avoid multiple msvcr*.dll libraries +# being loaded and Python possibly picking up symbols from the newer runtime +# (which would be loaded first). +[target.'cfg(target_os = "windows")'] +rustflags = ["-Ctarget-feature=+crt-static"] diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,127 @@ +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cpython" +version = "0.1.0" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862#ae8b89514b6df35419a4a563e416081a75bca862" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862)", +] + +[[package]] +name = "hgcli" +version = "0.1.0" +dependencies = [ + "cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862)", + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "python27-sys" +version = "0.1.2" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862#ae8b89514b6df35419a4a563e416081a75bca862" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862)" = "" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)" = "36fbc8a8929c632868295d0178dd8f63fc423fd7537ad0738372bd010b3ac9b0" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070" +"checksum python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=ae8b89514b6df35419a4a563e416081a75bca862)" = "" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,2 @@ +[workspace] +members = ["hgcli"] diff --git a/rust/README.rst b/rust/README.rst new file mode 100644 --- /dev/null +++ b/rust/README.rst @@ -0,0 +1,78 @@ +=================== +Mercurial Rust Code +=================== + +This directory contains various Rust code for the Mercurial project. + +The top-level ``Cargo.toml`` file defines a workspace containing +all primary Mercurial crates. + +Building +======== + +To build the Rust components:: + + $ cargo build + +If you prefer a non-debug / release configuration:: + + $ cargo build --release + +Features +-------- + +The following Cargo features are available: + +localdev (default) + Produce files that work with an in-source-tree build. + + In this mode, the build finds and uses a ``python2.7`` binary from + ``PATH``. The ``hg`` binary assumes it runs from ``rust/target/hg`` + and it finds Mercurial files at ``dirname($0)/../../../``. + +Build Mechanism +--------------- + +The produced ``hg`` binary is *bound* to a CPython installation. The +binary links against and loads a CPython library that is discovered +at build time (by a ``build.rs`` Cargo build script). The Python +standard library defined by this CPython installation is also used. + +Finding the appropriate CPython installation to use is done by +the ``python27-sys`` crate's ``build.rs``. Its search order is:: + +1. ``PYTHON_SYS_EXECUTABLE`` environment variable. +2. ``python`` executable on ``PATH`` +3. ``python2`` executable on ``PATH`` +4. ``python2.7`` executable on ``PATH`` + +Additional verification of the found Python will be performed by our +``build.rs`` to ensure it meets Mercurial's requirements. + +Details about the build-time configured Python are built into the +produced ``hg`` binary. This means that a built ``hg`` binary is only +suitable for a specific, well-defined role. These roles are controlled +by Cargo features (see above). + +Running +======= + +The ``hgcli`` crate produces an ``hg`` binary. You can run this binary +via ``cargo run``:: + + $ cargo run --manifest-path hgcli/Cargo.toml + +Or directly:: + + $ target/debug/hg + $ target/release/hg + +You can also run the test harness with this binary:: + + $ ./run-tests.py --with-hg ../rust/target/debug/hg + +.. note:: + + Integration with the test harness is still preliminary. Remember to + ``cargo build`` after changes because the test harness doesn't yet + automatically build Rust code. diff --git a/rust/hgcli/Cargo.toml b/rust/hgcli/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/hgcli/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "hgcli" +version = "0.1.0" +authors = ["Gregory Szorc "] +license = "GPL-2.0" + +build = "build.rs" + +[[bin]] +name = "hg" +path = "src/main.rs" + +[features] +# localdev: detect Python in PATH and use files from source checkout. +default = ["localdev"] +localdev = [] + +[dependencies] +libc = "0.2.34" + +# We currently use a custom build of cpython and python27-sys with the +# following changes: +# * Exports Cargo variable defining the full path to Python interpreter. +# * sysmodule exports for PySys_SetArgv. +# * GILGuard call of prepare_freethreaded_python() is removed. +# TODO switch to official release when our changes are incorporated. +[dependencies.cpython] +version = "0.1" +default-features = false +features = ["python27-sys"] +git = "https://github.com/indygreg/rust-cpython.git" +rev = "ae8b89514b6df35419a4a563e416081a75bca862" + +[dependencies.python27-sys] +version = "0.1.2" +git = "https://github.com/indygreg/rust-cpython.git" +rev = "ae8b89514b6df35419a4a563e416081a75bca862" diff --git a/rust/hgcli/build.rs b/rust/hgcli/build.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/build.rs @@ -0,0 +1,127 @@ +// build.rs -- Configure build environment for `hgcli` Rust package. +// +// Copyright 2017 Gregory Szorc +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use std::collections::HashMap; +use std::env; +use std::path::Path; +#[cfg(target_os = "windows")] +use std::path::PathBuf; + +use std::process::Command; + +struct PythonConfig { + python: String, + config: HashMap, +} + +fn get_python_config() -> PythonConfig { + // The python27-sys crate exports a Cargo variable defining the full + // path to the interpreter being used. + let python = env::var("DEP_PYTHON27_PYTHON_INTERPRETER") + .expect("Missing environment variable defining Python interpreter path; are you using the correct python27-sys crate?"); + + if !Path::new(&python).exists() { + panic!("Python interpreter {} does not exist; this should never happen", python); + } + + // This is a bit hacky but it gets the job done. + let separator = "SEPARATOR STRING"; + + let script = "import sysconfig; \ +c = sysconfig.get_config_vars(); \ +print('SEPARATOR STRING'.join('%s=%s' % i for i in c.items()))"; + + let mut command = Command::new(&python); + command.arg("-c").arg(script); + + let out = command.output().unwrap(); + + if !out.status.success() { + panic!( + "python script failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + } + + let stdout = String::from_utf8_lossy(&out.stdout); + let mut m = HashMap::new(); + + for entry in stdout.split(separator) { + let mut parts = entry.splitn(2, "="); + let key = parts.next().unwrap(); + let value = parts.next().unwrap(); + m.insert(String::from(key), String::from(value)); + } + + PythonConfig { + python: python, + config: m, + } +} + +#[cfg(not(target_os = "windows"))] +fn have_shared(config: &PythonConfig) -> bool { + match config.config.get("Py_ENABLE_SHARED") { + Some(value) => value == "1", + None => false, + } +} + +#[cfg(target_os = "windows")] +fn have_shared(config: &PythonConfig) -> bool { + // python27.dll should exist next to python2.7.exe. + let mut dll = PathBuf::from(&config.python); + dll.pop(); + dll.push("python27.dll"); + + return dll.exists(); +} + +static REQUIRED_CONFIG_FLAGS: [&'static str; 2] = [ + "Py_USING_UNICODE", + "WITH_THREAD", +]; + +fn main() { + let config = get_python_config(); + + println!("Using Python: {}", config.python); + println!("cargo:rustc-env=PYTHON_INTERPRETER={}", config.python); + + let prefix = config.config.get("prefix").unwrap(); + + println!("Prefix: {}", prefix); + + // TODO Windows builds don't expose these config flags. Figure out another + // way. + #[cfg(not(target_os = "windows"))] + for key in REQUIRED_CONFIG_FLAGS.iter() { + let result = match config.config.get(*key) { + Some(value) => value == "1", + None => false, + }; + + if !result { + panic!("Detected Python requires feature {}", key); + } + } + + // We need a Python shared library. + if !have_shared(&config) { + panic!("Detected Python lacks a shared library, which is required"); + } + + let ucs4 = match config.config.get("Py_UNICODE_SIZE") { + Some(value) => value == "4", + None => false, + }; + + if !ucs4 { + #[cfg(not(target_os = "windows"))] + panic!("Detected Python doesn't support UCS-4 code points"); + } +} diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/main.rs @@ -0,0 +1,220 @@ +// main.rs -- Main routines for `hg` program +// +// Copyright 2017 Gregory Szorc +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +extern crate libc; +extern crate cpython; +extern crate python27_sys; + +use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python}; +use libc::{c_char, c_int}; + +use std::env; +use std::path::PathBuf; +use std::ffi::CString; +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +#[derive(Debug)] +struct Environment { + _exe: PathBuf, + python_exe: PathBuf, + python_home: PathBuf, + mercurial_modules: PathBuf, +} + +/// Run Mercurial locally from a source distribution or checkout. +/// +/// hg is /rust/target//hg +/// Python interpreter is detected by build script. +/// Python home is relative to Python interpreter. +/// Mercurial files are relative to hg binary, which is relative to source root. +#[cfg(feature = "localdev")] +fn get_environment() -> Environment { + let exe = env::current_exe().unwrap(); + + let mut mercurial_modules = exe.clone(); + mercurial_modules.pop(); // /rust/target/ + mercurial_modules.pop(); // /rust/target + mercurial_modules.pop(); // /rust + mercurial_modules.pop(); // / + + let python_exe: &'static str = env!("PYTHON_INTERPRETER"); + let python_exe = PathBuf::from(python_exe); + + let mut python_home = python_exe.clone(); + python_home.pop(); + + // On Windows, python2.7.exe exists at the root directory of the Python + // install. Everywhere else, the Python install root is one level up. + if !python_exe.ends_with("python2.7.exe") { + python_home.pop(); + } + + Environment { + _exe: exe.clone(), + python_exe: python_exe.clone(), + python_home: python_home.clone(), + mercurial_modules: mercurial_modules.to_path_buf(), + } +} + +// On UNIX, argv starts as an array of char*. So it is easy to convert +// to C strings. +#[cfg(target_family = "unix")] +fn args_to_cstrings() -> Vec { + env::args_os().map(|a| CString::new(a.into_vec()).unwrap()).collect() +} + +// TODO Windows support is incomplete. We should either use env::args_os() +// (or call into GetCommandLineW() + CommandLinetoArgvW()), convert these to +// PyUnicode instances, and pass these into Python/Mercurial outside the +// standard PySys_SetArgvEx() mechanism. This will allow us to preserve the +// raw bytes (since PySys_SetArgvEx() is based on char* and can drop wchar +// data. +// +// For now, we use env::args(). This will choke on invalid UTF-8 arguments. +// But it is better than nothing. +#[cfg(target_family = "windows")] +fn args_to_cstrings() -> Vec { + env::args().map(|a| CString::new(a).unwrap()).collect() +} + +fn set_python_home(env: &Environment) { + let raw = CString::new(env.python_home.to_str().unwrap()) + .unwrap() + .into_raw(); + unsafe { + python27_sys::Py_SetPythonHome(raw); + } +} + +fn update_encoding(_py: Python, _sys_mod: &PyModule) { + // Call sys.setdefaultencoding("undefined") if HGUNICODEPEDANTRY is set. + let pedantry = env::var("HGUNICODEPEDANTRY").is_ok(); + + if pedantry { + // site.py removes the sys.setdefaultencoding attribute. So we need + // to reload the module to get a handle on it. This is a lesser + // used feature and we'll support this later. + // TODO support this + panic!("HGUNICODEPEDANTRY is not yet supported"); + } +} + +fn update_modules_path(env: &Environment, py: Python, sys_mod: &PyModule) { + let sys_path = sys_mod.get(py, "path").unwrap(); + sys_path + .call_method(py, "insert", (0, env.mercurial_modules.to_str()), None) + .expect("failed to update sys.path to location of Mercurial modules"); +} + +fn run() -> Result<(), i32> { + let env = get_environment(); + + //println!("{:?}", env); + + // Tell Python where it is installed. + set_python_home(&env); + + // Set program name. The backing memory needs to live for the duration of the + // interpreter. + // + // Yes, we use the path to the Python interpreter not argv[0] here. The + // reason is because Python uses the given path to find the location of + // Python files. Apparently we could define our own ``Py_GetPath()`` + // implementation. But this may require statically linking Python, which is + // not desirable. + let program_name = CString::new(env.python_exe.to_str().unwrap()) + .unwrap() + .as_ptr(); + unsafe { + python27_sys::Py_SetProgramName(program_name as *mut i8); + } + + unsafe { + python27_sys::Py_Initialize(); + } + + // https://docs.python.org/2/c-api/init.html#c.PySys_SetArgvEx has important + // usage information about PySys_SetArgvEx: + // + // * It says the first argument should be the script that is being executed. + // If not a script, it can be empty. We are definitely not a script. + // However, parts of Mercurial do look at sys.argv[0]. So we need to set + // something here. + // + // * When embedding Python, we should use ``PySys_SetArgvEx()`` and set + // ``updatepath=0`` for security reasons. Essentially, Python's default + // logic will treat an empty argv[0] in a manner that could result in + // sys.path picking up directories it shouldn't and this could lead to + // loading untrusted modules. + + // env::args() will panic if it sees a non-UTF-8 byte sequence. And + // Mercurial supports arbitrary encodings of input data. So we need to + // use OS-specific mechanisms to get the raw bytes without UTF-8 + // interference. + let args = args_to_cstrings(); + let argv: Vec<*const c_char> = args.iter().map(|a| a.as_ptr()).collect(); + + unsafe { + python27_sys::PySys_SetArgvEx(args.len() as c_int, argv.as_ptr() as *mut *mut i8, 0); + } + + let result; + { + // These need to be dropped before we call Py_Finalize(). Hence the + // block. + let gil = Python::acquire_gil(); + let py = gil.python(); + + // Mercurial code could call sys.exit(), which will call exit() + // itself. So this may not return. + // TODO this may cause issues on Windows due to the CRT mismatch. + // Investigate if we can intercept sys.exit() or SystemExit() to + // ensure we handle process exit. + result = match run_py(&env, py) { + // Print unhandled exceptions and exit code 255, as this is what + // `python` does. + Err(err) => { + err.print(py); + Err(255) + } + Ok(()) => Ok(()), + }; + } + + unsafe { + python27_sys::Py_Finalize(); + } + + result +} + +fn run_py(env: &Environment, py: Python) -> PyResult<()> { + let sys_mod = py.import("sys").unwrap(); + + update_encoding(py, &sys_mod); + update_modules_path(&env, py, &sys_mod); + + // TODO consider a better error message on failure to import. + let demand_mod = py.import("hgdemandimport")?; + demand_mod.call(py, "enable", NoArgs, None)?; + + let dispatch_mod = py.import("mercurial.dispatch")?; + dispatch_mod.call(py, "run", NoArgs, None)?; + + Ok(()) +} + +fn main() { + let exit_code = match run() { + Err(err) => err, + Ok(()) => 0, + }; + + std::process::exit(exit_code); +} diff --git a/tests/run-tests.py b/tests/run-tests.py --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -2436,12 +2436,27 @@ self._tmpbindir = os.path.join(self._hgtmp, b'install', b'bin') os.makedirs(self._tmpbindir) - # This looks redundant with how Python initializes sys.path from - # the location of the script being executed. Needed because the - # "hg" specified by --with-hg is not the only Python script - # executed in the test suite that needs to import 'mercurial' - # ... which means it's not really redundant at all. - self._pythondir = self._bindir + normbin = os.path.normpath(os.path.abspath(whg)) + normbin = normbin.replace(os.sep.encode('ascii'), b'/') + + # Other Python scripts in the test harness need to + # `import mercurial`. If `hg` is a Python script, we assume + # the Mercurial modules are relative to its path and tell the tests + # to load Python modules from its directory. + with open(whg, 'rb') as fh: + initial = fh.read(1024) + + if re.match(b'#!.*python', initial): + self._pythondir = self._bindir + # If it looks like our in-repo Rust binary, use the source root. + # This is a bit hacky. But rhg is still not supported outside the + # source directory. So until it is, do the simple thing. + elif re.search(b'|/rust/target/[^/]+/hg', normbin): + self._pythondir = os.path.dirname(self._testdir) + # Fall back to the legacy behavior. + else: + self._pythondir = self._bindir + else: self._installdir = os.path.join(self._hgtmp, b"install") self._bindir = os.path.join(self._installdir, b"bin")