diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -66,3 +66,5 @@ # hackable windows distribution additions ^hg-python ^hg.py$ + +subinclude:rust/.hgignore diff --git a/contrib/STANDALONE-MERCURIAL.rst b/contrib/STANDALONE-MERCURIAL.rst new file mode 100644 --- /dev/null +++ b/contrib/STANDALONE-MERCURIAL.rst @@ -0,0 +1,70 @@ +==================== +Standalone Mercurial +==================== + +*Standalone Mercurial* is a generic term given to a distribution +of Mercurial that is standalone and has minimal dependencies on +the host (typically just the C runtime library). Instead, most of +Mercurial's dependencies are included in the distribution. This +includes a Python interpreter. + +Architecture +============ + +A standalone Mercurial distribution essentially consists of the +following elements: + +* An `hg` binary executable +* A Python interpreter shared library +* The Python standard library +* 3rd party Python packages to enhance the Mercurial experience +* Mercurial's Python packages +* Mercurial support files (help content, default config files, etc) +* Any additional support files (e.g. shared library dependencies) + +From a high-level, the `hg` binary has a shared library dependency +on `libpython`. The binary is configured to load the `libpython` +that ships with the Mercurial distribution. When started, the +`hg` binary assesses its state, configures an embedded Python +interpreter, and essentially invoke Mercurial's `main()` function. + +Build Requirements +================== + +Universal +--------- + +* Python 2.7 (to run the build script) +* A working Rust and Cargo installation + +Linux +----- + +* Dependencies to build Python 2.7 from source (GNU make, autoconf, + various dependencies for extensions) +* The `patchelf` tool + +MacOS +----- + +* Xcode + +Windows +------- + +* Microsoft Visual C+ Compiler for Python 2.7 (https://www.microsoft.com/en-us/download/details.aspx?id=44266) + +Building +======== + +To build standalone Mercurial, run the following:: + + $ python2.7 contrib/build-standalone.py + +This will: + +1. Obtain a Python distribution (either by compiling from source + or downloading a pre-built distribution) +2. Build Mercurial Rust components +3. Build Mercurial Python components +4. Produce an *archive* suitable for distribution diff --git a/contrib/build-standalone.py b/contrib/build-standalone.py new file mode 100755 --- /dev/null +++ b/contrib/build-standalone.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python2.7 +# build-standalone.py - Create a standalone distribution of Mercurial. +# +# Copyright 2017 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +"""Create a standalone Mercurial distribution. + +This script does the bulk of the work for creating a standalone Mercurial +distribution. +""" + +import errno +import gzip +import hashlib +import io +import multiprocessing +import os +import shutil +import stat +import subprocess +import sys +import tarfile +import tempfile +import urllib2 + +try: + import lzma +except ImportError: + lzma = None + + +PYTHON_ARCHIVES = { + 'version': '2.7.14', + 'url': 'https://www.python.org/ftp/python/{version}/{prefix}-{version}.{suffix}', + 'gz': { + 'sha256': '304c9b202ea6fbd0a4a8e0ad3733715fbd4749f2204a9173a58ec53c32ea73e8', + 'prefix': 'Python', + 'suffix': 'tgz', + 'tar_mode': 'r:gz', + }, + 'xz': { + 'sha256': '71ffb26e09e78650e424929b2b457b9c912ac216576e6bd9e7d204ed03296a66', + 'prefix': 'Python', + 'suffix': 'xz', + 'tar_mode': 'r:xz', + }, + 'msi32': { + 'sha256': '450bde0540341d4f7a6ad2bb66639fd3fac1c53087e9844dc34ddf88057a17ca', + 'prefix': 'python', + 'suffix': 'msi', + }, + 'msi64': { + 'sha256': 'af293df7728b861648162ba0cd4a067299385cb6a3f172569205ac0b33190693', + 'prefix': 'python', + 'suffix': 'amd64.msi', + } +} + + +def hash_file(fh): + hasher = hashlib.sha256() + while True: + chunk = fh.read(16384) + if not chunk: + break + + hasher.update(chunk) + + return hasher.hexdigest() + + +def makedirs(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def _ensure_python_source(dest_dir): + """Ensure the Python source code is extracted to a path.""" + makedirs(dest_dir) + + if lzma: + archive = PYTHON_ARCHIVES['xz'] + else: + archive = PYTHON_ARCHIVES['gz'] + + archive_path = os.path.join(dest_dir, + 'python-%s.%s' % (PYTHON_ARCHIVES['version'], + archive['suffix'])) + + if os.path.exists(archive_path): + with open(archive_path, 'rb') as fh: + if hash_file(fh) != archive['sha256']: + print('%s has unexpected hash; removing' % archive_path) + os.unlink(archive_path) + + if not os.path.exists(archive_path): + url = PYTHON_ARCHIVES['url'].format( + version=PYTHON_ARCHIVES['version'], + prefix=archive['prefix'], + suffix=archive['suffix']) + + print('downloading %s' % url) + + req = urllib2.urlopen(url) + if req.getcode() != 200: + raise Exception('non-200 HTTP response downloading Python: %d' % req.getcode()) + + buf = io.BytesIO() + while True: + chunk = req.read(16384) + if not chunk: + break + buf.write(chunk) + + buf.seek(0) + if hash_file(buf) != archive['sha256']: + raise Exception('Python hash mismatch') + + buf.seek(0) + with open(archive_path, 'wb') as fh: + fh.write(buf.getvalue()) + + # Assume if a single file from the archive is present that we don't need + # to re-extract. + if os.path.exists(os.path.join(dest_dir, 'configure')): + print('extracted python source code found; using without modifications') + return + + print('extracting %s to %s' % (archive_path, dest_dir)) + with tarfile.open(archive_path, archive['tar_mode']) as tf: + prefix = 'Python-%s' % PYTHON_ARCHIVES['version'] + for ti in tf: + assert ti.name.startswith(prefix) + ti.name = ti.name[len(prefix):].lstrip('/') + tf.extract(ti, dest_dir) + + +def _build_python(state): + source_dir = state['python_source_dir'] + build_dir = state['python_build_dir'] + _ensure_python_source(source_dir) + + makedirs(build_dir) + + # TODO use a more sensible filesystem layout for Python in cases + # where the files will be installed alongside other system files + # (e.g. when producing deb or rpm archives). + if not os.path.exists(os.path.join(build_dir, 'config.status')): + subprocess.check_call([ + os.path.join(source_dir, 'configure'), + '--prefix', '/hgpython', + '--enable-shared', + '--enable-unicode=ucs4', + # TODO enable optimizations + # '--enable-optimizations', + # '--enable-lto', + ], cwd=build_dir) + + subprocess.check_call([ + 'make', '-j%d' % multiprocessing.cpu_count(), + ], cwd=build_dir) + + +def install_python(state): + """Installs Python in the standalone directory. + + Python is installed to the `hgpython/` sub-directory. The layout of + this directory resembles a typical Python distribution. In fact, the + Python installation could be used on its own, just like any other + Python installation. + """ + # TODO on Windows, obtain Python files from official, self-contained + # binary distribution (via an MSI). + _build_python(state) + + build_dir = state['python_build_dir'] + py_dir = state['python_install_dir'] + + if os.path.exists(os.path.join(py_dir, 'bin', 'python')): + print('python already installed in %s; skipping `make install`' % + py_dir) + else: + subprocess.check_call([ + 'make', + '-j%d' % multiprocessing.cpu_count(), + 'install', + 'DESTDIR=%s' % state['install_dir'], + ], cwd=build_dir) + + # Update shared library references to be relative to binary. + # TODO compile Python in such a way that this isn't necessary. + if sys.platform.startswith('linux'): + subprocess.check_call([ + 'patchelf', + '--set-rpath', + '$ORIGIN/../lib', + state['python_bin'], + ]) + elif sys.platform == 'darwin': + subprocess.check_call([ + 'install_name_tool', '-change', + '/hgpython/lib/libpython2.7.dylib', + '@loader_path/../lib/libpython2.7.dylib', + state['python_bin'], + ]) + + +def install_rust_components(state): + rust_dir = os.path.join(state['root_dir'], 'rust', 'hgcli') + + env = dict(os.environ) + + # Tell cpython's build.rs to use our Python binary. + env['PYTHON_SYS_EXECUTABLE'] = os.path.join( + state['python_install_dir'], 'bin', 'python2.7') + + # Tell our build.rs where to find libpython. + env['HG_STANDALONE_LINK_PATH'] = os.path.join( + state['python_install_dir'], 'lib') + + subprocess.check_call(['cargo', 'build', '--release', '-v'], + cwd=rust_dir, env=env) + + subprocess.check_call([ + 'cargo', + 'install', + '--force', + '--root', state['install_dir'], + ], cwd=rust_dir, env=env) + + # TODO figure out how to link properly via Cargo. + # Adjust rpath so libpython is loaded from a relative path. + if sys.platform.startswith('linux'): + subprocess.check_call([ + 'patchelf', + '--set-rpath', + '$ORIGIN/../hgpython/lib', + state['hg_bin'], + ]) + elif sys.platform == 'darwin': + subprocess.check_call([ + 'install_name_tool', '-change', + '/System/Library/Frameworks/Python.framework/Versions/2.7/Python', + '@loader_path/../lib/libpython2.7.dylib', + state['hg_bin'], + ]) + +def install_mercurial(state): + """Install Mercurial files into the distribution.""" + install_dir = os.path.join(state['install_dir']) + python = os.path.join(state['python_install_dir'], 'bin', 'python') + + temp_dir = tempfile.mkdtemp(dir=state['build_dir']) + try: + subprocess.check_call([ + python, 'setup.py', + 'build', + 'install', + # These are the only files we care about. + '--install-lib', os.path.join(install_dir, 'mercurial'), + + '--install-data', os.path.join(temp_dir, 'data'), + '--install-headers', os.path.join(temp_dir, 'headers'), + '--install-platlib', os.path.join(temp_dir, 'platlib'), + '--install-purelib', os.path.join(temp_dir, 'purelib'), + # `hg` is replaced by our binary version. + '--install-scripts', os.path.join(temp_dir, 'bin'), + ], + cwd=state['root_dir']) + finally: + temp_files = set() + for root, dirs, files in os.walk(temp_dir): + for f in files: + full = os.path.join(root, f) + temp_files.add(full[len(temp_dir)+1:]) + + shutil.rmtree(temp_dir) + + expected = { + 'bin/hg', + } + extra = temp_files - expected + if extra: + raise Exception('unknown extra files were installed: %s' % + ', '.join(sorted(extra))) + + +def _run_hg(args): + env = dict(os.environ) + env['HGPLAIN'] = '1' + env['HGRCPATH'] = '' + + with open(os.devnull, 'wb') as devnull: + return subprocess.check_output([state['hg_bin']] + args, + env=env, + stderr=devnull) + +def verify_hg(state): + print('running `hg version`') + try: + print(_run_hg(['version'])) + except subprocess.CalledProcessError as e: + print('error invoking `hg version`') + print(e.output) + sys.exit(1) + + +def get_revision_info(state): + res = _run_hg(['-R', state['root_dir'], 'log', '-r', '.', '-T', '{node} {date}']) + node, date = res.split(' ') + return node, int(float(date)) + + +def _get_archive_files(state): + # Ideally we wouldn't have any ignores. + IGNORE = { + '.crates.toml', + } + + for root, dirs, files in os.walk(state['install_dir']): + # sorts are here for determinism. + dirs.sort() + for f in sorted(files): + full = os.path.join(root, f) + rel = full[len(state['install_dir']) + 1:] + + if rel in IGNORE: + continue + + yield full, rel + + +def create_tar(state, ts): + print('writing %s' % state['tar_path']) + with tarfile.TarFile(state['tar_path'], 'w') as tf: + for full, rel in _get_archive_files(state): + with open(full, 'rb') as fh: + ti = tf.gettarinfo(full, rel) + + if ti.mode & (stat.S_ISUID | stat.S_ISGID): + print('setuid or setgid bits set: %s' % full) + + # Normalize mtime to commit time. + ti.mtime = ts + # Normalize uid/gid to root:root. + ti.uid = 0 + ti.gid = 0 + ti.uname = '' + ti.gname = '' + + tf.addfile(ti, fh) + + #gz = state['tar_path'] + '.gz' + #print('writing %s' % gz) + #with open(state['tar_path'], 'rb') as ifh, gzip.GzipFile(gz, 'wb') as ofh: + # shutil.copyfileobj(ifh, ofh) + + +if __name__ == '__main__': + root = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) + root = os.path.abspath(root) + build_dir = os.path.join(root, 'build') + + python_install_dir = os.path.join(build_dir, 'standalone', 'hgpython') + + state = { + 'root_dir': root, + 'build_dir': build_dir, + 'install_dir': os.path.join(build_dir, 'standalone'), + 'python_source_dir': os.path.join(build_dir, 'python-src'), + 'python_build_dir': os.path.join(build_dir, 'python-build'), + 'python_install_dir': python_install_dir, + 'python_bin': os.path.join(python_install_dir, 'bin', 'python2.7'), + 'hg_bin': os.path.join(build_dir, 'standalone', 'bin', 'hg'), + 'tar_path': os.path.join(build_dir, 'standalone.tar'), + } + + makedirs(state['install_dir']) + install_python(state) + install_rust_components(state) + install_mercurial(state) + verify_hg(state) + node, ts = get_revision_info(state) + create_tar(state, ts) diff --git a/rust/.hgignore b/rust/.hgignore new file mode 100644 --- /dev/null +++ b/rust/.hgignore @@ -0,0 +1 @@ +target/ diff --git a/rust/.cargo/config b/rust/.cargo/config new file mode 100644 --- /dev/null +++ b/rust/.cargo/config @@ -0,0 +1,7 @@ +# Rust builds with a modern MSVC and uses a newer CRT. +# Python 2.7 has a shared library dependency on an older CRT (msvcr90.dll). +# We statically link the modern CRT to avoid multiple msvcr*.dll libraries +# being loaded and Python possibly picking up symbols from the newer runtime +# (which would be loaded first). +[target.'cfg(target_os = "windows")'] +rustflags = ["-Ctarget-feature=+crt-static"] diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,127 @@ +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cpython" +version = "0.1.0" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee#94b357f2ec56270daa1aa7ab3c776ed8e409ceee" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)", +] + +[[package]] +name = "hgcli" +version = "0.1.0" +dependencies = [ + "cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)", + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "python27-sys" +version = "0.1.2" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee#94b357f2ec56270daa1aa7ab3c776ed8e409ceee" +dependencies = [ + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)" = "" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)" = "36fbc8a8929c632868295d0178dd8f63fc423fd7537ad0738372bd010b3ac9b0" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070" +"checksum python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)" = "" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,2 @@ +[workspace] +members = ["hgcli"] diff --git a/rust/README.rst b/rust/README.rst new file mode 100644 --- /dev/null +++ b/rust/README.rst @@ -0,0 +1,58 @@ +=================== +Mercurial Rust Code +=================== + +This directory contains various Rust code for the Mercurial project. + +The top-level ``Cargo.toml`` file defines a workspace containing +all primary Mercurial crates. + +Building +======== + +To build the Rust components:: + + $ cargo build + +If you prefer a non-debug / release configuration:: + + $ cargo build --release + +Features +-------- + +The following Cargo features are available: + +localdev (default) + Produce files that work with an in-source-tree build. + + In this mode, the build finds and uses a ``python2.7`` binary from + ``PATH``. The ``hg`` binary assumes it runs from ``rust/target/hg`` + and it finds Mercurial files at ``dirname($0)/../../../``. + +standalone + Produce files that work in a standalone Mercurial distribution. + + Standalone distributions are self-contained and contain their own + bundled version of Python and all Mercurial support files. Paths + to these dependencies are hard-coded as relative to the ``hg`` + binary. The exact layout is platform/target dependent. + +Running +======= + +The ``hgcli`` crate produces an ``hg`` binary. You can run this binary +via ``cargo run``:: + + $ cargo run --manifest-path hgcli/Cargo.toml + +Or directly:: + + $ target/debug/hg + $ target/release/hg + +You can also run the test harness with this binary:: + + $ ./run-tests.py --with-hg ../rust/target/debug/hg + +Some tests are still failing when run with the Rust binary, however. diff --git a/rust/hgcli/Cargo.toml b/rust/hgcli/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/hgcli/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "hgcli" +version = "0.1.0" +authors = ["Gregory Szorc "] + +build = "build.rs" + +[[bin]] +name = "hg" +path = "src/main.rs" + +[features] +# localdev: detect Python in PATH and use files from source checkout. +# standalone: use bundled Python and files from bin-relative path. +default = ["localdev"] +standalone = [] +localdev = [] + +[dependencies] +libc = "0.2.34" + +# We currently use a custom build of cpython and python27-sys with the +# following changes: +# * Exports Cargo variable defining the full path to Python interpreter. +# * sysmodule exports for PySys_SetArgv. +# +# TODO switch to official release when our changes are incorporated. +[dependencies.cpython] +version = "0.1" +default-features = false +features = ["python27-sys"] +git = "https://github.com/indygreg/rust-cpython.git" +rev = "94b357f2ec56270daa1aa7ab3c776ed8e409ceee" + +[dependencies.python27-sys] +version = "0.1.2" +git = "https://github.com/indygreg/rust-cpython.git" +rev = "94b357f2ec56270daa1aa7ab3c776ed8e409ceee" diff --git a/rust/hgcli/build.rs b/rust/hgcli/build.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/build.rs @@ -0,0 +1,146 @@ +// build.rs -- Configure build environment for `hgcli` Rust package. +// +// Copyright 2017 Gregory Szorc +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use std::collections::HashMap; +use std::env; +use std::path::Path; +use std::process::Command; + +struct PythonConfig { + python: String, + config: HashMap, +} + +fn get_python_config() -> PythonConfig { + // The python27-sys crate exports a Cargo variable defining the full + // path to the interpreter being used. + let python = env::var("DEP_PYTHON27_PYTHON_INTERPRETER") + .expect("Missing environment variable defining Python interpreter path; are you using the correct python27-sys crate?"); + + if !Path::new(&python).exists() { + panic!("Python interpreter {} does not exist; this should never happen", python); + } + + let separator = "SEPARATOR STRING"; + + let script = "import sysconfig; \ +c = sysconfig.get_config_vars(); \ +print('SEPARATOR STRING'.join('%s=%s' % i for i in c.items()))"; + + let mut command = Command::new(&python); + command.arg("-c").arg(script); + + let out = command.output().unwrap(); + + if !out.status.success() { + panic!( + "python script failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + } + + let stdout = String::from_utf8_lossy(&out.stdout); + let mut m = HashMap::new(); + + for entry in stdout.split(separator) { + let mut parts = entry.splitn(2, "="); + let key = parts.next().unwrap(); + let value = parts.next().unwrap(); + m.insert(String::from(key), String::from(value)); + } + + PythonConfig { + python: python, + config: m, + } +} + +#[cfg(not(target_os = "windows"))] +fn have_shared(config: &PythonConfig) -> bool { + match config.config.get("Py_ENABLE_SHARED") { + Some(value) => value == "1", + None => false, + } +} + +#[cfg(target_os = "windows")] +fn have_shared(config: &PythonConfig) -> bool { + // python27.dll should exist next to python2.7.exe. + let mut dll = config.python.clone(); + dll.pop(); + dll.push("python27.dll"); + + return dll.exists(); +} + +static REQUIRED_CONFIG_FLAGS: [&'static str; 2] = [ + "Py_USING_UNICODE", + "WITH_THREAD", +]; + +static REQUIRED_UNSET_FLAGS: [&'static str; 4] = [ + "Py_DEBUG", + "Py_REF_DEBUG", + "Py_TRACE_REFS", + "COUNT_ALLOCS", +]; + +fn main() { + let config = get_python_config(); + + println!("Using Python: {}", config.python); + println!("cargo:rustc-env=PYTHON_INTERPRETER={}", config.python); + + let prefix = config.config.get("prefix").unwrap(); + + println!("Prefix: {}", prefix); + + for key in REQUIRED_CONFIG_FLAGS.iter() { + let result = match config.config.get(*key) { + Some(value) => value == "1", + None => false, + }; + + if !result { + panic!("Detected Python required feature {}", key); + } + } + + for key in REQUIRED_UNSET_FLAGS.iter() { + let result = match config.config.get(*key) { + Some(value) => value != "0", + None => false, + }; + + if result { + panic!("Detected Python feature {} is not supported", key); + } + } + + // We need a Python shared library. + if !have_shared(&config) { + panic!("Detected Python lacks a shared library, which is required"); + } + + let ucs4 = match config.config.get("Py_UNICODE_SIZE") { + Some(value) => value == "4", + None => false, + }; + + if !ucs4 { + panic!("Detected Python doesn't support UCS-4 code points"); + } + + // If building standalone Mercurial, add an extra link path for + // native libraries. + if let Some(lib_path) = env::var_os("HG_STANDALONE_LINK_PATH") { + println!( + "cargo:rustc-link-search=native={}", + lib_path.to_str().unwrap() + ); + } +} diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/main.rs @@ -0,0 +1,275 @@ +// main.rs -- Main routines for `hg` program +// +// Copyright 2017 Gregory Szorc +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +extern crate libc; +extern crate cpython; +extern crate python27_sys; + +use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python}; +use libc::{c_char, c_int}; + +use std::env; +use std::path::PathBuf; +use std::ffi::CString; +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +#[derive(Debug)] +struct Environment { + _exe: PathBuf, + python_exe: PathBuf, + python_home: PathBuf, + mercurial_modules: PathBuf, +} + +/// Run Mercurial locally from a source distribution or checkout. +/// +/// hg is /rust/target//hg +/// Python interpreter is detected by build script. +/// Python home is relative to Python interpreter. +/// Mercurial files are relative to hg binary, which is relative to source root. +#[cfg(feature = "localdev")] +fn get_environment() -> Environment { + let exe = env::current_exe().unwrap(); + + let mut mercurial_modules = exe.clone(); + mercurial_modules.pop(); // /rust/target/ + mercurial_modules.pop(); // /rust/target + mercurial_modules.pop(); // /rust + mercurial_modules.pop(); // / + + let python_exe: &'static str = env!("PYTHON_INTERPRETER"); + let python_exe = PathBuf::from(python_exe); + + let mut python_home = python_exe.clone(); + python_home.pop(); + + // On Windows, python2.7.exe exists at the root directory of the Python + // install. Everywhere else, the Python install root is one level up. + if !python_exe.ends_with("python2.7.exe") { + python_home.pop(); + } + + Environment { + _exe: exe.clone(), + python_exe: python_exe.clone(), + python_home: python_home.clone(), + mercurial_modules: mercurial_modules.to_path_buf(), + } +} + +/// Run Mercurial from a standalone environment on Windows. +/// +/// The current hg.exe executable is somewhere. Let's say ./hg.exe. +/// There is a ./python27.dll in the same directory as hg.exe. +/// Python standard library and other files are in ./hgpython. +/// Mercurial files are in ./mercurial. +#[cfg(all(feature = "standalone", target_os = "windows"))] +fn get_environment() -> Environment { + let exe = env::current_exe().unwrap(); + + let mercurial_modules = exe.parent().unwrap(); + + let mut python_home = exe.parent().unwrap().to_path_buf(); + python_home.push("hgpython"); + + // TODO this value is wrong. + let mut python_exe = exe.parent().unwrap().to_path_buf(); + python_exe.push("python2.7.exe"); + + Environment { + _exe: exe.clone(), + python_exe: python_exe.clone(), + python_home: python_home.clone(), + mercurial_modules: mercurial_modules.clone(), + } +} + +/// Run Mercurial from a standalone environment on POSIX systems. +/// +/// The current hg binary is assumed to be ./bin/hg. +/// Python home is ./lib/hgpython. +/// Mercurial files are in ./mercurial. +/// +/// TODO the layout here is pretty poor. This code is not well-tested. +#[cfg(all(feature = "standalone", not(target_os = "windows")))] +fn get_environment() -> Environment { + let exe = env::current_exe().unwrap(); + + let root = exe.parent().unwrap().parent().unwrap(); + + let mut mercurial_modules = root.to_path_buf(); + mercurial_modules.push("hgmodules"); + + let mut python_exe = root.to_path_buf(); + python_exe.push("hgpython"); + python_exe.push("bin"); + python_exe.push("python2.7"); + + let mut python_home = root.to_path_buf(); + python_home.push("hgpython"); + + Environment { + _exe: exe.clone(), + python_exe: python_exe.clone(), + python_home: python_home.clone(), + mercurial_modules: mercurial_modules.clone(), + } +} + +// On UNIX, argv starts as an array of char*. So it is easy to convert +// to C strings. +#[cfg(target_family = "unix")] +fn args_to_cstrings() -> Vec { + env::args_os().map(|a| CString::new(a.into_vec()).unwrap()).collect() +} + +// Windows is more complicated. We will get wchar strings that are thin +// wrappers around GetCommandLineW() + CommandLineToArgvW(). We need to +// use WideCharToMultiByte() to convert these to the system default Windows +// ANSI code page (CP_ACP) to feed into Python. Once our Rust is a bit +// more established, we could do something better here, such as convert to +// a PyUnicode and pass them to Mercurial and let Mercurial deal with the +// decoding. +//#[cfg(target_family = "windows")] +//fn args_to_cstrings() -> Vec { +//} + +fn set_python_home(env: &Environment) { + let raw = CString::new(env.python_home.to_str().unwrap()) + .unwrap() + .into_raw(); + unsafe { + python27_sys::Py_SetPythonHome(raw); + } +} + +fn update_encoding(py: Python, sys_mod: &PyModule) { + // Call sys.setdefaultencoding("undefined") if HGUNICODEPEDANTRY is set. + let pedantry = env::var("HGUNICODEPEDANTRY").is_ok(); + + // TODO do we need to call reload(sys) here? Should we set Python encoding + // before we start Python interpreter? + if pedantry { + sys_mod + .call(py, "setdefaultencoding", ("undefined",), None) + .expect("sys.setdefaultencoding() failed"); + } +} + +fn update_modules_path(env: &Environment, py: Python, sys_mod: &PyModule) { + let sys_path = sys_mod.get(py, "path").unwrap(); + sys_path + .call_method(py, "insert", (0, env.mercurial_modules.to_str()), None) + .expect("failed to update sys.path to location of Mercurial modules"); +} + +fn run() -> Result<(), i32> { + let env = get_environment(); + + //println!("{:?}", env); + + // Tell Python where it is installed. + set_python_home(&env); + + // Set program name. The backing memory needs to live for the duration of the + // interpreter. + // + // Yes, we use the path to the Python interpreter not argv[0] here. The + // reason is because Python uses the given path to find the location of + // Python files. Apparently we could define our own ``Py_GetPath()`` + // implementation. But this may require statically linking Python, which is + // not desirable. + let program_name = CString::new(env.python_exe.to_str().unwrap()) + .unwrap() + .as_ptr(); + unsafe { + python27_sys::Py_SetProgramName(program_name as *mut i8); + } + + unsafe { + python27_sys::Py_Initialize(); + } + + // https://docs.python.org/2/c-api/init.html#c.PySys_SetArgvEx has important + // usage information about PySys_SetArgvEx: + // + // * It says the first argument should be the script that is being executed. + // If not a script, it can be empty. We are definitely not a script. + // However, parts of Mercurial do look at sys.argv[0]. So we need to set + // something here. + // + // * When embedding Python, we should use ``PySys_SetArgvEx()`` and set + // ``updatepath=0`` for security reasons. Essentially, Python's default + // logic will treat an empty argv[0] in a manner that could result in + // sys.path picking up directories it shouldn't and this could lead to + // loading untrusted modules. + + // env::args() will panic if it sees a non-UTF-8 byte sequence. So we need + // to use env::args_os() and pass the raw bytes down to Python/Mercurial. + + let args = args_to_cstrings(); + let argv: Vec<*const c_char> = args.iter().map(|a| a.as_ptr()).collect(); + + unsafe { + python27_sys::PySys_SetArgvEx(args.len() as c_int, argv.as_ptr() as *mut *mut i8, 0); + } + + // We need to do this to appease the cpython package. + unsafe { + python27_sys::PyEval_InitThreads(); + let _state = python27_sys::PyEval_SaveThread(); + } + + let gil = Python::acquire_gil(); + let py = gil.python(); + + let sys_mod = py.import("sys").unwrap(); + + update_encoding(py, &sys_mod); + update_modules_path(&env, py, &sys_mod); + + // TODO we don't capture exit code from Mercurial. + let result = match run_py(py) { + Err(err) => { + err.print(py); + Err(255) + } + Ok(()) => Ok(()), + }; + + // The GIL needs to be held when we call this. So it needs to be in this + // scope with the active GILGuard. + // TODO this crashes when Python raises an uncaught exception. Unsure + // of why. Is this even needed??? + /* + unsafe { + python27_sys::Py_Finalize(); + } + */ + + result +} + +fn run_py(py: Python) -> PyResult<()> { + let demand_mod = py.import("hgdemandimport")?; + demand_mod.call(py, "enable", NoArgs, None)?; + + let dispatch_mod = py.import("mercurial.dispatch")?; + dispatch_mod.call(py, "run", NoArgs, None)?; + + Ok(()) +} + +fn main() { + let exit_code = match run() { + Err(err) => err, + Ok(()) => 0, + }; + + std::process::exit(exit_code); +} diff --git a/tests/run-tests.py b/tests/run-tests.py --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -2442,6 +2442,14 @@ # executed in the test suite that needs to import 'mercurial' # ... which means it's not really redundant at all. self._pythondir = self._bindir + + # The harness assumes we're running ./hg from the source + # directory or that Mercurial files are available in the + # directory where hg is. This isn't always the case. + # TODO Make this Rust support less hacky. + if re.search('|target/[^\/]+/hg', whg): + self._pythondir = os.path.dirname(self._testdir) + else: self._installdir = os.path.join(self._hgtmp, b"install") self._bindir = os.path.join(self._installdir, b"bin")