diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -49,6 +49,11 @@ ] [[package]] +name = "cc" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -66,7 +71,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", "python27-sys 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "python3-sys 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -141,7 +146,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -150,7 +155,9 @@ version = "0.1.0" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -168,7 +175,7 @@ dependencies = [ "cpython 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "hg-core 0.1.0", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -176,7 +183,7 @@ version = "0.1.0" dependencies = [ "hg-core 0.1.0", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -186,7 +193,7 @@ [[package]] name = "libc" -version = "0.2.64" +version = "0.2.66" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -220,7 +227,7 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -260,7 +267,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -269,7 +276,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -287,7 +294,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "autocfg 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -305,7 +312,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "getrandom 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -379,7 +386,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -391,7 +398,7 @@ dependencies = [ "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -523,7 +530,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", "remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -591,6 +598,7 @@ "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" +"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cpython 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "85532c648315aeb0829ad216a6a29aa3212cf9319bc7f6daf1404aa0bdd1485f" @@ -604,7 +612,7 @@ "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum getrandom 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -"checksum libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)" = "74dfca3d9957906e8d1e6a0b641dc9a59848e793f1da2165889fd4f62d10d79c" +"checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" "checksum memoffset 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ce6075db033bbbb7ee5a0bbd3a3186bbae616f57fb001c485c7ff77955f8177f" "checksum nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -4,6 +4,7 @@ authors = ["Georges Racinet "] description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" edition = "2018" +build = "build.rs" [lib] name = "hg" @@ -11,6 +12,7 @@ [dependencies] byteorder = "1.3.1" lazy_static = "1.3.0" +libc = { version = "0.2.66", optional = true } memchr = "2.2.0" rand = "0.6.5" rand_pcg = "0.1.1" @@ -21,4 +23,11 @@ [dev-dependencies] tempfile = "3.1.0" -pretty_assertions = "0.6.1" \ No newline at end of file +pretty_assertions = "0.6.1" + +[build-dependencies] +cc = { version = "1.0.48", optional = true } + +[features] +default = [] +with-re2 = ["cc", "libc"] diff --git a/rust/hg-core/build.rs b/rust/hg-core/build.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/build.rs @@ -0,0 +1,25 @@ +// build.rs +// +// Copyright 2020 Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +#[cfg(feature = "with-re2")] +use cc; + +#[cfg(feature = "with-re2")] +fn compile_re2() { + cc::Build::new() + .cpp(true) + .flag("-std=c++11") + .file("src/re2/rust_re2.cpp") + .compile("librustre.a"); + + println!("cargo:rustc-link-lib=re2"); +} + +fn main() { + #[cfg(feature = "with-re2")] + compile_re2(); +} diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -21,6 +21,8 @@ pub mod matchers; pub mod revlog; pub use revlog::*; +#[cfg(feature = "with-re2")] +pub mod re2; pub mod utils; use crate::utils::hg_path::{HgPathBuf, HgPathError}; diff --git a/rust/hg-core/src/re2/mod.rs b/rust/hg-core/src/re2/mod.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/re2/mod.rs @@ -0,0 +1,21 @@ +/// re2 module +/// +/// The Python implementation of Mercurial uses the Re2 regex engine when +/// possible and if the bindings are installed, falling back to Python's `re` +/// in case of unsupported syntax (Re2 is a non-backtracking engine). +/// +/// Using it from Rust is not ideal. We need C++ bindings, a C++ compiler, +/// Re2 needs to be installed... why not just use the `regex` crate? +/// +/// Using Re2 from the Rust implementation guarantees backwards compatibility. +/// We know it will work out of the box without needing to figure out the +/// subtle differences in syntax. For example, `regex` currently does not +/// support empty alternations (regex like `a||b`) which happens more often +/// than we might think. Old benchmarks also showed worse performance from +/// regex than with Re2, but the methodology and results were lost, so take +/// this with a grain of salt. +/// +/// The idea is to use Re2 for now as a temporary phase and then investigate +/// how much work would be needed to use `regex`. +mod re2; +pub use re2::Re2; diff --git a/rust/hg-core/src/re2/re2.rs b/rust/hg-core/src/re2/re2.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/re2/re2.rs @@ -0,0 +1,66 @@ +/* +re2.rs + +Rust FFI bindings to Re2. + +Copyright 2020 Valentin Gatien-Baron + +This software may be used and distributed according to the terms of the +GNU General Public License version 2 or any later version. +*/ +use libc::{c_int, c_void}; + +type Re2Ptr = *const c_void; + +pub struct Re2(Re2Ptr); + +/// `re2.h` says: +/// "An "RE2" object is safe for concurrent use by multiple threads." +unsafe impl Sync for Re2 {} + +/// These bind to the C ABI in `rust_re2.cpp`. +extern "C" { + fn rust_re2_create(data: *const u8, len: usize) -> Re2Ptr; + fn rust_re2_destroy(re2: Re2Ptr); + fn rust_re2_ok(re2: Re2Ptr) -> bool; + fn rust_re2_error( + re2: Re2Ptr, + outdata: *mut *const u8, + outlen: *mut usize, + ) -> bool; + fn rust_re2_match( + re2: Re2Ptr, + data: *const u8, + len: usize, + anchor: c_int, + ) -> bool; +} + +impl Re2 { + pub fn new(pattern: &[u8]) -> Result { + unsafe { + let re2 = rust_re2_create(pattern.as_ptr(), pattern.len()); + if rust_re2_ok(re2) { + Ok(Re2(re2)) + } else { + let mut data: *const u8 = std::ptr::null(); + let mut len: usize = 0; + rust_re2_error(re2, &mut data, &mut len); + Err(String::from_utf8_lossy(std::slice::from_raw_parts( + data, len, + )) + .to_string()) + } + } + } + + pub fn is_match(&self, data: &[u8]) -> bool { + unsafe { rust_re2_match(self.0, data.as_ptr(), data.len(), 1) } + } +} + +impl Drop for Re2 { + fn drop(&mut self) { + unsafe { rust_re2_destroy(self.0) } + } +} diff --git a/rust/hg-core/src/re2/rust_re2.cpp b/rust/hg-core/src/re2/rust_re2.cpp new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/re2/rust_re2.cpp @@ -0,0 +1,49 @@ +/* +rust_re2.cpp + +C ABI export of Re2's C++ interface for Rust FFI. + +Copyright 2020 Valentin Gatien-Baron + +This software may be used and distributed according to the terms of the +GNU General Public License version 2 or any later version. +*/ + +#include +using namespace re2; + +extern "C" { + RE2* rust_re2_create(const char* data, size_t len) { + RE2::Options o; + o.set_encoding(RE2::Options::Encoding::EncodingLatin1); + o.set_log_errors(false); + o.set_max_mem(50000000); + + return new RE2(StringPiece(data, len), o); + } + + void rust_re2_destroy(RE2* re) { + delete re; + } + + bool rust_re2_ok(RE2* re) { + return re->ok(); + } + + void rust_re2_error(RE2* re, const char** outdata, size_t* outlen) { + const std::string& e = re->error(); + *outdata = e.data(); + *outlen = e.length(); + } + + bool rust_re2_match(RE2* re, char* data, size_t len, int ianchor) { + const StringPiece sp = StringPiece(data, len); + + RE2::Anchor anchor = + ianchor == 0 ? RE2::Anchor::UNANCHORED : + (ianchor == 1 ? RE2::Anchor::ANCHOR_START : + RE2::Anchor::ANCHOR_BOTH); + + return re->Match(sp, 0, len, anchor, NULL, 0); + } +} diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml --- a/rust/hg-cpython/Cargo.toml +++ b/rust/hg-cpython/Cargo.toml @@ -10,6 +10,7 @@ [features] default = ["python27"] +with-re2 = ["hg-core/with-re2"] # Features to build an extension module: python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] @@ -21,7 +22,7 @@ python3-bin = ["cpython/python3-sys"] [dependencies] -hg-core = { path = "../hg-core" } +hg-core = { path = "../hg-core"} libc = '*' [dependencies.cpython] diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -1351,10 +1351,19 @@ env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir cargocmd = ['cargo', 'rustc', '-vv', '--release'] + + feature_flags = [] + if sys.version_info[0] == 3 and self.py3_features is not None: - cargocmd.extend( - ('--features', self.py3_features, '--no-default-features') - ) + feature_flags.append(self.py3_features) + cargocmd.append('--no-default-features') + + rust_features = env.get("HG_RUST_FEATURES") + if rust_features: + feature_flags.append(rust_features) + + cargocmd.extend(('--features', " ".join(feature_flags))) + cargocmd.append('--') if sys.platform == 'darwin': cargocmd.extend(