diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -17,6 +17,16 @@ ] [[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "autocfg" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -55,6 +65,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "clap" +version = "2.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "cloudabi" version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -167,10 +191,12 @@ dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -214,6 +240,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "memoffset" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -523,6 +558,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "syn" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -546,6 +586,14 @@ ] [[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "thread_local" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -562,11 +610,21 @@ ] [[package]] +name = "unicode-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "unicode-xid" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -601,6 +659,7 @@ [metadata] "checksum aho-corasick 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" @@ -608,6 +667,7 @@ "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" "checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" @@ -624,6 +684,7 @@ "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" "checksum memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" +"checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" "checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" "checksum num_cpus 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" @@ -660,11 +721,15 @@ "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" +"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" "checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum twox-hash 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3bfd5b7557925ce778ff9b9ef90e3ade34c524b5ff10e239c69a42d546d2af56" +"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -23,8 +23,10 @@ same-file = "1.0.6" [dev-dependencies] +clap = "*" +memmap = "0.7.0" +pretty_assertions = "0.6.1" tempfile = "3.1.0" -pretty_assertions = "0.6.1" [build-dependencies] cc = { version = "1.0.48", optional = true } diff --git a/rust/hg-core/examples/nodemap/index.rs b/rust/hg-core/examples/nodemap/index.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/examples/nodemap/index.rs @@ -0,0 +1,95 @@ +// Copyright 2019-2020 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Minimal `RevlogIndex`, readable from standard Mercurial file format +use hg::*; +use memmap::*; +use std::fs::File; +use std::ops::Deref; +use std::path::Path; +use std::slice; + +pub struct Index { + data: Box + Send>, +} + +/// A fixed sized index entry. All numbers are big endian +#[repr(C)] +pub struct IndexEntry { + not_used_yet: [u8; 24], + p1: Revision, + p2: Revision, + node: Node, + unused_node: [u8; 12], +} + +pub const INDEX_ENTRY_SIZE: usize = 64; + +impl IndexEntry { + fn parents(&self) -> [Revision; 2] { + [Revision::from_be(self.p1), Revision::from_be(self.p1)] + } +} + +impl RevlogIndex for Index { + fn len(&self) -> usize { + self.data.len() + } + + fn node(&self, rev: Revision) -> Option<&Node> { + if rev == NULL_REVISION { + return None; + } + let i = rev as usize; + if i >= self.len() { + None + } else { + Some(&self.data[i].node) + } + } +} + +impl Graph for &Index { + fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { + let [p1, p2] = (*self).data[rev as usize].parents(); + let len = (*self).len(); + if p1 < NULL_REVISION + || p2 < NULL_REVISION + || p1 as usize >= len + || p2 as usize >= len + { + return Err(GraphError::ParentOutOfRange(rev)); + } + Ok([p1, p2]) + } +} + +struct IndexMmap(Mmap); + +impl Deref for IndexMmap { + type Target = [IndexEntry]; + + fn deref(&self) -> &[IndexEntry] { + let ptr = self.0.as_ptr() as *const IndexEntry; + // Any misaligned data will be ignored. + debug_assert_eq!( + self.0.len() % std::mem::align_of::(), + 0, + "Misaligned data in mmap" + ); + unsafe { slice::from_raw_parts(ptr, self.0.len() / INDEX_ENTRY_SIZE) } + } +} + +impl Index { + pub fn load_mmap(path: impl AsRef) -> Self { + let file = File::open(path).unwrap(); + let msg = "Index file is missing, or missing permission"; + let mmap = unsafe { MmapOptions::new().map(&file) }.expect(msg); + Self { + data: Box::new(IndexMmap(mmap)), + } + } +} diff --git a/rust/hg-core/examples/nodemap/main.rs b/rust/hg-core/examples/nodemap/main.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/examples/nodemap/main.rs @@ -0,0 +1,146 @@ +// Copyright 2019-2020 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use clap::*; +use hg::revlog::node::*; +use hg::revlog::nodemap::*; +use hg::revlog::*; +use memmap::MmapOptions; +use rand::Rng; +use std::fs::File; +use std::io; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::time::Instant; + +mod index; +use index::Index; + +fn mmap_index(repo_path: &Path) -> Index { + let mut path = PathBuf::from(repo_path); + path.extend([".hg", "store", "00changelog.i"].iter()); + Index::load_mmap(path) +} + +fn mmap_nodemap(path: &Path) -> NodeTree { + let file = File::open(path).unwrap(); + let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; + let len = mmap.len(); + NodeTree::load_bytes(Box::new(mmap), len) +} + +/// Scan the whole index and create the corresponding nodemap file at `path` +fn create(index: &Index, path: &Path) -> io::Result<()> { + let mut file = File::create(path)?; + let start = Instant::now(); + let mut nm = NodeTree::default(); + for rev in 0..index.len() { + let rev = rev as Revision; + nm.insert(index, index.node(rev).unwrap(), rev).unwrap(); + } + eprintln!("Nodemap constructed in RAM in {:?}", start.elapsed()); + file.write(&nm.into_readonly_and_added_bytes().1)?; + eprintln!("Nodemap written to disk"); + Ok(()) +} + +fn query(index: &Index, nm: &NodeTree, prefix: &str) { + let start = Instant::now(); + let res = nm.find_hex(index, prefix); + println!("Result found in {:?}: {:?}", start.elapsed(), res); +} + +fn bench(index: &Index, nm: &NodeTree, queries: usize) { + let len = index.len() as u32; + let mut rng = rand::thread_rng(); + let nodes: Vec = (0..queries) + .map(|_| { + index + .node((rng.gen::() % len) as Revision) + .unwrap() + .clone() + }) + .collect(); + if queries < 10 { + let nodes_hex: Vec = + nodes.iter().map(|n| n.encode_hex()).collect(); + println!("Nodes: {:?}", nodes_hex); + } + let mut last: Option = None; + let start = Instant::now(); + for node in nodes.iter() { + last = nm.find_bin(index, node.into()).unwrap(); + } + let elapsed = start.elapsed(); + println!( + "Did {} queries in {:?} (mean {:?}), last was {:?} with result {:?}", + queries, + elapsed, + elapsed / (queries as u32), + nodes.last().unwrap().encode_hex(), + last + ); +} + +fn main() { + let matches = App::new("Nodemap pure Rust example") + .arg( + Arg::with_name("REPOSITORY") + .help("Path to the repository, always necessary for its index") + .required(true), + ) + .arg( + Arg::with_name("NODEMAP_FILE") + .help("Path to the nodemap file, independent of REPOSITORY") + .required(true), + ) + .subcommand( + SubCommand::with_name("create") + .about("Create NODEMAP_FILE by scanning repository index"), + ) + .subcommand( + SubCommand::with_name("query") + .about("Query NODEMAP_FILE for PREFIX") + .arg(Arg::with_name("PREFIX").required(true)), + ) + .subcommand( + SubCommand::with_name("bench") + .about( + "Perform #QUERIES random successful queries on NODEMAP_FILE") + .arg(Arg::with_name("QUERIES").required(true)), + ) + .get_matches(); + + let repo = matches.value_of("REPOSITORY").unwrap(); + let nm_path = matches.value_of("NODEMAP_FILE").unwrap(); + + let index = mmap_index(&Path::new(repo)); + + if let Some(_) = matches.subcommand_matches("create") { + println!("Creating nodemap file {} for repository {}", nm_path, repo); + create(&index, &Path::new(nm_path)).unwrap(); + return; + } + + let nm = mmap_nodemap(&Path::new(nm_path)); + if let Some(matches) = matches.subcommand_matches("query") { + let prefix = matches.value_of("PREFIX").unwrap(); + println!( + "Querying {} in nodemap file {} of repository {}", + prefix, nm_path, repo + ); + query(&index, &nm, prefix); + } + if let Some(matches) = matches.subcommand_matches("bench") { + let queries = + usize::from_str(matches.value_of("QUERIES").unwrap()).unwrap(); + println!( + "Doing {} random queries in nodemap file {} of repository {}", + queries, nm_path, repo + ); + bench(&index, &nm, queries); + } +}