diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -7,6 +7,19 @@ ] [[package]] +name = "bitflags" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "cpython" version = "0.1.0" source = "git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52#c90d65cf84abfffce7ef54476bbfed56017a2f52" @@ -17,8 +30,25 @@ ] [[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "hg-core" version = "0.1.0" +dependencies = [ + "rand 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "hgcli" @@ -74,6 +104,71 @@ ] [[package]] +name = "rand" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_chacha 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_xorshift 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_chacha" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rand_hc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_isaac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_pcg" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_xorshift" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "regex" version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -91,6 +186,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "thread-id" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -118,22 +234,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "winapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "winapi-build" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [metadata] "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)" = "" +"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)" = "96264e9b293e95d25bfcbbf8a88ffd1aedc85b754eba8b7d78012f638ba220eb" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070" "checksum python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)" = "" +"checksum rand 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae9d223d52ae411a33cf7e54ec6034ec165df296ccd23533d671a28252b6f66a" +"checksum rand_chacha 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "771b009e3a508cb67e8823dda454aaa5368c7bc1c16829fb77d3e980440dd34a" +"checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" +"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" +"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" +"checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" +"checksum rand_xorshift 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effa3fcaa47e18db002bdde6060944b6d2f9cfd8db471c30e873448ad9187be3" "checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" "checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" +"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -6,3 +6,6 @@ [lib] name = "hg" + +[dev-dependencies] +rand = "*" diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -8,6 +8,7 @@ //! Rust versions of generic DAG ancestors algorithms for Mercurial use super::{Graph, GraphError, Revision, NULL_REVISION}; +use std::cmp::max; use std::collections::{BinaryHeap, HashSet}; /// Iterator over the ancestors of a given list of revisions @@ -24,6 +25,11 @@ stoprev: Revision, } +pub struct MissingAncestors { + graph: G, + bases: HashSet, +} + impl AncestorsIterator { /// Constructor. /// @@ -143,10 +149,211 @@ } } +impl MissingAncestors { + pub fn new(graph: G, bases: I) -> Self + where + I: IntoIterator, + { + let mut bset: HashSet = bases.into_iter().collect(); + if bset.is_empty() { + bset.insert(NULL_REVISION); + } + MissingAncestors { + graph: graph, + bases: bset, + } + } + + pub fn has_bases(&self) -> bool { + match self.bases.len() { + 0 => false, // shouldn't happen + 1 => *(self.bases.iter().next().unwrap()) != -1, + _ => true, + } + } + + /// Return a reference to current bases. + /// + /// This is useful in unit tests, but also setdiscovery.py does + /// read the bases attribute of a ancestor.missingancestors instance. + pub fn get_bases<'a>(&'a self) -> &'a HashSet { + &self.bases + } + + pub fn add_bases(&mut self, new_bases: I) + where + I: IntoIterator, + { + self.bases.extend(new_bases); + } + + /// Remove all ancestors of self.bases from the revs set (in place) + pub fn remove_ancestors_from( + &mut self, + revs: &mut HashSet, + ) -> Result<(), GraphError> { + // using difference() or intersection() would not satisfy borrow rules + revs.retain(|r| !self.bases.contains(r)); + // the null revision is always an ancestor + revs.remove(&NULL_REVISION); + for rev in self.bases.iter() { + revs.remove(&rev); + } + if revs.is_empty() { + return Ok(()); + } + // anything in revs > start is definitely not an ancestor of bases + // revs <= start need to be investigated + // TODO gracinet obvious optim here if a missingancestors is to be used + // several times + let start = match self.bases.iter().map(|&r| r).max() { + Some(m) => m, + None => { + // bases is empty (shouldn't happen, but let's be safe) + return Ok(()); + } + }; + // whatever happens, we'll keep at least keepcount of them + // knowing this gives us a earlier stop condition than + // going all the way to the root + let keepcount = revs.iter().filter(|&r| *r > start).count(); + + let mut curr = start; + while curr != NULL_REVISION && revs.len() > keepcount { + if self.bases.contains(&curr) { + revs.remove(&curr); + self.add_parents(curr)?; + } + curr -= 1; + } + Ok(()) + } + + /// Add rev's parents to self.bases + #[inline] + fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { + let parents = self.graph.parents(rev)?; + // No need to bother the set with inserting NULL_REVISION over and + // over + if parents.0 != NULL_REVISION { + self.bases.insert(parents.0); + } + if parents.1 != NULL_REVISION { + self.bases.insert(parents.1); + } + Ok(()) + } + + /// Return all the ancestors of revs that are not ancestors of self.bases + /// + /// This may include elements from revs. + /// + /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in + /// revision number order, which is a topological order. + pub fn missing_ancestors( + &mut self, + revs: I, + ) -> Result, GraphError> + where + I: IntoIterator, + { + // just for convenience and comparison with Python version + let bases_visit = &mut self.bases; + let mut revs_as_set: HashSet = revs + .into_iter() + .filter(|r| !bases_visit.contains(r)) + .collect(); + let revs_visit = &mut revs_as_set; + let mut both_visit: HashSet = + revs_visit.intersection(&bases_visit).map(|&r| r).collect(); + if revs_visit.is_empty() { + return Ok(Vec::new()); + } + + let max_bases = match bases_visit.iter().map(|&r| r).max() { + Some(m) => m, + None => NULL_REVISION, + }; + + let max_revs = match revs_visit.iter().map(|&r| r).max() { + Some(m) => m, + None => NULL_REVISION, + }; + let start = max(max_bases, max_revs); + + // TODO heuristics for with_capacity()? + let mut missing: Vec = Vec::new(); + for curr in (0..start + 1).map(|i| start - i) { + if revs_visit.is_empty() { + break; + } + if both_visit.contains(&curr) { + // curr's parents might have made it into revs_visit through + // another path + // TODO optim: Rust's HashSet.remove returns a boolean telling + // if it happened. This will spare us one set lookup + both_visit.remove(&curr); + for p in self.graph.parents_iter(curr)? { + revs_visit.remove(&p); + bases_visit.insert(p); + both_visit.insert(p); + } + continue; + } + // in Rust, one can't just use mutable variables assignation + // to be more straightforward. Instead of Python's + // thisvisit and othervisit, we'll differentiate with a boolean + let this_visit_is_revs = { + if revs_visit.contains(&curr) { + missing.push(curr); + revs_visit.remove(&curr); + true + } else if bases_visit.contains(&curr) { + false + } else { + // not an ancestor of revs or bases: ignore + continue; + } + }; + + for p in self.graph.parents_iter(curr)? { + let in_other_visit = if this_visit_is_revs { + bases_visit.contains(&p) + } else { + revs_visit.contains(&p) + }; + if in_other_visit || both_visit.contains(&p) { + // p is implicitely in this_visit. + // This means p is or should be in bothvisit + // GR performance hence if bothvisit, we look up twice + revs_visit.remove(&p); + bases_visit.insert(p); + both_visit.insert(p); + } else { + // visit later + if this_visit_is_revs { + revs_visit.insert(p); + } else { + bases_visit.insert(p); + } + } + } + } + missing.reverse(); + Ok(missing) + } +} + #[cfg(test)] mod tests { use super::*; + extern crate rand; + use self::rand::distributions::{Distribution, LogNormal, Uniform}; + use self::rand::{thread_rng, Rng, RngCore}; + use std::cmp::min; + use std::fmt::Debug; + use std::iter::FromIterator; #[derive(Clone, Debug)] struct Stub; @@ -270,4 +477,458 @@ assert_eq!(iter.next(), Some(0)); assert_eq!(iter.next(), None); } + + #[test] + /// Test constructor, add/get bases + fn test_missing_bases() { + let mut missanc = MissingAncestors::new(Stub, vec![5, 3, 1, 3]); + let mut as_vec: Vec; + as_vec = missanc.get_bases().iter().map(|&r| r).collect(); + as_vec.sort(); + assert_eq!(as_vec, [1, 3, 5]); + + missanc.add_bases(vec![3, 7, 8]); + as_vec = missanc.get_bases().iter().map(|&r| r).collect(); + as_vec.sort(); + assert_eq!(as_vec, [1, 3, 5, 7, 8]); + } + + fn assert_missing_remove( + bases: &[Revision], + revs: &[Revision], + expected: &[Revision], + ) { + let mut missanc = + MissingAncestors::new(Stub, bases.iter().map(|&r| r)); + let mut revset: HashSet = revs.iter().map(|&r| r).collect(); + missanc.remove_ancestors_from(&mut revset).unwrap(); + let mut as_vec: Vec = revset.into_iter().collect(); + as_vec.sort(); + assert_eq!(as_vec.as_slice(), expected); + } + + #[test] + fn test_missing_remove() { + assert_missing_remove( + &[1, 2, 3, 4, 7], + Vec::from_iter(1..10).as_slice(), + &[5, 6, 8, 9], + ); + assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]); + assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]); + } + + fn assert_missing_ancestors( + bases: &[Revision], + revs: I, + expected: &[Revision], + ) where + I: IntoIterator, + { + let mut missanc = + MissingAncestors::new(Stub, bases.iter().map(|&r| r)); + let missing = missanc.missing_ancestors(revs).unwrap(); + assert_eq!(missing.as_slice(), expected); + } + + #[test] + fn test_missing_ancestors() { + // examples taken from test-ancestors.py by having it run + // on the same graph (both naive and fast Python algs) + assert_missing_ancestors(&[10], vec![11], &[3, 7, 11]); + assert_missing_ancestors(&[11], vec![10], &[5, 10]); + assert_missing_ancestors(&[7], vec![9, 11], &[3, 6, 9, 11]); + } + + // A Graph represented by a vector whose indices are revisions + // and values are parents of the revisions + type VecGraph = Vec<[Revision; 2]>; + + impl Graph for VecGraph { + fn parents( + &self, + rev: Revision, + ) -> Result<(Revision, Revision), GraphError> { + let as_array = self[rev as usize]; + Ok((as_array[0], as_array[1])) + } + } + + /// A interesting case found by a random generator similar to + /// the one in test-ancestor.py. An early version of Rust MissingAncestors + /// failed this, yet none of the integration tests of the whole suite + /// catched it. + #[test] + fn test_remove_ancestors_from_case1() { + let graph: VecGraph = vec![ + [-1, -1], + [0, -1], + [1, 0], + [2, 1], + [3, -1], + [4, -1], + [5, 1], + [2, -1], + [7, -1], + [8, -1], + [9, -1], + [10, 1], + [3, -1], + [12, -1], + [13, -1], + [14, -1], + [4, -1], + [16, -1], + [17, -1], + [18, -1], + [19, 11], + [20, -1], + [21, -1], + [22, -1], + [23, -1], + [2, -1], + [3, -1], + [26, 24], + [27, -1], + [28, -1], + [12, -1], + [1, -1], + [1, 9], + [32, -1], + [33, -1], + [34, 31], + [35, -1], + [36, 26], + [37, -1], + [38, -1], + [39, -1], + [40, -1], + [41, -1], + [42, 26], + [0, -1], + [44, -1], + [45, 4], + [40, -1], + [47, -1], + [36, 0], + [49, -1], + [-1, -1], + [51, -1], + [52, -1], + [53, -1], + [14, -1], + [55, -1], + [15, -1], + [23, -1], + [58, -1], + [59, -1], + [2, -1], + [61, 59], + [62, -1], + [63, -1], + [-1, -1], + [65, -1], + [66, -1], + [67, -1], + [68, -1], + [37, 28], + [69, 25], + [71, -1], + [72, -1], + [50, 2], + [74, -1], + [12, -1], + [18, -1], + [77, -1], + [78, -1], + [79, -1], + [43, 33], + [81, -1], + [82, -1], + [83, -1], + [84, 45], + [85, -1], + [86, -1], + [-1, -1], + [88, -1], + [-1, -1], + [76, 83], + [44, -1], + [92, -1], + [93, -1], + [9, -1], + [95, 67], + [96, -1], + [97, -1], + [-1, -1], + ]; + let problem_rev = 28 as Revision; + let problem_base = 70 as Revision; + // making the problem evident: problem_rev is a parent of problem_base + assert_eq!(graph.parents(problem_base).unwrap().1, problem_rev); + + let mut missanc: MissingAncestors = MissingAncestors::new( + graph, + vec![60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6], + ); + assert!(missanc.bases.contains(&problem_base)); + + let mut revs: HashSet = + [4, 12, 41, 28, 68, 38, 1, 30, 56, 44] + .iter() + .map(|&r| r) + .collect(); + missanc.remove_ancestors_from(&mut revs).unwrap(); + assert!(!revs.contains(&problem_rev)); + } + + fn build_random_graph( + nodes_opt: Option, + rootprob_opt: Option, + mergeprob_opt: Option, + prevprob_opt: Option, + ) -> VecGraph { + let nodes = nodes_opt.unwrap_or(100); + let rootprob = rootprob_opt.unwrap_or(0.05); + let mergeprob = mergeprob_opt.unwrap_or(0.2); + let prevprob = prevprob_opt.unwrap_or(0.7); + + let mut rng = thread_rng(); + let mut vg: VecGraph = Vec::with_capacity(nodes); + for i in 0..nodes { + if i == 0 || rng.gen_bool(rootprob) { + vg.push([NULL_REVISION, NULL_REVISION]) + } else if i == 1 { + vg.push([0, NULL_REVISION]) + } else if rng.gen_bool(mergeprob) { + let p1 = { + if i == 2 || rng.gen_bool(prevprob) { + (i - 1) as Revision + } else { + rng.gen_range(0, i - 1) as Revision + } + }; + // p2 is a random revision lower than i and different from p1 + let mut p2 = rng.gen_range(0, i - 1) as Revision; + if p2 >= p1 { + p2 = p2 + 1; + } + vg.push([p1, p2]); + } else if rng.gen_bool(prevprob) { + vg.push([(i - 1) as Revision, NULL_REVISION]) + } else { + vg.push([rng.gen_range(0, i - 1) as Revision, NULL_REVISION]) + } + } + vg + } + + /// Compute the ancestors set of all revisions of a VecGraph + fn ancestors_sets(vg: &VecGraph) -> Vec> { + let mut ancs: Vec> = Vec::new(); + for i in 0..vg.len() { + let mut ancs_i = HashSet::new(); + ancs_i.insert(i as Revision); + for p in vg[i].iter().map(|&r| r) { + if p != NULL_REVISION { + ancs_i.extend(&ancs[p as usize]); + } + } + ancs.push(ancs_i); + } + ancs + } + + #[derive(Clone, Debug)] + enum MissingAncestorsAction { + InitialBases(HashSet), + AddBases(HashSet), + RemoveAncestorsFrom(HashSet), + MissingAncestors(HashSet), + } + + /// An instrumented naive yet obviously correct implementation + /// + /// It also records all its actions for easy reproduction for replay + /// of problematic cases + struct NaiveMissingAncestors<'a> { + ancestors_sets: &'a Vec>, + graph: &'a VecGraph, // used for error reporting only + bases: HashSet, + history: Vec, + } + + impl<'a> NaiveMissingAncestors<'a> { + fn new( + graph: &'a VecGraph, + ancestors_sets: &'a Vec>, + bases: &HashSet, + ) -> Self { + Self { + ancestors_sets: ancestors_sets, + bases: bases.clone(), + graph: graph, + history: vec![MissingAncestorsAction::InitialBases( + bases.clone(), + )], + } + } + + fn add_bases(&mut self, new_bases: HashSet) { + self.bases.extend(&new_bases); + self.history + .push(MissingAncestorsAction::AddBases(new_bases)) + } + + fn remove_ancestors_from(&mut self, revs: &mut HashSet) { + revs.remove(&NULL_REVISION); + self.history + .push(MissingAncestorsAction::RemoveAncestorsFrom( + revs.clone(), + )); + for base in self.bases.iter().map(|&r| r) { + if base != NULL_REVISION { + for rev in &self.ancestors_sets[base as usize] { + revs.remove(&rev); + } + } + } + } + + fn missing_ancestors(&mut self, revs: I) -> Vec + where + I: IntoIterator, + { + let revs_as_set: HashSet = revs.into_iter().collect(); + + let mut missing: HashSet = HashSet::new(); + for rev in revs_as_set.iter().map(|&r| r) { + if rev != NULL_REVISION { + missing.extend(&self.ancestors_sets[rev as usize]) + } + } + self.history + .push(MissingAncestorsAction::MissingAncestors(revs_as_set)); + + for base in self.bases.iter().map(|&r| r) { + if base != NULL_REVISION { + for rev in &self.ancestors_sets[base as usize] { + missing.remove(&rev); + } + } + } + let mut res: Vec = missing.iter().map(|&r| r).collect(); + res.sort(); + res + } + + fn assert_eq(&self, left: T, right: T) + where + T: PartialEq + Debug, + { + if left == right { + return; + } + eprintln!( + "Equality assertion failed:\n left={:?}\n right={:?}", + left, right + ); + self.report(); + panic!("Failed example, see stderr for details"); + } + + fn report(&self) { + eprintln!("Failed example, graph={:?}", self.graph); + eprintln!("Current bases: {:?}", self.bases); + eprintln!("History: {:?}", self.history); + } + } + + /// Choose a set of random revisions + /// + /// The size of the set is taken from a LogNormal distribution + /// with default mu=1.1 and default sigma=0.8. Quoting the Python + /// test this is taken from: + /// the default mu and sigma give us a nice distribution of mostly + /// single-digit counts (including 0) with some higher ones + /// The sample may include NULL_REVISION + fn sample_revs( + rng: &mut R, + maxrev: Revision, + mu_opt: Option, + sigma_opt: Option, + ) -> HashSet { + let mu = mu_opt.unwrap_or(1.1); + let sigma = sigma_opt.unwrap_or(0.8); + + let log_normal = LogNormal::new(mu, sigma); + let nb = min(maxrev as usize, log_normal.sample(rng).floor() as usize); + + let dist = Uniform::from(NULL_REVISION..maxrev); + return rng.sample_iter(&dist).take(nb).collect(); + } + + #[test] + /// This test creates lots of random VecGraphs, + /// and compare a bunch of MissingAncestors for them with + /// NaiveMissingAncestors that rely on precomputed transitive closures of + /// these VecGraphs (ancestors_sets). + /// + /// This is slow: it runs on my workstation in about 5 seconds with the + /// default settings. + /// If you want to run it faster, especially if you're changing the + /// parameters, use `cargo test --release`. + /// For me, that gets it down to 0.15 seconds with the default parameters + fn test_missing_ancestors_compare_naive() { + let graphcount = 100; + let testcount = 10; + let inccount = 10; + let mut rng = thread_rng(); + for _g in 0..graphcount { + let graph = build_random_graph(None, None, None, None); + let graph_len = graph.len() as Revision; + let ancestors_sets = ancestors_sets(&graph); + for _testno in 0..testcount { + let bases: HashSet = + sample_revs(&mut rng, graph_len, None, None); + let mut inc = MissingAncestors::::new( + graph.clone(), + bases.clone(), + ); + let mut naive = NaiveMissingAncestors::new( + &graph, + &ancestors_sets, + &bases, + ); + for _m in 0..inccount { + if rng.gen_bool(0.2) { + let new_bases = + sample_revs(&mut rng, graph_len, None, None); + inc.add_bases(new_bases.iter().map(|&r| r)); + naive.add_bases(new_bases); + } + if rng.gen_bool(0.4) { + // larger set so that there are more revs to remove + // from + let mut hrevs = + sample_revs(&mut rng, graph_len, Some(1.5), None); + let mut rrevs = hrevs.clone(); + inc.remove_ancestors_from(&mut hrevs).unwrap(); + naive.remove_ancestors_from(&mut rrevs); + naive.assert_eq(hrevs, rrevs); + } else { + let revs = + sample_revs(&mut rng, graph_len, None, None); + let hm = inc + .missing_ancestors(revs.iter().map(|&r| r)) + .unwrap(); + let rm = + naive.missing_ancestors(revs.iter().map(|&r| r)); + naive.assert_eq(hm, rm); + } + } + } + } + } + } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::AncestorsIterator; +pub use ancestors::{AncestorsIterator, MissingAncestors}; /// Mercurial revision numbers /// @@ -16,6 +16,50 @@ /// The simplest expression of what we need of Mercurial DAGs. pub trait Graph { fn parents(&self, Revision) -> Result<(Revision, Revision), GraphError>; + + fn parents_iter( + &self, + r: Revision, + ) -> Result { + let parents = self.parents(r)?; + Ok(ParentsIterator::new(parents)) + } +} + +pub struct ParentsIterator { + cur: isize, + parents: (Revision, Revision), +} + +/// Iterator on the at most two parents of a given Revision +/// Instead of NULL_REVISION, None is returned +/// Here a macro would probably be more in order for performance +impl ParentsIterator { + fn new(parents: (Revision, Revision)) -> Self { + Self { + cur: -1, + parents: parents, + } + } +} + +impl Iterator for ParentsIterator { + type Item = Revision; + + #[inline] + fn next(&mut self) -> Option { + self.cur += 1; + match match self.cur { + 0 => self.parents.0, + 1 => self.parents.1, + _ => { + return None; + } + } { + NULL_REVISION => None, + p => Some(p), + } + } } #[derive(Clone, Debug, PartialEq)] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): + """A few explicit cases, easier to check for catching errors in refactors. + + The bigger graph at the end has been produced by the random generator + above, and we have some evidence that the other tests don't cover it. + """ + for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): + print("%% removeancestorsfrom(), example %d" % (i + 1)) + missanc = ancestor.incrementalmissingancestors(graph.get, bases) + missanc.removeancestorsfrom(revs) + print("remaining (sorted): %s" % sorted(list(revs))) + + for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): + print("%% missingancestors(), example %d" % (i + 1)) + missanc = ancestor.incrementalmissingancestors(graph.get, bases) + print("return %s" % missanc.missingancestors(revs)) + + print("% removeancestorsfrom(), bigger graph") + vecgraph = [ + [-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], + [2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], + [13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], + [19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], + [3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], + [32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], + [38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], + [44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], + [-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], + [55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], + [61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], + [66, -1], [67, -1], [68, -1], [37, 28], [69, 25], + [71, -1], [72, -1], [50, 2], [74, -1], [12, -1], + [18, -1], [77, -1], [78, -1], [79, -1], [43, 33], + [81, -1], [82, -1], [83, -1], [84, 45], [85, -1], + [86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], + [92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], + [-1, -1]] + problem_rev = 28 + problem_base = 70 + # problem_rev is a parent of problem_base, but a faulty implementation + # could forget to remove it. + bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} + if problem_rev not in vecgraph[problem_base] or problem_base not in bases: + print("Conditions have changed") + missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) + revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} + missanc.removeancestorsfrom(revs) + if 28 in revs: + print("Failed!") + else: + print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ seed = long(time.time() * 1000) rng = random.Random(seed) + test_missingancestors_explicit() test_missingancestors(seed, rng) test_lazyancestors() test_gca() diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: []