diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -26,6 +26,7 @@ graph: G, // plays the role of self._repo common: MissingAncestors, undecided: Option>, + children_cache: Option>>, missing: HashSet, rng: Rng, } @@ -49,13 +50,16 @@ /// - `sample`: a sample to update /// - `parentfn`: a callable to resolve parents for a revision /// - `quicksamplesize`: optional target size of the sample -fn update_sample( +fn update_sample( revs: Option<&HashSet>, heads: impl IntoIterator, sample: &mut HashSet, - parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, + parentsfn: impl Fn(Revision) -> Result, quicksamplesize: Option, -) -> Result<(), GraphError> { +) -> Result<(), GraphError> +where + I: Iterator, +{ let mut distances: HashMap = HashMap::new(); let mut visit: VecDeque = heads.into_iter().collect(); let mut factor: u32 = 1; @@ -83,10 +87,7 @@ } } } - for &p in &parentsfn(current)? { - if p == NULL_REVISION { - continue; - } + for p in parentsfn(current)? { if let Some(revs) = revs { if !revs.contains(&p) { continue; @@ -99,6 +100,39 @@ Ok(()) } +struct ParentsIterator { + parents: [Revision; 2], + cur: usize, +} + +impl ParentsIterator { + fn graph_parents( + graph: &impl Graph, + r: Revision, + ) -> Result { + Ok(ParentsIterator { + parents: graph.parents(r)?, + cur: 0, + }) + } +} + +impl Iterator for ParentsIterator { + type Item = Revision; + + fn next(&mut self) -> Option { + if self.cur > 1 { + return None; + } + let rev = self.parents[self.cur]; + self.cur += 1; + if rev == NULL_REVISION { + return self.next(); + } + Some(rev) + } +} + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -123,6 +157,7 @@ ) -> Self { PartialDiscovery { undecided: None, + children_cache: None, target_heads: Some(target_heads), graph: graph.clone(), common: MissingAncestors::new(graph, vec![]), @@ -228,6 +263,22 @@ Ok(()) } + fn ensure_children_cache(&mut self) -> Result<(), GraphError> { + if self.children_cache.is_some() { + return Ok(()); + } + self.ensure_undecided()?; + + let mut children: HashMap> = HashMap::new(); + for &rev in self.undecided.as_ref().unwrap() { + for p in ParentsIterator::graph_parents(&self.graph, rev)? { + children.entry(p).or_insert_with(|| Vec::new()).push(rev); + } + } + self.children_cache = Some(children); + Ok(()) + } + /// Provide statistics about the current state of the discovery process pub fn stats(&self) -> DiscoveryStats { DiscoveryStats { @@ -255,11 +306,104 @@ None, headrevs, &mut sample, - |r| self.graph.parents(r), + |r| ParentsIterator::graph_parents(&self.graph, r), Some(size), )?; Ok(sample.into_iter().collect()) } + + /// Extract a sample from `self.undecided`, going from its heads and roots. + /// + /// The `size` parameter is used to avoid useless computations if + /// it turns out to be bigger than the whole set of undecided Revisions. + /// + /// The sample is taken by using `update_sample` from the heads, then + /// from the roots, working on the reverse DAG, + /// expressed by `self.children_cache`. + /// + /// No effort is being made to complete or limit the sample to `size` + fn bidirectional_sample( + &mut self, + size: usize, + ) -> Result, GraphError> { + self.ensure_undecided()?; + { + // we don't want to compute children_cache before this + // but doing it after extracting self.undecided takes a mutable + // ref to self while a shareable one is still active. + let undecided = self.undecided.as_ref().unwrap(); + if undecided.len() <= size { + return Ok(undecided.clone()); + } + } + + self.ensure_children_cache()?; + let revs = self.undecided.as_ref().unwrap(); + let mut sample: HashSet = revs.clone(); + + // it's possible that leveraging the children cache would be more + // efficient here + dagops::retain_heads(&self.graph, &mut sample)?; + let revsheads = sample.clone(); // was again heads(revs) in python + + // update from heads + update_sample( + Some(revs), + revsheads.iter().cloned(), + &mut sample, + |r| ParentsIterator::graph_parents(&self.graph, r), + None, + )?; + + // update from roots + let revroots: HashSet = + dagops::roots(&self.graph, revs)?.into_iter().collect(); + + let children = self.children_cache.as_ref().unwrap(); + let empty_vec: Vec = Vec::new(); + update_sample( + Some(revs), + revroots, + &mut sample, + |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()), + None, + )?; + Ok(sample) + } + + /// Fill up sample up to the wished size with random undecided Revisions. + /// + /// This is intended to be used as a last resort completion if the + /// regular sampling algorithm returns too few elements. + fn random_complete_sample( + &mut self, + sample: &mut Vec, + size: usize, + ) { + let sample_len = sample.len(); + if size <= sample_len { + return; + } + let take_from: Vec = self + .undecided + .as_ref() + .unwrap() + .iter() + .filter(|&r| !sample.contains(r)) + .cloned() + .collect(); + sample.extend(self.limit_sample(take_from, size - sample_len)); + } + + pub fn take_full_sample( + &mut self, + size: usize, + ) -> Result, GraphError> { + let sample = self.bidirectional_sample(size)?.into_iter().collect(); + let mut sample = self.limit_sample(sample, size); + self.random_complete_sample(&mut sample, size); + Ok(sample) + } } #[cfg(test)] @@ -390,4 +534,57 @@ assert_eq!(sample_vec, vec![4, 9, 12]); Ok(()) } + + #[test] + fn test_children_cache() -> Result<(), GraphError> { + let mut disco = full_disco(); + disco.ensure_children_cache()?; + + let cache = disco.children_cache.unwrap(); + assert_eq!(cache.get(&2).cloned(), Some(vec![4])); + assert_eq!(cache.get(&10).cloned(), None); + + let mut children_4 = cache.get(&4).cloned().unwrap(); + children_4.sort(); + assert_eq!(children_4, vec![5, 6, 7]); + + let mut children_7 = cache.get(&7).cloned().unwrap(); + children_7.sort(); + assert_eq!(children_7, vec![9, 11]); + + Ok(()) + } + + #[test] + fn test_complete_sample() { + let mut disco = full_disco(); + let undecided: HashSet = + [4, 7, 9, 2, 3].iter().cloned().collect(); + disco.undecided = Some(undecided); + + let mut sample = vec![0]; + disco.random_complete_sample(&mut sample, 3); + assert_eq!(sample.len(), 3); + + let mut sample = vec![2, 4, 7]; + disco.random_complete_sample(&mut sample, 1); + assert_eq!(sample.len(), 3); + } + + #[test] + fn test_bidirectional_sample() -> Result<(), GraphError> { + let mut disco = full_disco(); + disco.undecided = Some((0..=13).into_iter().collect()); + + let mut sample: Vec = + disco.bidirectional_sample(7)?.into_iter().collect(); + sample.sort(); + // our DAG is a bit too small for the results to be really interesting + // at least it shows that + // - we went both ways + // - we didn't take all Revisions (6 is not in the sample) + assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]); + Ok(()) + } + }