diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -26,6 +26,7 @@ graph: G, // plays the role of self._repo common: MissingAncestors, undecided: Option>, + children_cache: Option>>, missing: HashSet, rng: Rng, } @@ -49,13 +50,16 @@ /// - `sample`: a sample to update /// - `parentfn`: a callable to resolve parents for a revision /// - `quicksamplesize`: optional target size of the sample -fn update_sample( +fn update_sample( revs: Option<&HashSet>, heads: impl IntoIterator, sample: &mut HashSet, - parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, + parentsfn: impl Fn(Revision) -> Result, quicksamplesize: Option, -) -> Result<(), GraphError> { +) -> Result<(), GraphError> +where + I: Iterator, +{ let mut distances: HashMap = HashMap::new(); let mut visit: VecDeque = heads.into_iter().collect(); let mut factor: u32 = 1; @@ -84,10 +88,7 @@ } } seen.insert(current); - for p in parentsfn(current)?.iter().cloned() { - if p == NULL_REVISION { - continue; - } + for p in parentsfn(current)? { if let Some(revs) = revs { if !revs.contains(&p) { continue; @@ -100,6 +101,45 @@ Ok(()) } +// TODO reread this and decide Vec/Box, and where to put it +struct ParentsIterator { + parents: [Revision; 2], + cur: usize, +} + +impl ParentsIterator { + fn graph_parents( + graph: &impl Graph, + r: Revision, + ) -> Result { + Ok(ParentsIterator { + parents: graph.parents(r)?, + cur: 0, + }) + } +} + +impl Iterator for ParentsIterator { + type Item = Revision; + + fn next(&mut self) -> Option { + if self.cur > 1 { + return None; + } + let rev = self.parents[self.cur]; + self.cur += 1; + if rev == NULL_REVISION { + if self.cur == 2 { + return None; + } else { + // exceptional branch + return self.next(); + } + } + Some(rev) + } +} + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -124,6 +164,7 @@ ) -> Self { PartialDiscovery { undecided: None, + children_cache: None, target_heads: Some(target_heads), graph: graph.clone(), common: MissingAncestors::new(graph, vec![]), @@ -229,6 +270,24 @@ Ok(()) } + fn ensure_children_cache(&mut self) -> Result<(), GraphError> { + if self.children_cache.is_some() { + return Ok(()); + } + self.ensure_undecided()?; + + let mut children: HashMap> = HashMap::new(); + for rev in self.undecided.as_ref().unwrap().iter().cloned() { + for p in self.graph.parents(rev)?.iter().cloned() { + if p != NULL_REVISION { + children.entry(p).or_insert_with(|| Vec::new()).push(rev); + } + } + } + self.children_cache = Some(children); + Ok(()) + } + /// Provide statistics about the current state of the discovery process pub fn stats(&self) -> DiscoveryStats { DiscoveryStats { @@ -256,11 +315,82 @@ None, headrevs, &mut sample, - |r| self.graph.parents(r), + |r| ParentsIterator::graph_parents(&self.graph, r), Some(size), )?; Ok(sample.into_iter().collect()) } + + fn bidirectional_sample( + &mut self, + size: usize, + ) -> Result, GraphError> { + self.ensure_undecided()?; + { + // we don't want to compute children_cache before this + // but doing it after extracting self.undecided takes a mutable + // ref to self while a shareable one is still active. + let undecided = self.undecided.as_ref().unwrap(); + if undecided.len() <= size { + return Ok(undecided.clone()); + } + } + + self.ensure_children_cache()?; + let revs = self.undecided.as_ref().unwrap(); + let mut sample: HashSet = revs.clone(); + if revs.len() <= size { + return Ok(sample); + } + // TODO children cache here ? + dagops::retain_heads(&self.graph, &mut sample)?; + let revsheads = sample.clone(); // was again heads(revs) in python + + // update from heads + update_sample( + Some(revs), + revsheads.iter().cloned(), + &mut sample, + |r| ParentsIterator::graph_parents(&self.graph, r), + None, + )?; + + // update from roots + let revroots: HashSet = + dagops::roots(&self.graph, revs)?.iter().cloned().collect(); + + let children = self.children_cache.as_ref().unwrap(); + let empty_vec: Vec = Vec::new(); + update_sample( + Some(revs), + revroots, + &mut sample, + |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()), + None, + )?; + Ok(sample) + } + + pub fn take_full_sample( + &mut self, + size: usize, + ) -> Result, GraphError> { + let sample = self.bidirectional_sample(size)?; + let more = size - sample.len(); + let mut sample = self.limit_sample(sample.into_iter().collect(), size); + if more > 0 { + let take_from: Vec = self + .undecided + .as_ref() + .unwrap() + .iter() + .filter(|&r| !sample.contains(r)) + .cloned() + .collect(); + sample.extend(self.limit_sample(take_from, more)); + } + Ok(sample) + } } #[cfg(test)] @@ -391,4 +521,24 @@ assert_eq!(sample_vec, vec![4, 9, 12]); Ok(()) } + + #[test] + fn test_children_cache() -> Result<(), GraphError> { + let mut disco = full_disco(); + disco.ensure_children_cache()?; + + let cache = disco.children_cache.unwrap(); + assert_eq!(cache.get(&2).cloned(), Some(vec![4])); + assert_eq!(cache.get(&10).cloned(), None); + + let mut children_4 = cache.get(&4).cloned().unwrap(); + children_4.sort(); + assert_eq!(children_4, vec![5, 6, 7]); + + let mut children_7 = cache.get(&7).cloned().unwrap(); + children_7.sort(); + assert_eq!(children_7, vec![9, 11]); + + Ok(()) + } }