diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -16,15 +16,29 @@ use rand::{thread_rng, RngCore, SeedableRng}; use std::cmp::{max, min}; use std::collections::{HashSet, VecDeque}; +use std::mem; type Rng = rand_pcg::Pcg32; type Seed = [u8; 16]; -pub struct PartialDiscovery { - target_heads: Option>, +pub enum PartialDiscovery { + Com(OnlyCommon), + Und(WithUndecided), +} + +pub struct OnlyCommon { + target_heads: Vec, graph: G, // plays the role of self._repo common: MissingAncestors, - undecided: Option>, + respect_size: bool, + randomize: bool, + seed: Seed, +} + +pub struct WithUndecided { + graph: G, // plays the role of self._repo + common: MissingAncestors, + undecided: HashSet, children_cache: Option>>, missing: HashSet, rng: Rng, @@ -128,6 +142,8 @@ } } +use PartialDiscovery::{Com, Und}; + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -173,16 +189,202 @@ respect_size: bool, randomize: bool, ) -> Self { - PartialDiscovery { - undecided: None, - children_cache: None, - target_heads: Some(target_heads), + Com(OnlyCommon::new( + graph, + target_heads, + seed, + respect_size, + randomize, + )) + } + + /// Do we have any information about the peer? + pub fn has_info(&self) -> bool { + match self { + Com(c) => c.common.has_bases(), + Und(u) => u.has_info(), + } + } + + /// Did we acquire full knowledge of our Revisions that the peer has? + pub fn is_complete(&self) -> bool { + match self { + Com(_) => false, + Und(u) => u.is_complete(), + } + } + + /// Return the heads of the currently known common set of revisions. + /// + /// If the discovery process is not complete (see `is_complete()`), the + /// caller must be aware that this is an intermediate state. + /// + /// On the other hand, if it is complete, then this is currently + /// the only way to retrieve the end results of the discovery process. + /// + /// We may introduce in the future an `into_common_heads` call that + /// would be more appropriate for normal Rust callers, dropping `self` + /// if it is complete. + pub fn common_heads(&self) -> Result, GraphError> { + match self { + Com(c) => c.common.bases_heads(), + Und(u) => u.common_heads(), + } + } + + /// Provide statistics about the current state of the discovery process + pub fn stats(&self) -> DiscoveryStats { + match self { + Com(c) => c.stats(), + Und(u) => u.stats(), + } + } + + /// Register revisions known as being common + pub fn add_common_revisions( + &mut self, + common: impl IntoIterator, + ) -> Result<(), GraphError> { + match self { + Com(oc) => Ok(oc.add_common_revisions(common)), + Und(wu) => wu.add_common_revisions(common), + } + } + + /// Register revisions known as being missing in remote + pub fn add_missing_revisions( + &mut self, + missing: impl IntoIterator, + ) -> Result<(), GraphError> { + self.mutate_undecided( + |oc| oc.compute_undecided(), + |wu| wu.add_missing_revisions(missing), + ) + } + + /// Mutate into a `WithUndecided` and apply the `mutator` closure. + /// + /// If `self` is still in the `OnlyCommons` stage, this applies first + /// the `transitor` closure to produce a `WithUndecided`. + /// + /// The `mutator` closure is applied in all cases. + /// + /// The advantage for the caller is not to have to re-consider the + /// `OnlyCommon` variant after performing a mutation to `WithUndecided`. + fn mutate_undecided( + &mut self, + transitor: T, + mutator: M, + ) -> Result + where + T: FnOnce(OnlyCommon) -> Result, GraphError>, + M: FnOnce(&mut WithUndecided) -> Result, + { + match self { + Com(com) => { + // here we could use `mem::take` if we were on Rust 1.40 + // and `OnlyCommon` was truly implementing the `Default` trait. + let com = mem::replace(com, OnlyCommon::default(&com.graph)); + let mut und = transitor(com)?; + let res = mutator(&mut und); + mem::replace(self, Und(und)); + res + } + Und(und) => mutator(und), + } + } + + pub fn take_quick_sample( + &mut self, + headrevs: impl IntoIterator, + size: usize, + ) -> Result, GraphError> { + self.mutate_undecided( + |oc| oc.compute_undecided(), + |wu| wu.take_quick_sample(headrevs, size), + ) + } + + pub fn take_full_sample( + &mut self, + size: usize, + ) -> Result, GraphError> { + self.mutate_undecided( + |oc| oc.compute_undecided(), + |wu| wu.take_full_sample(size), + ) + } +} + +impl OnlyCommon { + /// In this first stage of the Discovery process, we gather information + /// about common revisions only, and we don't need to compute an + /// undecided set. + /// + /// The more common revisions are known, the less the computation of + /// the undecided set is expensive. Therefore, we delay it as much + /// as possible. + fn new( + graph: G, + target_heads: Vec, + seed: Seed, + respect_size: bool, + randomize: bool, + ) -> Self { + OnlyCommon { graph: graph.clone(), - common: MissingAncestors::new(graph, vec![]), - missing: HashSet::new(), - rng: Rng::from_seed(seed), + target_heads: target_heads, respect_size: respect_size, randomize: randomize, + seed: seed, + common: MissingAncestors::new(graph, vec![]), + } + } + + /// Provide the cheapest possible valid object of type `Self` + /// + /// If later on we can insist on `G` implementing `Default`, then + /// we can drop the `graph` parameter and move this into an `impl Default` + /// block. Currently, our concrete type for `G` besides in tests is outside + /// of this crate (wrapper around index Python object). + /// We don't want to implement `Default` for it right away. + fn default(graph: &G) -> Self { + Self::new(graph.clone(), Vec::new(), [0; 16], false, false) + } + + /// Register revisions known as being common + pub fn add_common_revisions( + &mut self, + common: impl IntoIterator, + ) { + self.common.add_bases(common); + } + + pub fn stats(&self) -> DiscoveryStats { + DiscoveryStats { undecided: None } + } + + fn compute_undecided(mut self) -> Result, GraphError> { + self.common + .missing_ancestors(self.target_heads.iter().cloned()) + .map(|undecided| WithUndecided::new(self, undecided)) + } +} + +impl WithUndecided { + fn new( + disco: OnlyCommon, + undecided: impl IntoIterator, + ) -> Self { + WithUndecided { + undecided: undecided.into_iter().collect(), + children_cache: None, + rng: Rng::from_seed(disco.seed), + graph: disco.graph.clone(), + missing: HashSet::new(), + respect_size: disco.respect_size, + randomize: disco.randomize, + common: disco.common, } } @@ -222,10 +424,7 @@ if self.common.get_bases().len() == before_len { return Ok(()); } - if let Some(ref mut undecided) = self.undecided { - self.common.remove_ancestors_from(undecided)?; - } - Ok(()) + self.common.remove_ancestors_from(&mut self.undecided) } /// Register revisions known as being missing @@ -247,10 +446,9 @@ return Ok(()); } self.ensure_children_cache()?; - self.ensure_undecided()?; // for safety of possible future refactors let children = self.children_cache.as_ref().unwrap(); let mut seen: HashSet = HashSet::new(); - let undecided_mut = self.undecided.as_mut().unwrap(); + let undecided_mut = &mut self.undecided; while let Some(rev) = tovisit.pop_front() { if !self.missing.insert(rev) { // either it's known to be missing from a previous @@ -284,7 +482,7 @@ /// Did we acquire full knowledge of our Revisions that the peer has? pub fn is_complete(&self) -> bool { - self.undecided.as_ref().map_or(false, |s| s.is_empty()) + self.undecided.is_empty() } /// Return the heads of the currently known common set of revisions. @@ -302,37 +500,15 @@ self.common.bases_heads() } - /// Force first computation of `self.undecided` - /// - /// After this, `self.undecided.as_ref()` and `.as_mut()` can be - /// unwrapped to get workable immutable or mutable references without - /// any panic. - /// - /// This is an imperative call instead of an access with added lazyness - /// to reduce easily the scope of mutable borrow for the caller, - /// compared to undecided(&'a mut self) -> &'a… that would keep it - /// as long as the resulting immutable one. - fn ensure_undecided(&mut self) -> Result<(), GraphError> { - if self.undecided.is_some() { - return Ok(()); - } - let tgt = self.target_heads.take().unwrap(); - self.undecided = - Some(self.common.missing_ancestors(tgt)?.into_iter().collect()); - Ok(()) - } - fn ensure_children_cache(&mut self) -> Result<(), GraphError> { if self.children_cache.is_some() { return Ok(()); } - self.ensure_undecided()?; - let mut children: FastHashMap> = FastHashMap::default(); - for &rev in self.undecided.as_ref().unwrap() { - for p in ParentsIterator::graph_parents(&self.graph, rev)? { - children.entry(p).or_insert_with(|| Vec::new()).push(rev); + for rev in self.undecided.iter() { + for p in ParentsIterator::graph_parents(&self.graph, *rev)? { + children.entry(p).or_insert_with(|| Vec::new()).push(*rev); } } self.children_cache = Some(children); @@ -342,7 +518,7 @@ /// Provide statistics about the current state of the discovery process pub fn stats(&self) -> DiscoveryStats { DiscoveryStats { - undecided: self.undecided.as_ref().map(|s| s.len()), + undecided: Some(self.undecided.len()), } } @@ -351,9 +527,8 @@ headrevs: impl IntoIterator, size: usize, ) -> Result, GraphError> { - self.ensure_undecided()?; let mut sample = { - let undecided = self.undecided.as_ref().unwrap(); + let undecided = &self.undecided; if undecided.len() <= size { return Ok(undecided.iter().cloned().collect()); } @@ -389,19 +564,17 @@ &mut self, size: usize, ) -> Result<(HashSet, usize), GraphError> { - self.ensure_undecided()?; { // we don't want to compute children_cache before this // but doing it after extracting self.undecided takes a mutable // ref to self while a shareable one is still active. - let undecided = self.undecided.as_ref().unwrap(); - if undecided.len() <= size { - return Ok((undecided.clone(), size)); + if self.undecided.len() <= size { + return Ok((self.undecided.clone(), size)); } } self.ensure_children_cache()?; - let revs = self.undecided.as_ref().unwrap(); + let revs = &self.undecided; let mut sample: HashSet = revs.clone(); // it's possible that leveraging the children cache would be more @@ -450,8 +623,6 @@ } let take_from: Vec = self .undecided - .as_ref() - .unwrap() .iter() .filter(|&r| !sample.contains(r)) .cloned() @@ -495,6 +666,12 @@ ) } + fn full_disco_with_undecided() -> WithUndecided { + OnlyCommon::new(SampleGraph, vec![10, 11, 12, 13], [0; 16], true, true) + .compute_undecided() + .unwrap() + } + /// A PartialDiscovery as for pushing the 12 head of `SampleGraph` /// /// To avoid actual randomness in tests, we give it a fixed random seed. @@ -508,18 +685,51 @@ ) } + fn unwrap_disco_with_undecided( + disco: &PartialDiscovery, + ) -> &WithUndecided { + match disco { + Com(_) => { + panic!("Unexpected variant"); + } + Und(wu) => wu, + } + } + + fn force_undecided( + disco: &mut PartialDiscovery, + undecided: impl IntoIterator, + ) { + let undecided_set: HashSet = undecided.into_iter().collect(); + disco + .mutate_undecided( + |oc| Ok(WithUndecided::new(oc, Vec::new())), + |wu| { + wu.undecided = undecided_set; + Ok(()) + }, + ) + .unwrap() + } + fn sorted_undecided( disco: &PartialDiscovery, ) -> Vec { - let mut as_vec: Vec = - disco.undecided.as_ref().unwrap().iter().cloned().collect(); + let mut as_vec: Vec = unwrap_disco_with_undecided(disco) + .undecided + .iter() + .cloned() + .collect(); as_vec.sort(); as_vec } fn sorted_missing(disco: &PartialDiscovery) -> Vec { - let mut as_vec: Vec = - disco.missing.iter().cloned().collect(); + let mut as_vec: Vec = unwrap_disco_with_undecided(disco) + .missing + .iter() + .cloned() + .collect(); as_vec.sort(); as_vec } @@ -533,22 +743,38 @@ Ok(as_vec) } + fn assert_disco_is_only_commons( + disco: &PartialDiscovery, + ) -> () { + if let Com(_) = disco { + return; + } + panic!("Not a discovery::OnlyCommon"); + } + + fn assert_disco_is_b(disco: &PartialDiscovery) -> () { + if let Und(_) = disco { + return; + } + panic!("Not a discovery::WithUndecided"); + } + #[test] fn test_add_common_get_undecided() -> Result<(), GraphError> { let mut disco = full_disco(); - assert_eq!(disco.undecided, None); assert!(!disco.has_info()); assert_eq!(disco.stats().undecided, None); disco.add_common_revisions(vec![11, 12])?; assert!(disco.has_info()); assert!(!disco.is_complete()); - assert!(disco.missing.is_empty()); // add_common_revisions did not trigger a premature computation - // of `undecided`, let's check that and ask for them - assert_eq!(disco.undecided, None); - disco.ensure_undecided()?; + // of `undecided`, let's check that, force the mutation and + // ask for them + assert_disco_is_only_commons(&disco); + disco.add_missing_revisions(Vec::new())?; + assert_disco_is_b(&disco); assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]); assert_eq!(disco.stats().undecided, Some(4)); Ok(()) @@ -578,7 +804,6 @@ eprintln!("test_add_missing_early_stop"); let mut disco = full_disco(); disco.add_common_revisions(vec![13, 3, 4])?; - disco.ensure_children_cache()?; // 12 is grand-child of 6 through 9 // passing them in this order maximizes the chances of the // early continue to do the wrong thing @@ -592,22 +817,31 @@ #[test] fn test_limit_sample_no_need_to() { let sample = vec![1, 2, 3, 4]; - assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]); + assert_eq!( + full_disco_with_undecided().limit_sample(sample, 10), + vec![1, 2, 3, 4] + ); } #[test] fn test_limit_sample_less_than_half() { - assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]); + assert_eq!( + full_disco_with_undecided().limit_sample((1..6).collect(), 2), + vec![4, 2] + ); } #[test] fn test_limit_sample_more_than_half() { - assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]); + assert_eq!( + full_disco_with_undecided().limit_sample((1..4).collect(), 2), + vec![3, 2] + ); } #[test] fn test_limit_sample_no_random() { - let mut disco = full_disco(); + let mut disco = full_disco_with_undecided(); disco.randomize = false; assert_eq!( disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4), @@ -618,7 +852,7 @@ #[test] fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { let mut disco = full_disco(); - disco.undecided = Some((1..=13).collect()); + force_undecided(&mut disco, 1..=13); let mut sample_vec = disco.take_quick_sample(vec![], 4)?; sample_vec.sort(); @@ -629,7 +863,6 @@ #[test] fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> { let mut disco = disco12(); - disco.ensure_undecided()?; let mut sample_vec = disco.take_quick_sample(vec![12], 4)?; sample_vec.sort(); @@ -642,7 +875,7 @@ #[test] fn test_children_cache() -> Result<(), GraphError> { - let mut disco = full_disco(); + let mut disco = full_disco_with_undecided(); disco.ensure_children_cache()?; let cache = disco.children_cache.unwrap(); @@ -662,10 +895,8 @@ #[test] fn test_complete_sample() { - let mut disco = full_disco(); - let undecided: HashSet = - [4, 7, 9, 2, 3].iter().cloned().collect(); - disco.undecided = Some(undecided); + let mut disco = full_disco_with_undecided(); + disco.undecided = vec![4, 7, 9, 2, 3].into_iter().collect(); let mut sample = vec![0]; disco.random_complete_sample(&mut sample, 3); @@ -678,8 +909,8 @@ #[test] fn test_bidirectional_sample() -> Result<(), GraphError> { - let mut disco = full_disco(); - disco.undecided = Some((0..=13).into_iter().collect()); + let mut disco = full_disco_with_undecided(); + disco.undecided = (0..=13).collect(); let (sample_set, size) = disco.bidirectional_sample(7)?; assert_eq!(size, 7);