diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py --- a/mercurial/setdiscovery.py +++ b/mercurial/setdiscovery.py @@ -92,11 +92,19 @@ dist.setdefault(p, d + 1) visit.append(p) -def _limitsample(sample, desiredlen): - """return a random subset of sample of at most desiredlen item""" - if len(sample) > desiredlen: - sample = set(random.sample(sample, desiredlen)) - return sample +def _limitsample(sample, desiredlen, randomize=True): + """return a random subset of sample of at most desiredlen item. + + If randomize is False, though, a deterministic subset is returned. + This is meant for integration tests. + """ + if len(sample) <= desiredlen: + return sample + if randomize: + return set(random.sample(sample, desiredlen)) + sample = list(sample) + sample.sort() + return set(sample[:desiredlen]) class partialdiscovery(object): """an object representing ongoing discovery @@ -110,13 +118,14 @@ (all tracked revisions are known locally) """ - def __init__(self, repo, targetheads): + def __init__(self, repo, targetheads, randomize=True): self._repo = repo self._targetheads = targetheads self._common = repo.changelog.incrementalmissingrevs() self._undecided = None self.missing = set() self._childrenmap = None + self.randomize = randomize def addcommons(self, commons): """register nodes known as common""" @@ -221,7 +230,7 @@ sample = set(self._repo.revs('heads(%ld)', revs)) if len(sample) >= size: - return _limitsample(sample, size) + return _limitsample(sample, size, randomize=self.randomize) _updatesample(None, headrevs, sample, self._parentsgetter(), quicksamplesize=size) @@ -246,10 +255,15 @@ _updatesample(revs, revsroots, sample, childrenrevs) assert sample - sample = _limitsample(sample, size) + sample = _limitsample(sample, size, randomize=self.randomize) if len(sample) < size: more = size - len(sample) - sample.update(random.sample(list(revs - sample), more)) + takefrom = list(revs - sample) + if self.randomize: + sample.update(random.sample(takefrom, more)) + else: + takefrom.sort() + sample.update(takefrom[:more]) return sample def findcommonheads(ui, local, remote, diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -29,6 +29,7 @@ children_cache: Option>>, missing: HashSet, rng: Rng, + randomize: bool, } pub struct DiscoveryStats { @@ -144,16 +145,32 @@ /// If we want to make the signature more flexible, /// we'll have to make it a type argument of `PartialDiscovery` or a trait /// object since we'll keep it in the meanwhile - pub fn new(graph: G, target_heads: Vec) -> Self { + /// + /// The `randomize` boolean affects sampling, and specifically how + /// limiting or last-minute expanding is been done: + /// + /// If `true`, both will perform random picking from `self.undecided`. + /// This is currently the best for actual discoveries. + /// + /// If `false`, a reproductible picking strategy is performed. This is + /// useful for integration tests. + pub fn new( + graph: G, + target_heads: Vec, + randomize: bool, + ) -> Self { let mut seed: [u8; 16] = [0; 16]; - thread_rng().fill_bytes(&mut seed); - Self::new_with_seed(graph, target_heads, seed) + if randomize { + thread_rng().fill_bytes(&mut seed); + } + Self::new_with_seed(graph, target_heads, seed, randomize) } pub fn new_with_seed( graph: G, target_heads: Vec, seed: [u8; 16], + randomize: bool, ) -> Self { PartialDiscovery { undecided: None, @@ -163,6 +180,7 @@ common: MissingAncestors::new(graph, vec![]), missing: HashSet::new(), rng: Rng::from_seed(seed), + randomize: randomize, } } @@ -173,6 +191,11 @@ mut sample: Vec, size: usize, ) -> Vec { + if !self.randomize { + sample.sort(); + sample.truncate(size); + return sample + } let sample_len = sample.len(); if sample_len <= size { return sample; @@ -413,12 +436,14 @@ /// A PartialDiscovery as for pushing all the heads of `SampleGraph` /// - /// To avoid actual randomness in tests, we give it a fixed random seed. + /// To avoid actual randomness in these tests, we give it a fixed + /// random seed, but by default we'll test the random version. fn full_disco() -> PartialDiscovery { PartialDiscovery::new_with_seed( SampleGraph, vec![10, 11, 12, 13], [0; 16], + true, ) } @@ -426,7 +451,7 @@ /// /// To avoid actual randomness in tests, we give it a fixed random seed. fn disco12() -> PartialDiscovery { - PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16]) + PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true) } fn sorted_undecided( @@ -511,6 +536,14 @@ } #[test] + fn test_limit_sample_no_random() { + let mut disco = full_disco(); + disco.randomize = false; + assert_eq!(disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4), + vec![1, 3, 5, 7]); + } + + #[test] fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { let mut disco = full_disco(); disco.undecided = Some((1..=13).collect()); diff --git a/rust/hg-cpython/src/discovery.rs b/rust/hg-cpython/src/discovery.rs --- a/rust/hg-cpython/src/discovery.rs +++ b/rust/hg-cpython/src/discovery.rs @@ -30,13 +30,15 @@ def __new__( _cls, index: PyObject, - targetheads: PyObject + targetheads: PyObject, + randomize: bool = true ) -> PyResult { Self::create_instance( py, RefCell::new(Box::new(CorePartialDiscovery::new( Index::new(py, index)?, rev_pyiter_collect(py, &targetheads)?, + randomize, ))) ) } diff --git a/tests/test-rust-discovery.py b/tests/test-rust-discovery.py --- a/tests/test-rust-discovery.py +++ b/tests/test-rust-discovery.py @@ -106,6 +106,10 @@ self.assertTrue(disco.iscomplete()) self.assertEqual(disco.commonheads(), {1}) + def testinitnorandom(self): + idx = self.parseindex() + PartialDiscovery(idx, [3], randomize=False) + if __name__ == '__main__': import silenttestrunner silenttestrunner.main(__name__)