diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py --- a/mercurial/setdiscovery.py +++ b/mercurial/setdiscovery.py @@ -92,11 +92,19 @@ dist.setdefault(p, d + 1) visit.append(p) -def _limitsample(sample, desiredlen): - """return a random subset of sample of at most desiredlen item""" - if len(sample) > desiredlen: - sample = set(random.sample(sample, desiredlen)) - return sample +def _limitsample(sample, desiredlen, randomize=True): + """return a random subset of sample of at most desiredlen item. + + If randomize is False, though, a deterministic subset is returned. + This is meant for integration tests. + """ + if len(sample) <= desiredlen: + return sample + if randomize: + return set(random.sample(sample, desiredlen)) + sample = list(sample) + sample.sort() + return set(sample[:desiredlen]) class partialdiscovery(object): """an object representing ongoing discovery @@ -110,7 +118,7 @@ (all tracked revisions are known locally) """ - def __init__(self, repo, targetheads, respectsize): + def __init__(self, repo, targetheads, respectsize, randomize=True): self._repo = repo self._targetheads = targetheads self._common = repo.changelog.incrementalmissingrevs() @@ -118,6 +126,7 @@ self.missing = set() self._childrenmap = None self._respectsize = respectsize + self.randomize = randomize def addcommons(self, commons): """register nodes known as common""" @@ -222,7 +231,7 @@ sample = set(self._repo.revs('heads(%ld)', revs)) if len(sample) >= size: - return _limitsample(sample, size) + return _limitsample(sample, size, randomize=self.randomize) _updatesample(None, headrevs, sample, self._parentsgetter(), quicksamplesize=size) @@ -249,10 +258,15 @@ if not self._respectsize: size = max(size, min(len(revsroots), len(revsheads))) - sample = _limitsample(sample, size) + sample = _limitsample(sample, size, randomize=self.randomize) if len(sample) < size: more = size - len(sample) - sample.update(random.sample(list(revs - sample), more)) + takefrom = list(revs - sample) + if self.randomize: + sample.update(random.sample(takefrom, more)) + else: + takefrom.sort() + sample.update(takefrom[:more]) return sample def findcommonheads(ui, local, remote, diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -31,6 +31,7 @@ missing: HashSet, rng: Rng, respect_size: bool, + randomize: bool, } pub struct DiscoveryStats { @@ -151,14 +152,26 @@ /// will interpret the size argument requested by the caller. If it's /// `false`, they are allowed to produce a sample whose size is more /// appropriate to the situation (typically bigger). + /// + /// The `randomize` boolean affects sampling, and specifically how + /// limiting or last-minute expanding is been done: + /// + /// If `true`, both will perform random picking from `self.undecided`. + /// This is currently the best for actual discoveries. + /// + /// If `false`, a reproductible picking strategy is performed. This is + /// useful for integration tests. pub fn new( graph: G, target_heads: Vec, respect_size: bool, + randomize: bool, ) -> Self { let mut seed: [u8; 16] = [0; 16]; - thread_rng().fill_bytes(&mut seed); - Self::new_with_seed(graph, target_heads, seed, respect_size) + if randomize { + thread_rng().fill_bytes(&mut seed); + } + Self::new_with_seed(graph, target_heads, seed, respect_size, randomize) } pub fn new_with_seed( @@ -166,6 +179,7 @@ target_heads: Vec, seed: [u8; 16], respect_size: bool, + randomize: bool, ) -> Self { PartialDiscovery { undecided: None, @@ -176,6 +190,7 @@ missing: HashSet::new(), rng: Rng::from_seed(seed), respect_size: respect_size, + randomize: randomize, } } @@ -186,6 +201,11 @@ mut sample: Vec, size: usize, ) -> Vec { + if !self.randomize { + sample.sort(); + sample.truncate(size); + return sample; + } let sample_len = sample.len(); if sample_len <= size { return sample; @@ -436,13 +456,15 @@ /// A PartialDiscovery as for pushing all the heads of `SampleGraph` /// - /// To avoid actual randomness in tests, we give it a fixed random seed. + /// To avoid actual randomness in these tests, we give it a fixed + /// random seed, but by default we'll test the random version. fn full_disco() -> PartialDiscovery { PartialDiscovery::new_with_seed( SampleGraph, vec![10, 11, 12, 13], [0; 16], true, + true, ) } @@ -450,7 +472,13 @@ /// /// To avoid actual randomness in tests, we give it a fixed random seed. fn disco12() -> PartialDiscovery { - PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true) + PartialDiscovery::new_with_seed( + SampleGraph, + vec![12], + [0; 16], + true, + true, + ) } fn sorted_undecided( @@ -535,6 +563,16 @@ } #[test] + fn test_limit_sample_no_random() { + let mut disco = full_disco(); + disco.randomize = false; + assert_eq!( + disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4), + vec![1, 3, 5, 7] + ); + } + + #[test] fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { let mut disco = full_disco(); disco.undecided = Some((1..=13).collect()); diff --git a/rust/hg-cpython/src/discovery.rs b/rust/hg-cpython/src/discovery.rs --- a/rust/hg-cpython/src/discovery.rs +++ b/rust/hg-cpython/src/discovery.rs @@ -36,7 +36,8 @@ _cls, repo: PyObject, targetheads: PyObject, - respectsize: bool + respectsize: bool, + randomize: bool = true ) -> PyResult { let index = repo.getattr(py, "changelog")?.getattr(py, "index")?; Self::create_instance( @@ -44,7 +45,8 @@ RefCell::new(Box::new(CorePartialDiscovery::new( Index::new(py, index)?, rev_pyiter_collect(py, &targetheads)?, - respectsize + respectsize, + randomize, ))) ) } diff --git a/tests/test-rust-discovery.py b/tests/test-rust-discovery.py --- a/tests/test-rust-discovery.py +++ b/tests/test-rust-discovery.py @@ -103,6 +103,9 @@ self.assertTrue(disco.iscomplete()) self.assertEqual(disco.commonheads(), {1}) + def testinitnorandom(self): + PartialDiscovery(self.repo(), [3], True, randomize=False) + if __name__ == '__main__': import silenttestrunner silenttestrunner.main(__name__)