diff --git a/mercurial/copies.py b/mercurial/copies.py --- a/mercurial/copies.py +++ b/mercurial/copies.py @@ -1,3 +1,4 @@ +# coding: utf8 # copies.py - copy detection for Mercurial # # Copyright 2008 Matt Mackall @@ -285,40 +286,93 @@ cl = repo.changelog isancestor = cl.isancestorrev - missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) - mrset = set(missingrevs) + + # To track rename from "A" to B, we need to gather all parent → children + # edges that are contains in `::B` but not in `::A`. + # + # + # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b - + # ::a`) and also all the "roots point", ie the parents of the exclusive set + # that belong to ::a. These are exactly all the revisions needed to express + # the parent → children we need to combine. + # + # [1] actually, we need to gather all the edges within `(::a)::b`, ie: + # excluding paths that leads to roots that are not ancestors of `a`. We + # keep this out of the explanation because it is hard enough without this special case.. + + parents = cl._uncheckedparentrevs + nullrev = node.nullrev + graph_roots = (nullrev, nullrev) + + ancestors = cl.ancestors([a.rev()], inclusive=True) + revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) roots = set() - for r in missingrevs: - for p in cl.parentrevs(r): - if p == node.nullrev: - continue - if p not in children: - children[p] = [r] - else: - children[p].append(r) - if p not in mrset: - roots.add(p) + has_graph_roots = False + + # iterate over `only(B, A)` + for r in revs: + ps = parents(r) + if ps == graph_roots: + has_graph_roots = True + else: + p1, p2 = ps + + # find all the "root points" (see larger comment above) + if p1 != nullrev and p1 in ancestors: + roots.add(p1) + if p2 != nullrev and p2 in ancestors: + roots.add(p2) if not roots: # no common revision to track copies from return {} - min_root = min(roots) - - from_head = set( - cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True) - ) - - iterrevs = set(from_head) - iterrevs &= mrset - iterrevs.update(roots) - iterrevs.remove(b.rev()) - revs = sorted(iterrevs) + if has_graph_roots: + # this deal with the special case mentionned in the [1] footnotes. We + # must filter out revisions that leads to non-common graphroots. + roots = list(roots) + m = min(roots) + h = [b.rev()] + roots_to_head = cl.reachableroots(m, h, roots, includepath=True) + roots_to_head = set(roots_to_head) + revs = [r for r in revs if r in roots_to_head] if repo.filecopiesmode == b'changeset-sidedata': + # When using side-data, we will process the edges "from" the children. + # We iterate over the childre, gathering previous collected data for + # the parents. Do know when the parents data is no longer necessary, we + # keep a counter of how many children each revision has. + # + # An interresting property of `children_count` is that it only contains + # revision that will be relevant for a edge of the graph. So if a + # children has parent not in `children_count`, that edges should not be + # processed. + children_count = dict((r, 0) for r in roots) + for r in revs: + for p in cl.parentrevs(r): + if p == node.nullrev: + continue + children_count[r] = 0 + if p in children_count: + children_count[p] += 1 revinfo = _revinfo_getter(repo, match) return _combine_changeset_copies( - revs, children, b.rev(), revinfo, match, isancestor + revs, children_count, b.rev(), revinfo, match, isancestor ) else: + # When using side-data, we will process the edges "from" the parent. + # so we need a full mapping of the parent -> children relation. + children = dict((r, []) for r in roots) + for r in revs: + for p in cl.parentrevs(r): + if p == node.nullrev: + continue + children[r] = [] + if p in children: + children[p].append(r) + x = revs.pop() + assert x == b.rev() + revs.extend(roots) + revs.sort() + revinfo = _revinfo_getter_extra(repo) return _combine_changeset_copies_extra( revs, children, b.rev(), revinfo, match, isancestor @@ -326,12 +380,12 @@ def _combine_changeset_copies( - revs, children, targetrev, revinfo, match, isancestor + revs, children_count, targetrev, revinfo, match, isancestor ): """combine the copies information for each item of iterrevs revs: sorted iterable of revision to visit - children: a {parent: [children]} mapping. + children_count: a {parent: } mapping. targetrev: the final copies destination revision (not in iterrevs) revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed) match: a matcher @@ -343,57 +397,68 @@ if rustmod is not None and alwaysmatch: return rustmod.combine_changeset_copies( - list(revs), children, targetrev, revinfo, isancestor + list(revs), children_count, targetrev, revinfo, isancestor ) isancestor = cached_is_ancestor(isancestor) all_copies = {} - for r in revs: - copies = all_copies.pop(r, None) - if copies is None: - # this is a root - copies = {} - for i, c in enumerate(children[r]): - p1, p2, changes = revinfo(c) - childcopies = {} - if r == p1: - parent = 1 - if changes is not None: - childcopies = changes.copied_from_p1 + for current_rev in revs: + p1, p2, changes = revinfo(current_rev) + current_copies = None + for parent, parent_rev in ((1, p1), (2, p2)): + if parent_rev == node.nullrev: + continue + remaining_children = children_count.get(parent_rev) + if remaining_children is None: + continue + remaining_children -= 1 + children_count[parent_rev] = remaining_children + if remaining_children: + copies = all_copies.get(parent_rev, None) else: - assert r == p2 - parent = 2 - if changes is not None: - childcopies = changes.copied_from_p2 - if not alwaysmatch: - childcopies = { - dst: src for dst, src in childcopies.items() if match(dst) - } + copies = all_copies.pop(parent_rev, None) + + if copies is None: + # this is a root + copies = {} + newcopies = copies - if childcopies: - newcopies = copies.copy() - for dest, source in pycompat.iteritems(childcopies): - prev = copies.get(source) - if prev is not None and prev[1] is not None: - source = prev[1] - newcopies[dest] = (c, source) - assert newcopies is not copies - if changes is not None and changes.removed: - if newcopies is copies: + if changes is not None: + childcopies = {} + if parent == 1: + childcopies = changes.copied_from_p1 + elif parent == 2: + childcopies = changes.copied_from_p2 + + if not alwaysmatch: + childcopies = { + dst: src + for dst, src in childcopies.items() + if match(dst) + } + if childcopies: newcopies = copies.copy() - for f in changes.removed: - if f in newcopies: - if newcopies is copies: - # copy on write to avoid affecting potential other - # branches. when there are no other branches, this - # could be avoided. - newcopies = copies.copy() - newcopies[f] = (c, None) - othercopies = all_copies.get(c) - if othercopies is None: - all_copies[c] = newcopies - elif newcopies is othercopies: + for dest, source in pycompat.iteritems(childcopies): + prev = copies.get(source) + if prev is not None and prev[1] is not None: + source = prev[1] + newcopies[dest] = (current_rev, source) + assert newcopies is not copies + if changes.removed: + if newcopies is copies: + newcopies = copies.copy() + for f in changes.removed: + if f in newcopies: + if newcopies is copies: + # copy on write to avoid affecting potential other + # branches. when there are no other branches, this + # could be avoided. + newcopies = copies.copy() + newcopies[f] = (current_rev, None) + if current_copies is None: + current_copies = newcopies + elif current_copies is newcopies: # nothing to merge: pass else: @@ -404,17 +469,11 @@ # This is an arbitrary choice made anew when implementing # changeset based copies. It was made without regards with # potential filelog related behavior. - if parent == 1: - if newcopies is copies: - newcopies = copies.copy() - minor, major = othercopies, newcopies - else: - # we do not know if the other dict is a copy or not, so we - # need to blindly copy it. Future change should make this - # unnecessary. - minor, major = newcopies, othercopies.copy() - copies = _merge_copies_dict(minor, major, isancestor, changes) - all_copies[c] = copies + assert parent == 2 + current_copies = _merge_copies_dict( + newcopies, current_copies, isancestor, changes + ) + all_copies[current_rev] = current_copies final_copies = {} for dest, (tt, source) in all_copies[targetrev].items(): @@ -517,13 +576,21 @@ specific "extra" based storage for copy information""" all_copies = {} alwaysmatch = match.always() + # iterate over all the "parent" side of copy tracing "edge" for r in revs: + + # fetch potential previously computed data for that parent copies = all_copies.pop(r, None) if copies is None: # this is a root copies = {} + + # iterate over all known children to chain the existing data with the + # data from the parent → child edge. for i, c in enumerate(children[r]): p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c) + + # select the right parent → child edge if r == p1: parent = 1 childcopies = p1copies @@ -535,6 +602,8 @@ childcopies = { dst: src for dst, src in childcopies.items() if match(dst) } + + # chain the data in the edge with the existing data newcopies = copies if childcopies: newcopies = copies.copy() @@ -552,6 +621,9 @@ # could be avoided. newcopies = copies.copy() newcopies[f] = (c, None) + + # check potential need to combine the data from another parent (for + # that child). See comment below for details. othercopies = all_copies.get(c) if othercopies is None: all_copies[c] = newcopies @@ -573,6 +645,7 @@ ) all_copies[c] = newcopies + # filter out internal details and return a {dest: source mapping} final_copies = {} for dest, (tt, source) in all_copies[targetrev].items(): if source is not None: diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs --- a/rust/hg-core/src/copy_tracing.rs +++ b/rust/hg-core/src/copy_tracing.rs @@ -1,6 +1,7 @@ use crate::utils::hg_path::HgPath; use crate::utils::hg_path::HgPathBuf; use crate::Revision; +use crate::NULL_REVISION; use im_rc::ordmap::DiffItem; use im_rc::ordmap::OrdMap; @@ -326,7 +327,7 @@ /// ancestor of another pub fn combine_changeset_copies bool, D>( revs: Vec, - children: HashMap>, + mut children_count: HashMap, target_rev: Revision, rev_info: RevInfoMaker, is_ancestor: &A, @@ -335,57 +336,63 @@ let mut oracle = AncestorOracle::new(is_ancestor); for rev in revs { - // Retrieve data computed in a previous iteration - let copies = all_copies.remove(&rev); - let copies = match copies { - Some(c) => c, - None => TimeStampedPathCopies::default(), // root of the walked set - }; - - let current_children = match children.get(&rev) { - Some(c) => c, - None => panic!("inconsistent `revs` and `children`"), - }; - - for child in current_children { - // We will chain the copies information accumulated for `rev` with - // the individual copies information for each of its children. - // Creating a new PathCopies for each `rev` → `children` vertex. - let mut d: DataHolder = DataHolder { data: None }; - let (p1, p2, changes) = rev_info(*child, &mut d); + let mut d: DataHolder = DataHolder { data: None }; + let (p1, p2, changes) = rev_info(rev, &mut d); - let parent = if rev == p1 { - Parent::FirstParent - } else { - assert_eq!(rev, p2); - Parent::SecondParent - }; - let new_copies = - add_from_changes(&copies, &changes, parent, *child); + // We will chain the copies information accumulated for the parent with + // the individual copies information the curent revision. Creating a + // new TimeStampedPath for each `rev` → `children` vertex. + let mut copies: Option = None; + if p1 != NULL_REVISION { + // Retrieve data computed in a previous iteration + let parent_copies = + get_parent_copies(&mut all_copies, &mut children_count, p1); + if let Some(parent_copies) = parent_copies { + // combine it with data for that revision + let vertex_copies = add_from_changes( + &parent_copies, + &changes, + Parent::FirstParent, + rev, + ); + // keep that data around for potential later combination + copies = Some(vertex_copies); + } + } + if p2 != NULL_REVISION { + // Retrieve data computed in a previous iteration + let parent_copies = + get_parent_copies(&mut all_copies, &mut children_count, p2); + if let Some(parent_copies) = parent_copies { + // combine it with data for that revision + let vertex_copies = add_from_changes( + &parent_copies, + &changes, + Parent::SecondParent, + rev, + ); - // Merge has two parents needs to combines their copy information. - // - // If the vertex from the other parent was already processed, we - // will have a value for the child ready to be used. We need to - // grab it and combine it with the one we already - // computed. If not we can simply store the newly - // computed data. The processing happening at - // the time of the second parent will take care of combining the - // two TimeStampedPathCopies instance. - match all_copies.remove(child) { - None => { - all_copies.insert(child, new_copies); - } - Some(other_copies) => { - let (minor, major) = match parent { - Parent::FirstParent => (other_copies, new_copies), - Parent::SecondParent => (new_copies, other_copies), - }; - let merged_copies = - merge_copies_dict(minor, major, &changes, &mut oracle); - all_copies.insert(child, merged_copies); - } - }; + copies = match copies { + None => Some(vertex_copies), + // Merge has two parents needs to combines their copy + // information. + // + // If we got data from both parents, We need to combine + // them. + Some(copies) => Some(merge_copies_dict( + vertex_copies, + copies, + &changes, + &mut oracle, + )), + }; + } + } + match copies { + Some(copies) => { + all_copies.insert(rev, copies); + } + _ => {} } } @@ -403,6 +410,32 @@ result } +/// fetch previous computed information +/// +/// If no other children are expected to need this information, we drop it from +/// the cache. +/// +/// If parent is not part of the set we are expected to walk, return None. +fn get_parent_copies( + all_copies: &mut HashMap, + children_count: &mut HashMap, + parent_rev: Revision, +) -> Option { + let count = children_count.get_mut(&parent_rev)?; + *count -= 1; + if *count == 0 { + match all_copies.remove(&parent_rev) { + Some(c) => Some(c), + None => Some(TimeStampedPathCopies::default()), + } + } else { + match all_copies.get(&parent_rev) { + Some(c) => Some(c.clone()), + None => Some(TimeStampedPathCopies::default()), + } + } +} + /// Combine ChangedFiles with some existing PathCopies information and return /// the result fn add_from_changes( diff --git a/rust/hg-cpython/src/copy_tracing.rs b/rust/hg-cpython/src/copy_tracing.rs --- a/rust/hg-cpython/src/copy_tracing.rs +++ b/rust/hg-cpython/src/copy_tracing.rs @@ -23,7 +23,7 @@ pub fn combine_changeset_copies_wrapper( py: Python, revs: PyList, - children: PyDict, + children_count: PyDict, target_rev: Revision, rev_info: PyObject, is_ancestor: PyObject, @@ -93,20 +93,15 @@ (p1, p2, files) }); - let children: PyResult<_> = children + let children_count: PyResult<_> = children_count .items(py) .iter() - .map(|(k, v)| { - let v: &PyList = v.cast_as(py)?; - let v: PyResult<_> = - v.iter(py).map(|child| Ok(child.extract(py)?)).collect(); - Ok((k.extract(py)?, v?)) - }) + .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?))) .collect(); let res = combine_changeset_copies( revs?, - children?, + children_count?, target_rev, rev_info_maker, &is_ancestor_wrap,