diff --git a/mercurial/copies.py b/mercurial/copies.py --- a/mercurial/copies.py +++ b/mercurial/copies.py @@ -183,10 +183,27 @@ * p1copies: mapping of copies from p1 * p2copies: mapping of copies from p2 * removed: a list of removed files + * ismerged: a callback to know if file was merged in that revision """ cl = repo.changelog parents = cl.parentrevs + def get_ismerged(rev): + ctx = repo[rev] + + def ismerged(path): + if path not in ctx.files(): + return False + fctx = ctx[path] + parents = fctx._filelog.parents(fctx._filenode) + nb_parents = 0 + for n in parents: + if n != node.nullid: + nb_parents += 1 + return nb_parents >= 2 + + return ismerged + if repo.filecopiesmode == b'changeset-sidedata': changelogrevision = cl.changelogrevision flags = cl.flags @@ -218,6 +235,7 @@ def revinfo(rev): p1, p2 = parents(rev) + value = None if flags(rev) & REVIDX_SIDEDATA: e = merge_caches.pop(rev, None) if e is not None: @@ -228,12 +246,22 @@ removed = c.filesremoved if p1 != node.nullrev and p2 != node.nullrev: # XXX some case we over cache, IGNORE - merge_caches[rev] = (p1, p2, p1copies, p2copies, removed) + value = merge_caches[rev] = ( + p1, + p2, + p1copies, + p2copies, + removed, + get_ismerged(rev), + ) else: p1copies = {} p2copies = {} removed = [] - return p1, p2, p1copies, p2copies, removed + + if value is None: + value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev)) + return value else: @@ -242,7 +270,7 @@ ctx = repo[rev] p1copies, p2copies = ctx._copies removed = ctx.filesremoved() - return p1, p2, p1copies, p2copies, removed + return p1, p2, p1copies, p2copies, removed, get_ismerged(rev) return revinfo @@ -256,6 +284,7 @@ revinfo = _revinfogetter(repo) cl = repo.changelog + isancestor = cl.isancestorrev # XXX we should had chaching to this. missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()]) mrset = set(missingrevs) roots = set() @@ -283,10 +312,14 @@ iterrevs.update(roots) iterrevs.remove(b.rev()) revs = sorted(iterrevs) - return _combinechangesetcopies(revs, children, b.rev(), revinfo, match) + return _combinechangesetcopies( + revs, children, b.rev(), revinfo, match, isancestor + ) -def _combinechangesetcopies(revs, children, targetrev, revinfo, match): +def _combinechangesetcopies( + revs, children, targetrev, revinfo, match, isancestor +): """combine the copies information for each item of iterrevs revs: sorted iterable of revision to visit @@ -305,7 +338,7 @@ # this is a root copies = {} for i, c in enumerate(children[r]): - p1, p2, p1copies, p2copies, removed = revinfo(c) + p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c) if r == p1: parent = 1 childcopies = p1copies @@ -319,9 +352,12 @@ } newcopies = copies if childcopies: - newcopies = _chain(newcopies, childcopies) - # _chain makes a copies, we can avoid doing so in some - # simple/linear cases. + newcopies = copies.copy() + for dest, source in pycompat.iteritems(childcopies): + prev = copies.get(source) + if prev is not None and prev[1] is not None: + source = prev[1] + newcopies[dest] = (c, source) assert newcopies is not copies for f in removed: if f in newcopies: @@ -330,7 +366,7 @@ # branches. when there are no other branches, this # could be avoided. newcopies = copies.copy() - del newcopies[f] + newcopies[f] = (c, None) othercopies = all_copies.get(c) if othercopies is None: all_copies[c] = newcopies @@ -338,21 +374,55 @@ # we are the second parent to work on c, we need to merge our # work with the other. # - # Unlike when copies are stored in the filelog, we consider - # it a copy even if the destination already existed on the - # other branch. It's simply too expensive to check if the - # file existed in the manifest. - # # In case of conflict, parent 1 take precedence over parent 2. # This is an arbitrary choice made anew when implementing # changeset based copies. It was made without regards with # potential filelog related behavior. if parent == 1: - othercopies.update(newcopies) + _merge_copies_dict( + othercopies, newcopies, isancestor, ismerged + ) else: - newcopies.update(othercopies) + _merge_copies_dict( + newcopies, othercopies, isancestor, ismerged + ) all_copies[c] = newcopies - return all_copies[targetrev] + + final_copies = {} + for dest, (tt, source) in all_copies[targetrev].items(): + if source is not None: + final_copies[dest] = source + return final_copies + + +def _merge_copies_dict(minor, major, isancestor, ismerged): + """merge two copies-mapping together, minor and major + + In case of conflict, value from "major" will be picked. + + - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an + ancestors of `high_rev`, + + - `ismerged(path)`: callable return True if `path` have been merged in the + current revision, + """ + for dest, value in major.items(): + other = minor.get(dest) + if other is None: + minor[dest] = value + else: + new_tt = value[0] + other_tt = other[0] + if value[1] == other[1]: + continue + # content from "major" wins, unless it is older + # than the branch point or there is a merge + if ( + new_tt == other_tt + or not isancestor(new_tt, other_tt) + or ismerged(dest) + ): + minor[dest] = value def _forwardcopies(a, b, base=None, match=None): diff --git a/tests/test-copies-chain-merge.t b/tests/test-copies-chain-merge.t --- a/tests/test-copies-chain-merge.t +++ b/tests/test-copies-chain-merge.t @@ -1,3 +1,5 @@ +#testcases filelog compatibility sidedata + ===================================================== Test Copy tracing for chain of copies involving merge ===================================================== @@ -6,6 +8,7 @@ are involved. It cheks we do not have unwanted update of behavior and that the different options to retrieve copies behave correctly. + Setup ===== @@ -18,6 +21,22 @@ > logtemplate={rev} {desc}\n > EOF +#if compatibility + $ cat >> $HGRCPATH << EOF + > [experimental] + > copies.read-from = compatibility + > EOF +#endif + +#if sidedata + $ cat >> $HGRCPATH << EOF + > [format] + > exp-use-side-data = yes + > exp-use-copies-side-data-changeset = yes + > EOF +#endif + + $ hg init repo-chain $ cd repo-chain @@ -453,17 +472,26 @@ 0 4 0dd616bc7ab1 000000000000 000000000000 1 10 6da5a2eecb9c 000000000000 000000000000 2 19 eb806e34ef6b 0dd616bc7ab1 6da5a2eecb9c + +# Here the filelog based implementation is not looking at the rename +# information (because the file exist on both side). However the changelog +# based on works fine. We have different output. + $ hg status --copies --rev 'desc("a-2")' --rev 'desc("mAEm-0")' M f + b (no-filelog !) R b $ hg status --copies --rev 'desc("a-2")' --rev 'desc("mEAm-0")' M f + b (no-filelog !) R b $ hg status --copies --rev 'desc("e-2")' --rev 'desc("mAEm-0")' M f + d (no-filelog !) R d $ hg status --copies --rev 'desc("e-2")' --rev 'desc("mEAm-0")' M f + d (no-filelog !) R d $ hg status --copies --rev 'desc("i-2")' --rev 'desc("a-2")' A f @@ -473,6 +501,18 @@ A f b R b + +# From here, we run status against revision where both source file exists. +# +# The filelog based implementation picks an arbitrary side based on revision +# numbers. So the same side "wins" whatever the parents order is. This is +# sub-optimal because depending on revision numbers means the result can be +# different from one repository to the next. +# +# The changeset based algorithm use the parent order to break tie on conflicting +# information and will have a different order depending on who is p1 and p2. +# That order is stable accross repositories. (data from p1 prevails) + $ hg status --copies --rev 'desc("i-2")' --rev 'desc("mAEm-0")' A f d @@ -480,7 +520,8 @@ R d $ hg status --copies --rev 'desc("i-2")' --rev 'desc("mEAm-0")' A f - d + d (filelog !) + b (no-filelog !) R b R d $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mAEm-0")' @@ -490,7 +531,8 @@ R b $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mEAm-0")' A f - a + a (filelog !) + b (no-filelog !) R a R b @@ -563,21 +605,25 @@ R h $ hg status --copies --rev 'desc("b-1")' --rev 'desc("mBFm-0")' M d + h (no-filelog !) R h $ hg status --copies --rev 'desc("f-2")' --rev 'desc("mBFm-0")' M b $ hg status --copies --rev 'desc("f-1")' --rev 'desc("mBFm-0")' M b M d + i (no-filelog !) R i $ hg status --copies --rev 'desc("b-1")' --rev 'desc("mFBm-0")' M d + h (no-filelog !) R h $ hg status --copies --rev 'desc("f-2")' --rev 'desc("mFBm-0")' M b $ hg status --copies --rev 'desc("f-1")' --rev 'desc("mFBm-0")' M b M d + i (no-filelog !) R i The following graphlog is wrong, the "a -> c -> d" chain was overwritten and should not appear. @@ -645,9 +691,15 @@ | o 0 i-0 initial commit: a b h +One side of the merge have a long history with rename. The other side of the +merge point to a new file with a smaller history. Each side is "valid". + +(and again the filelog based algorithm only explore one, with a pick based on +revision numbers) + $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mDGm-0")' A d - a + a (filelog !) R a $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mGDm-0")' A d @@ -740,7 +792,8 @@ $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mFGm-0")' A d - a + h (no-filelog !) + a (filelog !) R a R h $ hg status --copies --rev 'desc("i-0")' --rev 'desc("mGFm-0")' @@ -754,15 +807,19 @@ M d $ hg status --copies --rev 'desc("f-1")' --rev 'desc("mFGm-0")' M d + i (no-filelog !) R i $ hg status --copies --rev 'desc("f-1")' --rev 'desc("mGFm-0")' M d + i (no-filelog !) R i $ hg status --copies --rev 'desc("g-1")' --rev 'desc("mFGm-0")' M d + h (no-filelog !) R h $ hg status --copies --rev 'desc("g-1")' --rev 'desc("mGFm-0")' M d + h (no-filelog !) R h $ hg log -Gfr 'desc("mFGm-0")' d