diff --git a/hgext/fix.py b/hgext/fix.py --- a/hgext/fix.py +++ b/hgext/fix.py @@ -283,20 +283,29 @@ # There are no data dependencies between the workers fixing each file # revision, so we can use all available parallelism. def getfixes(items): - for rev, path in items: - ctx = repo[rev] + for srcrev, path, dstrevs in items: + ctx = repo[srcrev] olddata = ctx[path].data() metadata, newdata = fixfile( - ui, repo, opts, fixers, ctx, path, basepaths, basectxs[rev] + ui, + repo, + opts, + fixers, + ctx, + path, + basepaths, + basectxs[srcrev], ) - # Don't waste memory/time passing unchanged content back, but - # produce one result per item either way. - yield ( - rev, - path, - metadata, - newdata if newdata != olddata else None, - ) + # We ungroup the work items now, because the code that consumes + # these results has to handle each dstrev separately, and in + # topological order. Because these are handled in topological + # order, it's important that we pass around references to + # "newdata" instead of copying it. Otherwise, we would be + # keeping more copies of file content in memory at a time than + # if we hadn't bothered to group/deduplicate the work items. + data = newdata if newdata != olddata else None + for dstrev in dstrevs: + yield (dstrev, path, metadata, data) results = worker.worker( ui, 1.0, getfixes, tuple(), workqueue, threadsafe=False @@ -392,7 +401,7 @@ items by ascending revision number to match the order in which we commit the fixes later. """ - workqueue = [] + dstrevmap = collections.defaultdict(list) numitems = collections.defaultdict(int) maxfilesize = ui.configbytes(b'fix', b'maxfilesize') for rev in sorted(revstofix): @@ -410,8 +419,13 @@ % (util.bytecount(maxfilesize), path) ) continue - workqueue.append((rev, path)) + baserevs = tuple(ctx.rev() for ctx in basectxs[rev]) + dstrevmap[(fctx.filerev(), baserevs, path)].append(rev) numitems[rev] += 1 + workqueue = [ + (dstrevs[0], path, dstrevs) + for (filerev, baserevs, path), dstrevs in dstrevmap.items() + ] return workqueue, numitems @@ -516,9 +530,9 @@ return {} basepaths = {} - for rev, path in workqueue: - fixctx = repo[rev] - for basectx in basectxs[rev]: + for srcrev, path, dstrevs in workqueue: + fixctx = repo[srcrev] + for basectx in basectxs[srcrev]: basepath = copies.pathcopies(basectx, fixctx).get(path, path) if basepath in basectx: basepaths[(basectx.rev(), fixctx.rev(), path)] = basepath @@ -641,10 +655,10 @@ toprefetch = set() # Prefetch the files that will be fixed. - for rev, path in workqueue: - if rev == wdirrev: + for srcrev, path, dstrevs in workqueue: + if srcrev == wdirrev: continue - toprefetch.add((rev, path)) + toprefetch.add((srcrev, path)) # Prefetch the base contents for lineranges(). for (baserev, fixrev, path), basepath in basepaths.items(): diff --git a/tests/test-fix.t b/tests/test-fix.t --- a/tests/test-fix.t +++ b/tests/test-fix.t @@ -1758,8 +1758,8 @@ $ cat $LOGFILE | sort | uniq -c 4 bar.log 4 baz.log - 4 foo.log - 4 qux.log + 3 foo.log + 2 qux.log $ cd ..