diff --git a/remotefilelog/debugcommands.py b/remotefilelog/debugcommands.py --- a/remotefilelog/debugcommands.py +++ b/remotefilelog/debugcommands.py @@ -215,7 +215,6 @@ hashformatter = short hashlen = 14 - lastfilename = None totaldeltasize = 0 totalblobsize = 0 def printtotals(): @@ -233,7 +232,16 @@ deltastr )) + lastfilename = None + bases = {} + nodes = set() + failures = 0 for filename, node, deltabase, deltalen in dpack.iterentries(): + bases[node] = deltabase + if node in nodes: + ui.warn("Bad entry: %s appears twice\n" % short(node)) + failures += 1 + nodes.add(node) if filename != lastfilename: printtotals() name = '(empty name)' if filename == '' else filename @@ -246,6 +254,7 @@ lastfilename = filename totalblobsize = 0 totaldeltasize = 0 + # Metadata could be missing, in which case it will be an empty dict. meta = dpack.getmeta(filename, node) if constants.METAKEYSIZE in meta: @@ -262,6 +271,46 @@ if filename is not None: printtotals() + failures += _sanitycheck(ui, set(nodes), bases) + if failures > 1: + ui.warn("%d failures\n" % failures) + return 1 + +def _sanitycheck(ui, nodes, bases): + """ + Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a + mapping of node->base): + + - Each deltabase must itself be a node elsewhere in the pack + - There must be no cycles + """ + failures = 0 + for node in nodes: + seen = set() + current = node + deltabase = bases[current] + + while deltabase != nullid: + if deltabase not in nodes: + ui.warn("Bad entry: %s has an unknown deltabase (%s)\n" % + (short(node), short(deltabase))) + failures += 1 + break + + if deltabase in seen: + ui.warn("Bad entry: %s has a cycle (at %s)\n" % + (short(node), short(deltabase))) + failures += 1 + break + + current = deltabase + seen.add(current) + deltabase = bases[current] + # Since ``node`` begins a valid chain, reset/memoize its base to nullid + # so we don't traverse it again. + bases[node] = nullid + return failures + def dumpdeltachain(ui, deltachain, **opts): hashformatter = hex hashlen = 40