diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -93,7 +93,10 @@ stringutil, ) -from .revlogutils import deltas as deltautil +from .revlogutils import ( + deltas as deltautil, + nodemap, +) release = lockmod.release @@ -2075,6 +2078,20 @@ @command( + b'debugnodemap', + [('', b'dump', False, _(b'write a (binary) serialised nodemap on stdin'))], +) +def debugnodemap(ui, repo, **args): + """write and inspect on disk nodemap + """ + if args['dump']: + unfi = repo.unfiltered() + cl = unfi.changelog + data = nodemap.persistent_data(cl.index) + ui.write(data) + + +@command( b'debugobsolete', [ (b'', b'flags', 0, _(b'markers flag')), diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -7,9 +7,156 @@ # GNU General Public License version 2 or any later version. from __future__ import absolute_import -from .. import error + +import struct + +from .. import ( + error, + node as nodemod, +) class NodeMap(dict): def __missing__(self, x): raise error.RevlogError(b'unknown node: %s' % x) + + +### Nodemap Trie +# +# This is a simple reference implementation to compute and serialise a nodemap +# trie. This reference implementation is write only. The python version of this +# is not expected to be actually used, since it wont provide performance +# improvement over existing non-persistent C implementation. +# +# The nodemap is serialized as Trie using 4bits-address/16-entries block. each +# revision can be adressed using its node shortest prefix. +# +# The trie is stored as a sequence of block. Each block contains 16 entries +# (signed 64bit integer, big endian). Each entry can be one of the following: +# +# * value >= 0 -> index of sub-block +# * value == -1 -> no value +# * value < -1 -> a revision value: rev = -(value+10) +# +# The implementation focus on simplicity, not on performance. A Rust +# implementation should provide a efficient version of the same serialization. +# This reference python implementation is never meant to be extensively use in +# production. + + +def persistent_data(index): + """return the serialised data of a nodemap for a given index + """ + trie = _build_trie(index) + return _dump_trie(trie) + + +S_BLOCK = struct.Struct(">" + ("q" * 16)) + +NO_ENTRY = -1 +# rev-0 need to be -2 because 0 is used by block, -1 is a special value. +REV_OFFSET = -2 + + +def _transform_rev(rev): + """Return the number used to represent the rev in the tree. + + (or retrieve a rev number from such representation) + + Note that this is an involution. + """ + return -(rev + REV_OFFSET) + + +def _to_int(hex_digit): + """turn an hexadecimal digit into a proper integer""" + return int(hex_digit, 16) + + +def _build_trie(index): + """build a nodemap trie + + The nodemap store revision number for each unique prefix. + + Each block is a dictionnary with key in `[0, 15]`. Value are either + another block or a revision number. + """ + root = {} + for rev in range(len(index)): + hex = nodemod.hex(index[rev][7]) + _insert_into_block(index, 0, root, rev, hex) + return root + + +def _insert_into_block(index, level, block, current_rev, current_hex): + """insert a new revision in a block + + index: the index we are adding revision for + level: the depth of the current block in the trie + block: the block currently being considered + current_rev: the revision number we are adding + current_hex: the hexadecimal representation of the of that revision + """ + entry = block.get(_to_int(current_hex[level])) + if entry is None: + # no entry, simply store the revision number + block[_to_int(current_hex[level])] = current_rev + elif isinstance(entry, dict): + # need to recurse to an underlying block + _insert_into_block(index, level + 1, entry, current_rev, current_hex) + else: + # collision with a previously unique prefix, inserting new + # vertices to fit both entry. + other_hex = nodemod.hex(index[entry][7]) + other_rev = entry + while current_hex[level] == other_hex[level]: + new = {} + block[_to_int(current_hex[level])] = new + block = new + level += 1 + block[_to_int(current_hex[level])] = current_rev + block[_to_int(other_hex[level])] = other_rev + + +def _dump_trie(root): + """serialise a nodemap trie + + See `_build_trie` for nodemap trie structure""" + block_map = {} + chunks = [] + for tn in _walk_trie(root): + block_map[id(tn)] = len(chunks) + chunks.append(_dump_block(tn, block_map)) + return ''.join(chunks) + + +def _walk_trie(block): + """yield all the block in a trie + + Children blocks are always yield before their parent block. + """ + for (_, item) in sorted(block.items()): + if isinstance(item, dict): + for sub_block in _walk_trie(item): + yield sub_block + yield block + + +def _dump_block(block_node, block_map): + """serialise a single block + + Children block are assumed to be already serialized and present in + block_map. + """ + data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) + return S_BLOCK.pack(*data) + + +def _to_value(item, block_map): + """serialize any value as an integer""" + if item is None: + return NO_ENTRY + elif isinstance(item, dict): + return block_map[id(item)] + else: + return _transform_rev(item) diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -107,6 +107,7 @@ debugmanifestfulltextcache debugmergestate debugnamecomplete + debugnodemap debugobsolete debugp1copies debugp2copies @@ -289,6 +290,7 @@ debugmanifestfulltextcache: clear, add debugmergestate: debugnamecomplete: + debugnodemap: dump debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template debugp1copies: rev debugp2copies: rev diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1017,6 +1017,7 @@ print merge state debugnamecomplete complete "names" - tags, open branch names, bookmark names + debugnodemap write and inspect on disk nodemap debugobsolete create arbitrary obsolete marker debugoptADV (no help text available) diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t new file mode 100644 --- /dev/null +++ b/tests/test-persistent-nodemap.t @@ -0,0 +1,26 @@ +=================================== +Test the persistent on-disk nodemap +=================================== + + + $ hg init test-repo + $ cd test-repo + $ hg debugbuilddag .+5000 + $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size + size=245760, sha256=5dbe62ab98a26668b544063d4d674ac4452ba903ee8895c52fd21d9bbd771e09 + 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0030: ff ff ff ff ff ff fa c6 ff ff ff ff ff ff ff ff |................| + 0040: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ed b7 |................| + 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ee 38 |...............8| + 0090: 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................| + 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00b0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00c0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|