diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -95,7 +95,10 @@ stringutil, ) -from .revlogutils import deltas as deltautil +from .revlogutils import ( + deltas as deltautil, + nodemap, +) release = lockmod.release @@ -2082,6 +2085,20 @@ @command( + b'debugnodemap', + [(b'', b'dump', False, _(b'write persistent binary nodemap on stdin'))], +) +def debugnodemap(ui, repo, **opts): + """write and inspect on disk nodemap + """ + if opts['dump']: + unfi = repo.unfiltered() + cl = unfi.changelog + data = nodemap.persistent_data(cl.index) + ui.write(data) + + +@command( b'debugobsolete', [ (b'', b'flags', 0, _(b'markers flag')), diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -7,9 +7,156 @@ # GNU General Public License version 2 or any later version. from __future__ import absolute_import -from .. import error + +import struct + +from .. import ( + error, + node as nodemod, + pycompat, +) class NodeMap(dict): def __missing__(self, x): raise error.RevlogError(b'unknown node: %s' % x) + + +### Nodemap Trie +# +# This is a simple reference implementation to compute and persist a nodemap +# trie. This reference implementation is write only. The python version of this +# is not expected to be actually used, since it wont provide performance +# improvement over existing non-persistent C implementation. +# +# The nodemap is persisted as Trie using 4bits-address/16-entries block. each +# revision can be adressed using its node shortest prefix. +# +# The trie is stored as a sequence of block. Each block contains 16 entries +# (signed 64bit integer, big endian). Each entry can be one of the following: +# +# * value >= 0 -> index of sub-block +# * value == -1 -> no value +# * value < -1 -> a revision value: rev = -(value+10) +# +# The implementation focus on simplicity, not on performance. A Rust +# implementation should provide a efficient version of the same binary +# persistence. This reference python implementation is never meant to be +# extensively use in production. + + +def persistent_data(index): + """return the persistent binary form for a nodemap for a given index + """ + trie = _build_trie(index) + return _persist_trie(trie) + + +S_BLOCK = struct.Struct(">" + ("l" * 16)) + +NO_ENTRY = -1 +# rev 0 need to be -2 because 0 is used by block, -1 is a special value. +REV_OFFSET = 2 + + +def _transform_rev(rev): + """Return the number used to represent the rev in the tree. + + (or retrieve a rev number from such representation) + + Note that this is an involution, a function equal to its inverse (i.e. + which gives the identity when applied to itself). + """ + return -(rev + REV_OFFSET) + + +def _to_int(hex_digit): + """turn an hexadecimal digit into a proper integer""" + return int(hex_digit, 16) + + +def _build_trie(index): + """build a nodemap trie + + The nodemap stores revision number for each unique prefix. + + Each block is a dictionary with keys in `[0, 15]`. Values are either + another block or a revision number. + """ + root = {} + for rev in range(len(index)): + hex = nodemod.hex(index[rev][7]) + _insert_into_block(index, 0, root, rev, hex) + return root + + +def _insert_into_block(index, level, block, current_rev, current_hex): + """insert a new revision in a block + + index: the index we are adding revision for + level: the depth of the current block in the trie + block: the block currently being considered + current_rev: the revision number we are adding + current_hex: the hexadecimal representation of the of that revision + """ + hex_digit = _to_int(current_hex[level : level + 1]) + entry = block.get(hex_digit) + if entry is None: + # no entry, simply store the revision number + block[hex_digit] = current_rev + elif isinstance(entry, dict): + # need to recurse to an underlying block + _insert_into_block(index, level + 1, entry, current_rev, current_hex) + else: + # collision with a previously unique prefix, inserting new + # vertices to fit both entry. + other_hex = nodemod.hex(index[entry][7]) + other_rev = entry + new = {} + block[hex_digit] = new + _insert_into_block(index, level + 1, new, other_rev, other_hex) + _insert_into_block(index, level + 1, new, current_rev, current_hex) + + +def _persist_trie(root): + """turn a nodemap trie into persistent binary data + + See `_build_trie` for nodemap trie structure""" + block_map = {} + chunks = [] + for tn in _walk_trie(root): + block_map[id(tn)] = len(chunks) + chunks.append(_persist_block(tn, block_map)) + return b''.join(chunks) + + +def _walk_trie(block): + """yield all the block in a trie + + Children blocks are always yield before their parent block. + """ + for (_, item) in sorted(block.items()): + if isinstance(item, dict): + for sub_block in _walk_trie(item): + yield sub_block + yield block + + +def _persist_block(block_node, block_map): + """produce persistent binary data for a single block + + Children block are assumed to be already persisted and present in + block_map. + """ + data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) + return S_BLOCK.pack(*data) + + +def _to_value(item, block_map): + """persist any value as an integer""" + if item is None: + return NO_ENTRY + elif isinstance(item, dict): + return block_map[id(item)] + else: + return _transform_rev(item) diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -107,6 +107,7 @@ debugmanifestfulltextcache debugmergestate debugnamecomplete + debugnodemap debugobsolete debugp1copies debugp2copies @@ -290,6 +291,7 @@ debugmanifestfulltextcache: clear, add debugmergestate: debugnamecomplete: + debugnodemap: dump debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template debugp1copies: rev debugp2copies: rev diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1024,6 +1024,7 @@ print merge state debugnamecomplete complete "names" - tags, open branch names, bookmark names + debugnodemap write and inspect on disk nodemap debugobsolete create arbitrary obsolete marker debugoptADV (no help text available) diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t new file mode 100644 --- /dev/null +++ b/tests/test-persistent-nodemap.t @@ -0,0 +1,26 @@ +=================================== +Test the persistent on-disk nodemap +=================================== + + + $ hg init test-repo + $ cd test-repo + $ hg debugbuilddag .+5000 + $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size + size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 + 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff |................| + 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0030: ff ff ff ff ff ff ed b3 ff ff ff ff ff ff ff ff |................| + 0040: ff ff ff ff ff ff ee 34 00 00 00 00 ff ff ff ff |.......4........| + 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0080: ff ff ff ff ff ff f8 50 ff ff ff ff ff ff ff ff |.......P........| + 0090: ff ff ff ff ff ff ff ff ff ff ec c7 ff ff ff ff |................| + 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00b0: ff ff ff ff ff ff fa be ff ff f2 fc ff ff ff ff |................| + 00c0: ff ff ff ff ff ff ef ea ff ff ff ff ff ff f9 17 |................| + 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|