We need to keep that information around:
- total data will allow transaction to start appending new information without confusing other reader.
- unused data will allow to detect when we should regenerate new rawdata file.
( )
hg-reviewers |
We need to keep that information around:
Lint Skipped |
Unit Tests Skipped |
Path | Packages | |||
---|---|---|---|---|
M | mercurial/debugcommands.py (2 lines) | |||
M | mercurial/pure/parsers.py (4 lines) | |||
M | mercurial/revlogutils/nodemap.py (41 lines) | |||
M | tests/test-persistent-nodemap.t (17 lines) |
Commit | Parents | Author | Summary | Date |
---|---|---|---|---|
867a5b315c08 | 72194e78731e | Pierre-Yves David | Jan 15 2020, 9:50 AM |
Status | Author | Revision | |
---|---|---|---|
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute | ||
Closed | marmoute |
elif opts['metadata']: | elif opts['metadata']: | ||||
unfi = repo.unfiltered() | unfi = repo.unfiltered() | ||||
cl = unfi.changelog | cl = unfi.changelog | ||||
nm_data = nodemap.persisted_data(cl) | nm_data = nodemap.persisted_data(cl) | ||||
if nm_data is not None: | if nm_data is not None: | ||||
docket, data = nm_data | docket, data = nm_data | ||||
ui.write((b"uid: %s\n") % docket.uid) | ui.write((b"uid: %s\n") % docket.uid) | ||||
ui.write((b"tip-rev: %d\n") % docket.tip_rev) | ui.write((b"tip-rev: %d\n") % docket.tip_rev) | ||||
ui.write((b"data-length: %d\n") % docket.data_length) | |||||
ui.write((b"data-unused: %d\n") % docket.data_unused) | |||||
@command( | @command( | ||||
b'debugobsolete', | b'debugobsolete', | ||||
[ | [ | ||||
(b'', b'flags', 0, _(b'markers flag')), | (b'', b'flags', 0, _(b'markers flag')), | ||||
( | ( | ||||
b'', | b'', |
def nodemap_data_incremental(self): | def nodemap_data_incremental(self): | ||||
"""Return bytes containing a incremental update to persistent nodemap | """Return bytes containing a incremental update to persistent nodemap | ||||
This containst the data for an append-only update of the data provided | This containst the data for an append-only update of the data provided | ||||
in the last call to `update_nodemap_data`. | in the last call to `update_nodemap_data`. | ||||
""" | """ | ||||
if self._nm_root is None: | if self._nm_root is None: | ||||
return None | return None | ||||
data = nodemaputil.update_persistent_data( | changed, data = nodemaputil.update_persistent_data( | ||||
self, self._nm_root, self._nm_max_idx, self._nm_rev | self, self._nm_root, self._nm_max_idx, self._nm_rev | ||||
) | ) | ||||
self._nm_root = self._nm_max_idx = self._nm_rev = None | self._nm_root = self._nm_max_idx = self._nm_rev = None | ||||
return data | return changed, data | ||||
def update_nodemap_data(self, docket, nm_data): | def update_nodemap_data(self, docket, nm_data): | ||||
"""provide full block of persisted binary data for a nodemap | """provide full block of persisted binary data for a nodemap | ||||
The data are expected to come from disk. See `nodemap_data_all` for a | The data are expected to come from disk. See `nodemap_data_all` for a | ||||
produceur of such data.""" | produceur of such data.""" | ||||
if nm_data is not None: | if nm_data is not None: | ||||
self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) | self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) |
if not pdata: | if not pdata: | ||||
return None | return None | ||||
offset = 0 | offset = 0 | ||||
(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size]) | (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size]) | ||||
if version != ONDISK_VERSION: | if version != ONDISK_VERSION: | ||||
return None | return None | ||||
offset += S_VERSION.size | offset += S_VERSION.size | ||||
headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size]) | headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size]) | ||||
uid_size, tip_rev = headers | uid_size, tip_rev, data_length, data_unused = headers | ||||
offset += S_HEADER.size | offset += S_HEADER.size | ||||
docket = NodeMapDocket(pdata[offset : offset + uid_size]) | docket = NodeMapDocket(pdata[offset : offset + uid_size]) | ||||
docket.tip_rev = tip_rev | docket.tip_rev = tip_rev | ||||
docket.data_length = data_length | |||||
docket.data_unused = data_unused | |||||
filename = _rawdata_filepath(revlog, docket) | filename = _rawdata_filepath(revlog, docket) | ||||
return docket, revlog.opener.tryread(filename) | return docket, revlog.opener.tryread(filename) | ||||
def setup_persistent_nodemap(tr, revlog): | def setup_persistent_nodemap(tr, revlog): | ||||
"""Install whatever is needed transaction side to persist a nodemap on disk | """Install whatever is needed transaction side to persist a nodemap on disk | ||||
raise error.ProgrammingError(msg) | raise error.ProgrammingError(msg) | ||||
can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") | can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") | ||||
ondisk_docket = revlog._nodemap_docket | ondisk_docket = revlog._nodemap_docket | ||||
# first attemp an incremental update of the data | # first attemp an incremental update of the data | ||||
if can_incremental and ondisk_docket is not None: | if can_incremental and ondisk_docket is not None: | ||||
target_docket = revlog._nodemap_docket.copy() | target_docket = revlog._nodemap_docket.copy() | ||||
data = revlog.index.nodemap_data_incremental() | data_changed_count, data = revlog.index.nodemap_data_incremental() | ||||
datafile = _rawdata_filepath(revlog, target_docket) | datafile = _rawdata_filepath(revlog, target_docket) | ||||
# EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
# store vfs | # store vfs | ||||
with revlog.opener(datafile, b'a') as fd: | with revlog.opener(datafile, b'a') as fd: | ||||
fd.write(data) | fd.write(data) | ||||
target_docket.data_length += len(data) | |||||
target_docket.data_unused += data_changed_count | |||||
else: | else: | ||||
# otherwise fallback to a full new export | # otherwise fallback to a full new export | ||||
target_docket = NodeMapDocket() | target_docket = NodeMapDocket() | ||||
datafile = _rawdata_filepath(revlog, target_docket) | datafile = _rawdata_filepath(revlog, target_docket) | ||||
if util.safehasattr(revlog.index, "nodemap_data_all"): | if util.safehasattr(revlog.index, "nodemap_data_all"): | ||||
data = revlog.index.nodemap_data_all() | data = revlog.index.nodemap_data_all() | ||||
else: | else: | ||||
data = persistent_data(revlog.index) | data = persistent_data(revlog.index) | ||||
# EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
# store vfs | # store vfs | ||||
with revlog.opener(datafile, b'w') as fd: | with revlog.opener(datafile, b'w') as fd: | ||||
fd.write(data) | fd.write(data) | ||||
target_docket.data_length = len(data) | |||||
target_docket.tip_rev = revlog.tiprev() | target_docket.tip_rev = revlog.tiprev() | ||||
# EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
# store vfs | # store vfs | ||||
with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: | with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: | ||||
fp.write(target_docket.serialize()) | fp.write(target_docket.serialize()) | ||||
revlog._nodemap_docket = target_docket | revlog._nodemap_docket = target_docket | ||||
# EXP-TODO: if the transaction abort, we should remove the new data and | # EXP-TODO: if the transaction abort, we should remove the new data and | ||||
# reinstall the old one. | # reinstall the old one. | ||||
# specified inside the "docket" file. | # specified inside the "docket" file. | ||||
# | # | ||||
# The docket file contains information to find, qualify and validate the raw | # The docket file contains information to find, qualify and validate the raw | ||||
# data. Its content is currently very light, but it will expand as the on disk | # data. Its content is currently very light, but it will expand as the on disk | ||||
# nodemap gains the necessary features to be used in production. | # nodemap gains the necessary features to be used in production. | ||||
# version 0 is experimental, no BC garantee, do no use outside of tests. | # version 0 is experimental, no BC garantee, do no use outside of tests. | ||||
ONDISK_VERSION = 0 | ONDISK_VERSION = 0 | ||||
S_VERSION = struct.Struct(">B") | S_VERSION = struct.Struct(">B") | ||||
S_HEADER = struct.Struct(">BQ") | S_HEADER = struct.Struct(">BQQQ") | ||||
ID_SIZE = 8 | ID_SIZE = 8 | ||||
def _make_uid(): | def _make_uid(): | ||||
"""return a new unique identifier. | """return a new unique identifier. | ||||
The identifier is random and composed of ascii characters.""" | The identifier is random and composed of ascii characters.""" | ||||
return nodemod.hex(os.urandom(ID_SIZE)) | return nodemod.hex(os.urandom(ID_SIZE)) | ||||
class NodeMapDocket(object): | class NodeMapDocket(object): | ||||
"""metadata associated with persistent nodemap data | """metadata associated with persistent nodemap data | ||||
The persistent data may come from disk or be on their way to disk. | The persistent data may come from disk or be on their way to disk. | ||||
""" | """ | ||||
def __init__(self, uid=None): | def __init__(self, uid=None): | ||||
if uid is None: | if uid is None: | ||||
uid = _make_uid() | uid = _make_uid() | ||||
self.uid = uid | self.uid = uid | ||||
self.tip_rev = None | self.tip_rev = None | ||||
self.data_length = None | |||||
self.data_unused = 0 | |||||
def copy(self): | def copy(self): | ||||
new = NodeMapDocket(uid=self.uid) | new = NodeMapDocket(uid=self.uid) | ||||
new.tip_rev = self.tip_rev | new.tip_rev = self.tip_rev | ||||
new.data_length = self.data_length | |||||
new.data_unused = self.data_unused | |||||
return new | return new | ||||
def serialize(self): | def serialize(self): | ||||
"""return serialized bytes for a docket using the passed uid""" | """return serialized bytes for a docket using the passed uid""" | ||||
data = [] | data = [] | ||||
data.append(S_VERSION.pack(ONDISK_VERSION)) | data.append(S_VERSION.pack(ONDISK_VERSION)) | ||||
headers = (len(self.uid), self.tip_rev) | headers = ( | ||||
len(self.uid), | |||||
self.tip_rev, | |||||
self.data_length, | |||||
self.data_unused, | |||||
) | |||||
data.append(S_HEADER.pack(*headers)) | data.append(S_HEADER.pack(*headers)) | ||||
data.append(self.uid) | data.append(self.uid) | ||||
return b''.join(data) | return b''.join(data) | ||||
def _rawdata_filepath(revlog, docket): | def _rawdata_filepath(revlog, docket): | ||||
"""The (vfs relative) nodemap's rawdata file for a given uid""" | """The (vfs relative) nodemap's rawdata file for a given uid""" | ||||
prefix = revlog.nodemap_file[:-2] | prefix = revlog.nodemap_file[:-2] | ||||
""" | """ | ||||
trie = _build_trie(index) | trie = _build_trie(index) | ||||
return _persist_trie(trie) | return _persist_trie(trie) | ||||
def update_persistent_data(index, root, max_idx, last_rev): | def update_persistent_data(index, root, max_idx, last_rev): | ||||
"""return the incremental update for persistent nodemap from a given index | """return the incremental update for persistent nodemap from a given index | ||||
""" | """ | ||||
trie = _update_trie(index, root, last_rev) | changed_block, trie = _update_trie(index, root, last_rev) | ||||
return _persist_trie(trie, existing_idx=max_idx) | return ( | ||||
changed_block * S_BLOCK.size, | |||||
_persist_trie(trie, existing_idx=max_idx), | |||||
) | |||||
S_BLOCK = struct.Struct(">" + ("q" * 16)) | S_BLOCK = struct.Struct(">" + ("q" * 16)) | ||||
NO_ENTRY = -1 | NO_ENTRY = -1 | ||||
# rev 0 need to be -2 because 0 is used by block, -1 is a special value. | # rev 0 need to be -2 because 0 is used by block, -1 is a special value. | ||||
REV_OFFSET = 2 | REV_OFFSET = 2 | ||||
for rev in range(len(index)): | for rev in range(len(index)): | ||||
hex = nodemod.hex(index[rev][7]) | hex = nodemod.hex(index[rev][7]) | ||||
_insert_into_block(index, 0, root, rev, hex) | _insert_into_block(index, 0, root, rev, hex) | ||||
return root | return root | ||||
def _update_trie(index, root, last_rev): | def _update_trie(index, root, last_rev): | ||||
"""consume""" | """consume""" | ||||
changed = 0 | |||||
for rev in range(last_rev + 1, len(index)): | for rev in range(last_rev + 1, len(index)): | ||||
hex = nodemod.hex(index[rev][7]) | hex = nodemod.hex(index[rev][7]) | ||||
_insert_into_block(index, 0, root, rev, hex) | changed += _insert_into_block(index, 0, root, rev, hex) | ||||
return root | return changed, root | ||||
def _insert_into_block(index, level, block, current_rev, current_hex): | def _insert_into_block(index, level, block, current_rev, current_hex): | ||||
"""insert a new revision in a block | """insert a new revision in a block | ||||
index: the index we are adding revision for | index: the index we are adding revision for | ||||
level: the depth of the current block in the trie | level: the depth of the current block in the trie | ||||
block: the block currently being considered | block: the block currently being considered | ||||
current_rev: the revision number we are adding | current_rev: the revision number we are adding | ||||
current_hex: the hexadecimal representation of the of that revision | current_hex: the hexadecimal representation of the of that revision | ||||
""" | """ | ||||
changed = 1 | |||||
if block.ondisk_id is not None: | if block.ondisk_id is not None: | ||||
block.ondisk_id = None | block.ondisk_id = None | ||||
hex_digit = _to_int(current_hex[level : level + 1]) | hex_digit = _to_int(current_hex[level : level + 1]) | ||||
entry = block.get(hex_digit) | entry = block.get(hex_digit) | ||||
if entry is None: | if entry is None: | ||||
# no entry, simply store the revision number | # no entry, simply store the revision number | ||||
block[hex_digit] = current_rev | block[hex_digit] = current_rev | ||||
elif isinstance(entry, dict): | elif isinstance(entry, dict): | ||||
# need to recurse to an underlying block | # need to recurse to an underlying block | ||||
_insert_into_block(index, level + 1, entry, current_rev, current_hex) | changed += _insert_into_block( | ||||
index, level + 1, entry, current_rev, current_hex | |||||
) | |||||
else: | else: | ||||
# collision with a previously unique prefix, inserting new | # collision with a previously unique prefix, inserting new | ||||
# vertices to fit both entry. | # vertices to fit both entry. | ||||
other_hex = nodemod.hex(index[entry][7]) | other_hex = nodemod.hex(index[entry][7]) | ||||
other_rev = entry | other_rev = entry | ||||
new = Block() | new = Block() | ||||
block[hex_digit] = new | block[hex_digit] = new | ||||
_insert_into_block(index, level + 1, new, other_rev, other_hex) | _insert_into_block(index, level + 1, new, other_rev, other_hex) | ||||
_insert_into_block(index, level + 1, new, current_rev, current_hex) | _insert_into_block(index, level + 1, new, current_rev, current_hex) | ||||
return changed | |||||
def _persist_trie(root, existing_idx=None): | def _persist_trie(root, existing_idx=None): | ||||
"""turn a nodemap trie into persistent binary data | """turn a nodemap trie into persistent binary data | ||||
See `_build_trie` for nodemap trie structure""" | See `_build_trie` for nodemap trie structure""" | ||||
block_map = {} | block_map = {} | ||||
if existing_idx is not None: | if existing_idx is not None: |
> exp-persistent-nodemap=yes | > exp-persistent-nodemap=yes | ||||
> [devel] | > [devel] | ||||
> persistent-nodemap=yes | > persistent-nodemap=yes | ||||
> EOF | > EOF | ||||
$ hg debugbuilddag .+5000 | $ hg debugbuilddag .+5000 | ||||
$ hg debugnodemap --metadata | $ hg debugnodemap --metadata | ||||
uid: ???????????????? (glob) | uid: ???????????????? (glob) | ||||
tip-rev: 5000 | tip-rev: 5000 | ||||
data-length: 245760 | |||||
data-unused: 0 | |||||
$ f --size .hg/store/00changelog.n | $ f --size .hg/store/00changelog.n | ||||
.hg/store/00changelog.n: size=26 | .hg/store/00changelog.n: size=42 | ||||
$ f --sha256 .hg/store/00changelog-*.nd | $ f --sha256 .hg/store/00changelog-*.nd | ||||
.hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob) | .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob) | ||||
$ hg debugnodemap --dump-new | f --sha256 --size | $ hg debugnodemap --dump-new | f --sha256 --size | ||||
size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | ||||
$ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size | $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size | ||||
size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | ||||
0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | ||||
0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | ||||
add a new commit | add a new commit | ||||
$ hg up | $ hg up | ||||
0 files updated, 0 files merged, 0 files removed, 0 files unresolved | 0 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
$ echo foo > foo | $ echo foo > foo | ||||
$ hg add foo | $ hg add foo | ||||
$ hg ci -m 'foo' | $ hg ci -m 'foo' | ||||
#if pure | |||||
$ hg debugnodemap --metadata | $ hg debugnodemap --metadata | ||||
uid: ???????????????? (glob) | uid: ???????????????? (glob) | ||||
tip-rev: 5001 | tip-rev: 5001 | ||||
data-length: 246144 | |||||
data-unused: 384 | |||||
#else | |||||
$ hg debugnodemap --metadata | |||||
uid: ???????????????? (glob) | |||||
tip-rev: 5001 | |||||
data-length: 245760 | |||||
data-unused: 0 | |||||
#endif | |||||
$ f --size .hg/store/00changelog.n | $ f --size .hg/store/00changelog.n | ||||
.hg/store/00changelog.n: size=26 | .hg/store/00changelog.n: size=42 | ||||
(The pure code use the debug code that perform incremental update, the C code reencode from scratch) | (The pure code use the debug code that perform incremental update, the C code reencode from scratch) | ||||
#if pure | #if pure | ||||
$ f --sha256 .hg/store/00changelog-*.nd --size | $ f --sha256 .hg/store/00changelog-*.nd --size | ||||
.hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) | .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) | ||||
#else | #else | ||||
$ f --sha256 .hg/store/00changelog-*.nd --size | $ f --sha256 .hg/store/00changelog-*.nd --size | ||||
.hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) | .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) | ||||
#endif | #endif | ||||
$ hg debugnodemap --check | $ hg debugnodemap --check | ||||
revision in index: 5002 | revision in index: 5002 | ||||
revision in nodemap: 5002 | revision in nodemap: 5002 |