We need to keep that information around:
- total data will allow transaction to start appending new information without confusing other reader.
- unused data will allow to detect when we should regenerate new rawdata file.
( )
| hg-reviewers |
We need to keep that information around:
| Lint Skipped |
| Unit Tests Skipped |
| Path | Packages | |||
|---|---|---|---|---|
| M | mercurial/debugcommands.py (2 lines) | |||
| M | mercurial/pure/parsers.py (4 lines) | |||
| M | mercurial/revlogutils/nodemap.py (41 lines) | |||
| M | tests/test-persistent-nodemap.t (17 lines) |
| Commit | Parents | Author | Summary | Date |
|---|---|---|---|---|
| 867a5b315c08 | 72194e78731e | Pierre-Yves David | Jan 15 2020, 9:50 AM |
| Status | Author | Revision | |
|---|---|---|---|
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute | ||
| Closed | marmoute |
| elif opts['metadata']: | elif opts['metadata']: | ||||
| unfi = repo.unfiltered() | unfi = repo.unfiltered() | ||||
| cl = unfi.changelog | cl = unfi.changelog | ||||
| nm_data = nodemap.persisted_data(cl) | nm_data = nodemap.persisted_data(cl) | ||||
| if nm_data is not None: | if nm_data is not None: | ||||
| docket, data = nm_data | docket, data = nm_data | ||||
| ui.write((b"uid: %s\n") % docket.uid) | ui.write((b"uid: %s\n") % docket.uid) | ||||
| ui.write((b"tip-rev: %d\n") % docket.tip_rev) | ui.write((b"tip-rev: %d\n") % docket.tip_rev) | ||||
| ui.write((b"data-length: %d\n") % docket.data_length) | |||||
| ui.write((b"data-unused: %d\n") % docket.data_unused) | |||||
| @command( | @command( | ||||
| b'debugobsolete', | b'debugobsolete', | ||||
| [ | [ | ||||
| (b'', b'flags', 0, _(b'markers flag')), | (b'', b'flags', 0, _(b'markers flag')), | ||||
| ( | ( | ||||
| b'', | b'', | ||||
| def nodemap_data_incremental(self): | def nodemap_data_incremental(self): | ||||
| """Return bytes containing a incremental update to persistent nodemap | """Return bytes containing a incremental update to persistent nodemap | ||||
| This containst the data for an append-only update of the data provided | This containst the data for an append-only update of the data provided | ||||
| in the last call to `update_nodemap_data`. | in the last call to `update_nodemap_data`. | ||||
| """ | """ | ||||
| if self._nm_root is None: | if self._nm_root is None: | ||||
| return None | return None | ||||
| data = nodemaputil.update_persistent_data( | changed, data = nodemaputil.update_persistent_data( | ||||
| self, self._nm_root, self._nm_max_idx, self._nm_rev | self, self._nm_root, self._nm_max_idx, self._nm_rev | ||||
| ) | ) | ||||
| self._nm_root = self._nm_max_idx = self._nm_rev = None | self._nm_root = self._nm_max_idx = self._nm_rev = None | ||||
| return data | return changed, data | ||||
| def update_nodemap_data(self, docket, nm_data): | def update_nodemap_data(self, docket, nm_data): | ||||
| """provide full block of persisted binary data for a nodemap | """provide full block of persisted binary data for a nodemap | ||||
| The data are expected to come from disk. See `nodemap_data_all` for a | The data are expected to come from disk. See `nodemap_data_all` for a | ||||
| produceur of such data.""" | produceur of such data.""" | ||||
| if nm_data is not None: | if nm_data is not None: | ||||
| self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) | self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) | ||||
| if not pdata: | if not pdata: | ||||
| return None | return None | ||||
| offset = 0 | offset = 0 | ||||
| (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size]) | (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size]) | ||||
| if version != ONDISK_VERSION: | if version != ONDISK_VERSION: | ||||
| return None | return None | ||||
| offset += S_VERSION.size | offset += S_VERSION.size | ||||
| headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size]) | headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size]) | ||||
| uid_size, tip_rev = headers | uid_size, tip_rev, data_length, data_unused = headers | ||||
| offset += S_HEADER.size | offset += S_HEADER.size | ||||
| docket = NodeMapDocket(pdata[offset : offset + uid_size]) | docket = NodeMapDocket(pdata[offset : offset + uid_size]) | ||||
| docket.tip_rev = tip_rev | docket.tip_rev = tip_rev | ||||
| docket.data_length = data_length | |||||
| docket.data_unused = data_unused | |||||
| filename = _rawdata_filepath(revlog, docket) | filename = _rawdata_filepath(revlog, docket) | ||||
| return docket, revlog.opener.tryread(filename) | return docket, revlog.opener.tryread(filename) | ||||
| def setup_persistent_nodemap(tr, revlog): | def setup_persistent_nodemap(tr, revlog): | ||||
| """Install whatever is needed transaction side to persist a nodemap on disk | """Install whatever is needed transaction side to persist a nodemap on disk | ||||
| raise error.ProgrammingError(msg) | raise error.ProgrammingError(msg) | ||||
| can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") | can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") | ||||
| ondisk_docket = revlog._nodemap_docket | ondisk_docket = revlog._nodemap_docket | ||||
| # first attemp an incremental update of the data | # first attemp an incremental update of the data | ||||
| if can_incremental and ondisk_docket is not None: | if can_incremental and ondisk_docket is not None: | ||||
| target_docket = revlog._nodemap_docket.copy() | target_docket = revlog._nodemap_docket.copy() | ||||
| data = revlog.index.nodemap_data_incremental() | data_changed_count, data = revlog.index.nodemap_data_incremental() | ||||
| datafile = _rawdata_filepath(revlog, target_docket) | datafile = _rawdata_filepath(revlog, target_docket) | ||||
| # EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
| # store vfs | # store vfs | ||||
| with revlog.opener(datafile, b'a') as fd: | with revlog.opener(datafile, b'a') as fd: | ||||
| fd.write(data) | fd.write(data) | ||||
| target_docket.data_length += len(data) | |||||
| target_docket.data_unused += data_changed_count | |||||
| else: | else: | ||||
| # otherwise fallback to a full new export | # otherwise fallback to a full new export | ||||
| target_docket = NodeMapDocket() | target_docket = NodeMapDocket() | ||||
| datafile = _rawdata_filepath(revlog, target_docket) | datafile = _rawdata_filepath(revlog, target_docket) | ||||
| if util.safehasattr(revlog.index, "nodemap_data_all"): | if util.safehasattr(revlog.index, "nodemap_data_all"): | ||||
| data = revlog.index.nodemap_data_all() | data = revlog.index.nodemap_data_all() | ||||
| else: | else: | ||||
| data = persistent_data(revlog.index) | data = persistent_data(revlog.index) | ||||
| # EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
| # store vfs | # store vfs | ||||
| with revlog.opener(datafile, b'w') as fd: | with revlog.opener(datafile, b'w') as fd: | ||||
| fd.write(data) | fd.write(data) | ||||
| target_docket.data_length = len(data) | |||||
| target_docket.tip_rev = revlog.tiprev() | target_docket.tip_rev = revlog.tiprev() | ||||
| # EXP-TODO: if this is a cache, this should use a cache vfs, not a | # EXP-TODO: if this is a cache, this should use a cache vfs, not a | ||||
| # store vfs | # store vfs | ||||
| with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: | with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: | ||||
| fp.write(target_docket.serialize()) | fp.write(target_docket.serialize()) | ||||
| revlog._nodemap_docket = target_docket | revlog._nodemap_docket = target_docket | ||||
| # EXP-TODO: if the transaction abort, we should remove the new data and | # EXP-TODO: if the transaction abort, we should remove the new data and | ||||
| # reinstall the old one. | # reinstall the old one. | ||||
| # specified inside the "docket" file. | # specified inside the "docket" file. | ||||
| # | # | ||||
| # The docket file contains information to find, qualify and validate the raw | # The docket file contains information to find, qualify and validate the raw | ||||
| # data. Its content is currently very light, but it will expand as the on disk | # data. Its content is currently very light, but it will expand as the on disk | ||||
| # nodemap gains the necessary features to be used in production. | # nodemap gains the necessary features to be used in production. | ||||
| # version 0 is experimental, no BC garantee, do no use outside of tests. | # version 0 is experimental, no BC garantee, do no use outside of tests. | ||||
| ONDISK_VERSION = 0 | ONDISK_VERSION = 0 | ||||
| S_VERSION = struct.Struct(">B") | S_VERSION = struct.Struct(">B") | ||||
| S_HEADER = struct.Struct(">BQ") | S_HEADER = struct.Struct(">BQQQ") | ||||
| ID_SIZE = 8 | ID_SIZE = 8 | ||||
| def _make_uid(): | def _make_uid(): | ||||
| """return a new unique identifier. | """return a new unique identifier. | ||||
| The identifier is random and composed of ascii characters.""" | The identifier is random and composed of ascii characters.""" | ||||
| return nodemod.hex(os.urandom(ID_SIZE)) | return nodemod.hex(os.urandom(ID_SIZE)) | ||||
| class NodeMapDocket(object): | class NodeMapDocket(object): | ||||
| """metadata associated with persistent nodemap data | """metadata associated with persistent nodemap data | ||||
| The persistent data may come from disk or be on their way to disk. | The persistent data may come from disk or be on their way to disk. | ||||
| """ | """ | ||||
| def __init__(self, uid=None): | def __init__(self, uid=None): | ||||
| if uid is None: | if uid is None: | ||||
| uid = _make_uid() | uid = _make_uid() | ||||
| self.uid = uid | self.uid = uid | ||||
| self.tip_rev = None | self.tip_rev = None | ||||
| self.data_length = None | |||||
| self.data_unused = 0 | |||||
| def copy(self): | def copy(self): | ||||
| new = NodeMapDocket(uid=self.uid) | new = NodeMapDocket(uid=self.uid) | ||||
| new.tip_rev = self.tip_rev | new.tip_rev = self.tip_rev | ||||
| new.data_length = self.data_length | |||||
| new.data_unused = self.data_unused | |||||
| return new | return new | ||||
| def serialize(self): | def serialize(self): | ||||
| """return serialized bytes for a docket using the passed uid""" | """return serialized bytes for a docket using the passed uid""" | ||||
| data = [] | data = [] | ||||
| data.append(S_VERSION.pack(ONDISK_VERSION)) | data.append(S_VERSION.pack(ONDISK_VERSION)) | ||||
| headers = (len(self.uid), self.tip_rev) | headers = ( | ||||
| len(self.uid), | |||||
| self.tip_rev, | |||||
| self.data_length, | |||||
| self.data_unused, | |||||
| ) | |||||
| data.append(S_HEADER.pack(*headers)) | data.append(S_HEADER.pack(*headers)) | ||||
| data.append(self.uid) | data.append(self.uid) | ||||
| return b''.join(data) | return b''.join(data) | ||||
| def _rawdata_filepath(revlog, docket): | def _rawdata_filepath(revlog, docket): | ||||
| """The (vfs relative) nodemap's rawdata file for a given uid""" | """The (vfs relative) nodemap's rawdata file for a given uid""" | ||||
| prefix = revlog.nodemap_file[:-2] | prefix = revlog.nodemap_file[:-2] | ||||
| """ | """ | ||||
| trie = _build_trie(index) | trie = _build_trie(index) | ||||
| return _persist_trie(trie) | return _persist_trie(trie) | ||||
| def update_persistent_data(index, root, max_idx, last_rev): | def update_persistent_data(index, root, max_idx, last_rev): | ||||
| """return the incremental update for persistent nodemap from a given index | """return the incremental update for persistent nodemap from a given index | ||||
| """ | """ | ||||
| trie = _update_trie(index, root, last_rev) | changed_block, trie = _update_trie(index, root, last_rev) | ||||
| return _persist_trie(trie, existing_idx=max_idx) | return ( | ||||
| changed_block * S_BLOCK.size, | |||||
| _persist_trie(trie, existing_idx=max_idx), | |||||
| ) | |||||
| S_BLOCK = struct.Struct(">" + ("q" * 16)) | S_BLOCK = struct.Struct(">" + ("q" * 16)) | ||||
| NO_ENTRY = -1 | NO_ENTRY = -1 | ||||
| # rev 0 need to be -2 because 0 is used by block, -1 is a special value. | # rev 0 need to be -2 because 0 is used by block, -1 is a special value. | ||||
| REV_OFFSET = 2 | REV_OFFSET = 2 | ||||
| for rev in range(len(index)): | for rev in range(len(index)): | ||||
| hex = nodemod.hex(index[rev][7]) | hex = nodemod.hex(index[rev][7]) | ||||
| _insert_into_block(index, 0, root, rev, hex) | _insert_into_block(index, 0, root, rev, hex) | ||||
| return root | return root | ||||
| def _update_trie(index, root, last_rev): | def _update_trie(index, root, last_rev): | ||||
| """consume""" | """consume""" | ||||
| changed = 0 | |||||
| for rev in range(last_rev + 1, len(index)): | for rev in range(last_rev + 1, len(index)): | ||||
| hex = nodemod.hex(index[rev][7]) | hex = nodemod.hex(index[rev][7]) | ||||
| _insert_into_block(index, 0, root, rev, hex) | changed += _insert_into_block(index, 0, root, rev, hex) | ||||
| return root | return changed, root | ||||
| def _insert_into_block(index, level, block, current_rev, current_hex): | def _insert_into_block(index, level, block, current_rev, current_hex): | ||||
| """insert a new revision in a block | """insert a new revision in a block | ||||
| index: the index we are adding revision for | index: the index we are adding revision for | ||||
| level: the depth of the current block in the trie | level: the depth of the current block in the trie | ||||
| block: the block currently being considered | block: the block currently being considered | ||||
| current_rev: the revision number we are adding | current_rev: the revision number we are adding | ||||
| current_hex: the hexadecimal representation of the of that revision | current_hex: the hexadecimal representation of the of that revision | ||||
| """ | """ | ||||
| changed = 1 | |||||
| if block.ondisk_id is not None: | if block.ondisk_id is not None: | ||||
| block.ondisk_id = None | block.ondisk_id = None | ||||
| hex_digit = _to_int(current_hex[level : level + 1]) | hex_digit = _to_int(current_hex[level : level + 1]) | ||||
| entry = block.get(hex_digit) | entry = block.get(hex_digit) | ||||
| if entry is None: | if entry is None: | ||||
| # no entry, simply store the revision number | # no entry, simply store the revision number | ||||
| block[hex_digit] = current_rev | block[hex_digit] = current_rev | ||||
| elif isinstance(entry, dict): | elif isinstance(entry, dict): | ||||
| # need to recurse to an underlying block | # need to recurse to an underlying block | ||||
| _insert_into_block(index, level + 1, entry, current_rev, current_hex) | changed += _insert_into_block( | ||||
| index, level + 1, entry, current_rev, current_hex | |||||
| ) | |||||
| else: | else: | ||||
| # collision with a previously unique prefix, inserting new | # collision with a previously unique prefix, inserting new | ||||
| # vertices to fit both entry. | # vertices to fit both entry. | ||||
| other_hex = nodemod.hex(index[entry][7]) | other_hex = nodemod.hex(index[entry][7]) | ||||
| other_rev = entry | other_rev = entry | ||||
| new = Block() | new = Block() | ||||
| block[hex_digit] = new | block[hex_digit] = new | ||||
| _insert_into_block(index, level + 1, new, other_rev, other_hex) | _insert_into_block(index, level + 1, new, other_rev, other_hex) | ||||
| _insert_into_block(index, level + 1, new, current_rev, current_hex) | _insert_into_block(index, level + 1, new, current_rev, current_hex) | ||||
| return changed | |||||
| def _persist_trie(root, existing_idx=None): | def _persist_trie(root, existing_idx=None): | ||||
| """turn a nodemap trie into persistent binary data | """turn a nodemap trie into persistent binary data | ||||
| See `_build_trie` for nodemap trie structure""" | See `_build_trie` for nodemap trie structure""" | ||||
| block_map = {} | block_map = {} | ||||
| if existing_idx is not None: | if existing_idx is not None: | ||||
| > exp-persistent-nodemap=yes | > exp-persistent-nodemap=yes | ||||
| > [devel] | > [devel] | ||||
| > persistent-nodemap=yes | > persistent-nodemap=yes | ||||
| > EOF | > EOF | ||||
| $ hg debugbuilddag .+5000 | $ hg debugbuilddag .+5000 | ||||
| $ hg debugnodemap --metadata | $ hg debugnodemap --metadata | ||||
| uid: ???????????????? (glob) | uid: ???????????????? (glob) | ||||
| tip-rev: 5000 | tip-rev: 5000 | ||||
| data-length: 245760 | |||||
| data-unused: 0 | |||||
| $ f --size .hg/store/00changelog.n | $ f --size .hg/store/00changelog.n | ||||
| .hg/store/00changelog.n: size=26 | .hg/store/00changelog.n: size=42 | ||||
| $ f --sha256 .hg/store/00changelog-*.nd | $ f --sha256 .hg/store/00changelog-*.nd | ||||
| .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob) | .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob) | ||||
| $ hg debugnodemap --dump-new | f --sha256 --size | $ hg debugnodemap --dump-new | f --sha256 --size | ||||
| size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | ||||
| $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size | $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size | ||||
| size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c | ||||
| 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | ||||
| 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| | ||||
| add a new commit | add a new commit | ||||
| $ hg up | $ hg up | ||||
| 0 files updated, 0 files merged, 0 files removed, 0 files unresolved | 0 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
| $ echo foo > foo | $ echo foo > foo | ||||
| $ hg add foo | $ hg add foo | ||||
| $ hg ci -m 'foo' | $ hg ci -m 'foo' | ||||
| #if pure | |||||
| $ hg debugnodemap --metadata | $ hg debugnodemap --metadata | ||||
| uid: ???????????????? (glob) | uid: ???????????????? (glob) | ||||
| tip-rev: 5001 | tip-rev: 5001 | ||||
| data-length: 246144 | |||||
| data-unused: 384 | |||||
| #else | |||||
| $ hg debugnodemap --metadata | |||||
| uid: ???????????????? (glob) | |||||
| tip-rev: 5001 | |||||
| data-length: 245760 | |||||
| data-unused: 0 | |||||
| #endif | |||||
| $ f --size .hg/store/00changelog.n | $ f --size .hg/store/00changelog.n | ||||
| .hg/store/00changelog.n: size=26 | .hg/store/00changelog.n: size=42 | ||||
| (The pure code use the debug code that perform incremental update, the C code reencode from scratch) | (The pure code use the debug code that perform incremental update, the C code reencode from scratch) | ||||
| #if pure | #if pure | ||||
| $ f --sha256 .hg/store/00changelog-*.nd --size | $ f --sha256 .hg/store/00changelog-*.nd --size | ||||
| .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) | .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) | ||||
| #else | #else | ||||
| $ f --sha256 .hg/store/00changelog-*.nd --size | $ f --sha256 .hg/store/00changelog-*.nd --size | ||||
| .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) | .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) | ||||
| #endif | #endif | ||||
| $ hg debugnodemap --check | $ hg debugnodemap --check | ||||
| revision in index: 5002 | revision in index: 5002 | ||||
| revision in nodemap: 5002 | revision in nodemap: 5002 | ||||