diff --git a/mercurial/revlogutils/docket.py b/mercurial/revlogutils/docket.py --- a/mercurial/revlogutils/docket.py +++ b/mercurial/revlogutils/docket.py @@ -1,235 +1,249 @@ # docket - code related to revlog "docket" # # Copyright 2021 Pierre-Yves David # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. ### Revlog docket file # # The revlog is stored on disk using multiple files: # # * a small docket file, containing metadata and a pointer, # # * an index file, containing fixed width information about revisions, # # * a data file, containing variable width data for these revisions, from __future__ import absolute_import import errno import os import random import struct from .. import ( encoding, error, node, pycompat, util, ) from . import ( constants, ) def make_uid(id_size=8): """return a new unique identifier. The identifier is random and composed of ascii characters.""" # size we "hex" the result we need half the number of bits to have a final # uuid of size ID_SIZE return node.hex(os.urandom(id_size // 2)) # some special test logic to avoid anoying random output in the test stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE') if stable_docket_file: def make_uid(id_size=8): try: with open(stable_docket_file, mode='rb') as f: seed = f.read().strip() except IOError as inst: if inst.errno != errno.ENOENT: raise seed = b'04' # chosen by a fair dice roll. garanteed to be random if pycompat.ispy3: iter_seed = iter(seed) else: iter_seed = (ord(c) for c in seed) # some basic circular sum hashing on 64 bits int_seed = 0 low_mask = int('1' * 35, 2) for i in iter_seed: high_part = int_seed >> 35 low_part = (int_seed & low_mask) << 28 int_seed = high_part + low_part + i r = random.Random() if pycompat.ispy3: r.seed(int_seed, version=1) else: r.seed(int_seed) # once we drop python 3.8 support we can simply use r.randbytes raw = r.getrandbits(id_size * 4) assert id_size == 8 p = struct.pack('>L', raw) new = node.hex(p) with open(stable_docket_file, 'wb') as f: f.write(new) return new # Docket format # # * 4 bytes: revlog version # | This is mandatory as docket must be compatible with the previous # | revlog index header. +# * 1 bytes: size of index uuid # * 8 bytes: size of index-data # * 8 bytes: pending size of index-data # * 8 bytes: size of data # * 8 bytes: pending size of data # * 1 bytes: default compression header -S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc') +S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc') class RevlogDocket(object): """metadata associated with revlog""" def __init__( self, revlog, use_pending=False, version_header=None, + index_uuid=None, index_end=0, pending_index_end=0, data_end=0, pending_data_end=0, default_compression_header=None, ): self._version_header = version_header self._read_only = bool(use_pending) self._dirty = False self._radix = revlog.radix self._path = revlog._docket_file self._opener = revlog.opener + self._index_uuid = index_uuid # thes asserts should be True as long as we have a single index filename assert index_end <= pending_index_end assert data_end <= pending_data_end self._initial_index_end = index_end self._pending_index_end = pending_index_end self._initial_data_end = data_end self._pending_data_end = pending_data_end if use_pending: self._index_end = self._pending_index_end self._data_end = self._pending_data_end else: self._index_end = self._initial_index_end self._data_end = self._initial_data_end self.default_compression_header = default_compression_header def index_filepath(self): """file path to the current index file associated to this docket""" # very simplistic version at first - return b"%s.idx" % self._radix + if self._index_uuid is None: + self._index_uuid = make_uid() + return b"%s-%s.idx" % (self._radix, self._index_uuid) @property def index_end(self): return self._index_end @index_end.setter def index_end(self, new_size): if new_size != self._index_end: self._index_end = new_size self._dirty = True @property def data_end(self): return self._data_end @data_end.setter def data_end(self, new_size): if new_size != self._data_end: self._data_end = new_size self._dirty = True def write(self, transaction, pending=False, stripping=False): """write the modification of disk if any This make the new content visible to all process""" if not self._dirty: return False else: if self._read_only: msg = b'writing read-only docket: %s' msg %= self._path raise error.ProgrammingError(msg) if not stripping: # XXX we could, leverage the docket while stripping. However it # is not powerfull enough at the time of this comment transaction.addbackup(self._path, location=b'store') with self._opener(self._path, mode=b'w', atomictemp=True) as f: f.write(self._serialize(pending=pending)) # if pending we still need to the write final data eventually self._dirty = pending return True def _serialize(self, pending=False): if pending: official_index_end = self._initial_index_end official_data_end = self._initial_data_end else: official_index_end = self._index_end official_data_end = self._data_end # this assert should be True as long as we have a single index filename assert official_data_end <= self._data_end data = ( self._version_header, + len(self._index_uuid), official_index_end, self._index_end, official_data_end, self._data_end, self.default_compression_header, ) - return S_HEADER.pack(*data) + s = [] + s.append(S_HEADER.pack(*data)) + s.append(self._index_uuid) + return b''.join(s) def default_docket(revlog, version_header): """given a revlog version a new docket object for the given revlog""" rl_version = version_header & 0xFFFF if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2): return None comp = util.compengines[revlog._compengine].revlogheader() docket = RevlogDocket( revlog, version_header=version_header, default_compression_header=comp, ) docket._dirty = True return docket def parse_docket(revlog, data, use_pending=False): """given some docket data return a docket object for the given revlog""" header = S_HEADER.unpack(data[: S_HEADER.size]) + offset = S_HEADER.size version_header = header[0] - index_size = header[1] - pending_index_size = header[2] - data_size = header[3] - pending_data_size = header[4] - default_compression_header = header[5] + index_uuid_size = header[1] + index_uuid = data[offset : offset + index_uuid_size] + offset += index_uuid_size + index_size = header[2] + pending_index_size = header[3] + data_size = header[4] + pending_data_size = header[5] + default_compression_header = header[6] docket = RevlogDocket( revlog, use_pending=use_pending, version_header=version_header, + index_uuid=index_uuid, index_end=index_size, pending_index_end=pending_index_size, data_end=data_size, pending_data_end=pending_data_size, default_compression_header=default_compression_header, ) return docket diff --git a/tests/test-revlog-v2.t b/tests/test-revlog-v2.t --- a/tests/test-revlog-v2.t +++ b/tests/test-revlog-v2.t @@ -1,69 +1,85 @@ #require reporevlogstore A repo with unknown revlogv2 requirement string cannot be opened $ hg init invalidreq $ cd invalidreq $ echo exp-revlogv2.unknown >> .hg/requires $ hg log abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown (see https://mercurial-scm.org/wiki/MissingRequirement for more information) [255] $ cd .. Can create and open repo with revlog v2 requirement $ cat >> $HGRCPATH << EOF > [experimental] > revlogv2 = enable-unstable-format-and-corrupt-my-data > EOF $ hg init empty-repo $ cd empty-repo $ cat .hg/requires dotencode exp-dirstate-v2 (dirstate-v2 !) exp-revlogv2.2 fncache generaldelta persistent-nodemap (rust !) revlog-compression-zstd (zstd !) sparserevlog store $ hg log Unknown flags to revlog are rejected >>> with open('.hg/store/00changelog.i', 'wb') as fh: ... fh.write(b'\xff\x00\xde\xad') and None $ hg log abort: unknown flags (0xff00) in version 57005 revlog 00changelog [50] $ cd .. Writing a simple revlog v2 works $ hg init simple $ cd simple $ touch foo $ hg -q commit -A -m initial $ hg log changeset: 0:96ee1d7354c4 tag: tip user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: initial Header written as expected $ f --hexdump --bytes 4 .hg/store/00changelog.i .hg/store/00changelog.i: 0000: 00 00 de ad |....| $ f --hexdump --bytes 4 .hg/store/data/foo.i .hg/store/data/foo.i: 0000: 00 00 de ad |....| + +The expected files are generated +-------------------------------- + +We should have have: +- a docket +- a index file with a unique name +- a data file + + $ ls .hg/store/00changelog* .hg/store/00manifest* + .hg/store/00changelog-b870a51b.idx + .hg/store/00changelog.d + .hg/store/00changelog.i + .hg/store/00manifest-88698448.idx + .hg/store/00manifest.d + .hg/store/00manifest.i