diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -228,10 +228,51 @@ return self._offsets[i] -def parse_index2(data, inline): +def parse_index2(data, inline, revlogv2=False): if not inline: - return IndexObject(data), None - return InlinedIndexObject(data, inline), (0, data) + cls = IndexObject2 if revlogv2 else IndexObject + return cls(data), None + cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject + return cls(data, inline), (0, data) + + +class Index2Mixin(object): + index_format = b">Qiiiiii20s12xQiQi8x" + bit_int_size = struct.calcsize(b'Q') + int_size = struct.calcsize(b'i') + index_size = struct.calcsize(index_format) + assert index_size == 96, index_size + null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0, 0, 0, 0) + + +class IndexObject2(Index2Mixin, IndexObject): + pass + + +class InlinedIndexObject2(Index2Mixin, InlinedIndexObject): + def _inline_scan(self, lgt): + sidedata_length_pos = 84 + off = 0 + if lgt is not None: + self._offsets = [0] * lgt + count = 0 + while off <= len(self._data) - self.index_size: + start = off + self.bit_int_size + (data_size,) = struct.unpack( + b'>i', + self._data[start : start + self.int_size], + ) + start = off + sidedata_length_pos + (side_data_size,) = struct.unpack( + b'>i', self._data[start : start + self.int_size] + ) + if lgt is not None: + self._offsets[count] = off + count += 1 + off += self.index_size + data_size + side_data_size + if off != len(self._data): + raise ValueError(b"corrupted data") + return count def parse_index_devel_nodemap(data, inline): diff --git a/mercurial/requirements.py b/mercurial/requirements.py --- a/mercurial/requirements.py +++ b/mercurial/requirements.py @@ -23,7 +23,7 @@ # Increment the sub-version when the revlog v2 format changes to lock out old # clients. -REVLOGV2_REQUIREMENT = b'exp-revlogv2.1' +REVLOGV2_REQUIREMENT = b'exp-revlogv2.2' # A repository with the sparserevlog feature will have delta chains that # can spread over a larger span. Sparse reading cuts these large spans into diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -69,6 +69,7 @@ templatefilters, util, ) +from .pure import parsers as pureparsers from .interfaces import ( repository, util as interfaceutil, @@ -364,6 +365,40 @@ return p +# index v2: +# 6 bytes: offset +# 2 bytes: flags +# 4 bytes: compressed length +# 4 bytes: uncompressed length +# 4 bytes: base rev +# 4 bytes: link rev +# 4 bytes: parent 1 rev +# 4 bytes: parent 2 rev +# 32 bytes: nodeid +# 8 bytes: UnifiedRevlog identifier +# 4 bytes: rank (number of changesets under this one, this one included) +# 8 bytes: sidedata offset +# 4 bytes: sidedata compressed length +# 8 bytes: Padding to align to 96 bytes +indexformatv2 = struct.Struct(b">Qiiiiii20s12xQiQi8x") +indexformatv2_pack = indexformatv2.pack + + +class revlogv2io(object): + def __init__(self): + self.size = indexformatv2.size + + def parseindex(self, data, inline): + index, cache = parsers.parse_index2(data, inline, revlogv2=True) + return index, cache + + def packentry(self, entry, node, version, rev): + p = indexformatv2_pack(*entry) + if rev == 0: + p = versionformat_pack(version) + p[4:] + return p + + NodemapRevlogIO = None if util.safehasattr(parsers, 'parse_index_devel_nodemap'): @@ -647,6 +682,8 @@ self._io = revlogio() if self.version == REVLOGV0: self._io = revlogoldio() + elif fmt == REVLOGV2: + self._io = revlogv2io() elif devel_nodemap: self._io = NodemapRevlogIO() elif use_rust_index: @@ -2318,7 +2355,15 @@ p1r, p2r, node, + 0, + 0, + 0, + 0, ) + + if self.version & 0xFFFF != REVLOGV2: + e = e[:8] + self.index.append(e) entry = self._io.packentry(e, self.node, self.version, curr) diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -14,9 +14,7 @@ # revlog header flags REVLOGV0 = 0 REVLOGV1 = 1 -# Dummy value until file format is finalized. -# Reminder: change the bounds check in revlog.__init__ when this is changed. -REVLOGV2 = 0xDEAD +REVLOGV2 = 2 # Shared across v1 and v2. FLAG_INLINE_DATA = 1 << 16 # Only used by v1, implied by v2. diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py --- a/tests/test-parseindex2.py +++ b/tests/test-parseindex2.py @@ -117,8 +117,8 @@ ) -def parse_index2(data, inline): - index, chunkcache = parsers.parse_index2(data, inline) +def parse_index2(data, inline, revlogv2=False): + index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2) return list(index), chunkcache diff --git a/tests/test-revlog-v2.t b/tests/test-revlog-v2.t --- a/tests/test-revlog-v2.t +++ b/tests/test-revlog-v2.t @@ -22,7 +22,7 @@ $ cd empty-repo $ cat .hg/requires dotencode - exp-revlogv2.1 + exp-revlogv2.2 fncache sparserevlog store @@ -32,10 +32,10 @@ Unknown flags to revlog are rejected >>> with open('.hg/store/00changelog.i', 'wb') as fh: - ... fh.write(b'\xff\x00\xde\xad') and None + ... fh.write(b'\xff\x00\x00\x02') and None $ hg log - abort: unknown flags (0xff00) in version 57005 revlog 00changelog.i + abort: unknown flags (0xff00) in version 2 revlog 00changelog.i [50] $ cd .. @@ -58,8 +58,8 @@ $ f --hexdump --bytes 4 .hg/store/00changelog.i .hg/store/00changelog.i: - 0000: 00 01 de ad |....| + 0000: 00 01 00 02 |....| $ f --hexdump --bytes 4 .hg/store/data/foo.i .hg/store/data/foo.i: - 0000: 00 01 de ad |....| + 0000: 00 01 00 02 |....| diff --git a/tests/test-revlog.t b/tests/test-revlog.t --- a/tests/test-revlog.t +++ b/tests/test-revlog.t @@ -22,10 +22,10 @@ Unknown version is rejected >>> with open('.hg/store/00changelog.i', 'wb') as fh: - ... fh.write(b'\x00\x00\x00\x02') and None + ... fh.write(b'\x00\x00\x00\x03') and None $ hg log - abort: unknown version (2) in revlog 00changelog.i + abort: unknown version (3) in revlog 00changelog.i [50] $ cd ..