diff --git a/hgext/remotefilelog/remotefilelog.py b/hgext/remotefilelog/remotefilelog.py --- a/hgext/remotefilelog/remotefilelog.py +++ b/hgext/remotefilelog/remotefilelog.py @@ -336,6 +336,8 @@ delta=delta, # Sidedata is not supported yet sidedata=None, + # Protocol flags are not used yet + protocol_flags=0, ) def revdiff(self, node1, node2): diff --git a/hgext/sqlitestore.py b/hgext/sqlitestore.py --- a/hgext/sqlitestore.py +++ b/hgext/sqlitestore.py @@ -290,6 +290,7 @@ revision = attr.ib() delta = attr.ib() sidedata = attr.ib() + protocol_flags = attr.ib() linknode = attr.ib(default=None) diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -34,10 +34,12 @@ from .interfaces import repository from .revlogutils import sidedata as sidedatamod +from .utils import storageutil _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s") _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s") _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH") +_CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH") LFS_REQUIREMENT = b'lfs' @@ -194,7 +196,8 @@ else: deltabase = prevnode flags = 0 - return node, p1, p2, deltabase, cs, flags + protocol_flags = 0 + return node, p1, p2, deltabase, cs, flags, protocol_flags def deltachunk(self, prevnode): l = self._chunklength() @@ -203,10 +206,9 @@ headerdata = readexactly(self._stream, self.deltaheadersize) header = self.deltaheader.unpack(headerdata) delta = readexactly(self._stream, l - self.deltaheadersize) - node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode) - # cg4 forward-compat - sidedata = {} - return (node, p1, p2, cs, deltabase, delta, flags, sidedata) + header = self._deltaheader(header, prevnode) + node, p1, p2, deltabase, cs, flags, protocol_flags = header + return node, p1, p2, cs, deltabase, delta, flags, protocol_flags def getchunks(self): """returns all the chunks contains in the bundle @@ -595,7 +597,8 @@ def _deltaheader(self, headertuple, prevnode): node, p1, p2, deltabase, cs = headertuple flags = 0 - return node, p1, p2, deltabase, cs, flags + protocol_flags = 0 + return node, p1, p2, deltabase, cs, flags, protocol_flags class cg3unpacker(cg2unpacker): @@ -613,7 +616,8 @@ def _deltaheader(self, headertuple, prevnode): node, p1, p2, deltabase, cs, flags = headertuple - return node, p1, p2, deltabase, cs, flags + protocol_flags = 0 + return node, p1, p2, deltabase, cs, flags, protocol_flags def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None): super(cg3unpacker, self)._unpackmanifests( @@ -636,18 +640,24 @@ cg4 streams add support for exchanging sidedata. """ + deltaheader = _CHANGEGROUPV4_DELTA_HEADER + deltaheadersize = deltaheader.size version = b'04' + def _deltaheader(self, headertuple, prevnode): + protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple + return node, p1, p2, deltabase, cs, flags, protocol_flags + def deltachunk(self, prevnode): res = super(cg4unpacker, self).deltachunk(prevnode) if not res: return res - (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res + (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res - sidedata_raw = getchunk(self._stream) sidedata = {} - if len(sidedata_raw) > 0: + if protocol_flags & storageutil.CG_FLAG_SIDEDATA: + sidedata_raw = getchunk(self._stream) sidedata = sidedatamod.deserialize_sidedata(sidedata_raw) return node, p1, p2, cs, deltabase, delta, flags, sidedata @@ -693,10 +703,10 @@ yield prefix yield data - sidedata = delta.sidedata - if sidedata is not None: + if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA: # Need a separate chunk for sidedata to be able to differentiate # "raw delta" length and sidedata length + sidedata = delta.sidedata yield chunkheader(len(sidedata)) yield sidedata @@ -1638,11 +1648,18 @@ fullnodes=None, remote_sidedata=None, ): - # Same header func as cg3. Sidedata is in a separate chunk from the delta to - # differenciate "raw delta" and sidedata. - builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack( - d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags - ) + # Sidedata is in a separate chunk from the delta to differentiate + # "raw delta" and sidedata. + def builddeltaheader(d): + return _CHANGEGROUPV4_DELTA_HEADER.pack( + d.protocol_flags, + d.node, + d.p1node, + d.p2node, + d.basenode, + d.linknode, + d.flags, + ) return cgpacker( repo, @@ -1928,7 +1945,6 @@ sd_computers = collections.defaultdict(list) # Computers for categories to remove from sidedata sd_removers = collections.defaultdict(list) - to_generate = remote_sd_categories - repo._wanted_sidedata to_remove = repo._wanted_sidedata - remote_sd_categories if pull: diff --git a/mercurial/helptext/internals/changegroups.txt b/mercurial/helptext/internals/changegroups.txt --- a/mercurial/helptext/internals/changegroups.txt +++ b/mercurial/helptext/internals/changegroups.txt @@ -2,12 +2,13 @@ the changelog data, root/flat manifest data, treemanifest data, and filelogs. -There are 3 versions of changegroups: ``1``, ``2``, and ``3``. From a +There are 4 versions of changegroups: ``1``, ``2``, ``3`` and ``4``. From a high-level, versions ``1`` and ``2`` are almost exactly the same, with the only difference being an additional item in the *delta header*. Version ``3`` adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the -``changegroup`` part in the bundle2). +``changegroup`` part in the bundle2). Version ``4`` adds support for exchanging +sidedata (additional revision metadata not part of the digest). Changegroups when not exchanging treemanifests consist of 3 logical segments:: @@ -74,8 +75,8 @@ entry (either that the recipient already has, or previously specified in the bundle/changegroup). -The *delta header* is different between versions ``1``, ``2``, and -``3`` of the changegroup format. +The *delta header* is different between versions ``1``, ``2``, ``3`` and ``4`` +of the changegroup format. Version 1 (headerlen=80):: @@ -104,6 +105,15 @@ | | | | | | | +------------------------------------------------------------------------------+ +Version 4 (headerlen=103):: + + +------------------------------------------------------------------------------+----------+ + | | | | | | | | + | node | p1 node | p2 node | base node | link node | flags | pflags | + | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) | + | | | | | | | | + +------------------------------------------------------------------------------+----------+ + The *delta data* consists of ``chunklen - 4 - headerlen`` bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already has, or previously @@ -140,12 +150,24 @@ Externally stored. The revision fulltext contains ``key:value`` ``\n`` delimited metadata defining an object stored elsewhere. Used by the LFS extension. +4096 + Contains copy information. This revision changes files in a way that could + affect copy tracing. This does *not* affect changegroup handling, but is + relevant for other parts of Mercurial. For historical reasons, the integer values are identical to revlog version 1 per-revision storage flags and correspond to bits being set in this 2-byte field. Bits were allocated starting from the most-significant bit, hence the reverse ordering and allocation of these flags. +The *pflags* (protocol flags) field holds bitwise flags affecting the protocol +itself. They are first in the header since they may affect the handling of the +rest of the fields in a future version. They are defined as such: + +1 indicates whether to read a chunk of sidedata (of variable length) right + after the revision flags. + + Changeset Segment ================= @@ -166,9 +188,9 @@ Treemanifests Segment --------------------- -The *treemanifests segment* only exists in changegroup version ``3``, and -only if the 'treemanifest' param is part of the bundle2 changegroup part -(it is not possible to use changegroup version 3 outside of bundle2). +The *treemanifests segment* only exists in changegroup version ``3`` and ``4``, +and only if the 'treemanifest' param is part of the bundle2 changegroup part +(it is not possible to use changegroup version 3 or 4 outside of bundle2). Aside from the filenames in the *treemanifests segment* containing a trailing ``/`` character, it behaves identically to the *filelogs segment* (see below). The final sub-segment is followed by an *empty chunk* (logically, diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py --- a/mercurial/interfaces/repository.py +++ b/mercurial/interfaces/repository.py @@ -27,14 +27,12 @@ REVISION_FLAG_CENSORED = 1 << 15 REVISION_FLAG_ELLIPSIS = 1 << 14 REVISION_FLAG_EXTSTORED = 1 << 13 -REVISION_FLAG_SIDEDATA = 1 << 12 -REVISION_FLAG_HASCOPIESINFO = 1 << 11 +REVISION_FLAG_HASCOPIESINFO = 1 << 12 REVISION_FLAGS_KNOWN = ( REVISION_FLAG_CENSORED | REVISION_FLAG_ELLIPSIS | REVISION_FLAG_EXTSTORED - | REVISION_FLAG_SIDEDATA | REVISION_FLAG_HASCOPIESINFO ) @@ -457,6 +455,13 @@ """Raw sidedata bytes for the given revision.""" ) + protocol_flags = interfaceutil.Attribute( + """Single byte of integer flags that can influence the protocol. + + This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants. + """ + ) + class ifilerevisionssequence(interfaceutil.Interface): """Contains index data for all revisions of a file. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -62,7 +62,6 @@ REVIDX_HASCOPIESINFO, REVIDX_ISCENSORED, REVIDX_RAWTEXT_CHANGING_FLAGS, - REVIDX_SIDEDATA, ) from .thirdparty import attr from . import ( @@ -104,7 +103,6 @@ REVLOGV2_FLAGS REVIDX_ISCENSORED REVIDX_ELLIPSIS -REVIDX_SIDEDATA REVIDX_HASCOPIESINFO REVIDX_EXTSTORED REVIDX_DEFAULT_FLAGS @@ -210,6 +208,7 @@ revision = attr.ib() delta = attr.ib() sidedata = attr.ib() + protocol_flags = attr.ib() linknode = attr.ib(default=None) diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -85,8 +85,6 @@ REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS # revision data is stored externally REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED -# revision data contains extra metadata not part of the official digest -REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA # revision changes files in a way that could affect copy tracing. REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO REVIDX_DEFAULT_FLAGS = 0 @@ -95,13 +93,10 @@ REVIDX_ISCENSORED, REVIDX_ELLIPSIS, REVIDX_EXTSTORED, - REVIDX_SIDEDATA, REVIDX_HASCOPIESINFO, ] # bitmark for flags that could cause rawdata content change -REVIDX_RAWTEXT_CHANGING_FLAGS = ( - REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA -) +REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 diff --git a/mercurial/revlogutils/flagutil.py b/mercurial/revlogutils/flagutil.py --- a/mercurial/revlogutils/flagutil.py +++ b/mercurial/revlogutils/flagutil.py @@ -18,7 +18,6 @@ REVIDX_HASCOPIESINFO, REVIDX_ISCENSORED, REVIDX_RAWTEXT_CHANGING_FLAGS, - REVIDX_SIDEDATA, ) from .. import error, util @@ -28,7 +27,6 @@ REVIDX_ISCENSORED REVIDX_ELLIPSIS REVIDX_EXTSTORED -REVIDX_SIDEDATA REVIDX_HASCOPIESINFO, REVIDX_DEFAULT_FLAGS REVIDX_FLAGS_ORDER diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py --- a/mercurial/utils/storageutil.py +++ b/mercurial/utils/storageutil.py @@ -28,6 +28,10 @@ _nullhash = hashutil.sha1(nullid) +# revision data contains extra metadata not part of the official digest +# Only used in changegroup >= v4. +CG_FLAG_SIDEDATA = 1 + def hashrevisionsha1(text, p1, p2): """Compute the SHA-1 for revision data and its parents. @@ -486,7 +490,7 @@ available.add(rev) - sidedata = None + serialized_sidedata = None if sidedata_helpers: sidedata = store.sidedata(rev) sidedata = run_sidedata_helpers( @@ -495,18 +499,26 @@ sidedata=sidedata, rev=rev, ) - sidedata = sidedatamod.serialize_sidedata(sidedata) + if sidedata: + serialized_sidedata = sidedatamod.serialize_sidedata(sidedata) + + flags = flagsfn(rev) if flagsfn else 0 + protocol_flags = 0 + if serialized_sidedata: + # Advertise that sidedata exists to the other side + protocol_flags |= CG_FLAG_SIDEDATA yield resultcls( node=node, p1node=fnode(p1rev), p2node=fnode(p2rev), basenode=fnode(baserev), - flags=flagsfn(rev) if flagsfn else 0, + flags=flags, baserevisionsize=baserevisionsize, revision=revision, delta=delta, - sidedata=sidedata, + sidedata=serialized_sidedata, + protocol_flags=protocol_flags, ) prevrev = rev diff --git a/tests/test-check-interfaces.py b/tests/test-check-interfaces.py --- a/tests/test-check-interfaces.py +++ b/tests/test-check-interfaces.py @@ -282,6 +282,7 @@ revision=b'', sidedata=b'', delta=None, + protocol_flags=b'', ) checkzobject(rd) diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1134,12 +1134,13 @@ the changelog data, root/flat manifest data, treemanifest data, and filelogs. - There are 3 versions of changegroups: "1", "2", and "3". From a high- + There are 4 versions of changegroups: "1", "2", "3" and "4". From a high- level, versions "1" and "2" are almost exactly the same, with the only difference being an additional item in the *delta header*. Version "3" adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the - "changegroup" part in the bundle2). + "changegroup" part in the bundle2). Version "4" adds support for + exchanging sidedata (additional revision metadata not part of the digest). Changegroups when not exchanging treemanifests consist of 3 logical segments: @@ -1206,8 +1207,8 @@ existing entry (either that the recipient already has, or previously specified in the bundle/changegroup). - The *delta header* is different between versions "1", "2", and "3" of the - changegroup format. + The *delta header* is different between versions "1", "2", "3" and "4" of + the changegroup format. Version 1 (headerlen=80): @@ -1236,6 +1237,15 @@ | | | | | | | +------------------------------------------------------------------------------+ + Version 4 (headerlen=103): + + +------------------------------------------------------------------------------+----------+ + | | | | | | | | + | node | p1 node | p2 node | base node | link node | flags | pflags | + | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) | + | | | | | | | | + +------------------------------------------------------------------------------+----------+ + The *delta data* consists of "chunklen - 4 - headerlen" bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already @@ -1276,11 +1286,24 @@ delimited metadata defining an object stored elsewhere. Used by the LFS extension. + 4096 + Contains copy information. This revision changes files in a way that + could affect copy tracing. This does *not* affect changegroup handling, + but is relevant for other parts of Mercurial. + For historical reasons, the integer values are identical to revlog version 1 per-revision storage flags and correspond to bits being set in this 2-byte field. Bits were allocated starting from the most-significant bit, hence the reverse ordering and allocation of these flags. + The *pflags* (protocol flags) field holds bitwise flags affecting the + protocol itself. They are first in the header since they may affect the + handling of the rest of the fields in a future version. They are defined + as such: + + 1 indicates whether to read a chunk of sidedata (of variable length) right + after the revision flags. + Changeset Segment ================= @@ -1301,14 +1324,14 @@ Treemanifests Segment --------------------- - The *treemanifests segment* only exists in changegroup version "3", and - only if the 'treemanifest' param is part of the bundle2 changegroup part - (it is not possible to use changegroup version 3 outside of bundle2). - Aside from the filenames in the *treemanifests segment* containing a - trailing "/" character, it behaves identically to the *filelogs segment* - (see below). The final sub-segment is followed by an *empty chunk* - (logically, a sub-segment with filename size 0). This denotes the boundary - to the *filelogs segment*. + The *treemanifests segment* only exists in changegroup version "3" and + "4", and only if the 'treemanifest' param is part of the bundle2 + changegroup part (it is not possible to use changegroup version 3 or 4 + outside of bundle2). Aside from the filenames in the *treemanifests + segment* containing a trailing "/" character, it behaves identically to + the *filelogs segment* (see below). The final sub-segment is followed by + an *empty chunk* (logically, a sub-segment with filename size 0). This + denotes the boundary to the *filelogs segment*. Filelogs Segment ================ @@ -3636,12 +3659,13 @@ filelogs.
- There are 3 versions of changegroups: "1", "2", and "3". From a + There are 4 versions of changegroups: "1", "2", "3" and "4". From a high-level, versions "1" and "2" are almost exactly the same, with the only difference being an additional item in the *delta header*. Version "3" adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the - "changegroup" part in the bundle2). + "changegroup" part in the bundle2). Version "4" adds support for exchanging + sidedata (additional revision metadata not part of the digest).
Changegroups when not exchanging treemanifests consist of 3 logical @@ -3721,8 +3745,8 @@ bundle/changegroup).
- The *delta header* is different between versions "1", "2", and - "3" of the changegroup format. + The *delta header* is different between versions "1", "2", "3" and "4" + of the changegroup format.
Version 1 (headerlen=80): @@ -3758,6 +3782,17 @@ +------------------------------------------------------------------------------+
+ Version 4 (headerlen=103): +
++ +------------------------------------------------------------------------------+----------+ + | | | | | | | | + | node | p1 node | p2 node | base node | link node | flags | pflags | + | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) | + | | | | | | | | + +------------------------------------------------------------------------------+----------+ ++
The *delta data* consists of "chunklen - 4 - headerlen" bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already has, or previously @@ -3796,6 +3831,8 @@
For historical reasons, the integer values are identical to revlog version 1 @@ -3803,6 +3840,15 @@ field. Bits were allocated starting from the most-significant bit, hence the reverse ordering and allocation of these flags.
++ The *pflags* (protocol flags) field holds bitwise flags affecting the protocol + itself. They are first in the header since they may affect the handling of the + rest of the fields in a future version. They are defined as such: +
+The *changeset segment* consists of a single *delta group* holding @@ -3820,9 +3866,9 @@
- The *treemanifests segment* only exists in changegroup version "3", and - only if the 'treemanifest' param is part of the bundle2 changegroup part - (it is not possible to use changegroup version 3 outside of bundle2). + The *treemanifests segment* only exists in changegroup version "3" and "4", + and only if the 'treemanifest' param is part of the bundle2 changegroup part + (it is not possible to use changegroup version 3 or 4 outside of bundle2). Aside from the filenames in the *treemanifests segment* containing a trailing "/" character, it behaves identically to the *filelogs segment* (see below). The final sub-segment is followed by an *empty chunk* (logically, diff --git a/tests/test-lfs-serve.t b/tests/test-lfs-serve.t --- a/tests/test-lfs-serve.t +++ b/tests/test-lfs-serve.t @@ -355,11 +355,11 @@ # LFS required- both lfs and non-lfs revlogs have 0x2000 flag *** runcommand debugprocessors lfs.bin -R ../server registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand debugprocessors nonlfs2.txt -R ../server registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions --cwd ../server extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -368,7 +368,7 @@ # LFS not enabled- revlogs don't have 0x2000 flag *** runcommand debugprocessors nonlfs3.txt registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' *** runcommand config extensions extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -411,11 +411,11 @@ # LFS enabled- both lfs and non-lfs revlogs have 0x2000 flag *** runcommand debugprocessors lfs.bin -R ../server registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand debugprocessors nonlfs2.txt -R ../server registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions --cwd ../server extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -424,7 +424,7 @@ # LFS enabled without requirement- revlogs have 0x2000 flag *** runcommand debugprocessors nonlfs3.txt registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -433,7 +433,7 @@ # LFS disabled locally- revlogs don't have 0x2000 flag *** runcommand debugprocessors nonlfs.txt -R ../nonlfs registered processor '0x8000' - registered processor '0x800' + registered processor '0x1000' *** runcommand config extensions --cwd ../nonlfs extensions.debugprocessors=$TESTTMP/debugprocessors.py extensions.lfs=!