diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -33,6 +33,7 @@ ) from .interfaces import repository +from .revlogutils import constants from .revlogutils import sidedata as sidedatamod _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s") @@ -645,10 +646,14 @@ (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res - sidedata_raw = getchunk(self._stream) sidedata = {} - if len(sidedata_raw) > 0: + if flags & constants.REVIDX_SIDEDATA: + sidedata_raw = getchunk(self._stream) sidedata = sidedatamod.deserialize_sidedata(sidedata_raw) + # Remove that flag to prevent it being passed around again later, + # potentially incorrectly. It's only useful for cg4, not at the + # revlog layer. + flags &= ~constants.REVIDX_SIDEDATA return node, p1, p2, cs, deltabase, delta, flags, sidedata @@ -693,10 +698,10 @@ yield prefix yield data - sidedata = delta.sidedata - if sidedata is not None: + if delta.flags & constants.REVIDX_SIDEDATA: # Need a separate chunk for sidedata to be able to differentiate # "raw delta" length and sidedata length + sidedata = delta.sidedata yield chunkheader(len(sidedata)) yield sidedata @@ -1929,7 +1934,6 @@ sd_computers = collections.defaultdict(list) # Computers for categories to remove from sidedata sd_removers = collections.defaultdict(list) - to_generate = remote_sd_categories - repo._wanted_sidedata to_remove = repo._wanted_sidedata - remote_sd_categories if pull: diff --git a/mercurial/helptext/internals/changegroups.txt b/mercurial/helptext/internals/changegroups.txt --- a/mercurial/helptext/internals/changegroups.txt +++ b/mercurial/helptext/internals/changegroups.txt @@ -2,12 +2,13 @@ the changelog data, root/flat manifest data, treemanifest data, and filelogs. -There are 3 versions of changegroups: ``1``, ``2``, and ``3``. From a +There are 4 versions of changegroups: ``1``, ``2``, ``3`` and ``4``. From a high-level, versions ``1`` and ``2`` are almost exactly the same, with the only difference being an additional item in the *delta header*. Version ``3`` adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the -``changegroup`` part in the bundle2). +``changegroup`` part in the bundle2). Version ``4`` adds support for exchanging +sidedata (additional revision metadata not part of the digest). Changegroups when not exchanging treemanifests consist of 3 logical segments:: @@ -74,8 +75,8 @@ entry (either that the recipient already has, or previously specified in the bundle/changegroup). -The *delta header* is different between versions ``1``, ``2``, and -``3`` of the changegroup format. +The *delta header* is different between versions ``1``, ``2``, ``3`` and ``4`` +of the changegroup format. Version 1 (headerlen=80):: @@ -95,7 +96,7 @@ | | | | | | +------------------------------------------------------------------+ -Version 3 (headerlen=102):: +Version 3 and 4 (headerlen=102):: +------------------------------------------------------------------------------+ | | | | | | | @@ -104,6 +105,9 @@ | | | | | | | +------------------------------------------------------------------------------+ +In version 4, the presence of the sidedata flag (REVIDX_SIDEDATA) indicates +whether to read a chunk of sidedata (of variable length) right after the flags. + The *delta data* consists of ``chunklen - 4 - headerlen`` bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already has, or previously @@ -140,6 +144,12 @@ Externally stored. The revision fulltext contains ``key:value`` ``\n`` delimited metadata defining an object stored elsewhere. Used by the LFS extension. +4096 + Contains sidedata information. This revision contains extra metadata not + part of the official digest. +2048 + Contains copy information. This revision changes files in a way that could + affect copy tracing. For historical reasons, the integer values are identical to revlog version 1 per-revision storage flags and correspond to bits being set in this 2-byte @@ -166,9 +176,9 @@ Treemanifests Segment --------------------- -The *treemanifests segment* only exists in changegroup version ``3``, and -only if the 'treemanifest' param is part of the bundle2 changegroup part -(it is not possible to use changegroup version 3 outside of bundle2). +The *treemanifests segment* only exists in changegroup version ``3`` and ``4``, +and only if the 'treemanifest' param is part of the bundle2 changegroup part +(it is not possible to use changegroup version 3 or 4 outside of bundle2). Aside from the filenames in the *treemanifests segment* containing a trailing ``/`` character, it behaves identically to the *filelogs segment* (see below). The final sub-segment is followed by an *empty chunk* (logically, diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -86,6 +86,7 @@ # revision data is stored externally REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED # revision data contains extra metadata not part of the official digest +# Only used in changegroup v4 as an optimization, not in revlog. REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA # revision changes files in a way that could affect copy tracing. REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO diff --git a/mercurial/revlogutils/flagutil.py b/mercurial/revlogutils/flagutil.py --- a/mercurial/revlogutils/flagutil.py +++ b/mercurial/revlogutils/flagutil.py @@ -40,6 +40,7 @@ flagprocessors = { REVIDX_ISCENSORED: None, REVIDX_HASCOPIESINFO: None, + REVIDX_SIDEDATA: None, } diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py --- a/mercurial/utils/storageutil.py +++ b/mercurial/utils/storageutil.py @@ -24,6 +24,7 @@ ) from ..interfaces import repository from ..revlogutils import sidedata as sidedatamod +from ..revlogutils import constants from ..utils import hashutil _nullhash = hashutil.sha1(nullid) @@ -486,7 +487,7 @@ available.add(rev) - sidedata = None + serialized_sidedata = None if sidedata_helpers: sidedata = store.sidedata(rev) sidedata = run_sidedata_helpers( @@ -495,18 +496,23 @@ sidedata=sidedata, rev=rev, ) - sidedata = sidedatamod.serialize_sidedata(sidedata) + if sidedata: + serialized_sidedata = sidedatamod.serialize_sidedata(sidedata) + flags = flagsfn(rev) if flagsfn else 0 + if serialized_sidedata: + # Advertise that sidedata exists to the other side + flags |= constants.REVIDX_SIDEDATA yield resultcls( node=node, p1node=fnode(p1rev), p2node=fnode(p2rev), basenode=fnode(baserev), - flags=flagsfn(rev) if flagsfn else 0, + flags=flags, baserevisionsize=baserevisionsize, revision=revision, delta=delta, - sidedata=sidedata, + sidedata=serialized_sidedata, ) prevrev = rev diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1134,12 +1134,13 @@ the changelog data, root/flat manifest data, treemanifest data, and filelogs. - There are 3 versions of changegroups: "1", "2", and "3". From a high- + There are 4 versions of changegroups: "1", "2", "3" and "4". From a high- level, versions "1" and "2" are almost exactly the same, with the only difference being an additional item in the *delta header*. Version "3" adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the - "changegroup" part in the bundle2). + "changegroup" part in the bundle2). Version "4" adds support for + exchanging sidedata (additional revision metadata not part of the digest). Changegroups when not exchanging treemanifests consist of 3 logical segments: @@ -1206,8 +1207,8 @@ existing entry (either that the recipient already has, or previously specified in the bundle/changegroup). - The *delta header* is different between versions "1", "2", and "3" of the - changegroup format. + The *delta header* is different between versions "1", "2", "3" and "4" of + the changegroup format. Version 1 (headerlen=80): @@ -1227,7 +1228,7 @@ | | | | | | +------------------------------------------------------------------+ - Version 3 (headerlen=102): + Version 3 and 4 (headerlen=102): +------------------------------------------------------------------------------+ | | | | | | | @@ -1236,6 +1237,10 @@ | | | | | | | +------------------------------------------------------------------------------+ + In version 4, the presence of the sidedata flag (REVIDX_SIDEDATA) + indicates whether to read a chunk of sidedata (of variable length) right + after the flags. + The *delta data* consists of "chunklen - 4 - headerlen" bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already @@ -1276,6 +1281,14 @@ delimited metadata defining an object stored elsewhere. Used by the LFS extension. + 4096 + Contains sidedata information. This revision contains extra metadata + not part of the official digest. + + 2048 + Contains copy information. This revision changes files in a way that + could affect copy tracing. + For historical reasons, the integer values are identical to revlog version 1 per-revision storage flags and correspond to bits being set in this 2-byte field. Bits were allocated starting from the most-significant bit, @@ -1301,14 +1314,14 @@ Treemanifests Segment --------------------- - The *treemanifests segment* only exists in changegroup version "3", and - only if the 'treemanifest' param is part of the bundle2 changegroup part - (it is not possible to use changegroup version 3 outside of bundle2). - Aside from the filenames in the *treemanifests segment* containing a - trailing "/" character, it behaves identically to the *filelogs segment* - (see below). The final sub-segment is followed by an *empty chunk* - (logically, a sub-segment with filename size 0). This denotes the boundary - to the *filelogs segment*. + The *treemanifests segment* only exists in changegroup version "3" and + "4", and only if the 'treemanifest' param is part of the bundle2 + changegroup part (it is not possible to use changegroup version 3 or 4 + outside of bundle2). Aside from the filenames in the *treemanifests + segment* containing a trailing "/" character, it behaves identically to + the *filelogs segment* (see below). The final sub-segment is followed by + an *empty chunk* (logically, a sub-segment with filename size 0). This + denotes the boundary to the *filelogs segment*. Filelogs Segment ================ @@ -3636,12 +3649,13 @@ filelogs.

- There are 3 versions of changegroups: "1", "2", and "3". From a + There are 4 versions of changegroups: "1", "2", "3" and "4". From a high-level, versions "1" and "2" are almost exactly the same, with the only difference being an additional item in the *delta header*. Version "3" adds support for storage flags in the *delta header* and optionally exchanging treemanifests (enabled by setting an option on the - "changegroup" part in the bundle2). + "changegroup" part in the bundle2). Version "4" adds support for exchanging + sidedata (additional revision metadata not part of the digest).

Changegroups when not exchanging treemanifests consist of 3 logical @@ -3721,8 +3735,8 @@ bundle/changegroup).

- The *delta header* is different between versions "1", "2", and - "3" of the changegroup format. + The *delta header* is different between versions "1", "2", "3" and "4" + of the changegroup format.

Version 1 (headerlen=80): @@ -3747,7 +3761,7 @@ +------------------------------------------------------------------+

- Version 3 (headerlen=102): + Version 3 and 4 (headerlen=102):

   +------------------------------------------------------------------------------+
@@ -3758,6 +3772,10 @@
   +------------------------------------------------------------------------------+
   

+ In version 4, the presence of the sidedata flag (REVIDX_SIDEDATA) indicates + whether to read a chunk of sidedata (of variable length) right after the flags. +

+

The *delta data* consists of "chunklen - 4 - headerlen" bytes, which contain a series of *delta*s, densely packed (no separators). These deltas describe a diff from an existing entry (either that the recipient already has, or previously @@ -3796,6 +3814,10 @@

Ellipsis revision. Revision hash does not match data (likely due to rewritten parents).
8192
Externally stored. The revision fulltext contains "key:value" "\n" delimited metadata defining an object stored elsewhere. Used by the LFS extension. +
4096 +
Contains sidedata information. This revision contains extra metadata not part of the official digest. +
2048 +
Contains copy information. This revision changes files in a way that could affect copy tracing.

For historical reasons, the integer values are identical to revlog version 1 @@ -3820,9 +3842,9 @@

Treemanifests Segment

- The *treemanifests segment* only exists in changegroup version "3", and - only if the 'treemanifest' param is part of the bundle2 changegroup part - (it is not possible to use changegroup version 3 outside of bundle2). + The *treemanifests segment* only exists in changegroup version "3" and "4", + and only if the 'treemanifest' param is part of the bundle2 changegroup part + (it is not possible to use changegroup version 3 or 4 outside of bundle2). Aside from the filenames in the *treemanifests segment* containing a trailing "/" character, it behaves identically to the *filelogs segment* (see below). The final sub-segment is followed by an *empty chunk* (logically, diff --git a/tests/test-lfs-serve.t b/tests/test-lfs-serve.t --- a/tests/test-lfs-serve.t +++ b/tests/test-lfs-serve.t @@ -356,10 +356,12 @@ *** runcommand debugprocessors lfs.bin -R ../server registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand debugprocessors nonlfs2.txt -R ../server registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions --cwd ../server extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -369,6 +371,7 @@ *** runcommand debugprocessors nonlfs3.txt registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' *** runcommand config extensions extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -412,10 +415,12 @@ *** runcommand debugprocessors lfs.bin -R ../server registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand debugprocessors nonlfs2.txt -R ../server registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions --cwd ../server extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -425,6 +430,7 @@ *** runcommand debugprocessors nonlfs3.txt registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' registered processor '0x2000' *** runcommand config extensions extensions.debugprocessors=$TESTTMP/debugprocessors.py @@ -434,6 +440,7 @@ *** runcommand debugprocessors nonlfs.txt -R ../nonlfs registered processor '0x8000' registered processor '0x800' + registered processor '0x1000' *** runcommand config extensions --cwd ../nonlfs extensions.debugprocessors=$TESTTMP/debugprocessors.py extensions.lfs=!