diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -20,6 +20,7 @@ from .dirstateutils import ( docket as docketmod, + v2, ) parsers = policy.importmod('parsers') diff --git a/mercurial/dirstateutils/docket.py b/mercurial/dirstateutils/docket.py --- a/mercurial/dirstateutils/docket.py +++ b/mercurial/dirstateutils/docket.py @@ -10,14 +10,10 @@ import struct from ..revlogutils import docket as docket_mod - +from . import v2 V2_FORMAT_MARKER = b"dirstate-v2\n" -# Must match the constant of the same name in -# `rust/hg-core/src/dirstate_tree/on_disk.rs` -TREE_METADATA_SIZE = 44 - # * 12 bytes: format marker # * 32 bytes: node ID of the working directory's first parent # * 32 bytes: node ID of the working directory's second parent @@ -29,7 +25,7 @@ # Node IDs are null-padded if shorter than 32 bytes. # A data file shorter than the specified used size is corrupted (truncated) HEADER = struct.Struct( - ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE) + ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE) ) diff --git a/mercurial/dirstateutils/v2.py b/mercurial/dirstateutils/v2.py new file mode 100644 --- /dev/null +++ b/mercurial/dirstateutils/v2.py @@ -0,0 +1,106 @@ +# v2.py - Pure-Python implementation of the dirstate-v2 file format +# +# Copyright Mercurial Contributors +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import struct + +from .. import policy + +parsers = policy.importmod('parsers') +DirstateItem = parsers.DirstateItem + + +# Must match the constant of the same name in +# `rust/hg-core/src/dirstate_tree/on_disk.rs` +TREE_METADATA_SIZE = 44 +NODE_SIZE = 43 + + +# Must match the `TreeMetadata` Rust struct in +# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there. +# +# * 4 bytes: start offset of root nodes +# * 4 bytes: number of root nodes +# * 4 bytes: total number of nodes in the tree that have an entry +# * 4 bytes: total number of nodes in the tree that have a copy source +# * 4 bytes: number of bytes in the data file that are not used anymore +# * 4 bytes: unused +# * 20 bytes: SHA-1 hash of ignore patterns +TREE_METADATA = struct.Struct('>LLLLL4s20s') + + +# Must match the `Node` Rust struct in +# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there. +# +# * 4 bytes: start offset of full path +# * 2 bytes: length of the full path +# * 2 bytes: length within the full path before its "base name" +# * 4 bytes: start offset of the copy source if any, or zero for no copy source +# * 2 bytes: length of the copy source if any, or unused +# * 4 bytes: start offset of child nodes +# * 4 bytes: number of child nodes +# * 4 bytes: number of descendant nodes that have an entry +# * 4 bytes: number of descendant nodes that have a "tracked" state +# * 1 byte: state +# * 4 bytes: entry mode +# * 4 bytes: entry size +# * 4 bytes: entry mtime +NODE = struct.Struct('>LHHLHLLLLclll') + + +assert TREE_METADATA_SIZE == TREE_METADATA.size +assert NODE_SIZE == NODE.size + + +def parse_dirstate(map, copy_map, data, tree_metadata): + ( + root_nodes_start, + root_nodes_len, + _nodes_with_entry_count, + _nodes_with_copy_source_count, + _unreachable_bytes, + _unused, + _ignore_patterns_hash, + ) = TREE_METADATA.unpack(tree_metadata) + parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len) + + +def parse_nodes(map, copy_map, data, start, len): + for i in range(len): + node_start = start + NODE_SIZE * i + node_bytes = slice_with_len(data, node_start, NODE_SIZE) + ( + path_start, + path_len, + _basename_strat, + copy_source_start, + copy_source_len, + children_start, + children_count, + _descendants_with_entry_count, + _tracked_descendants_count, + state, + mode, + size, + mtime, + ) = NODE.unpack(node_bytes) + # Recurse + parse_nodes(map, copy_map, data, children_start, children_count) + + if state not in b'narm': + continue + path = slice_with_len(data, path_start, path_len) + map[path] = DirstateItem.from_v1_data(state, mode, size, mtime) + if copy_source_start: + copy_map[path] = slice_with_len( + data, copy_source_start, copy_source_len + ) + + +def slice_with_len(data, start, len): + return data[start : start + len] diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -47,16 +47,16 @@ pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; -/// Must match the constant of the same name in -/// `mercurial/dirstateutils/docket.py` +/// Must match constants of the same names in `mercurial/dirstateutils/v2.py` const TREE_METADATA_SIZE: usize = 44; +const NODE_SIZE: usize = 43; /// Make sure that size-affecting changes are made knowingly #[allow(unused)] fn static_assert_size_of() { let _ = std::mem::transmute::; let _ = std::mem::transmute::; - let _ = std::mem::transmute::; + let _ = std::mem::transmute::; } // Must match `HEADER` in `mercurial/dirstateutils/docket.py` @@ -169,8 +169,8 @@ #[repr(C)] struct Entry { mode: I32Be, + size: I32Be, mtime: I32Be, - size: I32Be, } /// Duration since the Unix epoch