diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -999,11 +999,7 @@ if repo.dirstate._use_dirstate_v2: docket = repo.dirstate._map.docket hash_len = 20 # 160 bits for SHA-1 - hash_offset = docket.data_size - hash_len # hash is at the end - data_filename = docket.data_filename() - with repo.vfs(data_filename) as f: - f.seek(hash_offset) - hash_bytes = f.read(hash_len) + hash_bytes = docket.tree_metadata[-hash_len:] ui.write(binascii.hexlify(hash_bytes) + b'\n') diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -640,7 +640,7 @@ else: data = b'' self._rustmap = rustmod.DirstateMap.new_v2( - data, self.docket.data_size + data, self.docket.data_size, self.docket.tree_metadata ) parents = self.docket.parents else: @@ -667,7 +667,7 @@ # We can only append to an existing data file if there is one can_append = self.docket.uuid is not None - packed, append = self._rustmap.write_v2(now, can_append) + packed, meta, append = self._rustmap.write_v2(now, can_append) if append: docket = self.docket data_filename = docket.data_filename() @@ -681,12 +681,13 @@ assert written == len(packed), (written, len(packed)) docket.data_size += len(packed) docket.parents = self.parents() + docket.tree_metadata = meta st.write(docket.serialize()) st.close() else: old_docket = self.docket new_docket = docketmod.DirstateDocket.with_new_uuid( - self.parents(), len(packed) + self.parents(), len(packed), meta ) data_filename = new_docket.data_filename() if tr: diff --git a/mercurial/dirstateutils/docket.py b/mercurial/dirstateutils/docket.py --- a/mercurial/dirstateutils/docket.py +++ b/mercurial/dirstateutils/docket.py @@ -14,47 +14,60 @@ V2_FORMAT_MARKER = b"dirstate-v2\n" +# Must match the constant of the same name in +# `rust/hg-core/src/dirstate_tree/on_disk.rs` +TREE_METADATA_SIZE = 40 + # * 12 bytes: format marker # * 32 bytes: node ID of the working directory's first parent # * 32 bytes: node ID of the working directory's second parent # * 4 bytes: big-endian used size of the data file +# * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately # * 1 byte: length of the data file's UUID # * variable: data file's UUID # # Node IDs are null-padded if shorter than 32 bytes. # A data file shorter than the specified used size is corrupted (truncated) -HEADER = struct.Struct(">{}s32s32sLB".format(len(V2_FORMAT_MARKER))) +HEADER = struct.Struct( + ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE) +) class DirstateDocket(object): data_filename_pattern = b'dirstate.%s.d' - def __init__(self, parents, data_size, uuid): + def __init__(self, parents, data_size, tree_metadata, uuid): self.parents = parents self.data_size = data_size + self.tree_metadata = tree_metadata self.uuid = uuid @classmethod - def with_new_uuid(cls, parents, data): - return cls(parents, data, docket_mod.make_uid()) + def with_new_uuid(cls, parents, data_size, tree_metadata): + return cls(parents, data_size, tree_metadata, docket_mod.make_uid()) @classmethod def parse(cls, data, nodeconstants): if not data: parents = (nodeconstants.nullid, nodeconstants.nullid) - return cls(parents, 0, None) - marker, p1, p2, data_size, uuid_size = HEADER.unpack_from(data) + return cls(parents, 0, b'', None) + marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data) if marker != V2_FORMAT_MARKER: raise ValueError("expected dirstate-v2 marker") uuid = data[HEADER.size : HEADER.size + uuid_size] p1 = p1[: nodeconstants.nodelen] p2 = p2[: nodeconstants.nodelen] - return cls((p1, p2), data_size, uuid) + return cls((p1, p2), data_size, meta, uuid) def serialize(self): p1, p2 = self.parents header = HEADER.pack( - V2_FORMAT_MARKER, p1, p2, self.data_size, len(self.uuid) + V2_FORMAT_MARKER, + p1, + p2, + self.data_size, + self.tree_metadata, + len(self.uuid), ) return header + self.uuid diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -424,9 +424,10 @@ pub fn new_v2( on_disk: &'on_disk [u8], data_size: usize, + metadata: &[u8], ) -> Result { if let Some(data) = on_disk.get(..data_size) { - Ok(on_disk::read(data)?) + Ok(on_disk::read(data, metadata)?) } else { Err(DirstateV2ParseError.into()) } @@ -1094,15 +1095,16 @@ Ok(packed) } - /// Returns new data together with whether that data should be appended to - /// the existing data file whose content is at `self.on_disk` (true), - /// instead of written to a new data file (false). + /// Returns new data and metadata together with whether that data should be + /// appended to the existing data file whose content is at + /// `self.on_disk` (true), instead of written to a new data file + /// (false). #[timed] fn pack_v2( &mut self, now: Timestamp, can_append: bool, - ) -> Result<(Vec, bool), DirstateError> { + ) -> Result<(Vec, Vec, bool), DirstateError> { // TODO: how do we want to handle this in 2038? let now: i32 = now.0.try_into().expect("time overflow"); let mut paths = Vec::new(); diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs --- a/rust/hg-core/src/dirstate_tree/dispatch.rs +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -182,16 +182,17 @@ /// serialize bytes to write a dirstate data file to disk in dirstate-v2 /// format. /// - /// Returns new data together with whether that data should be appended to - /// the existing data file whose content is at `self.on_disk` (true), - /// instead of written to a new data file (false). + /// Returns new data and metadata together with whether that data should be + /// appended to the existing data file whose content is at + /// `self.on_disk` (true), instead of written to a new data file + /// (false). /// /// Note: this is only supported by the tree dirstate map. fn pack_v2( &mut self, now: Timestamp, can_append: bool, - ) -> Result<(Vec, bool), DirstateError>; + ) -> Result<(Vec, Vec, bool), DirstateError>; /// Run the status algorithm. /// @@ -395,7 +396,7 @@ &mut self, _now: Timestamp, _can_append: bool, - ) -> Result<(Vec, bool), DirstateError> { + ) -> Result<(Vec, Vec, bool), DirstateError> { panic!( "should have used dirstate_tree::DirstateMap to use the v2 format" ) diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -47,6 +47,18 @@ pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; +/// Must match the constant of the same name in +/// `mercurial/dirstateutils/docket.py` +const TREE_METADATA_SIZE: usize = 40; + +/// Make sure that size-affecting changes are made knowingly +#[allow(unused)] +fn static_assert_size_of() { + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; +} + // Must match `HEADER` in `mercurial/dirstateutils/docket.py` #[derive(BytesCast)] #[repr(C)] @@ -58,6 +70,8 @@ /// Counted in bytes data_size: Size, + metadata: TreeMetadata, + uuid_size: u8, } @@ -68,7 +82,7 @@ #[derive(BytesCast)] #[repr(C)] -struct Root { +struct TreeMetadata { root_nodes: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, @@ -134,7 +148,7 @@ /// - All direct children of this directory (as returned by /// `std::fs::read_dir`) either have a corresponding dirstate node, or /// are ignored by ignore patterns whose hash is in - /// `Root::ignore_patterns_hash`. + /// `TreeMetadata::ignore_patterns_hash`. /// /// This means that if `std::fs::symlink_metadata` later reports the /// same modification time and ignored patterns haven’t changed, a run @@ -205,13 +219,6 @@ /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes type OptPathSlice = PathSlice; -/// Make sure that size-affecting changes are made knowingly -fn _static_assert_size_of() { - let _ = std::mem::transmute::; - let _ = std::mem::transmute::; - let _ = std::mem::transmute::; -} - /// Unexpected file format found in `.hg/dirstate` with the "v2" format. /// /// This should only happen if Mercurial is buggy or a repository is corrupted. @@ -242,6 +249,10 @@ DirstateParents { p1, p2 } } + pub fn tree_metadata(&self) -> &[u8] { + self.header.metadata.as_bytes() + } + pub fn data_size(&self) -> usize { // This `unwrap` could only panic on a 16-bit CPU self.header.data_size.get().try_into().unwrap() @@ -265,40 +276,25 @@ } } -fn read_root<'on_disk>( - on_disk: &'on_disk [u8], -) -> Result<&'on_disk Root, DirstateV2ParseError> { - // Find the `Root` at the end of the given slice - let root_offset = on_disk - .len() - .checked_sub(std::mem::size_of::()) - // A non-empty slice too short is an error - .ok_or(DirstateV2ParseError)?; - let (root, _) = Root::from_bytes(&on_disk[root_offset..]) - .map_err(|_| DirstateV2ParseError)?; - Ok(root) -} - pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], + metadata: &[u8], ) -> Result, DirstateV2ParseError> { if on_disk.is_empty() { return Ok(DirstateMap::empty(on_disk)); } - let root = read_root(on_disk)?; - let mut unreachable_bytes = root.unreachable_bytes.get(); - // Each append writes a new `Root`, so it’s never reused - unreachable_bytes += std::mem::size_of::() as u32; + let (meta, _) = TreeMetadata::from_bytes(metadata) + .map_err(|_| DirstateV2ParseError)?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk(read_nodes( on_disk, - root.root_nodes, + meta.root_nodes, )?), - nodes_with_entry_count: root.nodes_with_entry_count.get(), - nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), - ignore_patterns_hash: root.ignore_patterns_hash, - unreachable_bytes, + nodes_with_entry_count: meta.nodes_with_entry_count.get(), + nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), + ignore_patterns_hash: meta.ignore_patterns_hash, + unreachable_bytes: meta.unreachable_bytes.get(), }; Ok(dirstate_map) } @@ -530,9 +526,11 @@ pub(crate) fn for_each_tracked_path<'on_disk>( on_disk: &'on_disk [u8], + metadata: &[u8], mut f: impl FnMut(&'on_disk HgPath), ) -> Result<(), DirstateV2ParseError> { - let root = read_root(on_disk)?; + let (meta, _) = TreeMetadata::from_bytes(metadata) + .map_err(|_| DirstateV2ParseError)?; fn recur<'on_disk>( on_disk: &'on_disk [u8], nodes: ChildNodes, @@ -548,23 +546,23 @@ } Ok(()) } - recur(on_disk, root.root_nodes, &mut f) + recur(on_disk, meta.root_nodes, &mut f) } -/// Returns new data together with whether that data should be appended to the -/// existing data file whose content is at `dirstate_map.on_disk` (true), -/// instead of written to a new data file (false). +/// Returns new data and metadata, together with whether that data should be +/// appended to the existing data file whose content is at +/// `dirstate_map.on_disk` (true), instead of written to a new data file +/// (false). pub(super) fn write( dirstate_map: &mut DirstateMap, can_append: bool, -) -> Result<(Vec, bool), DirstateError> { +) -> Result<(Vec, Vec, bool), DirstateError> { let append = can_append && dirstate_map.write_should_append(); // This ignores the space for paths, and for nodes without an entry. // TODO: better estimate? Skip the `Vec` and write to a file directly? - let size_guess = std::mem::size_of::() - + std::mem::size_of::() - * dirstate_map.nodes_with_entry_count as usize; + let size_guess = std::mem::size_of::() + * dirstate_map.nodes_with_entry_count as usize; let mut writer = Writer { dirstate_map, @@ -574,7 +572,7 @@ let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; - let root = Root { + let meta = TreeMetadata { root_nodes, nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), nodes_with_copy_source_count: dirstate_map @@ -583,8 +581,7 @@ unreachable_bytes: dirstate_map.unreachable_bytes.into(), ignore_patterns_hash: dirstate_map.ignore_patterns_hash, }; - writer.out.extend(root.as_bytes()); - Ok((writer.out, append)) + Ok((writer.out, meta.as_bytes().to_vec(), append)) } struct Writer<'dmap, 'on_disk> { diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs --- a/rust/hg-core/src/operations/list_tracked_files.rs +++ b/rust/hg-core/src/operations/list_tracked_files.rs @@ -22,27 +22,33 @@ pub struct Dirstate { /// The `dirstate` content. content: Vec, - dirstate_v2: bool, + v2_metadata: Option>, } impl Dirstate { pub fn new(repo: &Repo) -> Result { let mut content = repo.hg_vfs().read("dirstate")?; - if repo.has_dirstate_v2() { + let v2_metadata = if repo.has_dirstate_v2() { let docket = read_docket(&content)?; + let meta = docket.tree_metadata().to_vec(); content = repo.hg_vfs().read(docket.data_filename())?; - } + Some(meta) + } else { + None + }; Ok(Self { content, - dirstate_v2: repo.has_dirstate_v2(), + v2_metadata, }) } pub fn tracked_files(&self) -> Result, DirstateError> { let mut files = Vec::new(); if !self.content.is_empty() { - if self.dirstate_v2 { - for_each_tracked_path(&self.content, |path| files.push(path))? + if let Some(meta) = &self.v2_metadata { + for_each_tracked_path(&self.content, meta, |path| { + files.push(path) + })? } else { let _parents = parse_dirstate_entries( &self.content, diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -84,12 +84,14 @@ def new_v2( on_disk: PyBytes, data_size: usize, + tree_metadata: PyBytes, ) -> PyResult { let dirstate_error = |e: DirstateError| { PyErr::new::(py, format!("Dirstate error: {:?}", e)) }; - let inner = OwningDirstateMap::new_v2(py, on_disk, data_size) - .map_err(dirstate_error)?; + let inner = OwningDirstateMap::new_v2( + py, on_disk, data_size, tree_metadata, + ).map_err(dirstate_error)?; let map = Self::create_instance(py, Box::new(inner))?; Ok(map.into_object()) } @@ -353,9 +355,11 @@ let mut inner = self.inner(py).borrow_mut(); let result = inner.pack_v2(now, can_append); match result { - Ok((packed, append)) => { + Ok((packed, tree_metadata, append)) => { let packed = PyBytes::new(py, &packed); - Ok((packed, append).to_py_object(py).into_object()) + let tree_metadata = PyBytes::new(py, &tree_metadata); + let tuple = (packed, tree_metadata, append); + Ok(tuple.to_py_object(py).into_object()) }, Err(_) => Err(PyErr::new::( py, diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs --- a/rust/hg-cpython/src/dirstate/dispatch.rs +++ b/rust/hg-cpython/src/dirstate/dispatch.rs @@ -128,7 +128,7 @@ &mut self, now: Timestamp, can_append: bool, - ) -> Result<(Vec, bool), DirstateError> { + ) -> Result<(Vec, Vec, bool), DirstateError> { self.get_mut().pack_v2(now, can_append) } diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs --- a/rust/hg-cpython/src/dirstate/owning.rs +++ b/rust/hg-cpython/src/dirstate/owning.rs @@ -49,9 +49,11 @@ py: Python, on_disk: PyBytes, data_size: usize, + tree_metadata: PyBytes, ) -> Result { let bytes: &'_ [u8] = on_disk.data(py); - let map = DirstateMap::new_v2(bytes, data_size)?; + let map = + DirstateMap::new_v2(bytes, data_size, tree_metadata.data(py))?; // Like in `bytes` above, this `'_` lifetime parameter borrows from // the bytes buffer owned by `on_disk`. diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -168,13 +168,16 @@ let repo = invocation.repo?; let dirstate_data_mmap; let (mut dmap, parents) = if repo.has_dirstate_v2() { + let docket_data = + repo.hg_vfs().read("dirstate").io_not_found_as_none()?; let parents; let dirstate_data; let data_size; - if let Some(docket_data) = - repo.hg_vfs().read("dirstate").io_not_found_as_none()? - { - let docket = on_disk::read_docket(&docket_data)?; + let docket; + let tree_metadata; + if let Some(docket_data) = &docket_data { + docket = on_disk::read_docket(docket_data)?; + tree_metadata = docket.tree_metadata(); parents = Some(docket.parents()); data_size = docket.data_size(); dirstate_data_mmap = repo @@ -184,10 +187,12 @@ dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b""); } else { parents = None; + tree_metadata = b""; data_size = 0; dirstate_data = b""; } - let dmap = DirstateMap::new_v2(dirstate_data, data_size)?; + let dmap = + DirstateMap::new_v2(dirstate_data, data_size, tree_metadata)?; (dmap, parents) } else { dirstate_data_mmap =