diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py +++ b/hgext/largefiles/lfutil.py @@ -25,6 +25,7 @@ httpconnection, match as matchmod, pycompat, + requirements, scmutil, sparse, util, @@ -197,6 +198,7 @@ vfs = repo.vfs lfstoredir = longname opener = vfsmod.vfs(vfs.join(lfstoredir)) + use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements lfdirstate = largefilesdirstate( opener, ui, @@ -204,6 +206,7 @@ repo.dirstate._validate, lambda: sparse.matcher(repo), repo.nodeconstants, + use_dirstate_v2, ) # If the largefiles dirstate does not exist, populate and create diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -75,7 +75,14 @@ @interfaceutil.implementer(intdirstate.idirstate) class dirstate(object): def __init__( - self, opener, ui, root, validate, sparsematchfn, nodeconstants + self, + opener, + ui, + root, + validate, + sparsematchfn, + nodeconstants, + use_dirstate_v2, ): """Create a new dirstate object. @@ -83,6 +90,7 @@ dirstate file; root is the root of the directory tracked by the dirstate. """ + self._use_dirstate_v2 = use_dirstate_v2 self._nodeconstants = nodeconstants self._opener = opener self._validate = validate @@ -141,7 +149,11 @@ def _map(self): """Return the dirstate contents (see documentation for dirstatemap).""" self._map = self._mapcls( - self._ui, self._opener, self._root, self._nodeconstants + self._ui, + self._opener, + self._root, + self._nodeconstants, + self._use_dirstate_v2, ) return self._map @@ -1435,13 +1447,16 @@ denormalized form that they appear as in the dirstate. """ - def __init__(self, ui, opener, root, nodeconstants): + def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): self._ui = ui self._opener = opener self._root = root self._filename = b'dirstate' self._nodelen = 20 self._nodeconstants = nodeconstants + assert ( + not use_dirstate_v2 + ), "should have detected unsupported requirement" self._parents = None self._dirtyparents = False @@ -1746,13 +1761,14 @@ if rustmod is not None: class dirstatemap(object): - def __init__(self, ui, opener, root, nodeconstants): + def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): + self._use_dirstate_v2 = use_dirstate_v2 self._nodeconstants = nodeconstants self._ui = ui self._opener = opener self._root = root self._filename = b'dirstate' - self._nodelen = 20 + self._nodelen = 20 # Also update Rust code when changing this! self._parents = None self._dirtyparents = False @@ -1832,9 +1848,14 @@ def parents(self): if not self._parents: + if self._use_dirstate_v2: + offset = len(rustmod.V2_FORMAT_MARKER) + else: + offset = 0 + read_len = offset + self._nodelen * 2 try: fp = self._opendirstatefile() - st = fp.read(40) + st = fp.read(read_len) fp.close() except IOError as err: if err.errno != errno.ENOENT: @@ -1843,7 +1864,8 @@ st = b'' l = len(st) - if l == self._nodelen * 2: + if l == read_len: + st = st[offset:] self._parents = ( st[: self._nodelen], st[self._nodelen : 2 * self._nodelen], @@ -1887,7 +1909,7 @@ False, ) self._rustmap, parents = rustmod.DirstateMap.new( - use_dirstate_tree, st + use_dirstate_tree, self._use_dirstate_v2, st ) if parents and not self._dirtyparents: @@ -1900,7 +1922,10 @@ def write(self, st, now): parents = self.parents() - st.write(self._rustmap.write(parents[0], parents[1], now)) + packed = self._rustmap.write( + self._use_dirstate_v2, parents[0], parents[1], now + ) + st.write(packed) st.close() self._dirtyparents = False diff --git a/mercurial/interfaces/dirstate.py b/mercurial/interfaces/dirstate.py --- a/mercurial/interfaces/dirstate.py +++ b/mercurial/interfaces/dirstate.py @@ -6,7 +6,15 @@ class idirstate(interfaceutil.Interface): - def __init__(opener, ui, root, validate, sparsematchfn, nodeconstants): + def __init__( + opener, + ui, + root, + validate, + sparsematchfn, + nodeconstants, + use_dirstate_v2, + ): """Create a new dirstate object. opener is an open()-like callable that can be used to open the diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1690,6 +1690,8 @@ def _makedirstate(self): """Extension point for wrapping the dirstate per-repo.""" sparsematchfn = lambda: sparse.matcher(self) + v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT + use_dirstate_v2 = v2_req in self.requirements return dirstate.dirstate( self.vfs, @@ -1698,6 +1700,7 @@ self._dirstatevalidate, sparsematchfn, self.nodeconstants, + use_dirstate_v2, ) def _dirstatevalidate(self, node): diff --git a/rust/hg-core/src/dirstate_tree.rs b/rust/hg-core/src/dirstate_tree.rs --- a/rust/hg-core/src/dirstate_tree.rs +++ b/rust/hg-core/src/dirstate_tree.rs @@ -1,4 +1,5 @@ pub mod dirstate_map; pub mod dispatch; +pub mod on_disk; pub mod path_with_basename; mod status; diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -4,14 +4,17 @@ use std::convert::TryInto; use std::path::PathBuf; +use super::on_disk::V2_FORMAT_MARKER; use super::path_with_basename::WithBasename; use crate::dirstate::parsers::clear_ambiguous_mtime; use crate::dirstate::parsers::pack_entry; use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; use crate::dirstate::parsers::Timestamp; +use crate::errors::HgError; use crate::matchers::Matcher; use crate::utils::hg_path::{HgPath, HgPathBuf}; +use crate::utils::SliceExt; use crate::CopyMapIter; use crate::DirstateEntry; use crate::DirstateError; @@ -75,7 +78,24 @@ ); impl<'on_disk> DirstateMap<'on_disk> { - pub fn new( + #[timed] + pub fn new_v2( + on_disk: &'on_disk [u8], + ) -> Result<(Self, Option), DirstateError> { + if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) { + Self::new_v1(rest) + } else if on_disk.is_empty() { + Self::new_v1(on_disk) + } else { + return Err(HgError::corrupted( + "missing dirstate-v2 magic number", + ) + .into()); + } + } + + #[timed] + pub fn new_v1( on_disk: &'on_disk [u8], ) -> Result<(Self, Option), DirstateError> { let mut map = Self { @@ -84,23 +104,16 @@ nodes_with_entry_count: 0, nodes_with_copy_source_count: 0, }; - let parents = map.read()?; - Ok((map, parents)) - } - - /// Should only be called in `new` - #[timed] - fn read(&mut self) -> Result, DirstateError> { - if self.on_disk.is_empty() { - return Ok(None); + if map.on_disk.is_empty() { + return Ok((map, None)); } let parents = parse_dirstate_entries( - self.on_disk, + map.on_disk, |path, entry, copy_source| { let tracked = entry.state.is_tracked(); let node = Self::get_or_insert_node( - &mut self.root, + &mut map.root, path, WithBasename::to_cow_borrowed, |ancestor| { @@ -119,14 +132,15 @@ ); node.entry = Some(*entry); node.copy_source = copy_source.map(Cow::Borrowed); - self.nodes_with_entry_count += 1; + map.nodes_with_entry_count += 1; if copy_source.is_some() { - self.nodes_with_copy_source_count += 1 + map.nodes_with_copy_source_count += 1 } }, )?; + let parents = Some(parents.clone()); - Ok(Some(parents.clone())) + Ok((map, parents)) } fn get_node(&self, path: &HgPath) -> Option<&Node> { @@ -498,7 +512,8 @@ } } - fn pack( + #[timed] + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, @@ -533,6 +548,18 @@ Ok(packed) } + #[timed] + fn pack_v2( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result, DirstateError> { + // Inefficient but temporary + let mut v2 = V2_FORMAT_MARKER.to_vec(); + v2.append(&mut self.pack_v1(parents, now)?); + Ok(v2) + } + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { // Do nothing, this `DirstateMap` does not a separate `all_dirs` that // needs to be recomputed diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs --- a/rust/hg-core/src/dirstate_tree/dispatch.rs +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -73,7 +73,13 @@ directory: &HgPath, ) -> Result; - fn pack( + fn pack_v1( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result, DirstateError>; + + fn pack_v2( &mut self, parents: DirstateParents, now: Timestamp, @@ -211,7 +217,7 @@ self.has_dir(directory) } - fn pack( + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, @@ -219,6 +225,16 @@ self.pack(parents, now) } + fn pack_v2( + &mut self, + _parents: DirstateParents, + _now: Timestamp, + ) -> Result, DirstateError> { + panic!( + "should have used dirstate_tree::DirstateMap to use the v2 format" + ) + } + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { self.set_all_dirs() } diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -0,0 +1,4 @@ +/// Added at the start of `.hg/dirstate` when the "v2" format is used. +/// Acts like a "magic number". This is a sanity check, not strictly necessary +/// since `.hg/requires` already governs which format should be used. +pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs --- a/rust/hg-cpython/src/dirstate.rs +++ b/rust/hg-cpython/src/dirstate.rs @@ -26,6 +26,7 @@ exc, PyBytes, PyDict, PyErr, PyList, PyModule, PyObject, PyResult, PySequence, Python, }; +use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER; use hg::{utils::hg_path::HgPathBuf, DirstateEntry, EntryState, StateMap}; use libc::{c_char, c_int}; use std::convert::TryFrom; @@ -117,6 +118,7 @@ )?; m.add_class::(py)?; m.add_class::(py)?; + m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?; m.add( py, "status", diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -55,13 +55,17 @@ /// Returns a `(dirstate_map, parents)` tuple @staticmethod - def new(use_dirstate_tree: bool, on_disk: PyBytes) -> PyResult { - let dirstate_error = |_: DirstateError| { - PyErr::new::(py, "Dirstate error".to_string()) + def new( + use_dirstate_tree: bool, + use_dirstate_v2: bool, + on_disk: PyBytes, + ) -> PyResult { + let dirstate_error = |e: DirstateError| { + PyErr::new::(py, format!("Dirstate error: {:?}", e)) }; - let (inner, parents) = if use_dirstate_tree { + let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 { let (map, parents) = - OwningDirstateMap::new(py, on_disk) + OwningDirstateMap::new(py, on_disk, use_dirstate_v2) .map_err(dirstate_error)?; (Box::new(map) as _, parents) } else { @@ -288,6 +292,7 @@ def write( &self, + use_dirstate_v2: bool, p1: PyObject, p2: PyObject, now: PyObject @@ -298,7 +303,13 @@ p2: extract_node_id(py, &p2)?, }; - match self.inner(py).borrow_mut().pack(parents, now) { + let mut inner = self.inner(py).borrow_mut(); + let result = if use_dirstate_v2 { + inner.pack_v2(parents, now) + } else { + inner.pack_v1(parents, now) + }; + match result { Ok(packed) => Ok(PyBytes::new(py, &packed)), Err(_) => Err(PyErr::new::( py, diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs --- a/rust/hg-cpython/src/dirstate/dispatch.rs +++ b/rust/hg-cpython/src/dirstate/dispatch.rs @@ -101,12 +101,20 @@ self.get_mut().has_dir(directory) } - fn pack( + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, ) -> Result, DirstateError> { - self.get_mut().pack(parents, now) + self.get_mut().pack_v1(parents, now) + } + + fn pack_v2( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result, DirstateError> { + self.get_mut().pack_v2(parents, now) } fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs --- a/rust/hg-cpython/src/dirstate/owning.rs +++ b/rust/hg-cpython/src/dirstate/owning.rs @@ -31,9 +31,14 @@ pub fn new( py: Python, on_disk: PyBytes, + use_dirstate_v2: bool, ) -> Result<(Self, Option), DirstateError> { let bytes: &'_ [u8] = on_disk.data(py); - let (map, parents) = DirstateMap::new(bytes)?; + let (map, parents) = if use_dirstate_v2 { + DirstateMap::new_v2(bytes)? + } else { + DirstateMap::new_v1(bytes)? + }; // Like in `bytes` above, this `'_` lifetime parameter borrows from // the bytes buffer owned by `on_disk`.