diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs +++ b/rust/hg-core/src/dirstate.rs @@ -7,6 +7,7 @@ use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::errors::HgError; +use crate::revlog::node::NULL_NODE; use crate::revlog::Node; use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::FastHashMap; @@ -25,6 +26,13 @@ pub p2: Node, } +impl DirstateParents { + pub const NULL: Self = Self { + p1: NULL_NODE, + p2: NULL_NODE, + }; +} + /// The C implementation uses all signed types. This will be an issue /// either when 4GB+ source files are commonplace or in 2038, whichever /// comes first. diff --git a/rust/hg-core/src/dirstate_tree.rs b/rust/hg-core/src/dirstate_tree.rs --- a/rust/hg-core/src/dirstate_tree.rs +++ b/rust/hg-core/src/dirstate_tree.rs @@ -2,4 +2,4 @@ pub mod dispatch; pub mod on_disk; pub mod path_with_basename; -mod status; +pub mod status; diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -167,6 +167,16 @@ } } +fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> { + let (header, _) = + Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; + if header.marker == *V2_FORMAT_MARKER { + Ok(header) + } else { + Err(DirstateV2ParseError) + } +} + pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], ) -> Result< @@ -176,27 +186,19 @@ if on_disk.is_empty() { return Ok((DirstateMap::empty(on_disk), None)); } - let (header, _) = - Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; - let Header { - marker, - parents, - root, - nodes_with_entry_count, - nodes_with_copy_source_count, - } = header; - if marker != V2_FORMAT_MARKER { - return Err(DirstateV2ParseError); - } + let header = read_header(on_disk)?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk(read_slice::( - on_disk, *root, + on_disk, + header.root, )?), - nodes_with_entry_count: nodes_with_entry_count.get(), - nodes_with_copy_source_count: nodes_with_copy_source_count.get(), + nodes_with_entry_count: header.nodes_with_entry_count.get(), + nodes_with_copy_source_count: header + .nodes_with_copy_source_count + .get(), }; - let parents = Some(parents.clone()); + let parents = Some(header.parents.clone()); Ok((dirstate_map, parents)) } @@ -414,6 +416,35 @@ .ok_or_else(|| DirstateV2ParseError) } +pub(crate) fn parse_dirstate_parents( + on_disk: &[u8], +) -> Result<&DirstateParents, HgError> { + Ok(&read_header(on_disk)?.parents) +} + +pub(crate) fn for_each_tracked_path<'on_disk>( + on_disk: &'on_disk [u8], + mut f: impl FnMut(&'on_disk HgPath), +) -> Result<(), DirstateV2ParseError> { + let header = read_header(on_disk)?; + fn recur<'on_disk>( + on_disk: &'on_disk [u8], + nodes: Slice, + f: &mut impl FnMut(&'on_disk HgPath), + ) -> Result<(), DirstateV2ParseError> { + for node in read_slice::(on_disk, nodes)? { + if let Some(state) = node.state()? { + if state.is_tracked() { + f(node.full_path(on_disk)?) + } + } + recur(on_disk, node.children, f)? + } + Ok(()) + } + recur(on_disk, header.root, &mut f) +} + pub(super) fn write( dirstate_map: &mut DirstateMap, parents: DirstateParents, diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs --- a/rust/hg-core/src/operations/list_tracked_files.rs +++ b/rust/hg-core/src/operations/list_tracked_files.rs @@ -6,6 +6,7 @@ // GNU General Public License version 2 or any later version. use crate::dirstate::parsers::parse_dirstate_entries; +use crate::dirstate_tree::on_disk::for_each_tracked_path; use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::changelog::Changelog; @@ -13,6 +14,7 @@ use crate::revlog::node::Node; use crate::revlog::revlog::RevlogError; use crate::utils::hg_path::HgPath; +use crate::DirstateError; use rayon::prelude::*; /// List files under Mercurial control in the working directory @@ -20,25 +22,34 @@ pub struct Dirstate { /// The `dirstate` content. content: Vec, + dirstate_v2: bool, } impl Dirstate { pub fn new(repo: &Repo) -> Result { - let content = repo.hg_vfs().read("dirstate")?; - Ok(Self { content }) + Ok(Self { + content: repo.hg_vfs().read("dirstate")?, + dirstate_v2: repo.has_dirstate_v2(), + }) } - pub fn tracked_files(&self) -> Result, HgError> { + pub fn tracked_files(&self) -> Result, DirstateError> { let mut files = Vec::new(); - let _parents = parse_dirstate_entries( - &self.content, - |path, entry, _copy_source| { - if entry.state.is_tracked() { - files.push(path) - } - Ok(()) - }, - )?; + if !self.content.is_empty() { + if self.dirstate_v2 { + for_each_tracked_path(&self.content, |path| files.push(path))? + } else { + let _parents = parse_dirstate_entries( + &self.content, + |path, entry, _copy_source| { + if entry.state.is_tracked() { + files.push(path) + } + Ok(()) + }, + )?; + } + } files.par_sort_unstable(); Ok(files) } diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -218,12 +218,23 @@ } } + pub fn has_dirstate_v2(&self) -> bool { + self.requirements + .contains(requirements::DIRSTATE_V2_REQUIREMENT) + } + pub fn dirstate_parents( &self, ) -> Result { let dirstate = self.hg_vfs().mmap_open("dirstate")?; - let parents = - crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?; + if dirstate.is_empty() { + return Ok(crate::dirstate::DirstateParents::NULL); + } + let parents = if self.has_dirstate_v2() { + crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)? + } else { + crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? + }; Ok(parents.clone()) } } diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs --- a/rust/hg-core/src/requirements.rs +++ b/rust/hg-core/src/requirements.rs @@ -82,6 +82,7 @@ SPARSEREVLOG_REQUIREMENT, RELATIVE_SHARED_REQUIREMENT, REVLOG_COMPRESSION_ZSTD, + DIRSTATE_V2_REQUIREMENT, // As of this writing everything rhg does is read-only. // When it starts writing to the repository, it’ll need to either keep the // persistent nodemap up to date or remove this entry: @@ -90,6 +91,8 @@ // Copied from mercurial/requirements.py: +pub(crate) const DIRSTATE_V2_REQUIREMENT: &str = "exp-dirstate-v2"; + /// When narrowing is finalized and no longer subject to format changes, /// we should move this to just "narrow" or similar. #[allow(unused)] diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -9,6 +9,7 @@ use crate::ui::Ui; use clap::{Arg, SubCommand}; use hg; +use hg::dirstate_tree::dirstate_map::DirstateMap; use hg::errors::HgResultExt; use hg::errors::IoResultExt; use hg::matchers::AlwaysMatcher; @@ -16,7 +17,7 @@ use hg::repo::Repo; use hg::revlog::node::Node; use hg::utils::hg_path::{hg_path_to_os_string, HgPath}; -use hg::{DirstateMap, StatusError}; +use hg::StatusError; use hg::{HgPathCow, StatusOptions}; use log::{info, warn}; use std::convert::TryInto; @@ -164,14 +165,17 @@ }; let repo = invocation.repo?; - let mut dmap = DirstateMap::new(); let dirstate_data = repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?; let dirstate_data = match &dirstate_data { Some(mmap) => &**mmap, None => b"", }; - let parents = dmap.read(dirstate_data)?; + let (mut dmap, parents) = if repo.has_dirstate_v2() { + DirstateMap::new_v2(dirstate_data)? + } else { + DirstateMap::new_v1(dirstate_data)? + }; let options = StatusOptions { // TODO should be provided by the dirstate parsing and // hence be stored on dmap. Using a value that assumes we aren't @@ -187,8 +191,8 @@ collect_traversed_dirs: false, }; let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded - let (mut ds_status, pattern_warnings) = hg::status( - &dmap, + let (mut ds_status, pattern_warnings) = hg::dirstate_tree::status::status( + &mut dmap, &AlwaysMatcher, repo.working_directory_path().to_owned(), vec![ignore_file],