diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -45,8 +45,160 @@ /// names. However `HashMap` would waste time always re-hashing the same /// string prefix. pub(super) type NodeKey<'on_disk> = WithBasename>; -pub(super) type ChildNodes<'on_disk> = - FastHashMap, Node<'on_disk>>; + +pub(super) enum ChildNodes<'on_disk> { + InMemory(FastHashMap, Node<'on_disk>>), +} + +pub(super) enum ChildNodesRef<'tree, 'on_disk> { + InMemory(&'tree FastHashMap, Node<'on_disk>>), +} + +pub(super) enum NodeRef<'tree, 'on_disk> { + InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>), +} + +impl Default for ChildNodes<'_> { + fn default() -> Self { + ChildNodes::InMemory(Default::default()) + } +} + +impl<'on_disk> ChildNodes<'on_disk> { + pub(super) fn as_ref<'tree>( + &'tree self, + ) -> ChildNodesRef<'tree, 'on_disk> { + match self { + ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes), + } + } + + pub(super) fn is_empty(&self) -> bool { + match self { + ChildNodes::InMemory(nodes) => nodes.is_empty(), + } + } + + pub(super) fn make_mut( + &mut self, + ) -> &mut FastHashMap, Node<'on_disk>> { + match self { + ChildNodes::InMemory(nodes) => nodes, + } + } +} + +impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> { + pub(super) fn get( + &self, + base_name: &HgPath, + ) -> Option> { + match self { + ChildNodesRef::InMemory(nodes) => nodes + .get_key_value(base_name) + .map(|(k, v)| NodeRef::InMemory(k, v)), + } + } + + /// Iterate in undefined order + pub(super) fn iter( + &self, + ) -> impl Iterator> { + match self { + ChildNodesRef::InMemory(nodes) => { + nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)) + } + } + } + + /// Iterate in parallel in undefined order + pub(super) fn par_iter( + &self, + ) -> impl rayon::iter::ParallelIterator> + { + use rayon::prelude::*; + match self { + ChildNodesRef::InMemory(nodes) => { + nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)) + } + } + } + + pub(super) fn sorted(&self) -> Vec> { + match self { + ChildNodesRef::InMemory(nodes) => { + let mut vec: Vec<_> = nodes + .iter() + .map(|(k, v)| NodeRef::InMemory(k, v)) + .collect(); + // `sort_unstable_by_key` doesn’t allow keys borrowing from the + // value: https://github.com/rust-lang/rust/issues/34162 + vec.sort_unstable_by(|a, b| a.base_name().cmp(b.base_name())); + vec + } + } + } +} + +impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> { + pub(super) fn full_path(&self) -> &'tree HgPath { + match self { + NodeRef::InMemory(path, _node) => path.full_path(), + } + } + + /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree + pub(super) fn full_path_cow(&self) -> Cow<'on_disk, HgPath> { + match self { + NodeRef::InMemory(path, _node) => path.full_path().clone(), + } + } + + pub(super) fn base_name(&self) -> &'tree HgPath { + match self { + NodeRef::InMemory(path, _node) => path.base_name(), + } + } + + pub(super) fn children(&self) -> ChildNodesRef<'tree, 'on_disk> { + match self { + NodeRef::InMemory(_path, node) => node.children.as_ref(), + } + } + + pub(super) fn copy_source(&self) -> Option<&'tree HgPath> { + match self { + NodeRef::InMemory(_path, node) => { + node.copy_source.as_ref().map(|s| &**s) + } + } + } + + pub(super) fn has_entry(&self) -> bool { + match self { + NodeRef::InMemory(_path, node) => node.entry.is_some(), + } + } + + pub(super) fn entry(&self) -> Option { + match self { + NodeRef::InMemory(_path, node) => node.entry, + } + } + pub(super) fn state(&self) -> Option { + match self { + NodeRef::InMemory(_path, node) => { + node.entry.as_ref().map(|entry| entry.state) + } + } + } + + pub(super) fn tracked_descendants_count(&self) -> u32 { + match self { + NodeRef::InMemory(_path, node) => node.tracked_descendants_count, + } + } +} /// Represents a file or a directory #[derive(Default)] @@ -62,22 +214,6 @@ pub(super) tracked_descendants_count: u32, } -impl<'on_disk> Node<'on_disk> { - pub(super) fn state(&self) -> Option { - self.entry.as_ref().map(|entry| entry.state) - } - - pub(super) fn sorted<'tree>( - nodes: &'tree ChildNodes<'on_disk>, - ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree Self)> { - let mut vec: Vec<_> = nodes.iter().collect(); - // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: - // https://github.com/rust-lang/rust/issues/34162 - vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2)); - vec - } -} - impl<'on_disk> DirstateMap<'on_disk> { pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { Self { @@ -139,8 +275,11 @@ Ok((map, parents)) } - fn get_node(&self, path: &HgPath) -> Option<&Node> { - let mut children = &self.root; + fn get_node<'tree>( + &'tree self, + path: &HgPath, + ) -> Option> { + let mut children = self.root.as_ref(); let mut components = path.components(); let mut component = components.next().expect("expected at least one components"); @@ -148,7 +287,7 @@ let child = children.get(component)?; if let Some(next_component) = components.next() { component = next_component; - children = &child.children; + children = child.children(); } else { return Some(child); } @@ -168,7 +307,7 @@ let mut component = components.next().expect("expected at least one components"); loop { - let child = children.get_mut(component)?; + let child = children.make_mut().get_mut(component)?; if let Some(next_component) = components.next() { component = next_component; children = &mut child.children; @@ -196,8 +335,10 @@ // TODO: can we avoid allocating an owned key in cases where the // map already contains that key, without introducing double // lookup? - let child_node = - child_nodes.entry(to_cow(ancestor_path)).or_default(); + let child_node = child_nodes + .make_mut() + .entry(to_cow(ancestor_path)) + .or_default(); if let Some(next) = inclusive_ancestor_paths.next() { each_ancestor(child_node); ancestor_path = next; @@ -242,9 +383,9 @@ node.entry = Some(new_entry) } - fn iter_nodes<'a>( - &'a self, - ) -> impl Iterator, &'a Node)> + 'a { + fn iter_nodes<'tree>( + &'tree self, + ) -> impl Iterator> + 'tree { // Depth first tree traversal. // // If we could afford internal iteration and recursion, @@ -265,22 +406,21 @@ // However we want an external iterator and therefore can’t use the // call stack. Use an explicit stack instead: let mut stack = Vec::new(); - let mut iter = self.root.iter(); + let mut iter = self.root.as_ref().iter(); std::iter::from_fn(move || { - while let Some((key, child_node)) = iter.next() { + while let Some(child_node) = iter.next() { // Pseudo-recursion - let new_iter = child_node.children.iter(); + let new_iter = child_node.children().iter(); let old_iter = std::mem::replace(&mut iter, new_iter); - let key = key.full_path(); - stack.push((key, child_node, old_iter)); + stack.push((child_node, old_iter)); } // Found the end of a `children.iter()` iterator. - if let Some((key, child_node, next_iter)) = stack.pop() { + if let Some((child_node, next_iter)) = stack.pop() { // "Return" from pseudo-recursion by restoring state from the // explicit stack iter = next_iter; - Some((key, child_node)) + Some(child_node) } else { // Reached the bottom of the stack, we’re done None @@ -303,7 +443,7 @@ impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { fn clear(&mut self) { - self.root.clear(); + self.root = Default::default(); self.nodes_with_entry_count = 0; self.nodes_with_copy_source_count = 0; } @@ -347,7 +487,7 @@ fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option { let (first_path_component, rest_of_path) = path.split_first_component(); - let node = nodes.get_mut(first_path_component)?; + let node = nodes.make_mut().get_mut(first_path_component)?; let dropped; if let Some(rest) = rest_of_path { dropped = recur(&mut node.children, rest)?; @@ -370,7 +510,7 @@ && node.copy_source.is_none() && node.children.is_empty() { - nodes.remove(first_path_component); + nodes.make_mut().remove(first_path_component); } Some(dropped) } @@ -401,8 +541,8 @@ fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool { self.get_node(key) - .and_then(|node| node.entry.as_ref()) - .map_or(false, DirstateEntry::is_non_normal) + .and_then(|node| node.entry()) + .map_or(false, |entry| entry.is_non_normal()) } fn non_normal_entries_remove(&mut self, _key: &HgPath) { @@ -413,13 +553,12 @@ fn non_normal_or_other_parent_paths( &mut self, ) -> Box + '_> { - Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.entry - .as_ref() + Box::new(self.iter_nodes().filter_map(|node| { + node.entry() .filter(|entry| { entry.is_non_normal() || entry.is_from_other_parent() }) - .map(|_| &**path) + .map(|_| node.full_path()) })) } @@ -437,22 +576,20 @@ fn iter_non_normal_paths_panic( &self, ) -> Box + Send + '_> { - Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.entry - .as_ref() + Box::new(self.iter_nodes().filter_map(|node| { + node.entry() .filter(|entry| entry.is_non_normal()) - .map(|_| &**path) + .map(|_| node.full_path()) })) } fn iter_other_parent_paths( &mut self, ) -> Box + Send + '_> { - Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.entry - .as_ref() + Box::new(self.iter_nodes().filter_map(|node| { + node.entry() .filter(|entry| entry.is_from_other_parent()) - .map(|_| &**path) + .map(|_| node.full_path()) })) } @@ -463,7 +600,7 @@ if let Some(node) = self.get_node(directory) { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. - Ok(node.entry.is_none() && node.tracked_descendants_count > 0) + Ok(!node.has_entry() && node.tracked_descendants_count() > 0) } else { Ok(false) } @@ -476,7 +613,7 @@ if let Some(node) = self.get_node(directory) { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. - Ok(node.entry.is_none()) + Ok(!node.has_entry()) } else { Ok(false) } @@ -493,14 +630,12 @@ // Optizimation (to be measured?): pre-compute size to avoid `Vec` // reallocations let mut size = parents.as_bytes().len(); - for (path, node) in self.iter_nodes() { - if let Some(entry) = &node.entry { - size += packed_entry_size( - path, - node.copy_source.as_ref().map(|p| &**p), - ); + for node in self.iter_nodes() { + if let Some(entry) = node.entry() { + size += + packed_entry_size(node.full_path(), node.copy_source()); if entry.mtime_is_ambiguous(now) { - ambiguous_mtimes.push(path.clone()) + ambiguous_mtimes.push(node.full_path_cow()) } } } @@ -509,12 +644,12 @@ let mut packed = Vec::with_capacity(size); packed.extend(parents.as_bytes()); - for (path, node) in self.iter_nodes() { - if let Some(entry) = &node.entry { + for node in self.iter_nodes() { + if let Some(entry) = node.entry() { pack_entry( - path, - entry, - node.copy_source.as_ref().map(|p| &**p), + node.full_path(), + &entry, + node.copy_source(), &mut packed, ); } @@ -531,10 +666,10 @@ // TODO: how do we want to handle this in 2038? let now: i32 = now.0.try_into().expect("time overflow"); let mut paths = Vec::new(); - for (path, node) in self.iter_nodes() { - if let Some(entry) = &node.entry { + for node in self.iter_nodes() { + if let Some(entry) = node.entry() { if entry.mtime_is_ambiguous(now) { - paths.push(path.clone()) + paths.push(node.full_path_cow()) } } } @@ -573,23 +708,22 @@ } fn copy_map_iter(&self) -> CopyMapIter<'_> { - Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.copy_source - .as_ref() - .map(|copy_source| (&**path, &**copy_source)) + Box::new(self.iter_nodes().filter_map(|node| { + node.copy_source() + .map(|copy_source| (node.full_path(), copy_source)) })) } fn copy_map_contains_key(&self, key: &HgPath) -> bool { if let Some(node) = self.get_node(key) { - node.copy_source.is_some() + node.copy_source().is_some() } else { false } } fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> { - self.get_node(key)?.copy_source.as_ref().map(|p| &**p) + self.get_node(key)?.copy_source() } fn copy_map_remove(&mut self, key: &HgPath) -> Option { @@ -628,12 +762,12 @@ } fn get(&self, key: &HgPath) -> Option { - self.get_node(key)?.entry + self.get_node(key)?.entry() } fn iter(&self) -> StateMapIter<'_> { - Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.entry.map(|entry| (&**path, entry)) + Box::new(self.iter_nodes().filter_map(|node| { + node.entry().map(|entry| (node.full_path(), entry)) })) } } diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -9,7 +9,7 @@ //! Nodes in turn contain slices to variable-size paths, and to their own child //! nodes (if any) for nested files and directories. -use crate::dirstate_tree::dirstate_map::{self, DirstateMap}; +use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; use crate::dirstate_tree::path_with_basename::WithBasename; use crate::errors::HgError; use crate::utils::hg_path::HgPath; @@ -200,7 +200,8 @@ .map(|node| { Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?)) }) - .collect() + .collect::>() + .map(dirstate_map::ChildNodes::InMemory) } fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result, HgError> { @@ -242,7 +243,7 @@ // actual offset for the root nodes. out.resize(header_len, 0_u8); - let root = write_nodes(&mut dirstate_map.root, &mut out)?; + let root = write_nodes(dirstate_map.root.as_ref(), &mut out)?; let header = Header { marker: *V2_FORMAT_MARKER, @@ -258,49 +259,53 @@ } fn write_nodes( - nodes: &dirstate_map::ChildNodes, + nodes: dirstate_map::ChildNodesRef, out: &mut Vec, ) -> Result { // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration // order. Sort to enable binary search in the written file. - let nodes = dirstate_map::Node::sorted(nodes); + let nodes = nodes.sorted(); // First accumulate serialized nodes in a `Vec` let mut on_disk_nodes = Vec::with_capacity(nodes.len()); - for (full_path, node) in nodes { - on_disk_nodes.push(Node { - children: write_nodes(&node.children, out)?, - tracked_descendants_count: node.tracked_descendants_count.into(), - full_path: write_slice::( - full_path.full_path().as_bytes(), - out, - ), - base_name_start: u32::try_from(full_path.base_name_start()) - // Could only panic for paths over 4 GiB - .expect("dirstate-v2 offset overflow") - .into(), - copy_source: if let Some(source) = &node.copy_source { - write_slice::(source.as_bytes(), out) - } else { - Slice { - start: 0.into(), - len: 0.into(), - } - }, - entry: if let Some(entry) = &node.entry { - OptEntry { - state: entry.state.into(), - mode: entry.mode.into(), - mtime: entry.mtime.into(), - size: entry.size.into(), - } - } else { - OptEntry { - state: b'\0', - mode: 0.into(), - mtime: 0.into(), - size: 0.into(), - } + for node in nodes { + let children = write_nodes(node.children(), out)?; + let full_path = write_slice::(node.full_path().as_bytes(), out); + let copy_source = if let Some(source) = node.copy_source() { + write_slice::(source.as_bytes(), out) + } else { + Slice { + start: 0.into(), + len: 0.into(), + } + }; + on_disk_nodes.push(match node { + NodeRef::InMemory(path, node) => Node { + children, + copy_source, + full_path, + base_name_start: u32::try_from(path.base_name_start()) + // Could only panic for paths over 4 GiB + .expect("dirstate-v2 offset overflow") + .into(), + tracked_descendants_count: node + .tracked_descendants_count + .into(), + entry: if let Some(entry) = &node.entry { + OptEntry { + state: entry.state.into(), + mode: entry.mode.into(), + mtime: entry.mtime.into(), + size: entry.size.into(), + } + } else { + OptEntry { + state: b'\0', + mode: 0.into(), + mtime: 0.into(), + size: 0.into(), + } + }, }, }) } diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs +++ b/rust/hg-core/src/dirstate_tree/status.rs @@ -1,7 +1,7 @@ use crate::dirstate::status::IgnoreFnType; -use crate::dirstate_tree::dirstate_map::ChildNodes; +use crate::dirstate_tree::dirstate_map::ChildNodesRef; use crate::dirstate_tree::dirstate_map::DirstateMap; -use crate::dirstate_tree::dirstate_map::Node; +use crate::dirstate_tree::dirstate_map::NodeRef; use crate::matchers::get_ignore_function; use crate::matchers::Matcher; use crate::utils::files::get_bytes_from_os_string; @@ -56,7 +56,7 @@ let has_ignored_ancestor = false; common.traverse_fs_directory_and_dirstate( has_ignored_ancestor, - &dmap.root, + dmap.root.as_ref(), hg_path, &root_dir, is_at_repo_root, @@ -93,7 +93,7 @@ fn traverse_fs_directory_and_dirstate( &self, has_ignored_ancestor: bool, - dirstate_nodes: &'tree ChildNodes, + dirstate_nodes: ChildNodesRef<'tree, '_>, directory_hg_path: &'tree HgPath, directory_fs_path: &Path, is_at_repo_root: bool, @@ -110,7 +110,7 @@ // `merge_join_by` requires both its input iterators to be sorted: - let dirstate_nodes = Node::sorted(dirstate_nodes); + let dirstate_nodes = dirstate_nodes.sorted(); // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: // https://github.com/rust-lang/rust/issues/34162 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); @@ -118,26 +118,24 @@ itertools::merge_join_by( dirstate_nodes, &fs_entries, - |(full_path, _node), fs_entry| { - full_path.base_name().cmp(&fs_entry.base_name) + |dirstate_node, fs_entry| { + dirstate_node.base_name().cmp(&fs_entry.base_name) }, ) .par_bridge() .for_each(|pair| { use itertools::EitherOrBoth::*; match pair { - Both((hg_path, dirstate_node), fs_entry) => { + Both(dirstate_node, fs_entry) => { self.traverse_fs_and_dirstate( fs_entry, - hg_path.full_path(), dirstate_node, has_ignored_ancestor, ); } - Left((hg_path, dirstate_node)) => self.traverse_dirstate_only( - hg_path.full_path(), - dirstate_node, - ), + Left(dirstate_node) => { + self.traverse_dirstate_only(dirstate_node) + } Right(fs_entry) => self.traverse_fs_only( has_ignored_ancestor, directory_hg_path, @@ -150,10 +148,10 @@ fn traverse_fs_and_dirstate( &self, fs_entry: &DirEntry, - hg_path: &'tree HgPath, - dirstate_node: &'tree Node, + dirstate_node: NodeRef<'tree, '_>, has_ignored_ancestor: bool, ) { + let hg_path = dirstate_node.full_path(); let file_type = fs_entry.metadata.file_type(); let file_or_symlink = file_type.is_file() || file_type.is_symlink(); if !file_or_symlink { @@ -161,7 +159,7 @@ // `hg rm` or similar) or deleted before it could be // replaced by a directory or something else. self.mark_removed_or_deleted_if_file( - hg_path, + dirstate_node.full_path(), dirstate_node.state(), ); } @@ -173,7 +171,7 @@ let is_at_repo_root = false; self.traverse_fs_directory_and_dirstate( is_ignored, - &dirstate_node.children, + dirstate_node.children(), hg_path, &fs_entry.full_path, is_at_repo_root, @@ -181,8 +179,8 @@ } else { if file_or_symlink && self.matcher.matches(hg_path) { let full_path = Cow::from(hg_path); - if let Some(entry) = &dirstate_node.entry { - match entry.state { + if let Some(state) = dirstate_node.state() { + match state { EntryState::Added => { self.outcome.lock().unwrap().added.push(full_path) } @@ -199,12 +197,7 @@ .modified .push(full_path), EntryState::Normal => { - self.handle_normal_file( - full_path, - dirstate_node, - entry, - fs_entry, - ); + self.handle_normal_file(&dirstate_node, fs_entry); } // This variant is not used in DirstateMap // nodes @@ -220,11 +213,8 @@ } } - for (child_hg_path, child_node) in &dirstate_node.children { - self.traverse_dirstate_only( - child_hg_path.full_path(), - child_node, - ) + for child_node in dirstate_node.children().iter() { + self.traverse_dirstate_only(child_node) } } } @@ -233,9 +223,7 @@ /// filesystem fn handle_normal_file( &self, - full_path: Cow<'tree, HgPath>, - dirstate_node: &Node, - entry: &crate::DirstateEntry, + dirstate_node: &NodeRef<'tree, '_>, fs_entry: &DirEntry, ) { // Keep the low 31 bits @@ -246,6 +234,10 @@ (value & 0x7FFF_FFFF) as i32 } + let entry = dirstate_node + .entry() + .expect("handle_normal_file called with entry-less node"); + let full_path = Cow::from(dirstate_node.full_path()); let mode_changed = || { self.options.check_exec && entry.mode_changed(&fs_entry.metadata) }; @@ -257,7 +249,7 @@ // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? self.outcome.lock().unwrap().unsure.push(full_path) - } else if dirstate_node.copy_source.is_some() + } else if dirstate_node.copy_source().is_some() || entry.is_from_other_parent() || (entry.size >= 0 && (size_changed || mode_changed())) { @@ -275,20 +267,15 @@ } /// A node in the dirstate tree has no corresponding filesystem entry - fn traverse_dirstate_only( - &self, - hg_path: &'tree HgPath, - dirstate_node: &'tree Node, - ) { - self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); - dirstate_node.children.par_iter().for_each( - |(child_hg_path, child_node)| { - self.traverse_dirstate_only( - child_hg_path.full_path(), - child_node, - ) - }, - ) + fn traverse_dirstate_only(&self, dirstate_node: NodeRef<'tree, '_>) { + self.mark_removed_or_deleted_if_file( + dirstate_node.full_path(), + dirstate_node.state(), + ); + dirstate_node + .children() + .par_iter() + .for_each(|child_node| self.traverse_dirstate_only(child_node)) } /// A node in the dirstate tree has no corresponding *file* on the