diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs --- a/rust/hg-core/src/dirstate/dirs_multiset.rs +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs @@ -8,12 +8,15 @@ //! A multiset of directory names. //! //! Used to counts the references to directories in a manifest or dirstate. -use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::{ - dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError, - FastHashMap, + dirstate::EntryState, + utils::{ + files, + hg_path::{HgPath, HgPathBuf}, + }, + DirstateEntry, DirstateMapError, FastHashMap, }; -use std::collections::hash_map::{self, Entry}; +use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet}; // could be encapsulated if we care API stability more seriously pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>; @@ -129,6 +132,68 @@ } } +/// This is basically a reimplementation of `DirsMultiset` that stores the +/// children instead of just a count of them, plus a small optional +/// optimization to avoid some directories we don't need. +#[derive(PartialEq, Debug)] +pub struct DirsChildrenMultiset<'a> { + inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>, + only_include: Option>, +} + +impl<'a> DirsChildrenMultiset<'a> { + pub fn new( + paths: impl Iterator, + only_include: Option<&'a HashSet + 'a>>, + ) -> Self { + let mut new = Self { + inner: HashMap::default(), + only_include: only_include + .map(|s| s.iter().map(|p| p.as_ref()).collect()), + }; + + for path in paths { + new.add_path(path) + } + + new + } + fn add_path(&mut self, path: &'a (impl AsRef + 'a)) { + if path.as_ref().is_empty() { + return; + } + for (directory, basename) in files::find_dirs_with_base(path.as_ref()) + { + if !self.is_dir_included(directory) { + continue; + } + self.inner + .entry(directory) + .and_modify(|e| { + e.insert(basename); + }) + .or_insert_with(|| { + let mut set = HashSet::new(); + set.insert(basename); + set + }); + } + } + fn is_dir_included(&self, dir: impl AsRef) -> bool { + match &self.only_include { + None => false, + Some(i) => i.contains(dir.as_ref()), + } + } + + pub fn get( + &self, + path: impl AsRef, + ) -> Option<&HashSet<&'a HgPath>> { + self.inner.get(path.as_ref()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs --- a/rust/hg-core/src/utils/files.rs +++ b/rust/hg-core/src/utils/files.rs @@ -10,11 +10,11 @@ //! Functions for fiddling with files. use crate::utils::hg_path::{HgPath, HgPathBuf}; -use std::iter::FusedIterator; use crate::utils::replace_slice; use lazy_static::lazy_static; use std::fs::Metadata; +use std::iter::FusedIterator; use std::path::Path; pub fn get_path_from_bytes(bytes: &[u8]) -> &Path { @@ -64,6 +64,28 @@ impl<'a> FusedIterator for Ancestors<'a> {} +/// An iterator over repository path yielding itself and its ancestors. +#[derive(Copy, Clone, Debug)] +pub(crate) struct AncestorsWithBase<'a> { + next: Option<(&'a HgPath, &'a HgPath)>, +} + +impl<'a> Iterator for AncestorsWithBase<'a> { + type Item = (&'a HgPath, &'a HgPath); + + fn next(&mut self) -> Option { + let next = self.next; + self.next = match self.next { + Some((s, _)) if s.is_empty() => None, + Some((s, _)) => Some(s.split_filename()), + None => None, + }; + next + } +} + +impl<'a> FusedIterator for AncestorsWithBase<'a> {} + /// Returns an iterator yielding ancestor directories of the given repository /// path. /// @@ -79,6 +101,25 @@ dirs } +/// Returns an iterator yielding ancestor directories of the given repository +/// path. +/// +/// The path is separated by '/', and must not start with '/'. +/// +/// The path itself isn't included unless it is b"" (meaning the root +/// directory.) +pub(crate) fn find_dirs_with_base<'a>( + path: &'a HgPath, +) -> AncestorsWithBase<'a> { + let mut dirs = AncestorsWithBase { + next: Some((path, HgPath::new(b""))), + }; + if !path.is_empty() { + dirs.next(); // skip itself + } + dirs +} + /// TODO more than ASCII? pub fn normalize_case(path: &HgPath) -> HgPathBuf { #[cfg(windows)] // NTFS compares via upper() @@ -170,4 +211,28 @@ assert_eq!(dirs.next(), None); assert_eq!(dirs.next(), None); } + + #[test] + fn test_find_dirs_with_base_some() { + let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz")); + assert_eq!( + dirs.next(), + Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz"))) + ); + assert_eq!( + dirs.next(), + Some((HgPath::new(b"foo"), HgPath::new(b"bar"))) + ); + assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo")))); + assert_eq!(dirs.next(), None); + assert_eq!(dirs.next(), None); + } + + #[test] + fn test_find_dirs_with_base_empty() { + let mut dirs = super::find_dirs_with_base(HgPath::new(b"")); + assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"")))); + assert_eq!(dirs.next(), None); + assert_eq!(dirs.next(), None); + } } diff --git a/rust/hg-core/src/utils/hg_path.rs b/rust/hg-core/src/utils/hg_path.rs --- a/rust/hg-core/src/utils/hg_path.rs +++ b/rust/hg-core/src/utils/hg_path.rs @@ -183,6 +183,29 @@ &self.inner[..] }) } + /// Returns a tuple of slices `(base, filename)` resulting from the split + /// at the rightmost `/`, if any. + /// + /// # Examples: + /// + /// ``` + /// use hg::utils::hg_path::HgPath; + /// + /// let path = HgPath::new(b"cool/hg/path").split_filename(); + /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path"))); + /// + /// let path = HgPath::new(b"pathwithoutsep").split_filename(); + /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep"))); + /// ``` + pub fn split_filename(&self) -> (&Self, &Self) { + match &self.inner.iter().rposition(|c| *c == b'/') { + None => (HgPath::new(""), &self), + Some(size) => ( + HgPath::new(&self.inner[..*size]), + HgPath::new(&self.inner[*size + 1..]), + ), + } + } pub fn join>(&self, other: &T) -> HgPathBuf { let mut inner = self.inner.to_owned(); if inner.len() != 0 && inner.last() != Some(&b'/') {