diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs --- a/rust/hg-core/src/dirstate/dirstate_map.rs +++ b/rust/hg-core/src/dirstate/dirstate_map.rs @@ -284,10 +284,10 @@ } #[timed] - pub fn read<'a>( + pub fn read( &mut self, - file_contents: &'a [u8], - ) -> Result, DirstateError> { + file_contents: &[u8], + ) -> Result, DirstateError> { if file_contents.is_empty() { return Ok(None); } @@ -303,7 +303,7 @@ .into_iter() .map(|(path, copy)| (path.to_owned(), copy.to_owned())), ); - Ok(Some(parents)) + Ok(Some(parents.clone())) } pub fn pack( diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -24,7 +24,10 @@ use crate::StatusError; use crate::StatusOptions; -pub struct DirstateMap { +pub struct DirstateMap<'on_disk> { + /// Contents of the `.hg/dirstate` file + on_disk: &'on_disk [u8], + pub(super) root: ChildNodes, /// Number of nodes anywhere in the tree that have `.entry.is_some()`. @@ -69,13 +72,58 @@ &'a mut Option, ); -impl DirstateMap { - pub fn new() -> Self { - Self { +impl<'on_disk> DirstateMap<'on_disk> { + pub fn new( + on_disk: &'on_disk [u8], + ) -> Result<(Self, Option), DirstateError> { + let mut map = Self { + on_disk, root: ChildNodes::default(), nodes_with_entry_count: 0, nodes_with_copy_source_count: 0, + }; + let parents = map.read()?; + Ok((map, parents)) + } + + /// Should only be called in `new` + #[timed] + fn read(&mut self) -> Result, DirstateError> { + if self.on_disk.is_empty() { + return Ok(None); } + + let parents = parse_dirstate_entries( + self.on_disk, + |path, entry, copy_source| { + let tracked = entry.state.is_tracked(); + let node = Self::get_or_insert_node_tracing_ancestors( + &mut self.root, + path, + |ancestor| { + if tracked { + ancestor.tracked_descendants_count += 1 + } + }, + ); + assert!( + node.entry.is_none(), + "duplicate dirstate entry in read" + ); + assert!( + node.copy_source.is_none(), + "duplicate dirstate entry in read" + ); + node.entry = Some(*entry); + node.copy_source = copy_source.map(HgPath::to_owned); + self.nodes_with_entry_count += 1; + if copy_source.is_some() { + self.nodes_with_copy_source_count += 1 + } + }, + )?; + + Ok(Some(parents.clone())) } fn get_node(&self, path: &HgPath) -> Option<&Node> { @@ -280,7 +328,7 @@ } } -impl super::dispatch::DirstateMapMethods for DirstateMap { +impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { fn clear(&mut self) { self.root.clear(); self.nodes_with_entry_count = 0; @@ -443,48 +491,6 @@ } } - #[timed] - fn read<'a>( - &mut self, - file_contents: &'a [u8], - ) -> Result, DirstateError> { - if file_contents.is_empty() { - return Ok(None); - } - - let parents = parse_dirstate_entries( - file_contents, - |path, entry, copy_source| { - let tracked = entry.state.is_tracked(); - let node = Self::get_or_insert_node_tracing_ancestors( - &mut self.root, - path, - |ancestor| { - if tracked { - ancestor.tracked_descendants_count += 1 - } - }, - ); - assert!( - node.entry.is_none(), - "duplicate dirstate entry in read" - ); - assert!( - node.copy_source.is_none(), - "duplicate dirstate entry in read" - ); - node.entry = Some(*entry); - node.copy_source = copy_source.map(HgPath::to_owned); - self.nodes_with_entry_count += 1; - if copy_source.is_some() { - self.nodes_with_copy_source_count += 1 - } - }, - )?; - - Ok(Some(parents)) - } - fn pack( &mut self, parents: DirstateParents, diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs --- a/rust/hg-core/src/dirstate_tree/dispatch.rs +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -73,11 +73,6 @@ directory: &HgPath, ) -> Result; - fn read<'a>( - &mut self, - file_contents: &'a [u8], - ) -> Result, DirstateError>; - fn pack( &mut self, parents: DirstateParents, @@ -216,13 +211,6 @@ self.has_dir(directory) } - fn read<'a>( - &mut self, - file_contents: &'a [u8], - ) -> Result, DirstateError> { - self.read(file_contents) - } - fn pack( &mut self, parents: DirstateParents, diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs --- a/rust/hg-cpython/src/dirstate.rs +++ b/rust/hg-cpython/src/dirstate.rs @@ -12,7 +12,9 @@ mod copymap; mod dirs_multiset; mod dirstate_map; +mod dispatch; mod non_normal_entries; +mod owning; mod status; use crate::{ dirstate::{ diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -22,6 +22,7 @@ dirstate::non_normal_entries::{ NonNormalEntries, NonNormalEntriesIterator, }, + dirstate::owning::OwningDirstateMap, dirstate::{dirs_multiset::Dirs, make_dirstate_tuple}, parsers::dirstate_parents_to_pytuple, }; @@ -58,12 +59,13 @@ let dirstate_error = |_: DirstateError| { PyErr::new::(py, "Dirstate error".to_string()) }; - let bytes = on_disk.data(py); let (inner, parents) = if use_dirstate_tree { - let mut map = hg::dirstate_tree::dirstate_map::DirstateMap::new(); - let parents = map.read(bytes).map_err(dirstate_error)?; + let (map, parents) = + OwningDirstateMap::new(py, on_disk) + .map_err(dirstate_error)?; (Box::new(map) as _, parents) } else { + let bytes = on_disk.data(py); let mut map = RustDirstateMap::default(); let parents = map.read(bytes).map_err(dirstate_error)?; (Box::new(map) as _, parents) diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/dirstate/dispatch.rs @@ -0,0 +1,175 @@ +use crate::dirstate::owning::OwningDirstateMap; +use hg::dirstate::parsers::Timestamp; +use hg::dirstate_tree::dispatch::DirstateMapMethods; +use hg::matchers::Matcher; +use hg::utils::hg_path::{HgPath, HgPathBuf}; +use hg::CopyMapIter; +use hg::DirstateEntry; +use hg::DirstateError; +use hg::DirstateMapError; +use hg::DirstateParents; +use hg::DirstateStatus; +use hg::EntryState; +use hg::PatternFileWarning; +use hg::StateMapIter; +use hg::StatusError; +use hg::StatusOptions; +use std::path::PathBuf; + +impl DirstateMapMethods for OwningDirstateMap { + fn clear(&mut self) { + self.get_mut().clear() + } + + fn add_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + entry: DirstateEntry, + ) -> Result<(), DirstateMapError> { + self.get_mut().add_file(filename, old_state, entry) + } + + fn remove_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + size: i32, + ) -> Result<(), DirstateMapError> { + self.get_mut().remove_file(filename, old_state, size) + } + + fn drop_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + ) -> Result { + self.get_mut().drop_file(filename, old_state) + } + + fn clear_ambiguous_times(&mut self, filenames: Vec, now: i32) { + self.get_mut().clear_ambiguous_times(filenames, now) + } + + fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool { + self.get_mut().non_normal_entries_contains(key) + } + + fn non_normal_entries_remove(&mut self, key: &HgPath) { + self.get_mut().non_normal_entries_remove(key) + } + + fn non_normal_or_other_parent_paths( + &mut self, + ) -> Box + '_> { + self.get_mut().non_normal_or_other_parent_paths() + } + + fn set_non_normal_other_parent_entries(&mut self, force: bool) { + self.get_mut().set_non_normal_other_parent_entries(force) + } + + fn iter_non_normal_paths( + &mut self, + ) -> Box + Send + '_> { + self.get_mut().iter_non_normal_paths() + } + + fn iter_non_normal_paths_panic( + &self, + ) -> Box + Send + '_> { + self.get().iter_non_normal_paths_panic() + } + + fn iter_other_parent_paths( + &mut self, + ) -> Box + Send + '_> { + self.get_mut().iter_other_parent_paths() + } + + fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.get_mut().has_tracked_dir(directory) + } + + fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.get_mut().has_dir(directory) + } + + fn pack( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result, DirstateError> { + self.get_mut().pack(parents, now) + } + + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + self.get_mut().set_all_dirs() + } + + fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + self.get_mut().set_dirs() + } + + fn status<'a>( + &'a mut self, + matcher: &'a (dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec, + options: StatusOptions, + ) -> Result<(DirstateStatus<'a>, Vec), StatusError> + { + self.get_mut() + .status(matcher, root_dir, ignore_files, options) + } + + fn copy_map_len(&self) -> usize { + self.get().copy_map_len() + } + + fn copy_map_iter(&self) -> CopyMapIter<'_> { + self.get().copy_map_iter() + } + + fn copy_map_contains_key(&self, key: &HgPath) -> bool { + self.get().copy_map_contains_key(key) + } + + fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf> { + self.get().copy_map_get(key) + } + + fn copy_map_remove(&mut self, key: &HgPath) -> Option { + self.get_mut().copy_map_remove(key) + } + + fn copy_map_insert( + &mut self, + key: HgPathBuf, + value: HgPathBuf, + ) -> Option { + self.get_mut().copy_map_insert(key, value) + } + + fn len(&self) -> usize { + self.get().len() + } + + fn contains_key(&self, key: &HgPath) -> bool { + self.get().contains_key(key) + } + + fn get(&self, key: &HgPath) -> Option<&DirstateEntry> { + self.get().get(key) + } + + fn iter(&self) -> StateMapIter<'_> { + self.get().iter() + } +} diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/dirstate/owning.rs @@ -0,0 +1,97 @@ +use cpython::PyBytes; +use cpython::Python; +use hg::dirstate_tree::dirstate_map::DirstateMap; +use hg::DirstateError; +use hg::DirstateParents; + +/// Keep a `DirstateMap<'on_disk>` next to the `on_disk` buffer that it +/// borrows. This is similar to the owning-ref crate. +/// +/// This is similar to [`OwningRef`] which is more limited because it +/// represents exactly one `&T` reference next to the value it borrows, as +/// opposed to a struct that may contain an arbitrary number of references in +/// arbitrarily-nested data structures. +/// +/// [`OwningRef`]: https://docs.rs/owning_ref/0.4.1/owning_ref/struct.OwningRef.html +pub(super) struct OwningDirstateMap { + /// Owned handle to a bytes buffer with a stable address. + /// + /// See . + on_disk: PyBytes, + + /// Pointer for `Box>`, typed-erased because the + /// language cannot represent a lifetime referencing a sibling field. + /// This is not quite a self-referencial struct (moving this struct is not + /// a problem as it doesn’t change the address of the bytes buffer owned + /// by `PyBytes`) but touches similar borrow-checker limitations. + ptr: *mut (), +} + +impl OwningDirstateMap { + pub fn new( + py: Python, + on_disk: PyBytes, + ) -> Result<(Self, Option), DirstateError> { + let bytes: &'_ [u8] = on_disk.data(py); + let (map, parents) = DirstateMap::new(bytes)?; + + // Like in `bytes` above, this `'_` lifetime parameter borrows from + // the bytes buffer owned by `on_disk`. + let ptr: *mut DirstateMap<'_> = Box::into_raw(Box::new(map)); + + // Erase the pointed type entirely in order to erase the lifetime. + let ptr: *mut () = ptr.cast(); + + Ok((Self { on_disk, ptr }, parents)) + } + + pub fn get_mut<'a>(&'a mut self) -> &'a mut DirstateMap<'a> { + // SAFETY: We cast the type-erased pointer back to the same type it had + // in `new`, except with a different lifetime parameter. This time we + // connect the lifetime to that of `self`. This cast is valid because + // `self` owns the same `PyBytes` whose buffer `DirstateMap` + // references. That buffer has a stable memory address because the byte + // string value of a `PyBytes` is immutable. + let ptr: *mut DirstateMap<'a> = self.ptr.cast(); + // SAFETY: we dereference that pointer, connecting the lifetime of the + // new `&mut` to that of `self`. This is valid because the + // raw pointer is to a boxed value, and `self` owns that box. + unsafe { &mut *ptr } + } + + pub fn get<'a>(&'a self) -> &'a DirstateMap<'a> { + // SAFETY: same reasoning as in `get_mut` above. + let ptr: *mut DirstateMap<'a> = self.ptr.cast(); + unsafe { &*ptr } + } +} + +impl Drop for OwningDirstateMap { + fn drop(&mut self) { + // Silence a "field is never read" warning, and demonstrate that this + // value is still alive. + let _ = &self.on_disk; + // SAFETY: this cast is the same as in `get_mut`, and is valid for the + // same reason. `self.on_disk` still exists at this point, drop glue + // will drop it implicitly after this `drop` method returns. + let ptr: *mut DirstateMap<'_> = self.ptr.cast(); + // SAFETY: `Box::from_raw` takes ownership of the box away from `self`. + // This is fine because drop glue does nothig for `*mut ()` and we’re + // in `drop`, so `get` and `get_mut` cannot be called again. + unsafe { drop(Box::from_raw(ptr)) } + } +} + +fn _static_assert_is_send() {} + +fn _static_assert_fields_are_send() { + _static_assert_is_send::(); + _static_assert_is_send::>>(); +} + +// SAFETY: we don’t get this impl implicitly because `*mut (): !Send` because +// thread-safety of raw pointers is unknown in the general case. However this +// particular raw pointer represents a `Box>` that we +// own. Since that `Box` and `PyBytes` are both `Send` as shown in above, it +// is sound to mark this struct as `Send` too. +unsafe impl Send for OwningDirstateMap {}