diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "adler" version = "0.2.3" @@ -390,6 +392,7 @@ "log", "memmap2", "micro-timer", + "openat", "pretty_assertions", "rand", "rand_distr", @@ -598,6 +601,15 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] +name = "openat" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95aa7c05907b3ebde2610d602f4ddd992145cc6a84493647c30396f30ba83abe" +dependencies = [ + "libc", +] + +[[package]] name = "output_vt100" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -34,6 +34,7 @@ memmap2 = {version = "0.4", features = ["stable_deref_trait"]} zstd = "0.5.3" format-bytes = "0.2.2" +openat = "0.1.21" # We don't use the `miniz-oxide` backend to not change rhg benchmarks and until # we have a clearer view of which backend is the fastest. diff --git a/rust/hg-core/src/dirstate/entry.rs b/rust/hg-core/src/dirstate/entry.rs --- a/rust/hg-core/src/dirstate/entry.rs +++ b/rust/hg-core/src/dirstate/entry.rs @@ -2,7 +2,6 @@ use crate::errors::HgError; use bitflags::bitflags; use std::convert::{TryFrom, TryInto}; -use std::fs; use std::io; use std::time::{SystemTime, UNIX_EPOCH}; @@ -76,18 +75,18 @@ } } - pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result { + pub fn for_mtime_of(metadata: &openat::Metadata) -> io::Result { #[cfg(unix)] { - use std::os::unix::fs::MetadataExt; - let seconds = metadata.mtime(); - // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range - let nanoseconds = metadata.mtime_nsec().try_into().unwrap(); + let stat = metadata.stat(); + let seconds = stat.st_mtime as libc::time_t; + let nanoseconds = stat.st_mtime_nsec as libc::c_long; + let nanoseconds : u32 = nanoseconds.try_into().unwrap(); Ok(Self::new_truncate(seconds, nanoseconds)) } #[cfg(not(unix))] { - metadata.modified().map(Self::from) + panic!("not supported") } } @@ -130,10 +129,11 @@ pub fn likely_equal_to_mtime_of( self, - metadata: &fs::Metadata, + metadata: &openat::Metadata, ) -> io::Result { Ok(self.likely_equal(Self::for_mtime_of(metadata)?)) } + } impl From for TruncatedTimestamp { @@ -578,12 +578,11 @@ #[cfg(unix)] pub fn mode_changed( &self, - filesystem_metadata: &std::fs::Metadata, + filesystem_metadata: &openat::Metadata, ) -> bool { - use std::os::unix::fs::MetadataExt; const EXEC_BIT_MASK: u32 = 0o100; let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK; - let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK; + let fs_exec_bit = filesystem_metadata.stat().st_mode & EXEC_BIT_MASK; dirstate_exec_bit != fs_exec_bit } diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs +++ b/rust/hg-core/src/dirstate_tree/status.rs @@ -8,13 +8,10 @@ use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::matchers::get_ignore_function; use crate::matchers::Matcher; -use crate::utils::files::get_bytes_from_os_string; -use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::BadMatch; use crate::DirstateStatus; use crate::EntryState; -use crate::HgPathBuf; use crate::PatternFileWarning; use crate::StatusError; use crate::StatusOptions; @@ -27,6 +24,11 @@ use std::path::PathBuf; use std::sync::Mutex; use std::time::SystemTime; +use openat::{Dir}; +use std::ffi::CStr; +use std::os::unix::ffi::OsStrExt; +use std::ffi::CString; +use std::convert::TryInto; /// Returns the status of the working directory compared to its parent /// changeset. @@ -80,6 +82,7 @@ // If the path we have for the repository root is a symlink, do follow it. // (As opposed to symlinks within the working directory which are not // followed, using `std::fs::symlink_metadata`.) + let root_dir = openat::Dir::open(&root_dir)?; common.traverse_fs_directory_and_dirstate( has_ignored_ancestor, dmap.root.as_ref(), @@ -141,14 +144,94 @@ filesystem_time_at_status_start: Option, } +enum MetadataOrType<'a> { + Metadata(&'a openat::Metadata), + FileType(openat::SimpleType) +} + +struct FilesystemEntry<'a> { + dir : &'a Dir, + name : &'a CStr, + metadata_or_type : MetadataOrType<'a> +} + +enum TORREF <'a, T> { + Owned(T), + Borrowed(&'a T) +} + +impl <'a, T> TORREF <'a, T> { + fn borrow(&self) -> &T { + match self { + TORREF::Owned(t) => &t, + TORREF::Borrowed(r) => r + } + } +} + +impl <'a> FilesystemEntry<'a> { + + fn of_read_dir(dir : &'a Dir, entry : &'a openat::Entry) -> Self { + FilesystemEntry { + dir, name : <& openat::Entry as openat::AsPath>::to_path(entry).unwrap(), metadata_or_type : MetadataOrType::FileType(entry.simple_type().unwrap()) // TODO: not unwrap + } + } + + fn of_metadata(dir : &'a Dir, name : &'a CStr, metadata : &'a openat::Metadata) -> Self { + FilesystemEntry { + dir, name, metadata_or_type : MetadataOrType::Metadata(metadata) + } + } + + fn file_type(&self) -> openat::SimpleType { + match self.metadata_or_type { + MetadataOrType::Metadata(metadata) => metadata.simple_type (), + MetadataOrType::FileType(file_type) => file_type + } + } + + // Returns None if the entry is not a directory + fn as_directory(&self) -> io::Result> { + match self.file_type() { + openat::SimpleType::Dir => { + let dir = self.dir.sub_dir(self.name)?; + let metadata = dir.self_metadata()?; + Ok(Some((dir, metadata))) + }, + _ => Ok(None) + } + } + + fn metadata(&self) -> io::Result> { + match self.metadata_or_type { + MetadataOrType::Metadata(m) => Ok(TORREF::Borrowed(m)), + MetadataOrType::FileType(_) => { + let metadata = self.dir.metadata(self.name)?; + Ok(TORREF::Owned(metadata)) } + } + } +} + + fn stat_mtime(metadata : &openat::Metadata) -> SystemTime { + let stat = metadata.stat(); + let nsec : i64 = stat.st_mtime_nsec as libc::c_long; + let nsec : u32 = nsec.try_into().unwrap(); + let sec : i64 = stat.st_mtime as libc::time_t; + let sec : u64 = sec.try_into().unwrap(); + let dur = std::time::Duration::new(sec, nsec); + SystemTime::UNIX_EPOCH.checked_add(dur).unwrap() + } + + + impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { fn read_dir( &self, hg_path: &HgPath, - fs_path: &Path, + fs_dir: &Dir, is_at_repo_root: bool, - ) -> Result, ()> { - DirEntry::read_dir(fs_path, is_at_repo_root) + ) -> Result, ()> { + read_dir(fs_dir, is_at_repo_root) .map_err(|error| self.io_error(error, hg_path)) } @@ -182,7 +265,7 @@ /// need to call `read_dir`. fn can_skip_fs_readdir( &self, - directory_metadata: Option<&std::fs::Metadata>, + directory_metadata: Option<&openat::Metadata>, cached_directory_mtime: Option, ) -> bool { if !self.options.list_unknown && !self.options.list_ignored { @@ -221,8 +304,8 @@ has_ignored_ancestor: bool, dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, directory_hg_path: &BorrowedPath<'tree, 'on_disk>, - directory_fs_path: &Path, - directory_metadata: Option<&std::fs::Metadata>, + directory_fd: &Dir, + directory_metadata: Option<&openat::Metadata>, cached_directory_mtime: Option, is_at_repo_root: bool, ) -> Result { @@ -231,13 +314,11 @@ dirstate_nodes .par_iter() .map(|dirstate_node| { - let fs_path = directory_fs_path.join(get_path_from_bytes( - dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), - )); - match std::fs::symlink_metadata(&fs_path) { + let base_name = dirstate_node.base_name(self.dmap.on_disk)?; + let base_name_cstring = CString::new(base_name.as_bytes()).expect("NULL BYTE"); + match directory_fd.metadata(base_name_cstring.as_ref()) { Ok(fs_metadata) => self.traverse_fs_and_dirstate( - &fs_path, - &fs_metadata, + &FilesystemEntry::of_metadata(directory_fd, base_name_cstring.as_ref(), &fs_metadata), dirstate_node, has_ignored_ancestor, ), @@ -261,7 +342,7 @@ let mut fs_entries = if let Ok(entries) = self.read_dir( directory_hg_path, - directory_fs_path, + directory_fd, is_at_repo_root, ) { entries @@ -277,7 +358,7 @@ let dirstate_nodes = dirstate_nodes.sorted(); // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: // https://github.com/rust-lang/rust/issues/34162 - fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); + fs_entries.sort_unstable_by(|e1, e2| e1.file_name().as_bytes().cmp(&e2.file_name().as_bytes())); // Propagate here any error that would happen inside the comparison // callback below @@ -293,7 +374,8 @@ dirstate_node .base_name(self.dmap.on_disk) .unwrap() - .cmp(&fs_entry.base_name) + .as_bytes() + .cmp(&fs_entry.file_name().as_bytes()) }, ) .par_bridge() @@ -303,8 +385,7 @@ match pair { Both(dirstate_node, fs_entry) => { self.traverse_fs_and_dirstate( - &fs_entry.full_path, - &fs_entry.metadata, + &FilesystemEntry::of_read_dir(directory_fd, &fs_entry), dirstate_node, has_ignored_ancestor, )?; @@ -318,6 +399,7 @@ has_dirstate_node_or_is_ignored = self.traverse_fs_only( has_ignored_ancestor, directory_hg_path, + directory_fd, fs_entry, ) } @@ -329,15 +411,18 @@ fn traverse_fs_and_dirstate( &self, - fs_path: &Path, - fs_metadata: &std::fs::Metadata, + fs_entry: &FilesystemEntry, dirstate_node: NodeRef<'tree, 'on_disk>, has_ignored_ancestor: bool, ) -> Result<(), DirstateV2ParseError> { self.check_for_outdated_directory_cache(&dirstate_node)?; let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?; - let file_type = fs_metadata.file_type(); - let file_or_symlink = file_type.is_file() || file_type.is_symlink(); + let file_type = fs_entry.file_type(); + let file_or_symlink = + match file_type { + openat::SimpleType::File | openat::SimpleType::Symlink => true, + openat::SimpleType::Dir | openat::SimpleType::Other => false, + }; if !file_or_symlink { // If we previously had a file here, it was removed (with // `hg rm` or similar) or deleted before it could be @@ -347,7 +432,8 @@ dirstate_node.state()?, ); } - if file_type.is_dir() { + match fs_entry.as_directory().expect("TODO: handle") { + Some ((dir, fs_metadata)) => { if self.options.collect_traversed_dirs { self.outcome .lock() @@ -362,17 +448,18 @@ is_ignored, dirstate_node.children(self.dmap.on_disk)?, hg_path, - fs_path, - Some(fs_metadata), + &dir, + Some(&fs_metadata), dirstate_node.cached_directory_mtime()?, is_at_repo_root, )?; self.maybe_save_directory_mtime( children_all_have_dirstate_node_or_are_ignored, - fs_metadata, + &fs_metadata, dirstate_node, )? - } else { + }, + None => { if file_or_symlink && self.matcher.matches(hg_path) { if let Some(state) = dirstate_node.state()? { match state { @@ -394,8 +481,13 @@ .unwrap() .modified .push(hg_path.detach_from_tree()), - EntryState::Normal => self - .handle_normal_file(&dirstate_node, fs_metadata)?, + EntryState::Normal => { + let fs_metadata = fs_entry.metadata().expect("TODO"); + self.handle_normal_file( + &dirstate_node, + fs_metadata.borrow(), + )? + } } } else { // `node.entry.is_none()` indicates a "directory" @@ -411,23 +503,25 @@ { self.traverse_dirstate_only(child_node)? } - } + }, + }; Ok(()) } + fn maybe_save_directory_mtime( &self, children_all_have_dirstate_node_or_are_ignored: bool, - directory_metadata: &std::fs::Metadata, + directory_metadata: &openat::Metadata, dirstate_node: NodeRef<'tree, 'on_disk>, ) -> Result<(), DirstateV2ParseError> { if children_all_have_dirstate_node_or_are_ignored { // All filesystem directory entries from `read_dir` have a // corresponding node in the dirstate, so we can reconstitute the // names of those entries without calling `read_dir` again. - if let (Some(status_start), Ok(directory_mtime)) = ( + if let (Some(status_start), directory_mtime) = ( &self.filesystem_time_at_status_start, - directory_metadata.modified(), + stat_mtime(directory_metadata), ) { // Although the Rust standard library’s `SystemTime` type // has nanosecond precision, the times reported for a @@ -495,7 +589,7 @@ fn handle_normal_file( &self, dirstate_node: &NodeRef<'tree, 'on_disk>, - fs_metadata: &std::fs::Metadata, + fs_metadata: &openat::Metadata, ) -> Result<(), DirstateV2ParseError> { // Keep the low 31 bits fn truncate_u64(value: u64) -> i32 { @@ -510,7 +604,7 @@ || self.options.check_exec && entry.mode_changed(fs_metadata); let size = entry.size(); let size_changed = size != truncate_u64(fs_metadata.len()); - if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() { + if size >= 0 && size_changed && fs_metadata.simple_type() == openat::SimpleType::Symlink { // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? self.outcome @@ -607,12 +701,15 @@ &self, has_ignored_ancestor: bool, directory_hg_path: &HgPath, - fs_entry: &DirEntry, + parent_dir : &Dir, + fs_entry: &openat::Entry, ) -> bool { - let hg_path = directory_hg_path.join(&fs_entry.base_name); - let file_type = fs_entry.metadata.file_type(); - let file_or_symlink = file_type.is_file() || file_type.is_symlink(); - if file_type.is_dir() { + let hg_path = directory_hg_path.join(HgPath::new(&fs_entry.file_name ().as_bytes())); + let file_type = fs_entry.simple_type().unwrap(); // TODO: fix unwrap + let file_or_symlink = match file_type { + openat::SimpleType::File | openat::SimpleType::Symlink => true, + _ => false }; + if openat::SimpleType::Dir == file_type { let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); let traverse_children = if is_ignored { @@ -625,15 +722,17 @@ }; if traverse_children { let is_at_repo_root = false; + let dir = parent_dir.sub_dir(fs_entry.file_name()).expect("TODO"); if let Ok(children_fs_entries) = self.read_dir( &hg_path, - &fs_entry.full_path, + &dir, is_at_repo_root, ) { children_fs_entries.par_iter().for_each(|child_fs_entry| { self.traverse_fs_only( is_ignored, &hg_path, + &dir, child_fs_entry, ); }) @@ -694,46 +793,34 @@ } } -struct DirEntry { - base_name: HgPathBuf, - full_path: PathBuf, - metadata: std::fs::Metadata, -} - -impl DirEntry { - /// Returns **unsorted** entries in the given directory, with name and - /// metadata. - /// - /// If a `.hg` sub-directory is encountered: - /// - /// * At the repository root, ignore that sub-directory - /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty - /// list instead. - fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result> { - let mut results = Vec::new(); - for entry in path.read_dir()? { - let entry = entry?; - let metadata = entry.metadata()?; - let name = get_bytes_from_os_string(entry.file_name()); - // FIXME don't do this when cached - if name == b".hg" { - if is_at_repo_root { - // Skip the repo’s own .hg (might be a symlink) - continue; - } else if metadata.is_dir() { - // A .hg sub-directory at another location means a subrepo, - // skip it entirely. - return Ok(Vec::new()); - } +/// Returns **unsorted** entries in the given directory, with name and +/// metadata. +/// +/// If a `.hg` sub-directory is encountered: +/// +/// * At the repository root, ignore that sub-directory +/// * Elsewhere, we’re listing the content of a sub-repo. Return an empty +/// list instead. +fn read_dir(dir: &openat::Dir, is_at_repo_root: bool) -> io::Result> { + let mut results = Vec::new(); + for entry in dir.list_dir(".")? { + let entry = entry?; + let name = entry.file_name(); + // FIXME don't do this when cached + if name.as_bytes() == b".hg" { + if is_at_repo_root { + // Skip the repo’s own .hg (might be a symlink) + continue; + // TODO: handle simple_type returning None + } else if Some(openat::SimpleType::Dir) == entry.simple_type() { + // A .hg sub-directory at another location means a subrepo, + // skip it entirely. + return Ok(Vec::new()); } - results.push(DirEntry { - base_name: name.into(), - full_path: entry.path(), - metadata, - }) } - Ok(results) + results.push(entry) } + Ok(results) } /// Return the `mtime` of a temporary file newly-created in the `.hg` directory