diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs +++ b/rust/hg-core/src/dirstate/status.rs @@ -11,22 +11,31 @@ use crate::{ dirstate::SIZE_FROM_OTHER_PARENT, - matchers::{Matcher, VisitChildrenSet}, + filepatterns::PatternFileWarning, + matchers::{get_ignore_function, Matcher, VisitChildrenSet}, utils::{ - files::HgMetadata, + files::{find_dirs, HgMetadata}, hg_path::{ hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf, + HgPathError, }, + path_auditor::PathAuditor, }, CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap, + PatternError, }; +use lazy_static::lazy_static; use rayon::prelude::*; -use std::borrow::Cow; -use std::collections::{HashSet, VecDeque}; -use std::fs::{read_dir, DirEntry}; -use std::io::ErrorKind; -use std::ops::Deref; -use std::path::Path; +use std::collections::VecDeque; +use std::{ + borrow::Cow, + collections::HashSet, + fs::{read_dir, DirEntry}, + io::ErrorKind, + ops::Deref, + path::Path, + sync::mpsc, +}; /// Wrong type of file from a `BadMatch` /// Note: a lot of those don't exist on all platforms. @@ -50,6 +59,7 @@ /// Marker enum used to dispatch new status entries into the right collections. /// Is similar to `crate::EntryState`, but represents the transient state of /// entries during the lifetime of a command. +#[derive(Debug)] enum Dispatch { Unsure, Modified, @@ -150,7 +160,7 @@ } else if options.list_clean { Dispatch::Clean } else { - Dispatch::Unknown + Dispatch::None } } EntryState::Merged => Dispatch::Modified, @@ -174,57 +184,95 @@ } } +lazy_static! { + static ref DEFAULT_WORK: HashSet<&'static HgPath> = { + let mut h = HashSet::new(); + h.insert(HgPath::new(b"")); + h + }; +} + /// Get stat data about the files explicitly specified by match. /// TODO subrepos fn walk_explicit<'a>( - files: &'a HashSet<&HgPath>, + files: Option<&'a HashSet<&HgPath>>, dmap: &'a DirstateMap, root_dir: impl AsRef + Sync + Send, + work: mpsc::Sender<&'a HgPath>, options: StatusOptions, -) -> impl ParallelIterator> { - files.par_iter().filter_map(move |filename| { - // TODO normalization - let normalized = filename.as_ref(); +) -> impl ParallelIterator, Dispatch)>> { + files + .unwrap_or(&DEFAULT_WORK) + .par_iter() + .map_with(work, move |work, filename| { + // TODO normalization + let normalized = filename.as_ref(); - let buf = match hg_path_to_path_buf(normalized) { - Ok(x) => x, - Err(e) => return Some(Err(e.into())), - }; - let target = root_dir.as_ref().join(buf); - let st = target.symlink_metadata(); - match st { - Ok(meta) => { - let file_type = meta.file_type(); - if file_type.is_file() || file_type.is_symlink() { - if let Some(entry) = dmap.get(normalized) { + let buf = match hg_path_to_path_buf(normalized) { + Ok(x) => x, + Err(e) => return Some(Err(e.into())), + }; + let target = root_dir.as_ref().join(buf); + let st = target.symlink_metadata(); + let in_dmap = dmap.get(normalized); + match st { + Ok(meta) => { + let file_type = meta.file_type(); + if file_type.is_file() || file_type.is_symlink() { + if let Some(entry) = in_dmap { + return Some(Ok(( + Cow::Borrowed(normalized), + dispatch_found( + &normalized, + *entry, + HgMetadata::from_metadata(meta), + &dmap.copy_map, + options, + ), + ))); + } return Some(Ok(( - normalized, - dispatch_found( - &normalized, - *entry, - HgMetadata::from_metadata(meta), - &dmap.copy_map, - options, - ), + Cow::Borrowed(normalized), + Dispatch::Unknown, + ))); + } else { + if file_type.is_dir() { + // The channel always outlives the sender, unwrap + work.send(normalized).unwrap() + } else { + return Some(Ok(( + Cow::Borrowed(normalized), + Dispatch::Bad(BadMatch::BadType( + // TODO do more than unknown + // Support for all `BadType` variant + // varies greatly between platforms. + // So far, no tests check the type and this + // should be good enough for most users. + BadType::Unknown, + )), + ))); + } + if in_dmap.is_some() { + return Some(Ok(( + Cow::Borrowed(normalized), + Dispatch::Removed, + ))); + } + } + } + Err(_) => { + if let Some(entry) = in_dmap { + return Some(Ok(( + Cow::Borrowed(normalized), + dispatch_missing(entry.state), ))); } - } else { - if dmap.contains_key(normalized) { - return Some(Ok((normalized, Dispatch::Removed))); - } } - } - Err(_) => { - if let Some(entry) = dmap.get(normalized) { - return Some(Ok(( - normalized, - dispatch_missing(entry.state), - ))); - } - } - }; - None - }) + }; + None + }) + .filter(|s| s.is_some()) + .map(|s| s.unwrap()) } #[derive(Debug, Copy, Clone)] @@ -396,13 +444,12 @@ Ok(new_results) } -/// Stat all entries in the `DirstateMap` and mark them for dispatch into -/// the relevant collections. +/// Stat all entries in the `DirstateMap` and mark them for dispatch. fn stat_dmap_entries( dmap: &DirstateMap, root_dir: impl AsRef + Sync + Send, options: StatusOptions, -) -> impl ParallelIterator> { +) -> impl ParallelIterator, Dispatch)>> { dmap.par_iter().map(move |(filename, entry)| { let filename: &HgPath = filename; let filename_as_path = hg_path_to_path_buf(filename)?; @@ -413,10 +460,10 @@ if !(m.file_type().is_file() || m.file_type().is_symlink()) => { - Ok((filename, dispatch_missing(entry.state))) + Ok((Cow::Borrowed(filename), dispatch_missing(entry.state))) } Ok(m) => Ok(( - filename, + Cow::Borrowed(filename), dispatch_found( filename, *entry, @@ -426,14 +473,14 @@ ), )), Err(ref e) - if e.kind() == std::io::ErrorKind::NotFound + if e.kind() == ErrorKind::NotFound || e.raw_os_error() == Some(20) => { // Rust does not yet have an `ErrorKind` for // `NotADirectory` (errno 20) // It happens if the dirstate contains `foo/bar` and // foo is not a directory - Ok((filename, dispatch_missing(entry.state))) + Ok((Cow::Borrowed(filename), dispatch_missing(entry.state))) } Err(e) => Err(e), } @@ -441,21 +488,19 @@ } pub struct DirstateStatus<'a> { - pub modified: Vec<&'a HgPath>, - pub added: Vec<&'a HgPath>, - pub removed: Vec<&'a HgPath>, - pub deleted: Vec<&'a HgPath>, - pub clean: Vec<&'a HgPath>, - pub ignored: Vec<&'a HgPath>, - pub unknown: Vec<&'a HgPath>, - pub bad: Vec<(&'a HgPath, BadMatch)>, - /* TODO ignored - * TODO unknown */ + pub modified: Vec>, + pub added: Vec>, + pub removed: Vec>, + pub deleted: Vec>, + pub clean: Vec>, + pub ignored: Vec>, + pub unknown: Vec>, + pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>, } fn build_response<'a>( - results: impl IntoIterator>, -) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> { + results: impl IntoIterator, Dispatch)>, +) -> (Vec>, DirstateStatus<'a>) { let mut lookup = vec![]; let mut modified = vec![]; let mut added = vec![]; @@ -466,8 +511,7 @@ let mut unknown = vec![]; let mut bad = vec![]; - for res in results.into_iter() { - let (filename, dispatch) = res?; + for (filename, dispatch) in results.into_iter() { match dispatch { Dispatch::Unknown => unknown.push(filename), Dispatch::Unsure => lookup.push(filename), @@ -482,7 +526,7 @@ } } - Ok(( + ( lookup, DirstateStatus { modified, @@ -494,25 +538,189 @@ unknown, bad, }, - )) + ) +} + +pub enum StatusError { + IO(std::io::Error), + Path(HgPathError), + Pattern(PatternError), } +pub type StatusResult = Result; + +impl From for StatusError { + fn from(e: PatternError) -> Self { + StatusError::Pattern(e) + } +} +impl From for StatusError { + fn from(e: HgPathError) -> Self { + StatusError::Path(e) + } +} +impl From for StatusError { + fn from(e: std::io::Error) -> Self { + StatusError::IO(e) + } +} + +impl ToString for StatusError { + fn to_string(&self) -> String { + match self { + StatusError::IO(e) => e.to_string(), + StatusError::Path(e) => e.to_string(), + StatusError::Pattern(e) => e.to_string(), + } + } +} + +/// Get the status of files in the working directory. +/// +/// This is the current entry-point for `hg-core` and is realistically unusable +/// outside of a Python context because its arguments need to provide a lot of +/// information that will not be necessary in the future. pub fn status<'a: 'c, 'b: 'c, 'c>( dmap: &'a DirstateMap, - matcher: &'b impl Matcher, + matcher: &'b (impl Matcher + Sync), root_dir: impl AsRef + Sync + Send + Copy, + ignore_files: &[impl AsRef], options: StatusOptions, -) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> { +) -> StatusResult<( + (Vec>, DirstateStatus<'c>), + Vec, +)> { + let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?; + let dir_ignore_fn = |dir: &_| { + if ignore_fn(dir) { + true + } else { + for p in find_dirs(dir) { + if ignore_fn(p) { + return true; + } + } + false + } + }; + let files = matcher.file_set(); + let mut results = vec![]; - if let Some(files) = files { - results.par_extend(walk_explicit(&files, &dmap, root_dir, options)); + let mut work = vec![]; + + // Step 1: check the files explicitly mentioned by the user + let (tx, rx) = mpsc::channel(); + results.par_extend(walk_explicit(files, &dmap, root_dir, tx, options)); + while let Ok(dir) = rx.recv() { + if options.list_ignored || options.list_unknown && !dir_ignore_fn(dir) + { + work.push(dir) + } + } + + let mut results = results.into_iter().flatten().collect(); + + // Step 2: recursively check the working directory for changes if needed + for dir in work { + if dir_ignore_fn(dir) { + continue; + } + results = traverse( + matcher, root_dir, &dmap, dir, results, &ignore_fn, options, + )?; } if !matcher.is_exact() { - let stat_results = stat_dmap_entries(&dmap, root_dir, options); - results.par_extend(stat_results); + // Step 3: Check the remaining files from the dmap. + // If a dmap file is not in results yet, it was either + // a) not matched b) ignored, c) missing, or d) under a + // symlink directory. + + let to_visit: Box> = + if results.is_empty() && matcher.matches_everything() { + Box::new(dmap.iter()) + } else { + Box::new(dmap.iter().filter_map(|(f, e)| { + if !results.contains_key(f.deref()) && matcher.matches(f) { + Some((f, e)) + } else { + None + } + })) + }; + let mut to_visit: Vec<_> = to_visit.collect(); + to_visit.sort_by(|a, b| a.0.cmp(&b.0)); + + if options.list_unknown { + // We walked all dirs under the roots that weren't ignored, and + // everything that matched was stat'ed and is already in results. + // The rest must thus be ignored or under a symlink. + let mut path_auditor = PathAuditor::new(root_dir); + + for (ref filename, entry) in to_visit { + // Report ignored items in the dmap as long as they are not + // under a symlink directory. + if path_auditor.check(filename) { + // TODO normalize for case-insensitive filesystems + let buf = hg_path_to_path_buf(filename)?; + results.insert( + Cow::Borrowed(filename), + match root_dir.as_ref().join(&buf).symlink_metadata() { + // File was just ignored, no links, and exists + Ok(meta) => { + let metadata = HgMetadata::from_metadata(meta); + dispatch_found( + filename, + *entry, + metadata, + &dmap.copy_map, + options, + ) + } + // File doesn't exist + Err(_) => dispatch_missing(entry.state), + }, + ); + } else { + // It's either missing or under a symlink directory which + // we, in this case, report as missing. + results.insert( + Cow::Borrowed(filename), + dispatch_missing(entry.state), + ); + } + } + } else { + // We may not have walked the full directory tree above, so stat + // and check everything we missed. + let stat_results = stat_dmap_entries(&dmap, root_dir, options); + results.par_extend(stat_results.flatten()); + } } - build_response(results) + let results = results.into_iter().filter_map(|(filename, dispatch)| { + match dispatch { + Dispatch::Bad(_) => return Some((filename, dispatch)), + _ => {} + }; + // TODO do this in //, not at the end + if !dmap.contains_key(filename.deref()) { + if (options.list_ignored || matcher.exact_match(&filename)) + && dir_ignore_fn(&filename) + { + if options.list_ignored { + return Some((filename.to_owned(), Dispatch::Ignored)); + } + } else { + if !ignore_fn(&filename) { + return Some((filename.to_owned(), Dispatch::Unknown)); + } + } + return None; + } + Some((filename, dispatch)) + }); + + Ok((build_response(results), warnings)) } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -13,7 +13,9 @@ dirs_multiset::{DirsMultiset, DirsMultisetIter}, dirstate_map::DirstateMap, parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE}, - status::{status, DirstateStatus, StatusOptions}, + status::{ + status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions, + }, CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState, StateMap, StateMapIter, };