This prevents unnecessary fallbacks to Python, improving performance for
hg update for instance.
On Mozilla-Central a noop update goes from 1.6s down to 700ms.
hg-reviewers |
This prevents unnecessary fallbacks to Python, improving performance for
hg update for instance.
On Mozilla-Central a noop update goes from 1.6s down to 700ms.
Automatic diff as part of commit; lint not applicable. |
Automatic diff as part of commit; unit tests not applicable. |
Path | Packages | |||
---|---|---|---|---|
M | rust/hg-core/src/dirstate/status.rs (58 lines) | |||
M | rust/hg-core/src/matchers.rs (19 lines) | |||
M | rust/hg-cpython/src/dirstate/status.rs (6 lines) |
use micro_timer::timed; | use micro_timer::timed; | ||||
use rayon::prelude::*; | use rayon::prelude::*; | ||||
use std::{ | use std::{ | ||||
borrow::Cow, | borrow::Cow, | ||||
collections::HashSet, | collections::HashSet, | ||||
fs::{read_dir, DirEntry}, | fs::{read_dir, DirEntry}, | ||||
io::ErrorKind, | io::ErrorKind, | ||||
ops::Deref, | ops::Deref, | ||||
path::Path, | path::{Path, PathBuf}, | ||||
}; | }; | ||||
/// Wrong type of file from a `BadMatch` | /// Wrong type of file from a `BadMatch` | ||||
/// Note: a lot of those don't exist on all platforms. | /// Note: a lot of those don't exist on all platforms. | ||||
#[derive(Debug, Copy, Clone)] | #[derive(Debug, Copy, Clone)] | ||||
pub enum BadType { | pub enum BadType { | ||||
CharacterDevice, | CharacterDevice, | ||||
BlockDevice, | BlockDevice, | ||||
Directory { | Directory { | ||||
/// True if the directory used to be a file in the dmap so we can say | /// True if the directory used to be a file in the dmap so we can say | ||||
/// that it's been removed. | /// that it's been removed. | ||||
was_file: bool, | was_file: bool, | ||||
}, | }, | ||||
} | } | ||||
type IoResult<T> = std::io::Result<T>; | type IoResult<T> = std::io::Result<T>; | ||||
/// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add | |||||
/// an explicit lifetime here to not fight `'static` bounds "out of nowhere". | |||||
type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>; | |||||
/// Dates and times that are outside the 31-bit signed range are compared | /// Dates and times that are outside the 31-bit signed range are compared | ||||
/// modulo 2^31. This should prevent hg from behaving badly with very large | /// modulo 2^31. This should prevent hg from behaving badly with very large | ||||
/// files or corrupt dates while still having a high probability of detecting | /// files or corrupt dates while still having a high probability of detecting | ||||
/// changes. (issue2608) | /// changes. (issue2608) | ||||
/// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>` | /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>` | ||||
/// is not defined for `i32`, and there is no `As` trait. This forces the | /// is not defined for `i32`, and there is no `As` trait. This forces the | ||||
/// caller to cast `b` as `i32`. | /// caller to cast `b` as `i32`. | ||||
fn handle_traversed_entry<'a>( | fn handle_traversed_entry<'a>( | ||||
scope: &rayon::Scope<'a>, | scope: &rayon::Scope<'a>, | ||||
files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | ||||
dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | ||||
ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &'a IgnoreFnType, | ||||
dir_ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &'a IgnoreFnType, | ||||
options: StatusOptions, | options: StatusOptions, | ||||
filename: HgPathBuf, | filename: HgPathBuf, | ||||
dir_entry: DirEntry, | dir_entry: DirEntry, | ||||
) -> IoResult<()> { | ) -> IoResult<()> { | ||||
let file_type = dir_entry.file_type()?; | let file_type = dir_entry.file_type()?; | ||||
let entry_option = dmap.get(&filename); | let entry_option = dmap.get(&filename); | ||||
if file_type.is_dir() { | if file_type.is_dir() { | ||||
/// A directory was found in the filesystem and needs to be traversed | /// A directory was found in the filesystem and needs to be traversed | ||||
fn handle_traversed_dir<'a>( | fn handle_traversed_dir<'a>( | ||||
scope: &rayon::Scope<'a>, | scope: &rayon::Scope<'a>, | ||||
files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | ||||
dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | ||||
ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &'a IgnoreFnType, | ||||
dir_ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &'a IgnoreFnType, | ||||
options: StatusOptions, | options: StatusOptions, | ||||
entry_option: Option<&'a DirstateEntry>, | entry_option: Option<&'a DirstateEntry>, | ||||
directory: HgPathBuf, | directory: HgPathBuf, | ||||
) { | ) { | ||||
scope.spawn(move |_| { | scope.spawn(move |_| { | ||||
// Nested `if` until `rust-lang/rust#53668` is stable | // Nested `if` until `rust-lang/rust#53668` is stable | ||||
if let Some(entry) = entry_option { | if let Some(entry) = entry_option { | ||||
// Used to be a file, is now a folder | // Used to be a file, is now a folder | ||||
/// entries in a separate thread. | /// entries in a separate thread. | ||||
fn traverse_dir<'a>( | fn traverse_dir<'a>( | ||||
files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
root_dir: impl AsRef<Path> + Sync + Send + Copy, | root_dir: impl AsRef<Path> + Sync + Send + Copy, | ||||
dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
directory: impl AsRef<HgPath>, | directory: impl AsRef<HgPath>, | ||||
old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | ||||
ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &IgnoreFnType, | ||||
dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &IgnoreFnType, | ||||
options: StatusOptions, | options: StatusOptions, | ||||
) -> IoResult<()> { | ) -> IoResult<()> { | ||||
let directory = directory.as_ref(); | let directory = directory.as_ref(); | ||||
if directory.as_bytes() == b".hg" { | if directory.as_bytes() == b".hg" { | ||||
return Ok(()); | return Ok(()); | ||||
} | } | ||||
let visit_entries = match matcher.visit_children_set(directory) { | let visit_entries = match matcher.visit_children_set(directory) { | ||||
VisitChildrenSet::Empty => return Ok(()), | VisitChildrenSet::Empty => return Ok(()), | ||||
/// in timings | /// in timings | ||||
#[timed] | #[timed] | ||||
fn traverse<'a>( | fn traverse<'a>( | ||||
matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
root_dir: impl AsRef<Path> + Sync + Send + Copy, | root_dir: impl AsRef<Path> + Sync + Send + Copy, | ||||
dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
path: impl AsRef<HgPath>, | path: impl AsRef<HgPath>, | ||||
old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | ||||
ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &IgnoreFnType, | ||||
dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &IgnoreFnType, | ||||
options: StatusOptions, | options: StatusOptions, | ||||
results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, | results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, | ||||
) -> IoResult<()> { | ) -> IoResult<()> { | ||||
let root_dir = root_dir.as_ref(); | let root_dir = root_dir.as_ref(); | ||||
// The traversal is done in parallel, so use a channel to gather entries. | // The traversal is done in parallel, so use a channel to gather entries. | ||||
// `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. | // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. | ||||
let (files_transmitter, files_receiver) = crossbeam::channel::unbounded(); | let (files_transmitter, files_receiver) = crossbeam::channel::unbounded(); | ||||
/// This is the current entry-point for `hg-core` and is realistically unusable | /// This is the current entry-point for `hg-core` and is realistically unusable | ||||
/// outside of a Python context because its arguments need to provide a lot of | /// outside of a Python context because its arguments need to provide a lot of | ||||
/// information that will not be necessary in the future. | /// information that will not be necessary in the future. | ||||
#[timed] | #[timed] | ||||
pub fn status<'a: 'c, 'b: 'c, 'c>( | pub fn status<'a: 'c, 'b: 'c, 'c>( | ||||
dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
matcher: &'b (impl Matcher + Sync), | matcher: &'b (impl Matcher + Sync), | ||||
root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c, | ||||
ignore_files: &[impl AsRef<Path> + 'c], | ignore_files: Vec<PathBuf>, | ||||
options: StatusOptions, | options: StatusOptions, | ||||
) -> StatusResult<( | ) -> StatusResult<( | ||||
(Vec<Cow<'c, HgPath>>, DirstateStatus<'c>), | (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>), | ||||
Vec<PatternFileWarning>, | Vec<PatternFileWarning>, | ||||
)> { | )> { | ||||
let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?; | // Needs to outlive `dir_ignore_fn` since it's captured. | ||||
let mut ignore_fn: IgnoreFnType; | |||||
// Only involve real ignore mechanism if we're listing unknowns or ignored. | |||||
let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored | |||||
|| options.list_unknown | |||||
{ | |||||
let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?; | |||||
ignore_fn = ignore; | |||||
let dir_ignore_fn = Box::new(|dir: &_| { | |||||
// Is the path or one of its ancestors ignored? | // Is the path or one of its ancestors ignored? | ||||
let dir_ignore_fn = |dir: &_| { | |||||
if ignore_fn(dir) { | if ignore_fn(dir) { | ||||
true | true | ||||
} else { | } else { | ||||
for p in find_dirs(dir) { | for p in find_dirs(dir) { | ||||
if ignore_fn(p) { | if ignore_fn(p) { | ||||
return true; | return true; | ||||
} | } | ||||
} | } | ||||
false | false | ||||
} | } | ||||
}); | |||||
(dir_ignore_fn, warnings) | |||||
} else { | |||||
ignore_fn = Box::new(|&_| true); | |||||
(Box::new(|&_| true), vec![]) | |||||
}; | }; | ||||
let files = matcher.file_set(); | let files = matcher.file_set(); | ||||
// Step 1: check the files explicitly mentioned by the user | // Step 1: check the files explicitly mentioned by the user | ||||
let explicit = walk_explicit(files, &dmap, root_dir, options); | let explicit = walk_explicit(files, &dmap, root_dir, options); | ||||
// Collect results into a `Vec` because we do very few lookups in most | // Collect results into a `Vec` because we do very few lookups in most |
files::find_dirs, | files::find_dirs, | ||||
hg_path::{HgPath, HgPathBuf}, | hg_path::{HgPath, HgPathBuf}, | ||||
Escaped, | Escaped, | ||||
}, | }, | ||||
DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError, | DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError, | ||||
PatternSyntax, | PatternSyntax, | ||||
}; | }; | ||||
use micro_timer::timed; | use std::borrow::ToOwned; | ||||
use std::collections::HashSet; | use std::collections::HashSet; | ||||
use std::fmt::{Display, Error, Formatter}; | use std::fmt::{Display, Error, Formatter}; | ||||
use std::iter::FromIterator; | use std::iter::FromIterator; | ||||
use std::ops::Deref; | use std::ops::Deref; | ||||
use std::path::Path; | use std::path::{Path, PathBuf}; | ||||
#[derive(Debug, PartialEq)] | #[derive(Debug, PartialEq)] | ||||
pub enum VisitChildrenSet<'a> { | pub enum VisitChildrenSet<'a> { | ||||
/// Don't visit anything | /// Don't visit anything | ||||
Empty, | Empty, | ||||
/// Only visit this directory | /// Only visit this directory | ||||
This, | This, | ||||
/// Visit this directory and these subdirectories | /// Visit this directory and these subdirectories | ||||
filter_subincludes(ignore_patterns, root_dir)?; | filter_subincludes(ignore_patterns, root_dir)?; | ||||
if !subincludes.is_empty() { | if !subincludes.is_empty() { | ||||
// Build prefix-based matcher functions for subincludes | // Build prefix-based matcher functions for subincludes | ||||
let mut submatchers = FastHashMap::default(); | let mut submatchers = FastHashMap::default(); | ||||
let mut prefixes = vec![]; | let mut prefixes = vec![]; | ||||
for SubInclude { prefix, root, path } in subincludes.into_iter() { | for SubInclude { prefix, root, path } in subincludes.into_iter() { | ||||
let (match_fn, warnings) = get_ignore_function(&[path], root)?; | let (match_fn, warnings) = | ||||
get_ignore_function(vec![path.to_path_buf()], root)?; | |||||
all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
prefixes.push(prefix.to_owned()); | prefixes.push(prefix.to_owned()); | ||||
submatchers.insert(prefix.to_owned(), match_fn); | submatchers.insert(prefix.to_owned(), match_fn); | ||||
} | } | ||||
let match_subinclude = move |filename: &HgPath| { | let match_subinclude = move |filename: &HgPath| { | ||||
for prefix in prefixes.iter() { | for prefix in prefixes.iter() { | ||||
if let Some(rel) = filename.relative_to(prefix) { | if let Some(rel) = filename.relative_to(prefix) { | ||||
all_warnings, | all_warnings, | ||||
) | ) | ||||
}) | }) | ||||
} | } | ||||
/// Parses all "ignore" files with their recursive includes and returns a | /// Parses all "ignore" files with their recursive includes and returns a | ||||
/// function that checks whether a given file (in the general sense) should be | /// function that checks whether a given file (in the general sense) should be | ||||
/// ignored. | /// ignored. | ||||
#[timed] | |||||
pub fn get_ignore_function<'a>( | pub fn get_ignore_function<'a>( | ||||
all_pattern_files: &[impl AsRef<Path>], | all_pattern_files: Vec<PathBuf>, | ||||
root_dir: impl AsRef<Path>, | root_dir: impl AsRef<Path>, | ||||
) -> PatternResult<( | ) -> PatternResult<( | ||||
impl for<'r> Fn(&'r HgPath) -> bool + Sync, | Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>, | ||||
Vec<PatternFileWarning>, | Vec<PatternFileWarning>, | ||||
)> { | )> { | ||||
let mut all_patterns = vec![]; | let mut all_patterns = vec![]; | ||||
let mut all_warnings = vec![]; | let mut all_warnings = vec![]; | ||||
for pattern_file in all_pattern_files.into_iter() { | for pattern_file in all_pattern_files.into_iter() { | ||||
let (patterns, warnings) = | let (patterns, warnings) = | ||||
get_patterns_from_file(pattern_file, &root_dir)?; | get_patterns_from_file(pattern_file, &root_dir)?; | ||||
all_patterns.extend(patterns); | all_patterns.extend(patterns.to_owned()); | ||||
all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
} | } | ||||
let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; | let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; | ||||
all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
Ok((move |path: &HgPath| matcher.matches(path), all_warnings)) | Ok(( | ||||
Box::new(move |path: &HgPath| matcher.matches(path)), | |||||
all_warnings, | |||||
)) | |||||
} | } | ||||
impl<'a> IncludeMatcher<'a> { | impl<'a> IncludeMatcher<'a> { | ||||
pub fn new( | pub fn new( | ||||
ignore_patterns: Vec<IgnorePattern>, | ignore_patterns: Vec<IgnorePattern>, | ||||
root_dir: impl AsRef<Path>, | root_dir: impl AsRef<Path>, | ||||
) -> PatternResult<(Self, Vec<PatternFileWarning>)> { | ) -> PatternResult<(Self, Vec<PatternFileWarning>)> { | ||||
let (patterns, match_fn, warnings) = | let (patterns, match_fn, warnings) = |
match matcher.get_type(py).name(py).borrow() { | match matcher.get_type(py).name(py).borrow() { | ||||
"alwaysmatcher" => { | "alwaysmatcher" => { | ||||
let matcher = AlwaysMatcher; | let matcher = AlwaysMatcher; | ||||
let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
&dmap, | &dmap, | ||||
&matcher, | &matcher, | ||||
&root_dir, | &root_dir, | ||||
&ignore_files, | ignore_files, | ||||
StatusOptions { | StatusOptions { | ||||
check_exec, | check_exec, | ||||
last_normal_time, | last_normal_time, | ||||
list_clean, | list_clean, | ||||
list_ignored, | list_ignored, | ||||
list_unknown, | list_unknown, | ||||
}, | }, | ||||
) | ) | ||||
let files = files?; | let files = files?; | ||||
let matcher = FileMatcher::new(&files) | let matcher = FileMatcher::new(&files) | ||||
.map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; | .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; | ||||
let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
&dmap, | &dmap, | ||||
&matcher, | &matcher, | ||||
&root_dir, | &root_dir, | ||||
&ignore_files, | ignore_files, | ||||
StatusOptions { | StatusOptions { | ||||
check_exec, | check_exec, | ||||
last_normal_time, | last_normal_time, | ||||
list_clean, | list_clean, | ||||
list_ignored, | list_ignored, | ||||
list_unknown, | list_unknown, | ||||
}, | }, | ||||
) | ) | ||||
IncludeMatcher::new(ignore_patterns, &root_dir) | IncludeMatcher::new(ignore_patterns, &root_dir) | ||||
.map_err(|e| handle_fallback(py, e.into()))?; | .map_err(|e| handle_fallback(py, e.into()))?; | ||||
all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
&dmap, | &dmap, | ||||
&matcher, | &matcher, | ||||
&root_dir, | &root_dir, | ||||
&ignore_files, | ignore_files, | ||||
StatusOptions { | StatusOptions { | ||||
check_exec, | check_exec, | ||||
last_normal_time, | last_normal_time, | ||||
list_clean, | list_clean, | ||||
list_ignored, | list_ignored, | ||||
list_unknown, | list_unknown, | ||||
}, | }, | ||||
) | ) |