This prevents unnecessary fallbacks to Python, improving performance for
hg update for instance.
On Mozilla-Central a noop update goes from 1.6s down to 700ms.
| hg-reviewers |
This prevents unnecessary fallbacks to Python, improving performance for
hg update for instance.
On Mozilla-Central a noop update goes from 1.6s down to 700ms.
| Automatic diff as part of commit; lint not applicable. |
| Automatic diff as part of commit; unit tests not applicable. |
| Path | Packages | |||
|---|---|---|---|---|
| M | rust/hg-core/src/dirstate/status.rs (58 lines) | |||
| M | rust/hg-core/src/matchers.rs (19 lines) | |||
| M | rust/hg-cpython/src/dirstate/status.rs (6 lines) |
| use micro_timer::timed; | use micro_timer::timed; | ||||
| use rayon::prelude::*; | use rayon::prelude::*; | ||||
| use std::{ | use std::{ | ||||
| borrow::Cow, | borrow::Cow, | ||||
| collections::HashSet, | collections::HashSet, | ||||
| fs::{read_dir, DirEntry}, | fs::{read_dir, DirEntry}, | ||||
| io::ErrorKind, | io::ErrorKind, | ||||
| ops::Deref, | ops::Deref, | ||||
| path::Path, | path::{Path, PathBuf}, | ||||
| }; | }; | ||||
| /// Wrong type of file from a `BadMatch` | /// Wrong type of file from a `BadMatch` | ||||
| /// Note: a lot of those don't exist on all platforms. | /// Note: a lot of those don't exist on all platforms. | ||||
| #[derive(Debug, Copy, Clone)] | #[derive(Debug, Copy, Clone)] | ||||
| pub enum BadType { | pub enum BadType { | ||||
| CharacterDevice, | CharacterDevice, | ||||
| BlockDevice, | BlockDevice, | ||||
| Directory { | Directory { | ||||
| /// True if the directory used to be a file in the dmap so we can say | /// True if the directory used to be a file in the dmap so we can say | ||||
| /// that it's been removed. | /// that it's been removed. | ||||
| was_file: bool, | was_file: bool, | ||||
| }, | }, | ||||
| } | } | ||||
| type IoResult<T> = std::io::Result<T>; | type IoResult<T> = std::io::Result<T>; | ||||
| /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add | |||||
| /// an explicit lifetime here to not fight `'static` bounds "out of nowhere". | |||||
| type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>; | |||||
| /// Dates and times that are outside the 31-bit signed range are compared | /// Dates and times that are outside the 31-bit signed range are compared | ||||
| /// modulo 2^31. This should prevent hg from behaving badly with very large | /// modulo 2^31. This should prevent hg from behaving badly with very large | ||||
| /// files or corrupt dates while still having a high probability of detecting | /// files or corrupt dates while still having a high probability of detecting | ||||
| /// changes. (issue2608) | /// changes. (issue2608) | ||||
| /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>` | /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>` | ||||
| /// is not defined for `i32`, and there is no `As` trait. This forces the | /// is not defined for `i32`, and there is no `As` trait. This forces the | ||||
| /// caller to cast `b` as `i32`. | /// caller to cast `b` as `i32`. | ||||
| fn handle_traversed_entry<'a>( | fn handle_traversed_entry<'a>( | ||||
| scope: &rayon::Scope<'a>, | scope: &rayon::Scope<'a>, | ||||
| files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
| matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
| root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | ||||
| dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
| old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | ||||
| ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &'a IgnoreFnType, | ||||
| dir_ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &'a IgnoreFnType, | ||||
| options: StatusOptions, | options: StatusOptions, | ||||
| filename: HgPathBuf, | filename: HgPathBuf, | ||||
| dir_entry: DirEntry, | dir_entry: DirEntry, | ||||
| ) -> IoResult<()> { | ) -> IoResult<()> { | ||||
| let file_type = dir_entry.file_type()?; | let file_type = dir_entry.file_type()?; | ||||
| let entry_option = dmap.get(&filename); | let entry_option = dmap.get(&filename); | ||||
| if file_type.is_dir() { | if file_type.is_dir() { | ||||
| /// A directory was found in the filesystem and needs to be traversed | /// A directory was found in the filesystem and needs to be traversed | ||||
| fn handle_traversed_dir<'a>( | fn handle_traversed_dir<'a>( | ||||
| scope: &rayon::Scope<'a>, | scope: &rayon::Scope<'a>, | ||||
| files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
| matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
| root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a, | ||||
| dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
| old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>, | ||||
| ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &'a IgnoreFnType, | ||||
| dir_ignore_fn: &'a (impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &'a IgnoreFnType, | ||||
| options: StatusOptions, | options: StatusOptions, | ||||
| entry_option: Option<&'a DirstateEntry>, | entry_option: Option<&'a DirstateEntry>, | ||||
| directory: HgPathBuf, | directory: HgPathBuf, | ||||
| ) { | ) { | ||||
| scope.spawn(move |_| { | scope.spawn(move |_| { | ||||
| // Nested `if` until `rust-lang/rust#53668` is stable | // Nested `if` until `rust-lang/rust#53668` is stable | ||||
| if let Some(entry) = entry_option { | if let Some(entry) = entry_option { | ||||
| // Used to be a file, is now a folder | // Used to be a file, is now a folder | ||||
| /// entries in a separate thread. | /// entries in a separate thread. | ||||
| fn traverse_dir<'a>( | fn traverse_dir<'a>( | ||||
| files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>, | ||||
| matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
| root_dir: impl AsRef<Path> + Sync + Send + Copy, | root_dir: impl AsRef<Path> + Sync + Send + Copy, | ||||
| dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
| directory: impl AsRef<HgPath>, | directory: impl AsRef<HgPath>, | ||||
| old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | ||||
| ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &IgnoreFnType, | ||||
| dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &IgnoreFnType, | ||||
| options: StatusOptions, | options: StatusOptions, | ||||
| ) -> IoResult<()> { | ) -> IoResult<()> { | ||||
| let directory = directory.as_ref(); | let directory = directory.as_ref(); | ||||
| if directory.as_bytes() == b".hg" { | if directory.as_bytes() == b".hg" { | ||||
| return Ok(()); | return Ok(()); | ||||
| } | } | ||||
| let visit_entries = match matcher.visit_children_set(directory) { | let visit_entries = match matcher.visit_children_set(directory) { | ||||
| VisitChildrenSet::Empty => return Ok(()), | VisitChildrenSet::Empty => return Ok(()), | ||||
| /// in timings | /// in timings | ||||
| #[timed] | #[timed] | ||||
| fn traverse<'a>( | fn traverse<'a>( | ||||
| matcher: &'a (impl Matcher + Sync), | matcher: &'a (impl Matcher + Sync), | ||||
| root_dir: impl AsRef<Path> + Sync + Send + Copy, | root_dir: impl AsRef<Path> + Sync + Send + Copy, | ||||
| dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
| path: impl AsRef<HgPath>, | path: impl AsRef<HgPath>, | ||||
| old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>, | ||||
| ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | ignore_fn: &IgnoreFnType, | ||||
| dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync), | dir_ignore_fn: &IgnoreFnType, | ||||
| options: StatusOptions, | options: StatusOptions, | ||||
| results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, | results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, | ||||
| ) -> IoResult<()> { | ) -> IoResult<()> { | ||||
| let root_dir = root_dir.as_ref(); | let root_dir = root_dir.as_ref(); | ||||
| // The traversal is done in parallel, so use a channel to gather entries. | // The traversal is done in parallel, so use a channel to gather entries. | ||||
| // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. | // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. | ||||
| let (files_transmitter, files_receiver) = crossbeam::channel::unbounded(); | let (files_transmitter, files_receiver) = crossbeam::channel::unbounded(); | ||||
| /// This is the current entry-point for `hg-core` and is realistically unusable | /// This is the current entry-point for `hg-core` and is realistically unusable | ||||
| /// outside of a Python context because its arguments need to provide a lot of | /// outside of a Python context because its arguments need to provide a lot of | ||||
| /// information that will not be necessary in the future. | /// information that will not be necessary in the future. | ||||
| #[timed] | #[timed] | ||||
| pub fn status<'a: 'c, 'b: 'c, 'c>( | pub fn status<'a: 'c, 'b: 'c, 'c>( | ||||
| dmap: &'a DirstateMap, | dmap: &'a DirstateMap, | ||||
| matcher: &'b (impl Matcher + Sync), | matcher: &'b (impl Matcher + Sync), | ||||
| root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c, | root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c, | ||||
| ignore_files: &[impl AsRef<Path> + 'c], | ignore_files: Vec<PathBuf>, | ||||
| options: StatusOptions, | options: StatusOptions, | ||||
| ) -> StatusResult<( | ) -> StatusResult<( | ||||
| (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>), | (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>), | ||||
| Vec<PatternFileWarning>, | Vec<PatternFileWarning>, | ||||
| )> { | )> { | ||||
| let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?; | // Needs to outlive `dir_ignore_fn` since it's captured. | ||||
| let mut ignore_fn: IgnoreFnType; | |||||
| // Only involve real ignore mechanism if we're listing unknowns or ignored. | |||||
| let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored | |||||
| || options.list_unknown | |||||
| { | |||||
| let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?; | |||||
| ignore_fn = ignore; | |||||
| let dir_ignore_fn = Box::new(|dir: &_| { | |||||
| // Is the path or one of its ancestors ignored? | // Is the path or one of its ancestors ignored? | ||||
| let dir_ignore_fn = |dir: &_| { | |||||
| if ignore_fn(dir) { | if ignore_fn(dir) { | ||||
| true | true | ||||
| } else { | } else { | ||||
| for p in find_dirs(dir) { | for p in find_dirs(dir) { | ||||
| if ignore_fn(p) { | if ignore_fn(p) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| } | } | ||||
| false | false | ||||
| } | } | ||||
| }); | |||||
| (dir_ignore_fn, warnings) | |||||
| } else { | |||||
| ignore_fn = Box::new(|&_| true); | |||||
| (Box::new(|&_| true), vec![]) | |||||
| }; | }; | ||||
| let files = matcher.file_set(); | let files = matcher.file_set(); | ||||
| // Step 1: check the files explicitly mentioned by the user | // Step 1: check the files explicitly mentioned by the user | ||||
| let explicit = walk_explicit(files, &dmap, root_dir, options); | let explicit = walk_explicit(files, &dmap, root_dir, options); | ||||
| // Collect results into a `Vec` because we do very few lookups in most | // Collect results into a `Vec` because we do very few lookups in most | ||||
| files::find_dirs, | files::find_dirs, | ||||
| hg_path::{HgPath, HgPathBuf}, | hg_path::{HgPath, HgPathBuf}, | ||||
| Escaped, | Escaped, | ||||
| }, | }, | ||||
| DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError, | DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError, | ||||
| PatternSyntax, | PatternSyntax, | ||||
| }; | }; | ||||
| use micro_timer::timed; | use std::borrow::ToOwned; | ||||
| use std::collections::HashSet; | use std::collections::HashSet; | ||||
| use std::fmt::{Display, Error, Formatter}; | use std::fmt::{Display, Error, Formatter}; | ||||
| use std::iter::FromIterator; | use std::iter::FromIterator; | ||||
| use std::ops::Deref; | use std::ops::Deref; | ||||
| use std::path::Path; | use std::path::{Path, PathBuf}; | ||||
| #[derive(Debug, PartialEq)] | #[derive(Debug, PartialEq)] | ||||
| pub enum VisitChildrenSet<'a> { | pub enum VisitChildrenSet<'a> { | ||||
| /// Don't visit anything | /// Don't visit anything | ||||
| Empty, | Empty, | ||||
| /// Only visit this directory | /// Only visit this directory | ||||
| This, | This, | ||||
| /// Visit this directory and these subdirectories | /// Visit this directory and these subdirectories | ||||
| filter_subincludes(ignore_patterns, root_dir)?; | filter_subincludes(ignore_patterns, root_dir)?; | ||||
| if !subincludes.is_empty() { | if !subincludes.is_empty() { | ||||
| // Build prefix-based matcher functions for subincludes | // Build prefix-based matcher functions for subincludes | ||||
| let mut submatchers = FastHashMap::default(); | let mut submatchers = FastHashMap::default(); | ||||
| let mut prefixes = vec![]; | let mut prefixes = vec![]; | ||||
| for SubInclude { prefix, root, path } in subincludes.into_iter() { | for SubInclude { prefix, root, path } in subincludes.into_iter() { | ||||
| let (match_fn, warnings) = get_ignore_function(&[path], root)?; | let (match_fn, warnings) = | ||||
| get_ignore_function(vec![path.to_path_buf()], root)?; | |||||
| all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
| prefixes.push(prefix.to_owned()); | prefixes.push(prefix.to_owned()); | ||||
| submatchers.insert(prefix.to_owned(), match_fn); | submatchers.insert(prefix.to_owned(), match_fn); | ||||
| } | } | ||||
| let match_subinclude = move |filename: &HgPath| { | let match_subinclude = move |filename: &HgPath| { | ||||
| for prefix in prefixes.iter() { | for prefix in prefixes.iter() { | ||||
| if let Some(rel) = filename.relative_to(prefix) { | if let Some(rel) = filename.relative_to(prefix) { | ||||
| all_warnings, | all_warnings, | ||||
| ) | ) | ||||
| }) | }) | ||||
| } | } | ||||
| /// Parses all "ignore" files with their recursive includes and returns a | /// Parses all "ignore" files with their recursive includes and returns a | ||||
| /// function that checks whether a given file (in the general sense) should be | /// function that checks whether a given file (in the general sense) should be | ||||
| /// ignored. | /// ignored. | ||||
| #[timed] | |||||
| pub fn get_ignore_function<'a>( | pub fn get_ignore_function<'a>( | ||||
| all_pattern_files: &[impl AsRef<Path>], | all_pattern_files: Vec<PathBuf>, | ||||
| root_dir: impl AsRef<Path>, | root_dir: impl AsRef<Path>, | ||||
| ) -> PatternResult<( | ) -> PatternResult<( | ||||
| impl for<'r> Fn(&'r HgPath) -> bool + Sync, | Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>, | ||||
| Vec<PatternFileWarning>, | Vec<PatternFileWarning>, | ||||
| )> { | )> { | ||||
| let mut all_patterns = vec![]; | let mut all_patterns = vec![]; | ||||
| let mut all_warnings = vec![]; | let mut all_warnings = vec![]; | ||||
| for pattern_file in all_pattern_files.into_iter() { | for pattern_file in all_pattern_files.into_iter() { | ||||
| let (patterns, warnings) = | let (patterns, warnings) = | ||||
| get_patterns_from_file(pattern_file, &root_dir)?; | get_patterns_from_file(pattern_file, &root_dir)?; | ||||
| all_patterns.extend(patterns); | all_patterns.extend(patterns.to_owned()); | ||||
| all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
| } | } | ||||
| let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; | let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; | ||||
| all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
| Ok((move |path: &HgPath| matcher.matches(path), all_warnings)) | Ok(( | ||||
| Box::new(move |path: &HgPath| matcher.matches(path)), | |||||
| all_warnings, | |||||
| )) | |||||
| } | } | ||||
| impl<'a> IncludeMatcher<'a> { | impl<'a> IncludeMatcher<'a> { | ||||
| pub fn new( | pub fn new( | ||||
| ignore_patterns: Vec<IgnorePattern>, | ignore_patterns: Vec<IgnorePattern>, | ||||
| root_dir: impl AsRef<Path>, | root_dir: impl AsRef<Path>, | ||||
| ) -> PatternResult<(Self, Vec<PatternFileWarning>)> { | ) -> PatternResult<(Self, Vec<PatternFileWarning>)> { | ||||
| let (patterns, match_fn, warnings) = | let (patterns, match_fn, warnings) = | ||||
| match matcher.get_type(py).name(py).borrow() { | match matcher.get_type(py).name(py).borrow() { | ||||
| "alwaysmatcher" => { | "alwaysmatcher" => { | ||||
| let matcher = AlwaysMatcher; | let matcher = AlwaysMatcher; | ||||
| let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
| &dmap, | &dmap, | ||||
| &matcher, | &matcher, | ||||
| &root_dir, | &root_dir, | ||||
| &ignore_files, | ignore_files, | ||||
| StatusOptions { | StatusOptions { | ||||
| check_exec, | check_exec, | ||||
| last_normal_time, | last_normal_time, | ||||
| list_clean, | list_clean, | ||||
| list_ignored, | list_ignored, | ||||
| list_unknown, | list_unknown, | ||||
| }, | }, | ||||
| ) | ) | ||||
| let files = files?; | let files = files?; | ||||
| let matcher = FileMatcher::new(&files) | let matcher = FileMatcher::new(&files) | ||||
| .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; | .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; | ||||
| let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
| &dmap, | &dmap, | ||||
| &matcher, | &matcher, | ||||
| &root_dir, | &root_dir, | ||||
| &ignore_files, | ignore_files, | ||||
| StatusOptions { | StatusOptions { | ||||
| check_exec, | check_exec, | ||||
| last_normal_time, | last_normal_time, | ||||
| list_clean, | list_clean, | ||||
| list_ignored, | list_ignored, | ||||
| list_unknown, | list_unknown, | ||||
| }, | }, | ||||
| ) | ) | ||||
| IncludeMatcher::new(ignore_patterns, &root_dir) | IncludeMatcher::new(ignore_patterns, &root_dir) | ||||
| .map_err(|e| handle_fallback(py, e.into()))?; | .map_err(|e| handle_fallback(py, e.into()))?; | ||||
| all_warnings.extend(warnings); | all_warnings.extend(warnings); | ||||
| let ((lookup, status_res), warnings) = status( | let ((lookup, status_res), warnings) = status( | ||||
| &dmap, | &dmap, | ||||
| &matcher, | &matcher, | ||||
| &root_dir, | &root_dir, | ||||
| &ignore_files, | ignore_files, | ||||
| StatusOptions { | StatusOptions { | ||||
| check_exec, | check_exec, | ||||
| last_normal_time, | last_normal_time, | ||||
| list_clean, | list_clean, | ||||
| list_ignored, | list_ignored, | ||||
| list_unknown, | list_unknown, | ||||
| }, | }, | ||||
| ) | ) | ||||