diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs --- a/rust/hg-core/src/filepatterns.rs +++ b/rust/hg-core/src/filepatterns.rs @@ -176,9 +176,14 @@ return vec![]; } match syntax { - PatternSyntax::Regexp => pattern.to_owned(), + // The `regex` crate adds `.*` to the start and end of expressions + // if there are no anchors, so add them. + PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(), PatternSyntax::RelRegexp => { - if pattern[0] == b'^' { + // The `regex` crate accepts `**` while `re2` and Python's `re` + // do not. Checking for `*` correctly triggers the same error all + // engines. + if pattern[0] == b'^' || pattern[0] == b'*' { return pattern.to_owned(); } [&b".*"[..], pattern].concat() @@ -191,14 +196,15 @@ } PatternSyntax::RootFiles => { let mut res = if pattern == b"." { - vec![] + vec![b'^'] } else { // Pattern is a directory name. - [escape_pattern(pattern).as_slice(), b"/"].concat() + [b"^", escape_pattern(pattern).as_slice(), b"/"].concat() }; // Anything after the pattern must be a non-directory. res.extend(b"[^/]+$"); + res.push(b'$'); res } PatternSyntax::RelGlob => { @@ -206,11 +212,11 @@ if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { [b".*", rest, GLOB_SUFFIX].concat() } else { - [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat() + [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() } } PatternSyntax::Glob | PatternSyntax::RootGlob => { - [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() + [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() } PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), } @@ -282,7 +288,10 @@ if *syntax == PatternSyntax::RootGlob && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) { - let mut escaped = escape_pattern(&pattern); + // The `regex` crate adds `.*` to the start and end of expressions + // if there are no anchors, so add the start anchor. + let mut escaped = vec![b'^']; + escaped.extend(escape_pattern(&pattern)); escaped.extend(GLOB_SUFFIX); Ok(escaped) } else { @@ -619,7 +628,7 @@ Path::new("") )) .unwrap(), - br"(?:|.*/)rust/target(?:/|$)".to_vec(), + br"(?:.*/)?rust/target(?:/|$)".to_vec(), ); } @@ -632,7 +641,7 @@ Path::new("") )) .unwrap(), - br"\.(?:/|$)".to_vec(), + br"^\.(?:/|$)".to_vec(), ); assert_eq!( build_single_regex(&IgnorePattern::new( @@ -641,7 +650,7 @@ Path::new("") )) .unwrap(), - br"whatever(?:/|$)".to_vec(), + br"^whatever(?:/|$)".to_vec(), ); assert_eq!( build_single_regex(&IgnorePattern::new( @@ -650,7 +659,7 @@ Path::new("") )) .unwrap(), - br"[^/]*\.o(?:/|$)".to_vec(), + br"^[^/]*\.o(?:/|$)".to_vec(), ); } } diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs --- a/rust/hg-core/src/matchers.rs +++ b/rust/hg-core/src/matchers.rs @@ -331,8 +331,37 @@ } #[cfg(not(feature = "with-re2"))] -fn re_matcher(_: &[u8]) -> PatternResult bool + Sync>> { - Err(PatternError::Re2NotInstalled) +/// Returns a function that matches an `HgPath` against the given regex +/// pattern. +/// +/// This can fail when the pattern is invalid or not supported by the +/// underlying engine (the `regex` crate), for instance anything with +/// back-references. +fn re_matcher( + pattern: &[u8], +) -> PatternResult bool + Sync> { + use std::io::Write; + + let mut escaped_bytes = vec![]; + for byte in pattern { + if *byte > 127 { + write!(escaped_bytes, "\\x{:x}", *byte).unwrap(); + } else { + escaped_bytes.push(*byte); + } + } + + // Avoid the cost of UTF8 checking + // + // # Safety + // This is safe because we escaped all non-ASCII bytes. + let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; + let re = regex::bytes::RegexBuilder::new(&pattern_string) + .unicode(false) + .build() + .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; + + Ok(move |path: &HgPath| re.is_match(path.as_bytes())) } /// Returns the regex pattern and a function that matches an `HgPath` against