diff --git a/rust/hg-core/src/configparser/c_api.rs b/rust/hg-core/src/configparser/c_api.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/c_api.rs @@ -0,0 +1,151 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +//! This module exports some symbols to allow calling the config parser from C/C++ +use std::ffi::{CStr, OsStr}; +use std::os::raw::c_char; +use std::path::Path; +use std::ptr; +use std::slice; + +use bytes::Bytes; + +use crate::config::{ConfigSet, Options}; +use crate::error::Error; +use crate::hg::ConfigSetHgExt; +use crate::hg::OptionsHgExt; + +/// Create and return a new, empty ConfigSet +#[no_mangle] +pub extern "C" fn hgrc_configset_new() -> *mut ConfigSet { + Box::into_raw(Box::new(ConfigSet::new())) +} + +/// Free a ConfigSet instance created via hgrc_configset_new(). +/// Releases all associated resources. +#[no_mangle] +pub extern "C" fn hgrc_configset_free(cfg: *mut ConfigSet) { + debug_assert!(!cfg.is_null()); + let cfg = unsafe { Box::from_raw(cfg) }; + drop(cfg); +} + +fn errors_to_bytes(errors: Vec) -> *mut Bytes { + if errors.is_empty() { + // Success! + return ptr::null_mut(); + } + + // Failed; convert the errors into an error string + let mut error_text = String::new(); + for (idx, err) in errors.iter().enumerate() { + if idx > 0 { + error_text.push_str("\n"); + } + error_text.push_str(&err.to_string()); + } + + Box::into_raw(Box::new(error_text.into())) +} + +fn load_path(cfg: &mut ConfigSet, path: &Path) -> *mut Bytes { + let errors = cfg.load_path(path, &Options::new().process_hgplain()); + + errors_to_bytes(errors) +} + +/// Attempt to load and parse the config file at the specified path. +/// If successful, returns a nullptr. +/// Returns a Bytes object containing the error reason on failure; the +/// error object is UTF-8 encoded text, and errors can span multiple lines. +#[cfg(unix)] +#[no_mangle] +pub extern "C" fn hgrc_configset_load_path(cfg: *mut ConfigSet, path: *const c_char) -> *mut Bytes { + debug_assert!(!path.is_null()); + debug_assert!(!cfg.is_null()); + + use std::os::unix::ffi::OsStrExt; + + let path_cstr = unsafe { CStr::from_ptr(path) }; + let path_bytes = path_cstr.to_bytes(); + let path = Path::new(OsStr::from_bytes(&path_bytes)); + + let cfg = unsafe { &mut *cfg }; + + load_path(cfg, path) +} + +/// Load system config files +#[no_mangle] +pub extern "C" fn hgrc_configset_load_system(cfg: *mut ConfigSet) -> *mut Bytes { + debug_assert!(!cfg.is_null()); + let cfg = unsafe { &mut *cfg }; + + // Forces datapath to be the empty string as it doesn't + // appear to play a useful role in simply resolving config + // settings for Eden. + errors_to_bytes(cfg.load_system()) +} + +/// Load user config files +#[no_mangle] +pub extern "C" fn hgrc_configset_load_user(cfg: *mut ConfigSet) -> *mut Bytes { + debug_assert!(!cfg.is_null()); + let cfg = unsafe { &mut *cfg }; + + errors_to_bytes(cfg.load_user()) +} + +/// Returns a Bytes object holding the configuration value for the corresponding +/// section name and key. If there is no matching section/key pair, returns nullptr. +#[no_mangle] +pub extern "C" fn hgrc_configset_get( + cfg: *const ConfigSet, + section: *const u8, + section_len: usize, + name: *const u8, + name_len: usize, +) -> *mut Bytes { + debug_assert!(!section.is_null()); + debug_assert!(!name.is_null()); + debug_assert!(!cfg.is_null()); + + let section = unsafe { slice::from_raw_parts(section, section_len) }; + let name = unsafe { slice::from_raw_parts(name, name_len) }; + let cfg = unsafe { &*cfg }; + + match cfg.get(section, name) { + None => ptr::null_mut(), + Some(bytes) => Box::into_raw(Box::new(bytes)), + } +} + +#[repr(C)] +pub struct ByteData { + ptr: *const u8, + len: usize, +} + +/// Returns the data pointer and length for a Bytes object, suitable for constructing +/// a folly::ByteRange. +#[no_mangle] +pub extern "C" fn hgrc_bytes_data(bytes: *const Bytes) -> ByteData { + debug_assert!(!bytes.is_null()); + let bytes = unsafe { &*bytes }; + ByteData { + ptr: bytes.as_ptr(), + len: bytes.len(), + } +} + +/// Frees a Bytes object, releasing any associated resources +#[no_mangle] +pub extern "C" fn hgrc_bytes_free(bytes: *mut Bytes) { + debug_assert!(!bytes.is_null()); + let bytes = unsafe { Box::from_raw(bytes) }; + drop(bytes); +} diff --git a/rust/hg-core/src/configparser/config.rs b/rust/hg-core/src/configparser/config.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/config.rs @@ -0,0 +1,944 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use std::collections::HashSet; +use std::convert::AsRef; +use std::fs; +use std::io::Read; +use std::ops::Range; +use std::path::{Path, PathBuf}; +use std::str; +use std::sync::Arc; + +use bytes::Bytes; +use indexmap::IndexMap; +use pest::{self, Parser, Span}; +use util::path::expand_path; + +use crate::error::Error; +use crate::parser::{ConfigParser, Rule}; + +type Pair<'a> = pest::iterators::Pair<'a, Rule>; + +/// Collection of config sections loaded from various sources. +#[derive(Clone, Default, Debug)] +pub struct ConfigSet { + sections: IndexMap, +} + +/// Internal representation of a config section. +#[derive(Clone, Default, Debug)] +struct Section { + items: IndexMap>, +} + +/// A config value with associated metadata like where it comes from. +#[derive(Clone, Debug)] +pub struct ValueSource { + value: Option, + source: Bytes, // global, user, repo, "--config", or an extension name, etc. + location: Option, +} + +/// The on-disk file name and byte offsets that provide the config value. +/// Useful if applications want to edit config values in-place. +#[derive(Clone, Debug)] +struct ValueLocation { + path: Arc, + content: Bytes, + location: Range, +} + +/// Options that affects config setting functions like `load_path`, `parse`, +/// and `set`. +#[derive(Default)] +pub struct Options { + source: Bytes, + filters: Vec) -> Option<(Bytes, Bytes, Option)>>>, +} + +impl ConfigSet { + /// Return an empty `ConfigSet`. + pub fn new() -> Self { + Default::default() + } + + /// Load config files at given path. The path is a file. + /// + /// If `path` is a directory, it is ignored. + /// If `path` is a file, it will be loaded directly. + /// + /// A config file can use `%include` to load other paths (directories or files). They will + /// be loaded recursively. Includes take effect in place, instead of deferred. For example, + /// with the following two files: + /// + /// ```plain,ignore + /// # This is 1.rc + /// [section] + /// x = 1 + /// %include 2.rc + /// y = 2 + /// + /// # This is 2.rc + /// [section] + /// x = 3 + /// y = 4 + /// ``` + /// + /// After loading `1.rc`. `x` is set to 3 and `y` is set to 2. + /// + /// Loading a file that is already parsed or being parsed by this `load_path` call is ignored, + /// to avoid infinite loop. A separate `load_path` call would not ignore files loaded by + /// other `load_path` calls. + /// + /// Return a list of errors. An error pasing a file will stop that file from loading, without + /// affecting other files. + pub fn load_path>(&mut self, path: P, opts: &Options) -> Vec { + let mut visited = HashSet::new(); + let mut errors = Vec::new(); + self.load_file(path.as_ref(), opts, &mut visited, &mut errors); + errors + } + + /// Load content of an unnamed config file. The `ValueLocation`s of loaded config items will + /// have an empty `path`. + /// + /// Return a list of errors. + pub fn parse>(&mut self, content: B, opts: &Options) -> Vec { + let mut visited = HashSet::new(); + let mut errors = Vec::new(); + let buf = content.into(); + self.load_file_content(Path::new(""), buf, opts, &mut visited, &mut errors); + errors + } + + /// Get config sections. + pub fn sections(&self) -> Vec { + self.sections.keys().cloned().collect() + } + + /// Get config names in the given section. Sorted by insertion order. + pub fn keys>(&self, section: S) -> Vec { + self.sections + .get(§ion.into()) + .map(|section| section.items.keys().cloned().collect()) + .unwrap_or(Vec::new()) + } + + /// Get config value for a given config. + /// Return `None` if the config item does not exist or is unset. + pub fn get, N: Into>(&self, section: S, name: N) -> Option { + self.sections.get(§ion.into()).and_then(|section| { + section + .items + .get(&name.into()) + .and_then(|values| values.last().and_then(|value| value.value.clone())) + }) + } + + /// Get detailed sources of a given config, including overrides, and source information. + /// The last item in the returned vector is the latest value that is considered effective. + /// + /// Return an emtpy vector if the config does not exist. + pub fn get_sources, N: Into>( + &self, + section: S, + name: N, + ) -> Vec { + self.sections + .get(§ion.into()) + .and_then(|section| section.items.get(&name.into()).map(|values| values.clone())) + .unwrap_or(Vec::new()) + } + + /// Set a config item directly. `section`, `name` locates the config. `value` is the new value. + /// `source` is some annotation about who set it, ex. "reporc", "userrc", "--config", etc. + pub fn set, N: Into>( + &mut self, + section: T, + name: N, + value: Option<&[u8]>, + opts: &Options, + ) { + let section = section.into(); + let name = name.into(); + let value = value.map(|v| Bytes::from(v)); + self.set_internal(section, name, value, None, &opts) + } + + fn set_internal( + &mut self, + section: Bytes, + name: Bytes, + value: Option, + location: Option, + opts: &Options, + ) { + let filtered = opts + .filters + .iter() + .fold(Some((section, name, value)), move |acc, func| { + acc.and_then(|(section, name, value)| func(section, name, value)) + }); + if let Some((section, name, value)) = filtered { + self.sections + .entry(section) + .or_insert_with(|| Default::default()) + .items + .entry(name) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ValueSource { + value, + location, + source: opts.source.clone(), + }) + } + } + + fn load_file( + &mut self, + path: &Path, + opts: &Options, + visited: &mut HashSet, + errors: &mut Vec, + ) { + if let Ok(path) = path.canonicalize() { + let path = &path; + debug_assert!(path.is_absolute()); + + if !visited.insert(path.to_path_buf()) { + // skip - visited before + return; + } + + match fs::File::open(path) { + Ok(mut file) => { + let mut buf = Vec::with_capacity(256); + if let Err(error) = file.read_to_end(&mut buf) { + errors.push(Error::Io(path.to_path_buf(), error)); + return; + } + buf.push(b'\n'); + let buf = Bytes::from(buf); + + self.load_file_content(path, buf, opts, visited, errors); + } + Err(error) => errors.push(Error::Io(path.to_path_buf(), error)), + } + } else { + // On Windows, a UNC path `\\?\C:\foo\.\x` will fail to canonicalize + // because it contains `.`. That path can be constructed by using + // `PathBuf::join` to concatenate a UNC path `\\?\C:\foo` with + // a "normal" path `.\x`. + // Try to fix it automatically by stripping the UNC prefix and retry + // `canonicalize`. `C:\foo\.\x` would be canonicalized without errors. + #[cfg(windows)] + { + if let Some(path_str) = path.to_str() { + if path_str.starts_with(r"\\?\") { + let path = Path::new(&path_str[4..]); + self.load_file(&path, opts, visited, errors); + } + } + } + } + + // If `path.canonicalize` reports an error. It's usually the path cannot + // be resolved (ex. does not exist). It is considered normal and is not + // reported in `errors`. + } + + fn load_file_content( + &mut self, + path: &Path, + buf: Bytes, + opts: &Options, + visited: &mut HashSet, + errors: &mut Vec, + ) { + let mut section = Bytes::new(); + let shared_path = Arc::new(path.to_path_buf()); // use Arc to do shallow copy + let skip_include = path.parent().is_none(); // skip handling %include if path is empty + + // Utilities to avoid too much indentation. + let handle_value = |this: &mut ConfigSet, + pair: Pair, + section: Bytes, + name: Bytes, + location: ValueLocation| { + let pairs = pair.into_inner(); + let mut lines = Vec::with_capacity(1); + for pair in pairs { + if Rule::line == pair.as_rule() { + lines.push(extract(&buf, pair.as_span())); + } + } + + let value = match lines.len() { + 1 => lines[0].clone(), + _ => Bytes::from(lines.join(&b'\n')), + }; + + let (start, end) = strip_offsets(&value, 0, value.len()); + let value = value.slice(start, end); + + this.set_internal(section, name, value.into(), location.into(), opts) + }; + + let handle_config_item = |this: &mut ConfigSet, pair: Pair, section: Bytes| { + let pairs = pair.into_inner(); + let mut name = Bytes::new(); + for pair in pairs { + match pair.as_rule() { + Rule::config_name => name = extract(&buf, pair.as_span()), + Rule::value => { + let span = pair.as_span(); + let location = ValueLocation { + path: shared_path.clone(), + content: buf.clone(), + location: span.start()..span.end(), + }; + return handle_value(this, pair, section, name, location); + } + _ => (), + } + } + unreachable!(); + }; + + let handle_section = |pair: Pair, section: &mut Bytes| { + let pairs = pair.into_inner(); + for pair in pairs { + match pair.as_rule() { + Rule::section_name => { + *section = extract(&buf, pair.as_span()); + return; + } + _ => (), + } + } + unreachable!(); + }; + + let mut handle_include = |this: &mut ConfigSet, pair: Pair, errors: &mut Vec| { + let pairs = pair.into_inner(); + for pair in pairs { + match pair.as_rule() { + Rule::line => { + if !skip_include { + let include_path = pair.as_str(); + let full_include_path = + path.parent().unwrap().join(expand_path(include_path)); + this.load_file(&full_include_path, opts, visited, errors); + } + } + _ => (), + } + } + }; + + let handle_unset = |this: &mut ConfigSet, pair: Pair, section: &Bytes| { + let unset_span = pair.as_span(); + let pairs = pair.into_inner(); + for pair in pairs { + match pair.as_rule() { + Rule::config_name => { + let name = extract(&buf, pair.as_span()); + let location = ValueLocation { + path: shared_path.clone(), + content: buf.clone(), + location: unset_span.start()..unset_span.end(), + }; + return this.set_internal( + section.clone(), + name, + None, + location.into(), + opts, + ); + } + _ => (), + } + } + unreachable!(); + }; + + let mut handle_directive = + |this: &mut ConfigSet, pair: Pair, section: &Bytes, errors: &mut Vec| { + let pairs = pair.into_inner(); + for pair in pairs { + match pair.as_rule() { + Rule::include => handle_include(this, pair, errors), + Rule::unset => handle_unset(this, pair, section), + _ => (), + } + } + }; + + let text = match str::from_utf8(&buf) { + Ok(text) => text, + Err(error) => return errors.push(Error::Utf8(path.to_path_buf(), error)), + }; + + let pairs = match ConfigParser::parse(Rule::file, &text) { + Ok(pairs) => pairs, + Err(error) => { + return errors.push(Error::Parse(path.to_path_buf(), format!("{}", error))); + } + }; + + for pair in pairs { + match pair.as_rule() { + Rule::config_item => handle_config_item(self, pair, section.clone()), + Rule::section => handle_section(pair, &mut section), + Rule::directive => handle_directive(self, pair, §ion, errors), + Rule::blank_line | Rule::comment_line | Rule::new_line | Rule::EOI => (), + + Rule::comment_start + | Rule::compound + | Rule::config_name + | Rule::equal_sign + | Rule::file + | Rule::include + | Rule::left_bracket + | Rule::line + | Rule::right_bracket + | Rule::section_name + | Rule::space + | Rule::unset + | Rule::value => unreachable!(), + } + } + } +} + +impl ValueSource { + /// Return the actual value stored in this config value, or `None` if uset. + pub fn value(&self) -> &Option { + &self.value + } + + /// Return the "source" information for the config value. It's usually who sets the config, + /// like "--config", "user_hgrc", "system_hgrc", etc. + pub fn source(&self) -> &Bytes { + &self.source + } + + /// Return the file path and byte range for the exact config value, + /// or `None` if there is no such information. + /// + /// If the value is `None`, the byte range is for the "%unset" statement. + pub fn location(&self) -> Option<(PathBuf, Range)> { + match self.location { + Some(ref src) => Some((src.path.as_ref().to_path_buf(), src.location.clone())), + None => None, + } + } + + /// Return the file content. Or `None` if there is no such information. + pub fn file_content(&self) -> Option { + match self.location { + Some(ref src) => Some(src.content.clone()), + None => None, + } + } +} + +impl Options { + /// Create a default `Options`. + pub fn new() -> Self { + Self::default() + } + + /// Append a filter. A filter can decide to ignore a config item, or change its section, + /// config name, or even value. The filter function takes a tuple of `(section, name, value)` + /// and outputs `None` to prevent inserting that value, or `Some((section, name, value))` to + /// insert it with optionally different name or values. + /// + /// Filters inserted first will be executed first. + pub fn append_filter( + mut self, + filter: Box) -> Option<(Bytes, Bytes, Option)>>, + ) -> Self { + self.filters.push(filter); + self + } + + /// Set `source` information. It is about who initialized the config loading. For example, + /// "user_hgrc" indicates it is from the user config file, "--config" indicates it is from the + /// global "--config" command line flag, "env" indicates it is translated from an environment + /// variable (ex. "PAGER"), etc. + pub fn source>(mut self, source: B) -> Self { + self.source = source.into(); + self + } +} + +/// Convert a "source" string to an `Options`. +impl> From for Options { + fn from(source: S) -> Options { + Options::new().source(source.into()) + } +} + +/// Remove space characters from both ends. Remove newline characters from the end. +/// `start` position is inclusive, `end` is exclusive. +/// Return the stripped `start` and `end` offsets. +#[inline] +fn strip_offsets(buf: &Bytes, start: usize, end: usize) -> (usize, usize) { + let mut start = start; + let mut end = end; + while start < end && b" \t".contains(&buf[start]) { + start += 1 + } + while start < end && b" \t\r\n".contains(&buf[end - 1]) { + end -= 1 + } + (start, end) +} + +#[inline] +fn extract<'a>(buf: &Bytes, span: Span<'a>) -> Bytes { + let (start, end) = strip_offsets(buf, span.start(), span.end()); + buf.slice(start, end) +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use std::io::Write; + use tempdir::TempDir; + + #[test] + fn test_empty() { + let cfg = ConfigSet::new(); + assert!(cfg.sections().is_empty()); + assert!(cfg.keys("foo").is_empty()); + assert!(cfg.get("foo", "bar").is_none()); + assert!(cfg.get_sources("foo", "bar").is_empty()); + } + + #[test] + fn test_set() { + let mut cfg = ConfigSet::new(); + cfg.set("y", "b", Some(b"1"), &"set1".into()); + cfg.set("y", "b", Some(b"2"), &"set2".into()); + cfg.set("y", "a", Some(b"3"), &"set3".into()); + cfg.set("z", "p", Some(b"4"), &"set4".into()); + cfg.set("z", "p", None, &"set5".into()); + assert_eq!(cfg.sections(), vec![Bytes::from("y"), Bytes::from("z")]); + assert_eq!(cfg.keys("y"), vec![Bytes::from("b"), Bytes::from("a")]); + assert_eq!(cfg.get("y", "b"), Some(Bytes::from("2"))); + assert_eq!(cfg.get("y", "a"), Some(Bytes::from("3"))); + assert_eq!(cfg.get("z", "p"), None); + + let sources = cfg.get_sources("z", "p"); + assert_eq!(sources.len(), 2); + assert_eq!(sources[0].value(), &Some(Bytes::from("4"))); + assert_eq!(sources[1].value(), &None); + assert_eq!(sources[0].source(), "set4"); + assert_eq!(sources[1].source(), "set5"); + assert_eq!(sources[0].location(), None); + assert_eq!(sources[1].location(), None); + assert_eq!(sources[1].file_content(), None); + } + + #[test] + fn test_clone() { + let mut cfg = ConfigSet::new(); + assert!(cfg.clone().sections().is_empty()); + cfg.set("x", "a", Some(b"1"), &"set1".into()); + assert_eq!(cfg.clone().sections(), vec![Bytes::from("x")]); + assert_eq!(cfg.clone().get("x", "a"), Some("1".into())); + } + + #[test] + fn test_parse_basic() { + let mut cfg = ConfigSet::new(); + cfg.parse( + "[y]\n\ + a = 0\n\ + b=1\n\ + # override a to 2\n\ + a = 2 \n\ + \n\ + [x]\n\ + m = this\n \ + value has\r\n \ + multi lines\n\ + ; comment again\n\ + n =\n", + &"test_parse_basic".into(), + ); + + assert_eq!(cfg.sections(), vec![Bytes::from("y"), Bytes::from("x")]); + assert_eq!(cfg.keys("y"), vec![Bytes::from("a"), Bytes::from("b")]); + assert_eq!(cfg.keys("x"), vec![Bytes::from("m"), Bytes::from("n")]); + + assert_eq!(cfg.get("y", "a"), Some(Bytes::from("2"))); + assert_eq!(cfg.get("y", "b"), Some(Bytes::from("1"))); + assert_eq!(cfg.get("x", "n"), Some(Bytes::new())); + assert_eq!( + cfg.get("x", "m"), + Some(Bytes::from(&b"this\nvalue has\nmulti lines"[..])) + ); + + let sources = cfg.get_sources("y", "a"); + assert_eq!(sources.len(), 2); + assert_eq!(sources[0].value(), &Some(Bytes::from("0"))); + assert_eq!(sources[1].value(), &Some(Bytes::from("2"))); + assert_eq!(sources[0].source(), "test_parse_basic"); + assert_eq!(sources[1].source(), "test_parse_basic"); + assert_eq!(sources[0].location().unwrap(), (PathBuf::new(), 8..9)); + assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 38..40)); + assert_eq!(sources[1].file_content().unwrap().len(), 100); + } + + #[test] + fn test_parse_spaces() { + let mut cfg = ConfigSet::new(); + + cfg.parse( + "# space after section name\n\ + [a] \n\ + # empty lines\n \n\t\n\n\ + x=1\n\ + # space in config name\n\ + y y \t =2\n\ + # space in multi-line config value, with trailing spaces\n\ + z=\t \n 3 3 \n \n 4 \n\t5 \n \n\ + # empty values\n\ + e1 =\n\ + e2 = \n\ + e3 =\n \n\ + \n\ + # space in section name\n\ + [ b c\t]\n\ + # space in unset\n\ + y y =\n\ + %unset y y \n\ + # no space at EOF\n\ + x=4", + &"".into(), + ); + + assert_eq!(cfg.get("a", "x"), Some("1".into())); + assert_eq!(cfg.get("a", "y y"), Some("2".into())); + assert_eq!(cfg.get("a", "z"), Some("\n3 3\n\n4\n5".into())); + assert_eq!(cfg.get("a", "e1"), Some("".into())); + assert_eq!(cfg.get("a", "e2"), Some("".into())); + assert_eq!(cfg.get("a", "e3"), Some("".into())); + assert_eq!(cfg.get("b c", "y y"), None); + assert_eq!(cfg.get("b c", "x"), Some("4".into())); + } + + #[test] + fn test_corner_cases() { + let mut cfg = ConfigSet::new(); + let errors = cfg.parse( + "# section looks like a config assignment\n\ + [a=b]\n\ + # comments look like config assignments\n\ + # a = b\n\ + ; a = b\n\ + # multiple equal signs in a config assignment\n\ + c = d = e\n\ + #", + &"".into(), + ); + + assert_eq!(format!("{:?}", errors), "[]"); + assert_eq!(cfg.get("a=b", "c"), Some("d = e".into())); + assert_eq!(cfg.get("a=b", "a"), None); + assert_eq!(cfg.get("a=b", "# a"), None); + assert_eq!(cfg.get("a=b", "; a"), None); + } + + #[test] + fn test_parse_errors() { + let mut cfg = ConfigSet::new(); + let errors = cfg.parse("=foo", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:1 + | +1 | =foo + | ^--- + | + = expected EOI, new_line, config_name, left_bracket, comment_line, or directive" + ); + + let errors = cfg.parse(" a=b", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:2 + | +1 | a=b + | ^--- + | + = expected EOI or new_line" + ); + + let errors = cfg.parse("%unset =foo", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:8 + | +1 | %unset =foo + | ^--- + | + = expected space or config_name" + ); + + let errors = cfg.parse("[", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:2 + | +1 | [ + | ^--- + | + = expected section_name" + ); + + let errors = cfg.parse("[]", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:2 + | +1 | [] + | ^--- + | + = expected section_name" + ); + + let errors = cfg.parse("[a]]", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 1:4 + | +1 | [a]] + | ^--- + | + = expected EOI, new_line, or space" + ); + + let errors = cfg.parse("# foo\n[y", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 2:3 + | +2 | [y + | ^--- + | + = expected right_bracket" + ); + + let mut cfg = ConfigSet::new(); + let errors = cfg.parse("\n\n%unknown", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 3:2 + | +3 | %unknown + | ^--- + | + = expected include or unset" + ); + + let mut cfg = ConfigSet::new(); + let errors = cfg.parse("[section]\nabc", &"test_parse_errors".into()); + assert_eq!( + format!("{}", errors[0]), + "\"\": + --> 2:4 + | +2 | abc + | ^--- + | + = expected equal_sign" + ); + } + + #[test] + fn test_parse_unset() { + let mut cfg = ConfigSet::new(); + cfg.parse( + "[x]\n\ + a = 1\n\ + %unset b\n\ + b = 2\n\ + %unset a \n\ + c = 3\n\ + d = 4\n\ + [y]\n\ + %unset c\n\ + [x]\n\ + %unset d ", + &"test_parse_unset".into(), + ); + + assert_eq!(cfg.get("x", "a"), None); + assert_eq!(cfg.get("x", "b"), Some(Bytes::from("2"))); + assert_eq!(cfg.get("x", "c"), Some(Bytes::from("3"))); + assert_eq!(cfg.get("x", "d"), None); + + let sources = cfg.get_sources("x", "a"); + assert_eq!(sources.len(), 2); + assert_eq!(sources[0].location().unwrap(), (PathBuf::new(), 8..9)); + assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 26..35)); + } + + #[test] + fn test_filters() { + fn blacklist_section_x( + section: Bytes, + name: Bytes, + value: Option, + ) -> Option<(Bytes, Bytes, Option)> { + if section.as_ref() == b"x" { + None + } else { + Some((section, name, value)) + } + } + + fn swap_name_value( + section: Bytes, + name: Bytes, + value: Option, + ) -> Option<(Bytes, Bytes, Option)> { + Some((section, value.unwrap(), name.into())) + } + + fn rename_section_to_z( + _section: Bytes, + name: Bytes, + value: Option, + ) -> Option<(Bytes, Bytes, Option)> { + Some(("z".into(), name, value)) + } + + let mut cfg = ConfigSet::new(); + let opts = Options::new() + .append_filter(Box::new(blacklist_section_x)) + .append_filter(Box::new(swap_name_value)) + .append_filter(Box::new(rename_section_to_z)); + cfg.parse( + "[x]\n\ + a=1\n\ + [y]\n\ + b=c", + &opts, + ); + assert_eq!(cfg.get("x", "a"), None); + assert_eq!(cfg.get("y", "b"), None); + assert_eq!(cfg.get("z", "c"), Some(Bytes::from("b"))); + } + + pub(crate) fn write_file(path: PathBuf, content: &str) { + fs::create_dir_all(path.parent().unwrap()).unwrap(); + let mut f = fs::File::create(path).unwrap(); + f.write_all(content.as_bytes()).unwrap(); + } + + #[test] + fn test_parse_include() { + let dir = TempDir::new("test_parse_include").unwrap(); + write_file( + dir.path().join("rootrc"), + "[x]\n\ + b=1\n\ + a=1\n\ + %include dir/abc.rc\n\ + %include dir/y.rc\n\ + %include dir/loop.rc\n\ + %include b.rc\n\ + [y]\n\ + b=1\n\ + [x]\n\ + %unset f", + ); + + write_file(dir.path().join("dir/abc.rc"), "[x]\na=2\nb=2"); + write_file(dir.path().join("dir/y.rc"), "[y]\ny=1\n%include ../e.rc"); + write_file(dir.path().join("dir/loop.rc"), "%include ../rootrc"); + + // Won't be loaded before it's not inside dir/ directly. + write_file(dir.path().join("dir/unused/unused.rc"), "[unused]\na=1"); + + // Won't be loaded before it does not have ".rc" extension. + write_file(dir.path().join("dir/unusedrc"), "[unused]\na=1"); + + // Will be loaded. `%include` shouldn't cause cycles. + write_file( + dir.path().join("b.rc"), + "[x]\nb=4\n\ + %include dir/abc.rc\n\ + %include dir/y.rc\n\ + %include dir/loop.rc", + ); + + // Will be loaded. Shouldn't cause cycles. + write_file(dir.path().join("e.rc"), "[x]\ne=e\n%include f.rc"); + write_file( + dir.path().join("f.rc"), + "[x]\nf=f\n%include e.rc\n%include rootrc", + ); + + let mut cfg = ConfigSet::new(); + let errors = cfg.load_path(dir.path().join("rootrc"), &"test_parse_include".into()); + assert!(errors.is_empty()); + + assert_eq!(cfg.sections(), vec![Bytes::from("x"), Bytes::from("y")]); + assert_eq!( + cfg.keys("x"), + vec![ + Bytes::from("b"), + Bytes::from("a"), + Bytes::from("e"), + Bytes::from("f"), + ] + ); + assert_eq!(cfg.get("x", "a"), Some(Bytes::from("2"))); + assert_eq!(cfg.get("x", "b"), Some(Bytes::from("4"))); + assert_eq!(cfg.get("x", "e"), Some(Bytes::from("e"))); + assert_eq!(cfg.get("x", "f"), None); + assert_eq!(cfg.get("y", "b"), Some(Bytes::from("1"))); + } + + #[test] + fn test_parse_include_expand() { + use std::env; + env::set_var("FOO", "f"); + + let dir = TempDir::new("test_parse_include_expand").unwrap(); + write_file( + dir.path().join("rootrc"), + "%include ./${FOO}1/$FOO/3.rc\n\ + %include ./%FOO%2/%FOO%/4.rc\n", + ); + + write_file(dir.path().join("f1/f/3.rc"), "[x]\na=1\n"); + write_file(dir.path().join("f2/f/4.rc"), "[y]\nb=2\n"); + + let mut cfg = ConfigSet::new(); + let errors = cfg.load_path(dir.path().join("rootrc"), &"include_expand".into()); + assert!(errors.is_empty()); + + assert_eq!(cfg.get("x", "a"), Some(Bytes::from("1"))); + assert_eq!(cfg.get("y", "b"), Some(Bytes::from("2"))); + } +} diff --git a/rust/hg-core/src/configparser/error.rs b/rust/hg-core/src/configparser/error.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/error.rs @@ -0,0 +1,32 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use std::io; +use std::path::PathBuf; +use std::str; + +use thiserror::Error; + +/// The error type for parsing config files. +#[derive(Error, Debug)] +pub enum Error { + /// Unable to convert to a type. + #[error("{0}")] + Convert(String), + + /// Unable to parse a file due to syntax. + #[error("{0:?}:\n{1}")] + Parse(PathBuf, String), + + /// Unable to read a file due to IO errors. + #[error("{0:?}: {1}")] + Io(PathBuf, #[source] io::Error), + + /// Config file contains invalid UTF-8. + #[error("{0:?}: {1}")] + Utf8(PathBuf, #[source] str::Utf8Error), +} diff --git a/rust/hg-core/src/configparser/hg.rs b/rust/hg-core/src/configparser/hg.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/hg.rs @@ -0,0 +1,1054 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +//! Mercurial-specific config postprocessing + +use std::cmp::Eq; +use std::collections::{HashMap, HashSet}; +use std::env; +use std::hash::Hash; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use bytes::Bytes; +use util::path::expand_path; + +use crate::config::{ConfigSet, Options}; +use crate::error::Error; + +pub const HGPLAIN: &str = "HGPLAIN"; +pub const HGPLAINEXCEPT: &str = "HGPLAINEXCEPT"; +pub const HGRCPATH: &str = "HGRCPATH"; + +pub trait OptionsHgExt { + /// Drop configs according to `$HGPLAIN` and `$HGPLAINEXCEPT`. + fn process_hgplain(self) -> Self; + + /// Set read-only config items. `items` contains a list of tuple `(section, name)`. + /// Setting those items to new value will be ignored. + fn readonly_items, N: Into>(self, items: Vec<(S, N)>) -> Self; + + /// Set section remap. If a section name matches an entry key, it will be treated as if the + /// name is the entry value. The remap wouldn't happen recursively. For example, with a + /// `{"A": "B", "B": "C"}` map, section name "A" will be treated as "B", not "C". + /// This is implemented via `append_filter`. + fn remap_sections, V: Into>( + self, + remap: HashMap, + ) -> Self; + + /// Set section whitelist. Sections outside the whitelist won't be loaded. + /// This is implemented via `append_filter`. + fn whitelist_sections>(self, sections: Vec) -> Self; +} + +pub trait ConfigSetHgExt { + /// Load system config files if `$HGRCPATH` is not set. + /// Return errors parsing files. + fn load_system(&mut self) -> Vec; + + /// Load user config files (and environment variables). If `$HGRCPATH` is + /// set, load files listed in that environment variable instead. + /// Return errors parsing files. + fn load_user(&mut self) -> Vec; + + /// Load a specified config file. Respect HGPLAIN environment variables. + /// Return errors parsing files. + fn load_hgrc(&mut self, path: impl AsRef, source: &'static str) -> Vec; + + /// Get a config item. Convert to type `T`. + fn get_opt(&self, section: &str, name: &str) -> Result>; + + /// Get a config item. Convert to type `T`. + /// + /// If the config item is not set, calculate it using `default_func`. + fn get_or( + &self, + section: &str, + name: &str, + default_func: impl Fn() -> T, + ) -> Result { + Ok(self.get_opt(section, name)?.unwrap_or_else(default_func)) + } + + /// Get a config item. Convert to type `T`. + /// + /// If the config item is not set, return `T::default()`. + fn get_or_default(&self, section: &str, name: &str) -> Result { + self.get_or(section, name, Default::default) + } +} + +pub trait FromConfigValue: Sized { + fn try_from_bytes(bytes: &[u8]) -> Result; +} + +/// Load system, user config files. +pub fn load() -> Result { + let mut set = ConfigSet::new(); + if let Some(error) = set.load_system().pop() { + return Err(error.into()); + } + if let Some(error) = set.load_user().pop() { + return Err(error.into()); + } + Ok(set) +} + +impl OptionsHgExt for Options { + fn process_hgplain(self) -> Self { + let plain_set = env::var(HGPLAIN).is_ok(); + let plain_except = env::var(HGPLAINEXCEPT); + if plain_set || plain_except.is_ok() { + let (section_blacklist, ui_blacklist) = { + let plain_exceptions: HashSet = plain_except + .unwrap_or_else(|_| "".to_string()) + .split(',') + .map(|s| s.to_string()) + .collect(); + + // [defaults] and [commands] are always blacklisted. + let mut section_blacklist: HashSet = + ["defaults", "commands"].iter().map(|&s| s.into()).collect(); + + // [alias], [revsetalias], [templatealias] are blacklisted if they are outside + // HGPLAINEXCEPT. + for &name in ["alias", "revsetalias", "templatealias"].iter() { + if !plain_exceptions.contains(name) { + section_blacklist.insert(Bytes::from(name)); + } + } + + // These configs under [ui] are always blacklisted. + let mut ui_blacklist: HashSet = [ + "debug", + "fallbackencoding", + "quiet", + "slash", + "logtemplate", + "statuscopies", + "style", + "traceback", + "verbose", + ] + .iter() + .map(|&s| s.into()) + .collect(); + // exitcodemask is blacklisted if exitcode is outside HGPLAINEXCEPT. + if !plain_exceptions.contains("exitcode") { + ui_blacklist.insert("exitcodemask".into()); + } + + (section_blacklist, ui_blacklist) + }; + + let filter = move |section: Bytes, name: Bytes, value: Option| { + if section_blacklist.contains(§ion) + || (section.as_ref() == b"ui" && ui_blacklist.contains(&name)) + { + None + } else { + Some((section, name, value)) + } + }; + + self.append_filter(Box::new(filter)) + } else { + self + } + } + + /// Set section whitelist. Sections outside the whitelist won't be loaded. + /// This is implemented via `append_filter`. + fn whitelist_sections>(self, sections: Vec) -> Self { + let whitelist: HashSet = sections + .iter() + .cloned() + .map(|section| section.into()) + .collect(); + + let filter = move |section: Bytes, name: Bytes, value: Option| { + if whitelist.contains(§ion) { + Some((section, name, value)) + } else { + None + } + }; + + self.append_filter(Box::new(filter)) + } + + /// Set section remap. If a section name matches an entry key, it will be treated as if the + /// name is the entry value. The remap wouldn't happen recursively. For example, with a + /// `{"A": "B", "B": "C"}` map, section name "A" will be treated as "B", not "C". + /// This is implemented via `append_filter`. + fn remap_sections(self, remap: HashMap) -> Self + where + K: Eq + Hash + Into, + V: Into, + { + let remap: HashMap = remap + .into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(); + + let filter = move |section: Bytes, name: Bytes, value: Option| { + let section = remap.get(§ion).cloned().unwrap_or(section); + Some((section, name, value)) + }; + + self.append_filter(Box::new(filter)) + } + + fn readonly_items, N: Into>(self, items: Vec<(S, N)>) -> Self { + let readonly_items: HashSet<(Bytes, Bytes)> = items + .into_iter() + .map(|(section, name)| (section.into(), name.into())) + .collect(); + + let filter = move |section: Bytes, name: Bytes, value: Option| { + if readonly_items.contains(&(section.clone(), name.clone())) { + None + } else { + Some((section, name, value)) + } + }; + + self.append_filter(Box::new(filter)) + } +} + +impl ConfigSetHgExt for ConfigSet { + fn load_system(&mut self) -> Vec { + let opts = Options::new().source("system").process_hgplain(); + let mut errors = Vec::new(); + + if env::var(HGRCPATH).is_err() { + #[cfg(unix)] + { + errors.append(&mut self.load_path("/etc/mercurial/system.rc", &opts)); + // TODO(T40519286): Remove this after the tupperware overrides move out of hgrc.d + errors.append( + &mut self.load_path("/etc/mercurial/hgrc.d/tupperware_overrides.rc", &opts), + ); + // TODO(quark): Remove this after packages using system.rc are rolled out + errors.append(&mut self.load_path("/etc/mercurial/hgrc.d/include.rc", &opts)); + } + + #[cfg(windows)] + { + if let Ok(program_data_path) = env::var("PROGRAMDATA") { + use std::path::Path; + let hgrc_dir = Path::new(&program_data_path).join("Facebook\\Mercurial"); + errors.append(&mut self.load_path(hgrc_dir.join("system.rc"), &opts)); + // TODO(quark): Remove this after packages using system.rc are rolled out + errors.append(&mut self.load_path(hgrc_dir.join("hgrc"), &opts)); + } + } + } + + errors + } + + fn load_user(&mut self) -> Vec { + let mut errors = Vec::new(); + + // Covert "$VISUAL", "$EDITOR" to "ui.editor". + // + // Unlike Mercurial, don't convert the "$PAGER" environment variable + // to "pager.pager" config. + // + // The environment variable could be from the system profile (ex. + // /etc/profile.d/...), or the user shell rc (ex. ~/.bashrc). There is + // no clean way to tell which one it is from. The value might be + // tweaked for sysadmin usecases (ex. -n), which are different from + // SCM's usecases. + for name in ["VISUAL", "EDITOR"].iter() { + if let Ok(editor) = env::var(name) { + self.set( + "ui", + "editor", + Some(editor.as_bytes()), + &Options::new().source(format!("${}", name)), + ); + break; + } + } + + // Convert $HGPROF to profiling.type + if let Ok(profiling_type) = env::var("HGPROF") { + self.set( + "profiling", + "type", + Some(profiling_type.as_bytes()), + &"$HGPROF".into(), + ); + } + + let opts = Options::new().source("user").process_hgplain(); + + // If $HGRCPATH is set, use it instead. + if let Ok(rcpath) = env::var("HGRCPATH") { + #[cfg(unix)] + let paths = rcpath.split(':'); + #[cfg(windows)] + let paths = rcpath.split(';'); + for path in paths { + errors.append(&mut self.load_path(expand_path(path), &opts)); + } + } else { + if let Some(home_dir) = dirs::home_dir() { + errors.append(&mut self.load_path(home_dir.join(".hgrc"), &opts)); + + #[cfg(windows)] + { + errors.append(&mut self.load_path(home_dir.join("mercurial.ini"), &opts)); + } + } + if let Some(config_dir) = dirs::config_dir() { + errors.append(&mut self.load_path(config_dir.join("hg/hgrc"), &opts)); + } + } + + errors + } + + fn load_hgrc(&mut self, path: impl AsRef, source: &'static str) -> Vec { + let opts = Options::new().source(source).process_hgplain(); + self.load_path(path, &opts) + } + + fn get_opt(&self, section: &str, name: &str) -> Result> { + ConfigSet::get(self, section, name) + .map(|bytes| T::try_from_bytes(&bytes)) + .transpose() + } +} + +impl FromConfigValue for bool { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.to_lowercase(); + match value.as_ref() { + "1" | "yes" | "true" | "on" | "always" => Ok(true), + "0" | "no" | "false" | "off" | "never" => Ok(false), + _ => Err(Error::Convert(format!("invalid bool: {}", value)).into()), + } + } +} + +impl FromConfigValue for i8 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for i16 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for i32 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for i64 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for isize { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for u8 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for u16 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for u32 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for u64 { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for usize { + fn try_from_bytes(bytes: &[u8]) -> Result { + let value = std::str::from_utf8(bytes)?.parse()?; + Ok(value) + } +} + +impl FromConfigValue for String { + fn try_from_bytes(bytes: &[u8]) -> Result { + String::from_utf8(bytes.to_vec()) + .map_err(|_| Error::Convert(format!("{:?} is not utf8 encoded", bytes)).into()) + } +} + +/// Byte count specified with a unit. For example: `1.5 MB`. +#[derive(Copy, Clone, Default)] +pub struct ByteCount(u64); + +impl ByteCount { + /// Get the value of bytes. For example, `1K` has a value of `1024`. + pub fn value(self) -> u64 { + self.0 + } +} + +impl From for ByteCount { + fn from(value: u64) -> ByteCount { + ByteCount(value) + } +} + +impl FromConfigValue for ByteCount { + fn try_from_bytes(bytes: &[u8]) -> Result { + // This implementation matches mercurial/util.py:sizetoint + let sizeunits = [ + ("kb", 1u64 << 10), + ("mb", 1 << 20), + ("gb", 1 << 30), + ("tb", 1 << 40), + ("k", 1 << 10), + ("m", 1 << 20), + ("g", 1 << 30), + ("t", 1 << 40), + ("b", 1), + ("", 1), + ]; + + let value = std::str::from_utf8(bytes)?.to_lowercase(); + for (suffix, unit) in sizeunits.iter() { + if value.ends_with(suffix) { + let number_str: &str = value[..value.len() - suffix.len()].trim(); + let number: f64 = number_str.parse()?; + if number < 0.0 { + return Err(Error::Convert(format!( + "byte size '{:?}' cannot be negative", + value + )) + .into()); + } + let unit = *unit as f64; + return Ok(ByteCount((number * unit) as u64)); + } + } + + Err(Error::Convert(format!("'{:?}' cannot be parsed as a byte size", value)).into()) + } +} + +impl FromConfigValue for PathBuf { + fn try_from_bytes(bytes: &[u8]) -> Result { + let st = std::str::from_utf8(&bytes)?; + + Ok(expand_path(st)) + } +} + +impl FromConfigValue for Vec { + fn try_from_bytes(bytes: &[u8]) -> Result { + let items = parse_list(bytes); + items.into_iter().map(|s| T::try_from_bytes(&s)).collect() + } +} + +impl FromConfigValue for Option { + fn try_from_bytes(bytes: &[u8]) -> Result { + T::try_from_bytes(&bytes).map(Option::Some) + } +} + +/// Parse a configuration value as a list of comma/space separated strings. +/// It is ported from `mercurial.config.parselist`. +/// +/// The function never complains about syntax and always returns some result. +/// +/// Example: +/// +/// ``` +/// use configparser::hg::parse_list; +/// +/// assert_eq!( +/// parse_list(b"this,is \"a small\" ,test"), +/// vec![b"this".to_vec(), b"is".to_vec(), b"a small".to_vec(), b"test".to_vec()] +/// ); +/// ``` +pub fn parse_list>(value: B) -> Vec { + let mut value = value.as_ref(); + + // ```python + // if value is not None and isinstance(value, bytes): + // result = _configlist(value.lstrip(' ,\n')) + // ``` + + while b" ,\n".iter().any(|b| value.starts_with(&[*b])) { + value = &value[1..] + } + + parse_list_internal(value) + .into_iter() + .map(Bytes::from) + .collect() +} + +fn parse_list_internal(value: &[u8]) -> Vec> { + let mut value = value; + + // ```python + // def _configlist(s): + // s = s.rstrip(' ,') + // if not s: + // return [] + // parser, parts, offset = _parse_plain, [''], 0 + // while parser: + // parser, parts, offset = parser(parts, s, offset) + // return parts + // ``` + + while b" ,\n".iter().any(|b| value.ends_with(&[*b])) { + value = &value[..value.len() - 1] + } + + if value.is_empty() { + return Vec::new(); + } + + #[derive(Copy, Clone)] + enum State { + Plain, + Quote, + }; + + let mut offset = 0; + let mut parts: Vec> = vec![Vec::new()]; + let mut state = State::Plain; + + loop { + match state { + // ```python + // def _parse_plain(parts, s, offset): + // whitespace = False + // while offset < len(s) and (s[offset:offset + 1].isspace() + // or s[offset:offset + 1] == ','): + // whitespace = True + // offset += 1 + // if offset >= len(s): + // return None, parts, offset + // if whitespace: + // parts.append('') + // if s[offset:offset + 1] == '"' and not parts[-1]: + // return _parse_quote, parts, offset + 1 + // elif s[offset:offset + 1] == '"' and parts[-1][-1:] == '\\': + // parts[-1] = parts[-1][:-1] + s[offset:offset + 1] + // return _parse_plain, parts, offset + 1 + // parts[-1] += s[offset:offset + 1] + // return _parse_plain, parts, offset + 1 + // ``` + State::Plain => { + let mut whitespace = false; + while offset < value.len() && b" \n\r\t,".contains(&value[offset]) { + whitespace = true; + offset += 1; + } + if offset >= value.len() { + break; + } + if whitespace { + parts.push(Vec::new()); + } + if value[offset] == b'"' { + let branch = { + match parts.last() { + None => 1, + Some(last) => { + if last.is_empty() { + 1 + } else if last.ends_with(b"\\") { + 2 + } else { + 3 + } + } + } + }; // manual NLL, to drop reference on "parts". + if branch == 1 { + // last.is_empty() + state = State::Quote; + offset += 1; + continue; + } else if branch == 2 { + // last.ends_with(b"\\") + let last = parts.last_mut().unwrap(); + last.pop(); + last.push(value[offset]); + offset += 1; + continue; + } + } + let last = parts.last_mut().unwrap(); + last.push(value[offset]); + offset += 1; + } + + // ```python + // def _parse_quote(parts, s, offset): + // if offset < len(s) and s[offset:offset + 1] == '"': # "" + // parts.append('') + // offset += 1 + // while offset < len(s) and (s[offset:offset + 1].isspace() or + // s[offset:offset + 1] == ','): + // offset += 1 + // return _parse_plain, parts, offset + // while offset < len(s) and s[offset:offset + 1] != '"': + // if (s[offset:offset + 1] == '\\' and offset + 1 < len(s) + // and s[offset + 1:offset + 2] == '"'): + // offset += 1 + // parts[-1] += '"' + // else: + // parts[-1] += s[offset:offset + 1] + // offset += 1 + // if offset >= len(s): + // real_parts = _configlist(parts[-1]) + // if not real_parts: + // parts[-1] = '"' + // else: + // real_parts[0] = '"' + real_parts[0] + // parts = parts[:-1] + // parts.extend(real_parts) + // return None, parts, offset + // offset += 1 + // while offset < len(s) and s[offset:offset + 1] in [' ', ',']: + // offset += 1 + // if offset < len(s): + // if offset + 1 == len(s) and s[offset:offset + 1] == '"': + // parts[-1] += '"' + // offset += 1 + // else: + // parts.append('') + // else: + // return None, parts, offset + // return _parse_plain, parts, offset + // ``` + State::Quote => { + if offset < value.len() && value[offset] == b'"' { + parts.push(Vec::new()); + offset += 1; + while offset < value.len() && b" \n\r\t,".contains(&value[offset]) { + offset += 1; + } + state = State::Plain; + continue; + } + while offset < value.len() && value[offset] != b'"' { + if value[offset] == b'\\' + && offset + 1 < value.len() + && value[offset + 1] == b'"' + { + offset += 1; + parts.last_mut().unwrap().push(b'"'); + } else { + parts.last_mut().unwrap().push(value[offset]); + } + offset += 1; + } + if offset >= value.len() { + let mut real_parts: Vec> = parse_list_internal(parts.last().unwrap()) + .iter() + .map(|b| b.to_vec()) + .collect(); + if real_parts.is_empty() { + parts.pop(); + parts.push(vec![b'"']); + } else { + real_parts[0].insert(0, b'"'); + parts.pop(); + parts.append(&mut real_parts); + } + break; + } + offset += 1; + while offset < value.len() && b" ,".contains(&value[offset]) { + offset += 1; + } + if offset < value.len() { + if offset + 1 == value.len() && value[offset] == b'"' { + parts.last_mut().unwrap().push(b'"'); + offset += 1; + } else { + parts.push(Vec::new()); + } + } else { + break; + } + state = State::Plain; + } + } + } + + parts +} + +#[cfg(test)] +mod tests { + use super::*; + + use tempdir::TempDir; + + use crate::config::tests::write_file; + + use lazy_static::lazy_static; + use parking_lot::Mutex; + + lazy_static! { + /// Lock for the environment. This should be acquired by tests that rely on particular + /// environment variable values that might be overwritten by other tests. + static ref ENV_LOCK: Mutex<()> = Mutex::new(()); + } + + #[test] + fn test_basic_hgplain() { + let _guard = ENV_LOCK.lock(); + env::set_var(HGPLAIN, "1"); + env::remove_var(HGPLAINEXCEPT); + + let opts = Options::new().process_hgplain(); + let mut cfg = ConfigSet::new(); + cfg.parse( + "[defaults]\n\ + commit = commit -d 0\n\ + [ui]\n\ + verbose = true\n\ + username = test\n\ + [alias]\n\ + l = log\n", + &opts, + ); + + assert!(cfg.keys("defaults").is_empty()); + assert_eq!(cfg.get("ui", "verbose"), None); + assert_eq!(cfg.get("ui", "username"), Some("test".into())); + assert_eq!(cfg.get("alias", "l"), None); + } + + #[test] + fn test_hgplainexcept() { + let _guard = ENV_LOCK.lock(); + env::remove_var(HGPLAIN); + env::set_var(HGPLAINEXCEPT, "alias,revsetalias"); + + let opts = Options::new().process_hgplain(); + let mut cfg = ConfigSet::new(); + cfg.parse( + "[defaults]\n\ + commit = commit -d 0\n\ + [alias]\n\ + l = log\n\ + [templatealias]\n\ + u = user\n\ + [revsetalias]\n\ + @ = master\n", + &opts, + ); + + assert!(cfg.keys("defaults").is_empty()); + assert_eq!(cfg.get("alias", "l"), Some("log".into())); + assert_eq!(cfg.get("revsetalias", "@"), Some("master".into())); + assert_eq!(cfg.get("templatealias", "u"), None); + } + + #[test] + fn test_hgrcpath() { + let dir = TempDir::new("test_hgrcpath").unwrap(); + + write_file(dir.path().join("1.rc"), "[x]\na=1"); + write_file(dir.path().join("2.rc"), "[y]\nb=2"); + + #[cfg(unix)] + let hgrcpath = "$T/1.rc:$T/2.rc"; + #[cfg(windows)] + let hgrcpath = "$T/1.rc;%T%/2.rc"; + + env::set_var("T", dir.path()); + env::set_var(HGRCPATH, hgrcpath); + + let mut cfg = ConfigSet::new(); + + cfg.load_system(); + assert!(cfg.sections().is_empty()); + + cfg.load_user(); + assert_eq!(cfg.get("x", "a"), Some("1".into())); + assert_eq!(cfg.get("y", "b"), Some("2".into())); + } + + #[test] + fn test_load_hgrc() { + let dir = TempDir::new("test_hgrcpath").unwrap(); + let path = dir.path().join("1.rc"); + + write_file(path.clone(), "[x]\na=1\n[alias]\nb=c\n"); + + let _guard = ENV_LOCK.lock(); + env::set_var(HGPLAIN, "1"); + env::remove_var(HGPLAINEXCEPT); + + let mut cfg = ConfigSet::new(); + cfg.load_hgrc(&path, "hgrc"); + + assert!(cfg.keys("alias").is_empty()); + assert!(cfg.get("alias", "b").is_none()); + assert_eq!(cfg.get("x", "a").unwrap(), "1"); + + env::remove_var(HGPLAIN); + cfg.load_hgrc(&path, "hgrc"); + + assert_eq!(cfg.get("alias", "b").unwrap(), "c"); + } + + #[test] + fn test_section_whitelist() { + let opts = Options::new().whitelist_sections(vec!["x", "y"]); + let mut cfg = ConfigSet::new(); + cfg.parse( + "[x]\n\ + a=1\n\ + [y]\n\ + b=2\n\ + [z]\n\ + c=3", + &opts, + ); + + assert_eq!(cfg.sections(), vec![Bytes::from("x"), Bytes::from("y")]); + assert_eq!(cfg.get("z", "c"), None); + } + + #[test] + fn test_section_remap() { + let mut remap = HashMap::new(); + remap.insert("x", "y"); + remap.insert("y", "z"); + + let opts = Options::new().remap_sections(remap); + let mut cfg = ConfigSet::new(); + cfg.parse( + "[x]\n\ + a=1\n\ + [y]\n\ + b=2\n\ + [z]\n\ + c=3", + &opts, + ); + + assert_eq!(cfg.get("y", "a"), Some("1".into())); + assert_eq!(cfg.get("z", "b"), Some("2".into())); + assert_eq!(cfg.get("z", "c"), Some("3".into())); + } + + #[test] + fn test_readonly_items() { + let opts = Options::new().readonly_items(vec![("x", "a"), ("y", "b")]); + let mut cfg = ConfigSet::new(); + cfg.parse( + "[x]\n\ + a=1\n\ + [y]\n\ + b=2\n\ + [z]\n\ + c=3", + &opts, + ); + + assert_eq!(cfg.get("x", "a"), None); + assert_eq!(cfg.get("y", "b"), None); + assert_eq!(cfg.get("z", "c"), Some("3".into())); + } + + #[test] + fn test_parse_list() { + fn b>(bytes: B) -> Bytes { + Bytes::from(bytes.as_ref()) + } + + // From test-ui-config.py + assert_eq!(parse_list(b"foo"), vec![b("foo")]); + assert_eq!( + parse_list(b"foo bar baz"), + vec![b("foo"), b("bar"), b("baz")] + ); + assert_eq!(parse_list(b"alice, bob"), vec![b("alice"), b("bob")]); + assert_eq!( + parse_list(b"foo bar baz alice, bob"), + vec![b("foo"), b("bar"), b("baz"), b("alice"), b("bob")] + ); + assert_eq!( + parse_list(b"abc d\"ef\"g \"hij def\""), + vec![b("abc"), b("d\"ef\"g"), b("hij def")] + ); + assert_eq!( + parse_list(b"\"hello world\", \"how are you?\""), + vec![b("hello world"), b("how are you?")] + ); + assert_eq!( + parse_list(b"Do\"Not\"Separate"), + vec![b("Do\"Not\"Separate")] + ); + assert_eq!(parse_list(b"\"Do\"Separate"), vec![b("Do"), b("Separate")]); + assert_eq!( + parse_list(b"\"Do\\\"NotSeparate\""), + vec![b("Do\"NotSeparate")] + ); + assert_eq!( + parse_list(&b"string \"with extraneous\" quotation mark\""[..]), + vec![ + b("string"), + b("with extraneous"), + b("quotation"), + b("mark\""), + ] + ); + assert_eq!(parse_list(b"x, y"), vec![b("x"), b("y")]); + assert_eq!(parse_list(b"\"x\", \"y\""), vec![b("x"), b("y")]); + assert_eq!( + parse_list(b"\"\"\" key = \"x\", \"y\" \"\"\""), + vec![b(""), b(" key = "), b("x\""), b("y"), b(""), b("\"")] + ); + assert_eq!(parse_list(b",,,, "), Vec::::new()); + assert_eq!( + parse_list(b"\" just with starting quotation"), + vec![b("\""), b("just"), b("with"), b("starting"), b("quotation")] + ); + assert_eq!( + parse_list(&b"\"longer quotation\" with \"no ending quotation"[..]), + vec![ + b("longer quotation"), + b("with"), + b("\"no"), + b("ending"), + b("quotation"), + ] + ); + assert_eq!( + parse_list(&b"this is \\\" \"not a quotation mark\""[..]), + vec![b("this"), b("is"), b("\""), b("not a quotation mark")] + ); + assert_eq!(parse_list(b"\n \n\nding\ndong"), vec![b("ding"), b("dong")]); + + // Other manually written cases + assert_eq!(parse_list("a,b,,c"), vec![b("a"), b("b"), b("c")]); + assert_eq!(parse_list("a b c"), vec![b("a"), b("b"), b("c")]); + assert_eq!( + parse_list(" , a , , b, , c , "), + vec![b("a"), b("b"), b("c")] + ); + assert_eq!(parse_list("a,\"b,c\" d"), vec![b("a"), b("b,c"), b("d")]); + assert_eq!(parse_list("a,\",c"), vec![b("a"), b("\""), b("c")]); + assert_eq!(parse_list("a,\" c\" \""), vec![b("a"), b(" c\"")]); + assert_eq!( + parse_list("a,\" c\" \" d"), + vec![b("a"), b(" c"), b("\""), b("d")] + ); + } + + #[test] + fn test_get_or() { + let mut cfg = ConfigSet::new(); + cfg.parse( + "[foo]\n\ + bool1 = yes\n\ + bool2 = unknown\n\ + bools = 1, TRUE, On, aLwAys, 0, false, oFF, never\n\ + int1 = -33\n\ + list1 = x y z\n\ + list3 = 2, 3, 1\n\ + byte1 = 1.5 KB\n\ + byte2 = 500\n\ + byte3 = 0.125M\n\ + ", + &"test".into(), + ); + + assert_eq!(cfg.get_or("foo", "bar", || 3).unwrap(), 3); + assert_eq!(cfg.get_or("foo", "bool1", || false).unwrap(), true); + assert_eq!( + format!("{}", cfg.get_or("foo", "bool2", || true).unwrap_err()), + "invalid bool: unknown" + ); + assert_eq!(cfg.get_or("foo", "int1", || 42).unwrap(), -33); + assert_eq!( + cfg.get_or("foo", "list1", || vec!["x".to_string()]) + .unwrap(), + vec!["x", "y", "z"] + ); + assert_eq!( + cfg.get_or("foo", "list3", || vec![0]).unwrap(), + vec![2, 3, 1] + ); + + assert_eq!(cfg.get_or_default::("foo", "bool1").unwrap(), true); + assert_eq!( + cfg.get_or_default::>("foo", "bools").unwrap(), + vec![true, true, true, true, false, false, false, false] + ); + + assert_eq!( + cfg.get_or_default::("foo", "byte1") + .unwrap() + .value(), + 1536 + ); + assert_eq!( + cfg.get_or_default::("foo", "byte2") + .unwrap() + .value(), + 500 + ); + assert_eq!( + cfg.get_or_default::("foo", "byte3") + .unwrap() + .value(), + 131072 + ); + assert_eq!( + cfg.get_or("foo", "missing", || ByteCount::from(3)) + .unwrap() + .value(), + 3 + ); + } +} diff --git a/rust/hg-core/src/configparser/lib.rs b/rust/hg-core/src/configparser/lib.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/lib.rs @@ -0,0 +1,75 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +//! # ConfigParser +//! +//! ConfigParser is a utility to parse hgrc-like config files. +//! +//! ## Features +//! +//! - Parse valid hgrc-like config files efficiently. +//! - Track source locations of config values. Keep multiple locations of +//! a same config if it is overridden. +//! +//! ## Config Format +//! +//! hgrc files are similar to INI files: +//! +//! ```plain,ignore +//! [section1] +//! name1 = value1 +//! name2 = value2 +//! +//! [section2] +//! name3 = value3 +//! +//! ; This is a comment. +//! # This is also a comment. +//! ``` +//! +//! But with some additional features. +//! +//! ### Include other config files +//! +//! Use `%include` to include other config files: +//! +//! ```plain,ignore +//! %include path/to/another/hgrc +//! %include path/to/another/hgrc.d +//! ``` +//! +//! The include path is relative to the directory of the current config +//! file being parsed. If it's a directory, files with names ending +//! with `.rc` in it will be read. +//! +//! ### Unset a config +//! +//! Use `%unset` to unset a config: +//! +//! ```plain,ignore +//! [section] +//! %unset name1 +//! ``` +//! +//! ### Multi-line values +//! +//! Indent non-first lines with a space: +//! +//! ```plain,ignore +//! [section] +//! name1 = value +//! line2 +//! line3 +//! ``` + +pub mod c_api; +pub mod config; +pub mod error; +pub mod hg; +pub mod parser; + +pub use error::Error; diff --git a/rust/hg-core/src/configparser/spec.pest b/rust/hg-core/src/configparser/spec.pest new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/configparser/spec.pest @@ -0,0 +1,63 @@ +// "comment" and "whitespace" have special meaning in pest. They cause more +// trouble than benefit here. Therfore, avoid them. +// See https://pest-parser.github.io/book/grammars/syntax.html +// +// Names are used in error messages. Certain rules are used to improved UX. +// For example, +// +// equal_sign = { space* ~ "=" ~ space* } +// config_item = { name ~ equal_sign ~ value } +// +// is more friendly than: +// +// config_item = { name ~ space* ~ "=" ~ space* ~ value } +// +// because the former shows "expect space", while the latter shows +// "expect equal_sign", for the following illegal content: +// +// [section] +// lack-of-equal-sign +// ^ error shows here +// +// Same applies to "directive" and "bracket"s. + + +new_line = { "\n" | "\r\n" } +space = { " " | "\t" } +comment_start = { ("#" | ";") } + +line = @{ (!new_line ~ ANY)* } + +value = ${ line ~ (new_line ~ space+ ~ line)* } +equal_sign = @{ "=" ~ space* } + +// Excluding special prefixes explicitly from config_name affects error +// messages. For example: +// +// [] +// ^ expect section_name (with "[" excluded) +// ^ except equal_sign (without "[" excluded) +// +// %unknown +// ^ expect unset or include (with "%" excluded) +// ^ expect equal_sign (without "%" excluded) +// +// The "expect equal_sign" version is less friendly. +config_name = @{ !("[" | "=" | "%" | space | comment_start | new_line) ~ ANY ~ (!("=" | new_line) ~ ANY)* } +config_item = ${ config_name ~ equal_sign ~ value } + +left_bracket = @{ "[" } +right_bracket = @{ "]" } + +section_name = @{ (!("]" | new_line) ~ ANY)+ } +section = ${ left_bracket ~ section_name ~ right_bracket ~ space* } + +comment_line = @{ comment_start ~ line } +blank_line = @{ space* } + +directive = ${ "%" ~ (include | unset) } +include = ${ "include" ~ space+ ~ line } +unset = ${ "unset" ~ space+ ~ config_name ~ space* } + +compound = _{ (config_item | section | comment_line | directive | blank_line ) } +file = _{ SOI ~ compound ~ (new_line ~ compound)* ~ EOI }