diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -1,409 +1,417 @@ use crate::changelog::Changelog; use crate::config::{Config, ConfigError, ConfigParseError}; use crate::dirstate::DirstateParents; use crate::dirstate_tree::dirstate_map::DirstateMap; use crate::dirstate_tree::owning::OwningDirstateMap; use crate::errors::HgError; use crate::errors::HgResultExt; use crate::exit_codes; use crate::manifest::{Manifest, Manifestlog}; use crate::revlog::filelog::Filelog; use crate::revlog::revlog::RevlogError; use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::{is_dir, is_file, Vfs}; use crate::{requirements, NodePrefix}; use crate::{DirstateError, Revision}; use std::cell::{Cell, Ref, RefCell, RefMut}; use std::collections::HashSet; use std::path::{Path, PathBuf}; /// A repository on disk pub struct Repo { working_directory: PathBuf, dot_hg: PathBuf, store: PathBuf, requirements: HashSet, config: Config, // None means not known/initialized yet dirstate_parents: Cell>, dirstate_map: LazyCell, changelog: LazyCell, manifestlog: LazyCell, } #[derive(Debug, derive_more::From)] pub enum RepoError { NotFound { at: PathBuf, }, #[from] ConfigParseError(ConfigParseError), #[from] Other(HgError), } impl From for RepoError { fn from(error: ConfigError) -> Self { match error { ConfigError::Parse(error) => error.into(), ConfigError::Other(error) => error.into(), } } } impl Repo { /// tries to find nearest repository root in current working directory or /// its ancestors pub fn find_repo_root() -> Result { let current_directory = crate::utils::current_dir()?; // ancestors() is inclusive: it first yields `current_directory` // as-is. for ancestor in current_directory.ancestors() { if is_dir(ancestor.join(".hg"))? { return Ok(ancestor.to_path_buf()); } } return Err(RepoError::NotFound { at: current_directory, }); } /// Find a repository, either at the given path (which must contain a `.hg` /// sub-directory) or by searching the current directory and its /// ancestors. /// /// A method with two very different "modes" like this usually a code smell /// to make two methods instead, but in this case an `Option` is what rhg /// sub-commands get from Clap for the `-R` / `--repository` CLI argument. /// Having two methods would just move that `if` to almost all callers. pub fn find( config: &Config, explicit_path: Option, ) -> Result { if let Some(root) = explicit_path { if is_dir(root.join(".hg"))? { Self::new_at_path(root.to_owned(), config) } else if is_file(&root)? { Err(HgError::unsupported("bundle repository").into()) } else { Err(RepoError::NotFound { at: root.to_owned(), }) } } else { let root = Self::find_repo_root()?; Self::new_at_path(root, config) } } /// To be called after checking that `.hg` is a sub-directory fn new_at_path( working_directory: PathBuf, config: &Config, ) -> Result { let dot_hg = working_directory.join(".hg"); let mut repo_config_files = Vec::new(); repo_config_files.push(dot_hg.join("hgrc")); repo_config_files.push(dot_hg.join("hgrc-not-shared")); let hg_vfs = Vfs { base: &dot_hg }; let mut reqs = requirements::load_if_exists(hg_vfs)?; let relative = reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT); let shared = reqs.contains(requirements::SHARED_REQUIREMENT) || relative; // From `mercurial/localrepo.py`: // // if .hg/requires contains the sharesafe requirement, it means // there exists a `.hg/store/requires` too and we should read it // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement // is present. We never write SHARESAFE_REQUIREMENT for a repo if store // is not present, refer checkrequirementscompat() for that // // However, if SHARESAFE_REQUIREMENT is not present, it means that the // repository was shared the old way. We check the share source // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the // current repository needs to be reshared let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT); let store_path; if !shared { store_path = dot_hg.join("store"); } else { let bytes = hg_vfs.read("sharedpath")?; let mut shared_path = get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n')) .to_owned(); if relative { shared_path = dot_hg.join(shared_path) } if !is_dir(&shared_path)? { return Err(HgError::corrupted(format!( ".hg/sharedpath points to nonexistent directory {}", shared_path.display() )) .into()); } store_path = shared_path.join("store"); let source_is_share_safe = requirements::load(Vfs { base: &shared_path })? .contains(requirements::SHARESAFE_REQUIREMENT); if share_safe && !source_is_share_safe { return Err(match config .get(b"share", b"safe-mismatch.source-not-safe") { Some(b"abort") | None => HgError::abort( "abort: share source does not support share-safe requirement\n\ (see `hg help config.format.use-share-safe` for more information)", exit_codes::ABORT, ), _ => HgError::unsupported("share-safe downgrade"), } .into()); } else if source_is_share_safe && !share_safe { return Err( match config.get(b"share", b"safe-mismatch.source-safe") { Some(b"abort") | None => HgError::abort( "abort: version mismatch: source uses share-safe \ functionality while the current share does not\n\ (see `hg help config.format.use-share-safe` for more information)", exit_codes::ABORT, ), _ => HgError::unsupported("share-safe upgrade"), } .into(), ); } if share_safe { repo_config_files.insert(0, shared_path.join("hgrc")) } } if share_safe { reqs.extend(requirements::load(Vfs { base: &store_path })?); } let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() { config.combine_with_repo(&repo_config_files)? } else { config.clone() }; let repo = Self { requirements: reqs, working_directory, store: store_path, dot_hg, config: repo_config, dirstate_parents: Cell::new(None), dirstate_map: LazyCell::new(Self::new_dirstate_map), changelog: LazyCell::new(Changelog::open), manifestlog: LazyCell::new(Manifestlog::open), }; requirements::check(&repo)?; Ok(repo) } pub fn working_directory_path(&self) -> &Path { &self.working_directory } pub fn requirements(&self) -> &HashSet { &self.requirements } pub fn config(&self) -> &Config { &self.config } /// For accessing repository files (in `.hg`), except for the store /// (`.hg/store`). pub fn hg_vfs(&self) -> Vfs<'_> { Vfs { base: &self.dot_hg } } /// For accessing repository store files (in `.hg/store`) pub fn store_vfs(&self) -> Vfs<'_> { Vfs { base: &self.store } } /// For accessing the working copy pub fn working_directory_vfs(&self) -> Vfs<'_> { Vfs { base: &self.working_directory, } } pub fn has_dirstate_v2(&self) -> bool { self.requirements .contains(requirements::DIRSTATE_V2_REQUIREMENT) } + pub fn has_sparse(&self) -> bool { + self.requirements.contains(requirements::SPARSE_REQUIREMENT) + } + + pub fn has_narrow(&self) -> bool { + self.requirements.contains(requirements::NARROW_REQUIREMENT) + } + fn dirstate_file_contents(&self) -> Result, HgError> { Ok(self .hg_vfs() .read("dirstate") .io_not_found_as_none()? .unwrap_or(Vec::new())) } pub fn dirstate_parents(&self) -> Result { if let Some(parents) = self.dirstate_parents.get() { return Ok(parents); } let dirstate = self.dirstate_file_contents()?; let parents = if dirstate.is_empty() { DirstateParents::NULL } else if self.has_dirstate_v2() { crate::dirstate_tree::on_disk::read_docket(&dirstate)?.parents() } else { crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? .clone() }; self.dirstate_parents.set(Some(parents)); Ok(parents) } fn new_dirstate_map(&self) -> Result { let dirstate_file_contents = self.dirstate_file_contents()?; if dirstate_file_contents.is_empty() { self.dirstate_parents.set(Some(DirstateParents::NULL)); Ok(OwningDirstateMap::new_empty(Vec::new())) } else if self.has_dirstate_v2() { let docket = crate::dirstate_tree::on_disk::read_docket( &dirstate_file_contents, )?; self.dirstate_parents.set(Some(docket.parents())); let data_size = docket.data_size(); let metadata = docket.tree_metadata(); let mut map = if let Some(data_mmap) = self .hg_vfs() .mmap_open(docket.data_filename()) .io_not_found_as_none()? { OwningDirstateMap::new_empty(data_mmap) } else { OwningDirstateMap::new_empty(Vec::new()) }; let (on_disk, placeholder) = map.get_pair_mut(); *placeholder = DirstateMap::new_v2(on_disk, data_size, metadata)?; Ok(map) } else { let mut map = OwningDirstateMap::new_empty(dirstate_file_contents); let (on_disk, placeholder) = map.get_pair_mut(); let (inner, parents) = DirstateMap::new_v1(on_disk)?; self.dirstate_parents .set(Some(parents.unwrap_or(DirstateParents::NULL))); *placeholder = inner; Ok(map) } } pub fn dirstate_map( &self, ) -> Result, DirstateError> { self.dirstate_map.get_or_init(self) } pub fn dirstate_map_mut( &self, ) -> Result, DirstateError> { self.dirstate_map.get_mut_or_init(self) } pub fn changelog(&self) -> Result, HgError> { self.changelog.get_or_init(self) } pub fn changelog_mut(&self) -> Result, HgError> { self.changelog.get_mut_or_init(self) } pub fn manifestlog(&self) -> Result, HgError> { self.manifestlog.get_or_init(self) } pub fn manifestlog_mut(&self) -> Result, HgError> { self.manifestlog.get_mut_or_init(self) } /// Returns the manifest of the *changeset* with the given node ID pub fn manifest_for_node( &self, node: impl Into, ) -> Result { self.manifestlog()?.data_for_node( self.changelog()? .data_for_node(node.into())? .manifest_node()? .into(), ) } /// Returns the manifest of the *changeset* with the given revision number pub fn manifest_for_rev( &self, revision: Revision, ) -> Result { self.manifestlog()?.data_for_node( self.changelog()? .data_for_rev(revision)? .manifest_node()? .into(), ) } pub fn filelog(&self, path: &HgPath) -> Result { Filelog::open(self, path) } } /// Lazily-initialized component of `Repo` with interior mutability /// /// This differs from `OnceCell` in that the value can still be "deinitialized" /// later by setting its inner `Option` to `None`. struct LazyCell { value: RefCell>, // `Fn`s that don’t capture environment are zero-size, so this box does // not allocate: init: Box Result>, } impl LazyCell { fn new(init: impl Fn(&Repo) -> Result + 'static) -> Self { Self { value: RefCell::new(None), init: Box::new(init), } } fn get_or_init(&self, repo: &Repo) -> Result, E> { let mut borrowed = self.value.borrow(); if borrowed.is_none() { drop(borrowed); // Only use `borrow_mut` if it is really needed to avoid panic in // case there is another outstanding borrow but mutation is not // needed. *self.value.borrow_mut() = Some((self.init)(repo)?); borrowed = self.value.borrow() } Ok(Ref::map(borrowed, |option| option.as_ref().unwrap())) } pub fn get_mut_or_init(&self, repo: &Repo) -> Result, E> { let mut borrowed = self.value.borrow_mut(); if borrowed.is_none() { *borrowed = Some((self.init)(repo)?); } Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap())) } } diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs --- a/rust/hg-core/src/requirements.rs +++ b/rust/hg-core/src/requirements.rs @@ -1,157 +1,161 @@ use crate::errors::{HgError, HgResultExt}; use crate::repo::Repo; use crate::utils::join_display; use crate::vfs::Vfs; use std::collections::HashSet; fn parse(bytes: &[u8]) -> Result, HgError> { // The Python code reading this file uses `str.splitlines` // which looks for a number of line separators (even including a couple of // non-ASCII ones), but Python code writing it always uses `\n`. let lines = bytes.split(|&byte| byte == b'\n'); lines .filter(|line| !line.is_empty()) .map(|line| { // Python uses Unicode `str.isalnum` but feature names are all // ASCII if line[0].is_ascii_alphanumeric() && line.is_ascii() { Ok(String::from_utf8(line.into()).unwrap()) } else { Err(HgError::corrupted("parse error in 'requires' file")) } }) .collect() } pub(crate) fn load(hg_vfs: Vfs) -> Result, HgError> { parse(&hg_vfs.read("requires")?) } pub(crate) fn load_if_exists(hg_vfs: Vfs) -> Result, HgError> { if let Some(bytes) = hg_vfs.read("requires").io_not_found_as_none()? { parse(&bytes) } else { // Treat a missing file the same as an empty file. // From `mercurial/localrepo.py`: // > requires file contains a newline-delimited list of // > features/capabilities the opener (us) must have in order to use // > the repository. This file was introduced in Mercurial 0.9.2, // > which means very old repositories may not have one. We assume // > a missing file translates to no requirements. Ok(HashSet::new()) } } pub(crate) fn check(repo: &Repo) -> Result<(), HgError> { let unknown: Vec<_> = repo .requirements() .iter() .map(String::as_str) // .filter(|feature| !ALL_SUPPORTED.contains(feature.as_str())) .filter(|feature| { !REQUIRED.contains(feature) && !SUPPORTED.contains(feature) }) .collect(); if !unknown.is_empty() { return Err(HgError::unsupported(format!( "repository requires feature unknown to this Mercurial: {}", join_display(&unknown, ", ") ))); } let missing: Vec<_> = REQUIRED .iter() .filter(|&&feature| !repo.requirements().contains(feature)) .collect(); if !missing.is_empty() { return Err(HgError::unsupported(format!( "repository is missing feature required by this Mercurial: {}", join_display(&missing, ", ") ))); } Ok(()) } /// rhg does not support repositories that are *missing* any of these features const REQUIRED: &[&str] = &["revlogv1", "store", "fncache", "dotencode"]; /// rhg supports repository with or without these const SUPPORTED: &[&str] = &[ "generaldelta", SHARED_REQUIREMENT, SHARESAFE_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, RELATIVE_SHARED_REQUIREMENT, REVLOG_COMPRESSION_ZSTD, DIRSTATE_V2_REQUIREMENT, // As of this writing everything rhg does is read-only. // When it starts writing to the repository, it’ll need to either keep the // persistent nodemap up to date or remove this entry: NODEMAP_REQUIREMENT, + // Not all commands support `sparse` and `narrow`. The commands that do + // not should opt out by checking `has_sparse` and `has_narrow`. + SPARSE_REQUIREMENT, + NARROW_REQUIREMENT, ]; // Copied from mercurial/requirements.py: pub(crate) const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2"; /// When narrowing is finalized and no longer subject to format changes, /// we should move this to just "narrow" or similar. #[allow(unused)] pub(crate) const NARROW_REQUIREMENT: &str = "narrowhg-experimental"; /// Enables sparse working directory usage #[allow(unused)] pub(crate) const SPARSE_REQUIREMENT: &str = "exp-sparse"; /// Enables the internal phase which is used to hide changesets instead /// of stripping them #[allow(unused)] pub(crate) const INTERNAL_PHASE_REQUIREMENT: &str = "internal-phase"; /// Stores manifest in Tree structure #[allow(unused)] pub(crate) const TREEMANIFEST_REQUIREMENT: &str = "treemanifest"; /// Increment the sub-version when the revlog v2 format changes to lock out old /// clients. #[allow(unused)] pub(crate) const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1"; /// A repository with the sparserevlog feature will have delta chains that /// can spread over a larger span. Sparse reading cuts these large spans into /// pieces, so that each piece isn't too big. /// Without the sparserevlog capability, reading from the repository could use /// huge amounts of memory, because the whole span would be read at once, /// including all the intermediate revisions that aren't pertinent for the /// chain. This is why once a repository has enabled sparse-read, it becomes /// required. #[allow(unused)] pub(crate) const SPARSEREVLOG_REQUIREMENT: &str = "sparserevlog"; /// A repository with the the copies-sidedata-changeset requirement will store /// copies related information in changeset's sidedata. #[allow(unused)] pub(crate) const COPIESSDC_REQUIREMENT: &str = "exp-copies-sidedata-changeset"; /// The repository use persistent nodemap for the changelog and the manifest. #[allow(unused)] pub(crate) const NODEMAP_REQUIREMENT: &str = "persistent-nodemap"; /// Denotes that the current repository is a share #[allow(unused)] pub(crate) const SHARED_REQUIREMENT: &str = "shared"; /// Denotes that current repository is a share and the shared source path is /// relative to the current repository root path #[allow(unused)] pub(crate) const RELATIVE_SHARED_REQUIREMENT: &str = "relshared"; /// A repository with share implemented safely. The repository has different /// store and working copy requirements i.e. both `.hg/requires` and /// `.hg/store/requires` are present. #[allow(unused)] pub(crate) const SHARESAFE_REQUIREMENT: &str = "share-safe"; /// A repository that use zstd compression inside its revlog #[allow(unused)] pub(crate) const REVLOG_COMPRESSION_ZSTD: &str = "revlog-compression-zstd"; diff --git a/rust/rhg/src/commands/files.rs b/rust/rhg/src/commands/files.rs --- a/rust/rhg/src/commands/files.rs +++ b/rust/rhg/src/commands/files.rs @@ -1,72 +1,100 @@ use crate::error::CommandError; use crate::ui::Ui; use crate::ui::UiError; use crate::utils::path_utils::relativize_paths; use clap::Arg; use hg::errors::HgError; use hg::operations::list_rev_tracked_files; use hg::operations::Dirstate; use hg::repo::Repo; use hg::utils::hg_path::HgPath; use std::borrow::Cow; pub const HELP_TEXT: &str = " List tracked files. Returns 0 on success. "; pub fn args() -> clap::App<'static, 'static> { clap::SubCommand::with_name("files") .arg( Arg::with_name("rev") .help("search the repository as it is in REV") .short("-r") .long("--revision") .value_name("REV") .takes_value(true), ) .about(HELP_TEXT) } pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> { let relative = invocation.config.get(b"ui", b"relative-paths"); if relative.is_some() { return Err(CommandError::unsupported( "non-default ui.relative-paths", )); } let rev = invocation.subcommand_args.value_of("rev"); let repo = invocation.repo?; + + // It seems better if this check is removed: this would correspond to + // automatically enabling the extension if the repo requires it. + // However we need this check to be in sync with vanilla hg so hg tests + // pass. + if repo.has_sparse() + && invocation.config.get(b"extensions", b"sparse").is_none() + { + return Err(CommandError::unsupported( + "repo is using sparse, but sparse extension is not enabled", + )); + } + if let Some(rev) = rev { + if repo.has_narrow() { + return Err(CommandError::unsupported( + "rhg files -r is not supported in narrow clones", + )); + } let files = list_rev_tracked_files(repo, rev).map_err(|e| (e, rev))?; display_files(invocation.ui, repo, files.iter()) } else { + // The dirstate always reflects the sparse narrowspec, so if + // we only have sparse without narrow all is fine. + // If we have narrow, then [hg files] needs to check if + // the store narrowspec is in sync with the one of the dirstate, + // so we can't support that without explicit code. + if repo.has_narrow() { + return Err(CommandError::unsupported( + "rhg files is not supported in narrow clones", + )); + } let distate = Dirstate::new(repo)?; let files = distate.tracked_files()?; display_files(invocation.ui, repo, files.into_iter().map(Ok)) } } fn display_files<'a>( ui: &Ui, repo: &Repo, files: impl IntoIterator>, ) -> Result<(), CommandError> { let mut stdout = ui.stdout_buffer(); let mut any = false; relativize_paths(repo, files, |path: Cow<[u8]>| -> Result<(), UiError> { any = true; stdout.write_all(path.as_ref())?; stdout.write_all(b"\n") })?; stdout.flush()?; if any { Ok(()) } else { Err(CommandError::Unsuccessful) } } diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -1,396 +1,403 @@ // status.rs // // Copyright 2020, Georges Racinet // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. use crate::error::CommandError; use crate::ui::Ui; use crate::utils::path_utils::relativize_paths; use clap::{Arg, SubCommand}; use format_bytes::format_bytes; use hg; use hg::config::Config; use hg::dirstate::has_exec_bit; use hg::errors::HgError; use hg::manifest::Manifest; use hg::matchers::AlwaysMatcher; use hg::repo::Repo; use hg::utils::files::get_bytes_from_os_string; use hg::utils::hg_path::{hg_path_to_os_string, HgPath}; use hg::{HgPathCow, StatusOptions}; use log::{info, warn}; pub const HELP_TEXT: &str = " Show changed files in the working directory This is a pure Rust version of `hg status`. Some options might be missing, check the list below. "; pub fn args() -> clap::App<'static, 'static> { SubCommand::with_name("status") .alias("st") .about(HELP_TEXT) .arg( Arg::with_name("all") .help("show status of all files") .short("-A") .long("--all"), ) .arg( Arg::with_name("modified") .help("show only modified files") .short("-m") .long("--modified"), ) .arg( Arg::with_name("added") .help("show only added files") .short("-a") .long("--added"), ) .arg( Arg::with_name("removed") .help("show only removed files") .short("-r") .long("--removed"), ) .arg( Arg::with_name("clean") .help("show only clean files") .short("-c") .long("--clean"), ) .arg( Arg::with_name("deleted") .help("show only deleted files") .short("-d") .long("--deleted"), ) .arg( Arg::with_name("unknown") .help("show only unknown (not tracked) files") .short("-u") .long("--unknown"), ) .arg( Arg::with_name("ignored") .help("show only ignored files") .short("-i") .long("--ignored"), ) .arg( Arg::with_name("no-status") .help("hide status prefix") .short("-n") .long("--no-status"), ) } /// Pure data type allowing the caller to specify file states to display #[derive(Copy, Clone, Debug)] pub struct DisplayStates { pub modified: bool, pub added: bool, pub removed: bool, pub clean: bool, pub deleted: bool, pub unknown: bool, pub ignored: bool, } pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates { modified: true, added: true, removed: true, clean: false, deleted: true, unknown: true, ignored: false, }; pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates { modified: true, added: true, removed: true, clean: true, deleted: true, unknown: true, ignored: true, }; impl DisplayStates { pub fn is_empty(&self) -> bool { !(self.modified || self.added || self.removed || self.clean || self.deleted || self.unknown || self.ignored) } } pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> { let status_enabled_default = false; let status_enabled = invocation.config.get_option(b"rhg", b"status")?; if !status_enabled.unwrap_or(status_enabled_default) { return Err(CommandError::unsupported( "status is experimental in rhg (enable it with 'rhg.status = true' \ or enable fallback with 'rhg.on-unsupported = fallback')" )); } // TODO: lift these limitations if invocation.config.get_bool(b"ui", b"tweakdefaults")? { return Err(CommandError::unsupported( "ui.tweakdefaults is not yet supported with rhg status", )); } if invocation.config.get_bool(b"ui", b"statuscopies")? { return Err(CommandError::unsupported( "ui.statuscopies is not yet supported with rhg status", )); } if invocation .config .get(b"commands", b"status.terse") .is_some() { return Err(CommandError::unsupported( "status.terse is not yet supported with rhg status", )); } let ui = invocation.ui; let config = invocation.config; let args = invocation.subcommand_args; let display_states = if args.is_present("all") { // TODO when implementing `--quiet`: it excludes clean files // from `--all` ALL_DISPLAY_STATES } else { let requested = DisplayStates { modified: args.is_present("modified"), added: args.is_present("added"), removed: args.is_present("removed"), clean: args.is_present("clean"), deleted: args.is_present("deleted"), unknown: args.is_present("unknown"), ignored: args.is_present("ignored"), }; if requested.is_empty() { DEFAULT_DISPLAY_STATES } else { requested } }; let no_status = args.is_present("no-status"); let repo = invocation.repo?; + + if repo.has_sparse() || repo.has_narrow() { + return Err(CommandError::unsupported( + "rhg status is not supported for sparse checkouts or narrow clones yet" + )); + } + let mut dmap = repo.dirstate_map_mut()?; let options = StatusOptions { // we're currently supporting file systems with exec flags only // anyway check_exec: true, list_clean: display_states.clean, list_unknown: display_states.unknown, list_ignored: display_states.ignored, collect_traversed_dirs: false, }; let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded let (mut ds_status, pattern_warnings) = dmap.status( &AlwaysMatcher, repo.working_directory_path().to_owned(), vec![ignore_file], options, )?; if !pattern_warnings.is_empty() { warn!("Pattern warnings: {:?}", &pattern_warnings); } if !ds_status.bad.is_empty() { warn!("Bad matches {:?}", &(ds_status.bad)) } if !ds_status.unsure.is_empty() { info!( "Files to be rechecked by retrieval from filelog: {:?}", &ds_status.unsure ); } if !ds_status.unsure.is_empty() && (display_states.modified || display_states.clean) { let p1 = repo.dirstate_parents()?.p1; let manifest = repo.manifest_for_node(p1).map_err(|e| { CommandError::from((e, &*format!("{:x}", p1.short()))) })?; for to_check in ds_status.unsure { if unsure_is_modified(repo, &manifest, &to_check)? { if display_states.modified { ds_status.modified.push(to_check); } } else { if display_states.clean { ds_status.clean.push(to_check); } } } } if display_states.modified { display_status_paths( ui, repo, config, no_status, &mut ds_status.modified, b"M", )?; } if display_states.added { display_status_paths( ui, repo, config, no_status, &mut ds_status.added, b"A", )?; } if display_states.removed { display_status_paths( ui, repo, config, no_status, &mut ds_status.removed, b"R", )?; } if display_states.deleted { display_status_paths( ui, repo, config, no_status, &mut ds_status.deleted, b"!", )?; } if display_states.unknown { display_status_paths( ui, repo, config, no_status, &mut ds_status.unknown, b"?", )?; } if display_states.ignored { display_status_paths( ui, repo, config, no_status, &mut ds_status.ignored, b"I", )?; } if display_states.clean { display_status_paths( ui, repo, config, no_status, &mut ds_status.clean, b"C", )?; } Ok(()) } // Probably more elegant to use a Deref or Borrow trait rather than // harcode HgPathBuf, but probably not really useful at this point fn display_status_paths( ui: &Ui, repo: &Repo, config: &Config, no_status: bool, paths: &mut [HgPathCow], status_prefix: &[u8], ) -> Result<(), CommandError> { paths.sort_unstable(); let mut relative: bool = config.get_bool(b"ui", b"relative-paths")?; relative = config .get_option(b"commands", b"status.relative")? .unwrap_or(relative); let print_path = |path: &[u8]| { // TODO optim, probably lots of unneeded copies here, especially // if out stream is buffered if no_status { ui.write_stdout(&format_bytes!(b"{}\n", path)) } else { ui.write_stdout(&format_bytes!(b"{} {}\n", status_prefix, path)) } }; if relative && !ui.plain() { relativize_paths(repo, paths.iter().map(Ok), |path| { print_path(&path) })?; } else { for path in paths { print_path(path.as_bytes())? } } Ok(()) } /// Check if a file is modified by comparing actual repo store and file system. /// /// This meant to be used for those that the dirstate cannot resolve, due /// to time resolution limits. fn unsure_is_modified( repo: &Repo, manifest: &Manifest, hg_path: &HgPath, ) -> Result { let vfs = repo.working_directory_vfs(); let fs_path = hg_path_to_os_string(hg_path).expect("HgPath conversion"); let fs_metadata = vfs.symlink_metadata(&fs_path)?; let is_symlink = fs_metadata.file_type().is_symlink(); // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the // dirstate let fs_flags = if is_symlink { Some(b'l') } else if has_exec_bit(&fs_metadata) { Some(b'x') } else { None }; let entry = manifest .find_file(hg_path)? .expect("ambgious file not in p1"); if entry.flags != fs_flags { return Ok(true); } let filelog = repo.filelog(hg_path)?; let filelog_entry = filelog.data_for_node(entry.node_id()?).map_err(|_| { HgError::corrupted("filelog missing node from manifest") })?; let contents_in_p1 = filelog_entry.data()?; let fs_contents = if is_symlink { get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string()) } else { vfs.read(fs_path)? }; return Ok(contents_in_p1 != &*fs_contents); } diff --git a/rust/rhg/src/main.rs b/rust/rhg/src/main.rs --- a/rust/rhg/src/main.rs +++ b/rust/rhg/src/main.rs @@ -1,652 +1,653 @@ extern crate log; use crate::error::CommandError; use crate::ui::Ui; use clap::App; use clap::AppSettings; use clap::Arg; use clap::ArgMatches; use format_bytes::{format_bytes, join}; use hg::config::{Config, ConfigSource}; use hg::exit_codes; use hg::repo::{Repo, RepoError}; use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes}; use hg::utils::SliceExt; use std::ffi::OsString; use std::path::PathBuf; use std::process::Command; mod blackbox; mod error; mod ui; pub mod utils { pub mod path_utils; } fn main_with_result( process_start_time: &blackbox::ProcessStartTime, ui: &ui::Ui, repo: Result<&Repo, &NoRepoInCwdError>, config: &Config, ) -> Result<(), CommandError> { check_unsupported(config, ui)?; let app = App::new("rhg") .global_setting(AppSettings::AllowInvalidUtf8) .global_setting(AppSettings::DisableVersion) .setting(AppSettings::SubcommandRequired) .setting(AppSettings::VersionlessSubcommands) .arg( Arg::with_name("repository") .help("repository root directory") .short("-R") .long("--repository") .value_name("REPO") .takes_value(true) // Both ok: `hg -R ./foo log` or `hg log -R ./foo` .global(true), ) .arg( Arg::with_name("config") .help("set/override config option (use 'section.name=value')") .long("--config") .value_name("CONFIG") .takes_value(true) .global(true) // Ok: `--config section.key1=val --config section.key2=val2` .multiple(true) // Not ok: `--config section.key1=val section.key2=val2` .number_of_values(1), ) .arg( Arg::with_name("cwd") .help("change working directory") .long("--cwd") .value_name("DIR") .takes_value(true) .global(true), ) .version("0.0.1"); let app = add_subcommand_args(app); let matches = app.clone().get_matches_safe()?; let (subcommand_name, subcommand_matches) = matches.subcommand(); // Mercurial allows users to define "defaults" for commands, fallback // if a default is detected for the current command let defaults = config.get_str(b"defaults", subcommand_name.as_bytes()); if defaults?.is_some() { let msg = "`defaults` config set"; return Err(CommandError::unsupported(msg)); } for prefix in ["pre", "post", "fail"].iter() { // Mercurial allows users to define generic hooks for commands, // fallback if any are detected let item = format!("{}-{}", prefix, subcommand_name); let hook_for_command = config.get_str(b"hooks", item.as_bytes())?; if hook_for_command.is_some() { let msg = format!("{}-{} hook defined", prefix, subcommand_name); return Err(CommandError::unsupported(msg)); } } let run = subcommand_run_fn(subcommand_name) .expect("unknown subcommand name from clap despite AppSettings::SubcommandRequired"); let subcommand_args = subcommand_matches .expect("no subcommand arguments from clap despite AppSettings::SubcommandRequired"); let invocation = CliInvocation { ui, subcommand_args, config, repo, }; if let Ok(repo) = repo { // We don't support subrepos, fallback if the subrepos file is present if repo.working_directory_vfs().join(".hgsub").exists() { let msg = "subrepos (.hgsub is present)"; return Err(CommandError::unsupported(msg)); } } let blackbox = blackbox::Blackbox::new(&invocation, process_start_time)?; blackbox.log_command_start(); let result = run(&invocation); blackbox.log_command_end(exit_code( &result, // TODO: show a warning or combine with original error if `get_bool` // returns an error config .get_bool(b"ui", b"detailed-exit-code") .unwrap_or(false), )); result } fn main() { // Run this first, before we find out if the blackbox extension is even // enabled, in order to include everything in-between in the duration // measurements. Reading config files can be slow if they’re on NFS. let process_start_time = blackbox::ProcessStartTime::now(); env_logger::init(); let ui = ui::Ui::new(); let early_args = EarlyArgs::parse(std::env::args_os()); let initial_current_dir = early_args.cwd.map(|cwd| { let cwd = get_path_from_bytes(&cwd); std::env::current_dir() .and_then(|initial| { std::env::set_current_dir(cwd)?; Ok(initial) }) .unwrap_or_else(|error| { exit( &None, &ui, OnUnsupported::Abort, Err(CommandError::abort(format!( "abort: {}: '{}'", error, cwd.display() ))), false, ) }) }); let mut non_repo_config = Config::load_non_repo().unwrap_or_else(|error| { // Normally this is decided based on config, but we don’t have that // available. As of this writing config loading never returns an // "unsupported" error but that is not enforced by the type system. let on_unsupported = OnUnsupported::Abort; exit( &initial_current_dir, &ui, on_unsupported, Err(error.into()), false, ) }); non_repo_config .load_cli_args_config(early_args.config) .unwrap_or_else(|error| { exit( &initial_current_dir, &ui, OnUnsupported::from_config(&non_repo_config), Err(error.into()), non_repo_config .get_bool(b"ui", b"detailed-exit-code") .unwrap_or(false), ) }); if let Some(repo_path_bytes) = &early_args.repo { lazy_static::lazy_static! { static ref SCHEME_RE: regex::bytes::Regex = // Same as `_matchscheme` in `mercurial/util.py` regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap(); } if SCHEME_RE.is_match(&repo_path_bytes) { exit( &initial_current_dir, &ui, OnUnsupported::from_config(&non_repo_config), Err(CommandError::UnsupportedFeature { message: format_bytes!( b"URL-like --repository {}", repo_path_bytes ), }), // TODO: show a warning or combine with original error if // `get_bool` returns an error non_repo_config .get_bool(b"ui", b"detailed-exit-code") .unwrap_or(false), ) } } let repo_arg = early_args.repo.unwrap_or(Vec::new()); let repo_path: Option = { if repo_arg.is_empty() { None } else { let local_config = { if std::env::var_os("HGRCSKIPREPO").is_none() { // TODO: handle errors from find_repo_root if let Ok(current_dir_path) = Repo::find_repo_root() { let config_files = vec![ ConfigSource::AbsPath( current_dir_path.join(".hg/hgrc"), ), ConfigSource::AbsPath( current_dir_path.join(".hg/hgrc-not-shared"), ), ]; // TODO: handle errors from // `load_from_explicit_sources` Config::load_from_explicit_sources(config_files).ok() } else { None } } else { None } }; let non_repo_config_val = { let non_repo_val = non_repo_config.get(b"paths", &repo_arg); match &non_repo_val { Some(val) if val.len() > 0 => home::home_dir() .unwrap_or_else(|| PathBuf::from("~")) .join(get_path_from_bytes(val)) .canonicalize() // TODO: handle error and make it similar to python // implementation maybe? .ok(), _ => None, } }; let config_val = match &local_config { None => non_repo_config_val, Some(val) => { let local_config_val = val.get(b"paths", &repo_arg); match &local_config_val { Some(val) if val.len() > 0 => { // presence of a local_config assures that // current_dir // wont result in an Error let canpath = hg::utils::current_dir() .unwrap() .join(get_path_from_bytes(val)) .canonicalize(); canpath.ok().or(non_repo_config_val) } _ => non_repo_config_val, } } }; config_val.or(Some(get_path_from_bytes(&repo_arg).to_path_buf())) } }; let repo_result = match Repo::find(&non_repo_config, repo_path.to_owned()) { Ok(repo) => Ok(repo), Err(RepoError::NotFound { at }) if repo_path.is_none() => { // Not finding a repo is not fatal yet, if `-R` was not given Err(NoRepoInCwdError { cwd: at }) } Err(error) => exit( &initial_current_dir, &ui, OnUnsupported::from_config(&non_repo_config), Err(error.into()), // TODO: show a warning or combine with original error if // `get_bool` returns an error non_repo_config .get_bool(b"ui", b"detailed-exit-code") .unwrap_or(false), ), }; let config = if let Ok(repo) = &repo_result { repo.config() } else { &non_repo_config }; let on_unsupported = OnUnsupported::from_config(config); let result = main_with_result( &process_start_time, &ui, repo_result.as_ref(), config, ); exit( &initial_current_dir, &ui, on_unsupported, result, // TODO: show a warning or combine with original error if `get_bool` // returns an error config .get_bool(b"ui", b"detailed-exit-code") .unwrap_or(false), ) } fn exit_code( result: &Result<(), CommandError>, use_detailed_exit_code: bool, ) -> i32 { match result { Ok(()) => exit_codes::OK, Err(CommandError::Abort { message: _, detailed_exit_code, }) => { if use_detailed_exit_code { *detailed_exit_code } else { exit_codes::ABORT } } Err(CommandError::Unsuccessful) => exit_codes::UNSUCCESSFUL, // Exit with a specific code and no error message to let a potential // wrapper script fallback to Python-based Mercurial. Err(CommandError::UnsupportedFeature { .. }) => { exit_codes::UNIMPLEMENTED } } } fn exit( initial_current_dir: &Option, ui: &Ui, mut on_unsupported: OnUnsupported, result: Result<(), CommandError>, use_detailed_exit_code: bool, ) -> ! { if let ( OnUnsupported::Fallback { executable }, Err(CommandError::UnsupportedFeature { .. }), ) = (&on_unsupported, &result) { let mut args = std::env::args_os(); let executable = match executable { None => { exit_no_fallback( ui, OnUnsupported::Abort, Err(CommandError::abort( "abort: 'rhg.on-unsupported=fallback' without \ 'rhg.fallback-executable' set.", )), false, ); } Some(executable) => executable, }; let executable_path = get_path_from_bytes(&executable); let this_executable = args.next().expect("exepcted argv[0] to exist"); if executable_path == &PathBuf::from(this_executable) { // Avoid spawning infinitely many processes until resource // exhaustion. let _ = ui.write_stderr(&format_bytes!( b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \ points to `rhg` itself.\n", executable )); on_unsupported = OnUnsupported::Abort } else { // `args` is now `argv[1..]` since we’ve already consumed // `argv[0]` let mut command = Command::new(executable_path); command.args(args); if let Some(initial) = initial_current_dir { command.current_dir(initial); } let result = command.status(); match result { Ok(status) => std::process::exit( status.code().unwrap_or(exit_codes::ABORT), ), Err(error) => { let _ = ui.write_stderr(&format_bytes!( b"tried to fall back to a '{}' sub-process but got error {}\n", executable, format_bytes::Utf8(error) )); on_unsupported = OnUnsupported::Abort } } } } exit_no_fallback(ui, on_unsupported, result, use_detailed_exit_code) } fn exit_no_fallback( ui: &Ui, on_unsupported: OnUnsupported, result: Result<(), CommandError>, use_detailed_exit_code: bool, ) -> ! { match &result { Ok(_) => {} Err(CommandError::Unsuccessful) => {} Err(CommandError::Abort { message, detailed_exit_code: _, }) => { if !message.is_empty() { // Ignore errors when writing to stderr, we’re already exiting // with failure code so there’s not much more we can do. let _ = ui.write_stderr(&format_bytes!(b"{}\n", message)); } } Err(CommandError::UnsupportedFeature { message }) => { match on_unsupported { OnUnsupported::Abort => { let _ = ui.write_stderr(&format_bytes!( b"unsupported feature: {}\n", message )); } OnUnsupported::AbortSilent => {} OnUnsupported::Fallback { .. } => unreachable!(), } } } std::process::exit(exit_code(&result, use_detailed_exit_code)) } macro_rules! subcommands { ($( $command: ident )+) => { mod commands { $( pub mod $command; )+ } fn add_subcommand_args<'a, 'b>(app: App<'a, 'b>) -> App<'a, 'b> { app $( .subcommand(commands::$command::args()) )+ } pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>; fn subcommand_run_fn(name: &str) -> Option { match name { $( stringify!($command) => Some(commands::$command::run), )+ _ => None, } } }; } subcommands! { cat debugdata debugrequirements debugignorerhg files root config status } pub struct CliInvocation<'a> { ui: &'a Ui, subcommand_args: &'a ArgMatches<'a>, config: &'a Config, /// References inside `Result` is a bit peculiar but allow /// `invocation.repo?` to work out with `&CliInvocation` since this /// `Result` type is `Copy`. repo: Result<&'a Repo, &'a NoRepoInCwdError>, } struct NoRepoInCwdError { cwd: PathBuf, } /// CLI arguments to be parsed "early" in order to be able to read /// configuration before using Clap. Ideally we would also use Clap for this, /// see . /// /// These arguments are still declared when we do use Clap later, so that Clap /// does not return an error for their presence. struct EarlyArgs { /// Values of all `--config` arguments. (Possibly none) config: Vec>, /// Value of the `-R` or `--repository` argument, if any. repo: Option>, /// Value of the `--cwd` argument, if any. cwd: Option>, } impl EarlyArgs { fn parse(args: impl IntoIterator) -> Self { let mut args = args.into_iter().map(get_bytes_from_os_str); let mut config = Vec::new(); let mut repo = None; let mut cwd = None; // Use `while let` instead of `for` so that we can also call // `args.next()` inside the loop. while let Some(arg) = args.next() { if arg == b"--config" { if let Some(value) = args.next() { config.push(value) } } else if let Some(value) = arg.drop_prefix(b"--config=") { config.push(value.to_owned()) } if arg == b"--cwd" { if let Some(value) = args.next() { cwd = Some(value) } } else if let Some(value) = arg.drop_prefix(b"--cwd=") { cwd = Some(value.to_owned()) } if arg == b"--repository" || arg == b"-R" { if let Some(value) = args.next() { repo = Some(value) } } else if let Some(value) = arg.drop_prefix(b"--repository=") { repo = Some(value.to_owned()) } else if let Some(value) = arg.drop_prefix(b"-R") { repo = Some(value.to_owned()) } } Self { config, repo, cwd } } } /// What to do when encountering some unsupported feature. /// /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`. enum OnUnsupported { /// Print an error message describing what feature is not supported, /// and exit with code 252. Abort, /// Silently exit with code 252. AbortSilent, /// Try running a Python implementation Fallback { executable: Option> }, } impl OnUnsupported { const DEFAULT: Self = OnUnsupported::Abort; fn from_config(config: &Config) -> Self { match config .get(b"rhg", b"on-unsupported") .map(|value| value.to_ascii_lowercase()) .as_deref() { Some(b"abort") => OnUnsupported::Abort, Some(b"abort-silent") => OnUnsupported::AbortSilent, Some(b"fallback") => OnUnsupported::Fallback { executable: config .get(b"rhg", b"fallback-executable") .map(|x| x.to_owned()), }, None => Self::DEFAULT, Some(_) => { // TODO: warn about unknown config value Self::DEFAULT } } } } -const SUPPORTED_EXTENSIONS: &[&[u8]] = &[b"blackbox", b"share"]; +const SUPPORTED_EXTENSIONS: &[&[u8]] = + &[b"blackbox", b"share", b"sparse", b"narrow"]; fn check_extensions(config: &Config) -> Result<(), CommandError> { let enabled = config.get_section_keys(b"extensions"); let mut unsupported = enabled; for supported in SUPPORTED_EXTENSIONS { unsupported.remove(supported); } if let Some(ignored_list) = config.get_list(b"rhg", b"ignored-extensions") { for ignored in ignored_list { unsupported.remove(ignored.as_slice()); } } if unsupported.is_empty() { Ok(()) } else { Err(CommandError::UnsupportedFeature { message: format_bytes!( b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)", join(unsupported, b", ") ), }) } } fn check_unsupported( config: &Config, ui: &ui::Ui, ) -> Result<(), CommandError> { check_extensions(config)?; if std::env::var_os("HG_PENDING").is_some() { // TODO: only if the value is `== repo.working_directory`? // What about relative v.s. absolute paths? Err(CommandError::unsupported("$HG_PENDING"))? } if config.has_non_empty_section(b"encode") { Err(CommandError::unsupported("[encode] config"))? } if config.has_non_empty_section(b"decode") { Err(CommandError::unsupported("[decode] config"))? } if let Some(color) = config.get(b"ui", b"color") { if (color == b"always" || color == b"debug") && !ui.plain() { Err(CommandError::unsupported("colored output"))? } } Ok(()) } diff --git a/tests/test-rhg-sparse-narrow.t b/tests/test-rhg-sparse-narrow.t new file mode 100644 --- /dev/null +++ b/tests/test-rhg-sparse-narrow.t @@ -0,0 +1,120 @@ +#require rhg + + $ NO_FALLBACK="env RHG_ON_UNSUPPORTED=abort" + +Rhg works well when sparse working copy is enabled. + + $ cd "$TESTTMP" + $ hg init repo-sparse + $ cd repo-sparse + $ cat > .hg/hgrc < [extensions] + > sparse= + > EOF + + $ echo a > show + $ echo x > hide + $ mkdir dir1 dir2 + $ echo x > dir1/x + $ echo y > dir1/y + $ echo z > dir2/z + + $ hg ci -Aqm 'initial' + $ hg debugsparse --include 'show' + $ ls -A + .hg + show + + $ tip=$(hg log -r . --template '{node}') + $ $NO_FALLBACK rhg files -r "$tip" + dir1/x + dir1/y + dir2/z + hide + show + $ $NO_FALLBACK rhg files + show + + $ $NO_FALLBACK rhg cat -r "$tip" hide + x + + $ cd .. + +We support most things when narrow is enabled, too, with a couple of caveats. + + $ . "$TESTDIR/narrow-library.sh" + $ real_hg=$RHG_FALLBACK_EXECUTABLE + + $ cat >> $HGRCPATH < [extensions] + > narrow= + > EOF + + $ hg clone --narrow ./repo-sparse repo-narrow --include dir1 + requesting all changes + adding changesets + adding manifests + adding file changes + added 1 changesets with 2 changes to 2 files + new changesets 6d714a4a2998 + updating to branch default + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved + + $ cd repo-narrow + + $ $NO_FALLBACK rhg cat -r "$tip" dir1/x + x + $ "$real_hg" cat -r "$tip" dir1/x + x + +TODO: bad error message + + $ $NO_FALLBACK rhg cat -r "$tip" hide + abort: invalid revision identifier: 6d714a4a2998cbfd0620db44da58b749f6565d63 + [255] + $ "$real_hg" cat -r "$tip" hide + [1] + +A naive implementation of [rhg files] leaks the paths that are supposed to be +hidden by narrow, so we just fall back to hg. + + $ $NO_FALLBACK rhg files -r "$tip" + unsupported feature: rhg files -r is not supported in narrow clones + [252] + $ "$real_hg" files -r "$tip" + dir1/x + dir1/y + +Hg status needs to do some filtering based on narrow spec, so we don't +support it in rhg for narrow clones yet. + + $ mkdir dir2 + $ touch dir2/q + $ "$real_hg" status + $ $NO_FALLBACK rhg --config rhg.status=true status + unsupported feature: rhg status is not supported for sparse checkouts or narrow clones yet + [252] + +Adding "orphaned" index files: + + $ (cd ..; cp repo-sparse/.hg/store/data/hide.i repo-narrow/.hg/store/data/hide.i) + $ (cd ..; mkdir repo-narrow/.hg/store/data/dir2; cp repo-sparse/.hg/store/data/dir2/z.i repo-narrow/.hg/store/data/dir2/z.i) + $ "$real_hg" verify + checking changesets + checking manifests + crosschecking files in changesets and manifests + checking files + checked 1 changesets with 2 changes to 2 files + + $ "$real_hg" files -r "$tip" + dir1/x + dir1/y + +# TODO: even though [hg files] hides the orphaned dir2/z, [hg cat] still shows it. +# rhg has the same issue, but at least it's not specific to rhg. +# This is despite [hg verify] succeeding above. + + $ $NO_FALLBACK rhg cat -r "$tip" dir2/z + z + $ "$real_hg" cat -r "$tip" dir2/z + z