diff --git a/rust/hg-core/src/errors.rs b/rust/hg-core/src/errors.rs --- a/rust/hg-core/src/errors.rs +++ b/rust/hg-core/src/errors.rs @@ -35,6 +35,9 @@ impl HgError { pub fn corrupted(explanation: impl Into) -> Self { + // TODO: capture a backtrace here and keep it in the error value + // to aid debugging? + // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html HgError::CorruptedRepository(explanation.into()) } } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,6 +3,7 @@ // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. + mod ancestors; pub mod dagops; pub mod errors; diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs --- a/rust/hg-core/src/operations/cat.rs +++ b/rust/hg-core/src/operations/cat.rs @@ -33,8 +33,8 @@ let changelog = Changelog::open(repo)?; let manifest = Manifest::open(repo)?; let changelog_entry = changelog.get_rev(rev)?; - let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?) - .map_err(|_| RevlogError::Corrupted)?; + let manifest_node = + Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; let manifest_entry = manifest.get_node(manifest_node.into())?; let mut bytes = vec![]; @@ -46,8 +46,7 @@ let file_log = Revlog::open(repo, &index_path, Some(&data_path))?; - let file_node = Node::from_hex(node_bytes) - .map_err(|_| RevlogError::Corrupted)?; + let file_node = Node::from_hex_for_repo(node_bytes)?; let file_rev = file_log.get_node_rev(file_node.into())?; let data = file_log.get_rev_data(file_rev)?; if data.starts_with(&METADATA_DELIMITER) { diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs --- a/rust/hg-core/src/operations/list_tracked_files.rs +++ b/rust/hg-core/src/operations/list_tracked_files.rs @@ -6,7 +6,7 @@ // GNU General Public License version 2 or any later version. use crate::dirstate::parsers::parse_dirstate; -use crate::errors::{HgError, IoResultExt}; +use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::changelog::Changelog; use crate::revlog::manifest::{Manifest, ManifestEntry}; @@ -25,12 +25,7 @@ impl Dirstate { pub fn new(repo: &Repo) -> Result { - let content = repo - .hg_vfs() - .read("dirstate") - // TODO: this will be more accurate when we use `HgError` in - // `Vfs::read`. - .for_file("dirstate".as_ref())?; + let content = repo.hg_vfs().read("dirstate")?; Ok(Self { content }) } @@ -57,8 +52,8 @@ let changelog = Changelog::open(repo)?; let manifest = Manifest::open(repo)?; let changelog_entry = changelog.get_rev(rev)?; - let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?) - .map_err(|_| RevlogError::Corrupted)?; + let manifest_node = + Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; let manifest_entry = manifest.get_node(manifest_node.into())?; Ok(FilesForRev(manifest_entry)) } diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -1,4 +1,4 @@ -use crate::errors::HgError; +use crate::errors::{HgError, IoResultExt}; use crate::operations::{find_root, FindRootError}; use crate::requirements; use memmap::{Mmap, MmapOptions}; @@ -68,24 +68,19 @@ pub(crate) fn read( &self, relative_path: impl AsRef, - ) -> std::io::Result> { - std::fs::read(self.base.join(relative_path)) - } - - pub(crate) fn open( - &self, - relative_path: impl AsRef, - ) -> std::io::Result { - std::fs::File::open(self.base.join(relative_path)) + ) -> Result, HgError> { + let path = self.base.join(relative_path); + std::fs::read(&path).for_file(&path) } pub(crate) fn mmap_open( &self, relative_path: impl AsRef, - ) -> std::io::Result { - let file = self.open(relative_path)?; + ) -> Result { + let path = self.base.join(relative_path); + let file = std::fs::File::open(&path).for_file(&path)?; // TODO: what are the safety requirements here? - let mmap = unsafe { MmapOptions::new().map(&file) }?; + let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?; Ok(mmap) } } diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs --- a/rust/hg-core/src/requirements.rs +++ b/rust/hg-core/src/requirements.rs @@ -1,4 +1,4 @@ -use crate::errors::{HgError, HgResultExt, IoResultExt}; +use crate::errors::{HgError, HgResultExt}; use crate::repo::Repo; fn parse(bytes: &[u8]) -> Result, HgError> { @@ -22,11 +22,8 @@ } pub fn load(repo: &Repo) -> Result, HgError> { - if let Some(bytes) = repo - .hg_vfs() - .read("requires") - .for_file("requires".as_ref()) - .io_not_found_as_none()? + if let Some(bytes) = + repo.hg_vfs().read("requires").io_not_found_as_none()? { parse(&bytes) } else { diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs --- a/rust/hg-core/src/revlog/changelog.rs +++ b/rust/hg-core/src/revlog/changelog.rs @@ -1,3 +1,4 @@ +use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::revlog::{Revlog, RevlogError}; use crate::revlog::NodePrefix; @@ -53,6 +54,8 @@ /// Return the node id of the `manifest` referenced by this `changelog` /// entry. pub fn manifest_node(&self) -> Result<&[u8], RevlogError> { - self.lines().next().ok_or(RevlogError::Corrupted) + self.lines() + .next() + .ok_or_else(|| HgError::corrupted("empty changelog entry").into()) } } diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs +++ b/rust/hg-core/src/revlog/index.rs @@ -3,6 +3,7 @@ use byteorder::{BigEndian, ByteOrder}; +use crate::errors::HgError; use crate::revlog::node::Node; use crate::revlog::revlog::RevlogError; use crate::revlog::{Revision, NULL_REVISION}; @@ -44,7 +45,8 @@ offsets: Some(offsets), }) } else { - Err(RevlogError::Corrupted) + Err(HgError::corrupted("unexpected inline revlog length") + .into()) } } else { Ok(Self { diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs --- a/rust/hg-core/src/revlog/node.rs +++ b/rust/hg-core/src/revlog/node.rs @@ -8,6 +8,7 @@ //! In Mercurial code base, it is customary to call "a node" the binary SHA //! of a revision. +use crate::errors::HgError; use bytes_cast::BytesCast; use std::convert::{TryFrom, TryInto}; use std::fmt; @@ -136,6 +137,19 @@ } } + /// `from_hex`, but for input from an internal file of the repository such + /// as a changelog or manifest entry. + /// + /// An error is treated as repository corruption. + pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result { + Self::from_hex(hex.as_ref()).map_err(|FromHexError| { + HgError::CorruptedRepository(format!( + "Expected a full hexadecimal node ID, found {}", + String::from_utf8_lossy(hex.as_ref()) + )) + }) + } + /// Provide access to binary data /// /// This is needed by FFI layers, for instance to return expected diff --git a/rust/hg-core/src/revlog/nodemap_docket.rs b/rust/hg-core/src/revlog/nodemap_docket.rs --- a/rust/hg-core/src/revlog/nodemap_docket.rs +++ b/rust/hg-core/src/revlog/nodemap_docket.rs @@ -1,3 +1,4 @@ +use crate::errors::{HgError, HgResultExt}; use bytes_cast::{unaligned, BytesCast}; use memmap::Mmap; use std::path::{Path, PathBuf}; @@ -38,12 +39,12 @@ index_path: &Path, ) -> Result, RevlogError> { let docket_path = index_path.with_extension("n"); - let docket_bytes = match repo.store_vfs().read(&docket_path) { - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - return Ok(None) - } - Err(e) => return Err(RevlogError::IoError(e)), - Ok(bytes) => bytes, + let docket_bytes = if let Some(bytes) = + repo.store_vfs().read(&docket_path).io_not_found_as_none()? + { + bytes + } else { + return Ok(None); }; let input = if let Some((&ONDISK_VERSION, rest)) = @@ -54,36 +55,40 @@ return Ok(None); }; - let (header, rest) = DocketHeader::from_bytes(input)?; + /// Treat any error as a parse error + fn parse(result: Result) -> Result { + result.map_err(|_| { + HgError::corrupted("nodemap docket parse error").into() + }) + } + + let (header, rest) = parse(DocketHeader::from_bytes(input))?; let uid_size = header.uid_size as usize; // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit // systems? let tip_node_size = header.tip_node_size.get() as usize; let data_length = header.data_length.get() as usize; - let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?; - let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?; - let uid = - std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?; + let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?; + let (_tip_node, _rest) = + parse(u8::slice_from_bytes(rest, tip_node_size))?; + let uid = parse(std::str::from_utf8(uid))?; let docket = NodeMapDocket { data_length }; let data_path = rawdata_path(&docket_path, uid); - // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap` + // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap` // config is false? - match repo.store_vfs().mmap_open(&data_path) { - Ok(mmap) => { - if mmap.len() >= data_length { - Ok(Some((docket, mmap))) - } else { - Err(RevlogError::Corrupted) - } + if let Some(mmap) = repo + .store_vfs() + .mmap_open(&data_path) + .io_not_found_as_none()? + { + if mmap.len() >= data_length { + Ok(Some((docket, mmap))) + } else { + Err(HgError::corrupted("persistent nodemap too short").into()) } - Err(error) => { - if error.kind() == std::io::ErrorKind::NotFound { - Ok(None) - } else { - Err(RevlogError::IoError(error)) - } - } + } else { + Ok(None) } } } diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs --- a/rust/hg-core/src/revlog/revlog.rs +++ b/rust/hg-core/src/revlog/revlog.rs @@ -13,25 +13,34 @@ use super::index::Index; use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE}; use super::nodemap; -use super::nodemap::NodeMap; +use super::nodemap::{NodeMap, NodeMapError}; use super::nodemap_docket::NodeMapDocket; use super::patch; +use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::Revision; +#[derive(derive_more::From)] pub enum RevlogError { - IoError(std::io::Error), - UnsuportedVersion(u16), InvalidRevision, /// Found more than one entry whose ID match the requested prefix AmbiguousPrefix, - Corrupted, - UnknowDataFormat(u8), + #[from] + Other(HgError), } -impl From for RevlogError { - fn from(_: bytes_cast::FromBytesError) -> Self { - RevlogError::Corrupted +impl From for RevlogError { + fn from(error: NodeMapError) -> Self { + match error { + NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, + NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(), + } + } +} + +impl RevlogError { + fn corrupted() -> Self { + RevlogError::Other(HgError::corrupted("corrupted revlog")) } } @@ -59,14 +68,12 @@ data_path: Option<&Path>, ) -> Result { let index_path = index_path.as_ref(); - let index_mmap = repo - .store_vfs() - .mmap_open(&index_path) - .map_err(RevlogError::IoError)?; + let index_mmap = repo.store_vfs().mmap_open(&index_path)?; let version = get_version(&index_mmap); if version != 1 { - return Err(RevlogError::UnsuportedVersion(version)); + // A proper new version should have had a repo/store requirement. + return Err(RevlogError::corrupted()); } let index = Index::new(Box::new(index_mmap))?; @@ -80,10 +87,7 @@ None } else { let data_path = data_path.unwrap_or(&default_data_path); - let data_mmap = repo - .store_vfs() - .mmap_open(data_path) - .map_err(RevlogError::IoError)?; + let data_mmap = repo.store_vfs().mmap_open(data_path)?; Some(Box::new(data_mmap)) }; @@ -121,9 +125,7 @@ ) -> Result { if let Some(nodemap) = &self.nodemap { return nodemap - .find_bin(&self.index, node) - // TODO: propagate details of this error: - .map_err(|_| RevlogError::Corrupted)? + .find_bin(&self.index, node)? .ok_or(RevlogError::InvalidRevision); } @@ -136,7 +138,9 @@ let mut found_by_prefix = None; for rev in (0..self.len() as Revision).rev() { let index_entry = - self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?; + self.index.get_entry(rev).ok_or(HgError::corrupted( + "revlog references a revision not in the index", + ))?; if node == *index_entry.hash() { return Ok(rev); } @@ -167,8 +171,9 @@ let mut delta_chain = vec![]; while let Some(base_rev) = entry.base_rev { delta_chain.push(entry); - entry = - self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?; + entry = self + .get_entry(base_rev) + .map_err(|_| RevlogError::corrupted())?; } // TODO do not look twice in the index @@ -191,7 +196,7 @@ ) { Ok(data) } else { - Err(RevlogError::Corrupted) + Err(RevlogError::corrupted()) } } @@ -301,7 +306,8 @@ b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), // zstd data. b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), - format_type => Err(RevlogError::UnknowDataFormat(format_type)), + // A proper new format should have had a repo/store requirement. + _format_type => Err(RevlogError::corrupted()), } } @@ -311,13 +317,13 @@ let mut buf = Vec::with_capacity(self.compressed_len); decoder .read_to_end(&mut buf) - .or(Err(RevlogError::Corrupted))?; + .map_err(|_| RevlogError::corrupted())?; Ok(buf) } else { let mut buf = vec![0; self.uncompressed_len]; decoder .read_exact(&mut buf) - .or(Err(RevlogError::Corrupted))?; + .map_err(|_| RevlogError::corrupted())?; Ok(buf) } } @@ -326,14 +332,14 @@ if self.is_delta() { let mut buf = Vec::with_capacity(self.compressed_len); zstd::stream::copy_decode(self.bytes, &mut buf) - .or(Err(RevlogError::Corrupted))?; + .map_err(|_| RevlogError::corrupted())?; Ok(buf) } else { let mut buf = vec![0; self.uncompressed_len]; let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) - .or(Err(RevlogError::Corrupted))?; + .map_err(|_| RevlogError::corrupted())?; if len != self.uncompressed_len { - Err(RevlogError::Corrupted) + Err(RevlogError::corrupted()) } else { Ok(buf) } diff --git a/rust/rhg/src/error.rs b/rust/rhg/src/error.rs --- a/rust/rhg/src/error.rs +++ b/rust/rhg/src/error.rs @@ -103,9 +103,6 @@ impl From<(RevlogError, &str)> for CommandError { fn from((err, rev): (RevlogError, &str)) -> CommandError { match err { - RevlogError::IoError(err) => CommandError::Abort(Some( - utf8_to_local(&format!("abort: {}\n", err)).into(), - )), RevlogError::InvalidRevision => CommandError::Abort(Some( utf8_to_local(&format!( "abort: invalid revision identifier {}\n", @@ -120,27 +117,7 @@ )) .into(), )), - RevlogError::UnsuportedVersion(version) => { - CommandError::Abort(Some( - utf8_to_local(&format!( - "abort: unsupported revlog version {}\n", - version - )) - .into(), - )) - } - RevlogError::Corrupted => { - CommandError::Abort(Some("abort: corrupted revlog\n".into())) - } - RevlogError::UnknowDataFormat(format) => { - CommandError::Abort(Some( - utf8_to_local(&format!( - "abort: unknow revlog dataformat {:?}\n", - format - )) - .into(), - )) - } + RevlogError::Other(err) => CommandError::Other(err), } } }