diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs --- a/rust/hg-core/src/operations/cat.rs +++ b/rust/hg-core/src/operations/cat.rs @@ -5,15 +5,11 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use std::path::PathBuf; - use crate::repo::Repo; -use crate::revlog::path_encode::path_encode; -use crate::revlog::revlog::Revlog; use crate::revlog::revlog::RevlogError; use crate::revlog::Node; -use crate::utils::files::get_path_from_bytes; -use crate::utils::hg_path::{HgPath, HgPathBuf}; + +use crate::utils::hg_path::HgPathBuf; pub struct CatOutput { /// Whether any file in the manifest matched the paths given as CLI @@ -27,8 +23,6 @@ pub node: Node, } -const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n']; - /// Output the given revision of files /// /// * `root`: Repository root @@ -54,26 +48,10 @@ if cat_file.as_bytes() == manifest_file.as_bytes() { *is_matched = true; found_any = true; - let index_path = store_path(manifest_file, b".i"); - let data_path = store_path(manifest_file, b".d"); - - let file_log = - Revlog::open(repo, &index_path, Some(&data_path))?; + let file_log = repo.filelog(manifest_file)?; let file_node = Node::from_hex_for_repo(node_bytes)?; - let file_rev = file_log.get_node_rev(file_node.into())?; - let data = file_log.get_rev_data(file_rev)?; - if data.starts_with(&METADATA_DELIMITER) { - let end_delimiter_position = data - [METADATA_DELIMITER.len()..] - .windows(METADATA_DELIMITER.len()) - .position(|bytes| bytes == METADATA_DELIMITER); - if let Some(position) = end_delimiter_position { - let offset = METADATA_DELIMITER.len() * 2; - bytes.extend(data[position + offset..].iter()); - } - } else { - bytes.extend(data); - } + let entry = file_log.get_node(file_node)?; + bytes.extend(entry.data()?) } } } @@ -91,9 +69,3 @@ node, }) } - -fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { - let encoded_bytes = - path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); - get_path_from_bytes(&encoded_bytes).into() -} diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -7,8 +7,10 @@ use crate::errors::HgResultExt; use crate::manifest::{Manifest, Manifestlog}; use crate::requirements; +use crate::revlog::filelog::Filelog; use crate::revlog::revlog::RevlogError; use crate::utils::files::get_path_from_bytes; +use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::{is_dir, is_file, Vfs}; use crate::{exit_codes, Node}; @@ -346,6 +348,10 @@ Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; manifest.get_node(manifest_node.into()) } + + pub fn filelog(&self, path: &HgPath) -> Result { + Filelog::open(self, path) + } } /// Lazily-initialized component of `Repo` with interior mutability diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs --- a/rust/hg-core/src/revlog.rs +++ b/rust/hg-core/src/revlog.rs @@ -11,6 +11,7 @@ pub mod path_encode; pub use node::{FromHexError, Node, NodePrefix}; pub mod changelog; +pub mod filelog; pub mod index; pub mod manifest; pub mod patch; diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/revlog/filelog.rs @@ -0,0 +1,79 @@ +use crate::errors::HgError; +use crate::repo::Repo; +use crate::revlog::path_encode::path_encode; +use crate::revlog::revlog::{Revlog, RevlogError}; +use crate::revlog::NodePrefix; +use crate::revlog::Revision; +use crate::utils::files::get_path_from_bytes; +use crate::utils::hg_path::HgPath; +use crate::utils::SliceExt; +use std::borrow::Cow; +use std::path::PathBuf; + +/// A specialized `Revlog` to work with file data logs. +pub struct Filelog { + /// The generic `revlog` format. + revlog: Revlog, +} + +impl Filelog { + pub fn open(repo: &Repo, file_path: &HgPath) -> Result { + let index_path = store_path(file_path, b".i"); + let data_path = store_path(file_path, b".d"); + let revlog = Revlog::open(repo, index_path, Some(&data_path))?; + Ok(Self { revlog }) + } + + /// The given node ID is that of the file as found in a manifest, not of a + /// changeset. + pub fn get_node( + &self, + file_node: impl Into, + ) -> Result { + let file_rev = self.revlog.get_node_rev(file_node.into())?; + self.get_rev(file_rev) + } + + /// The given revision is that of the file as found in a manifest, not of a + /// changeset. + pub fn get_rev( + &self, + file_rev: Revision, + ) -> Result { + let data = self.revlog.get_rev_data(file_rev)?; + Ok(FilelogEntry(data.into())) + } +} + +fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { + let encoded_bytes = + path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); + get_path_from_bytes(&encoded_bytes).into() +} + +pub struct FilelogEntry<'filelog>(Cow<'filelog, [u8]>); + +impl<'filelog> FilelogEntry<'filelog> { + /// Split into metadata and data + pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { + const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; + + if let Some(rest) = self.0.drop_prefix(DELIMITER) { + if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { + Ok((Some(metadata), data)) + } else { + Err(HgError::corrupted( + "Missing metadata end delimiter in filelog entry", + )) + } + } else { + Ok((None, &self.0)) + } + } + + /// Returns the file contents at this revision, stripped of any metadata + pub fn data(&self) -> Result<&[u8], HgError> { + let (_metadata, data) = self.split()?; + Ok(data) + } +} diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs --- a/rust/hg-core/src/utils.rs +++ b/rust/hg-core/src/utils.rs @@ -74,6 +74,7 @@ fn trim(&self) -> &Self; fn drop_prefix(&self, needle: &Self) -> Option<&Self>; fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; + fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; } impl SliceExt for [u8] { @@ -134,6 +135,14 @@ let b = iter.next()?; Some((a, b)) } + + fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { + if let Some(pos) = find_slice_in_slice(self, separator) { + Some((&self[..pos], &self[pos + separator.len()..])) + } else { + None + } + } } pub trait Escaped {