diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs --- a/rust/hg-core/src/revlog/filelog.rs +++ b/rust/hg-core/src/revlog/filelog.rs @@ -1,6 +1,7 @@ use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::path_encode::path_encode; +use crate::revlog::revlog::RevlogEntry; use crate::revlog::revlog::{Revlog, RevlogError}; use crate::revlog::NodePrefix; use crate::revlog::Revision; @@ -23,7 +24,7 @@ Ok(Self { revlog }) } - /// The given node ID is that of the file as found in a manifest, not of a + /// The given node ID is that of the file as found in a filelog, not of a /// changeset. pub fn data_for_node( &self, @@ -33,7 +34,7 @@ self.data_for_rev(file_rev) } - /// The given revision is that of the file as found in a manifest, not of a + /// The given revision is that of the file as found in a filelog, not of a /// changeset. pub fn data_for_rev( &self, @@ -42,6 +43,25 @@ let data: Vec = self.revlog.get_rev_data(file_rev)?.into_owned(); Ok(FilelogRevisionData(data.into())) } + + /// The given node ID is that of the file as found in a filelog, not of a + /// changeset. + pub fn entry_for_node( + &self, + file_node: impl Into, + ) -> Result { + let file_rev = self.revlog.rev_from_node(file_node.into())?; + self.entry_for_rev(file_rev) + } + + /// The given revision is that of the file as found in a filelog, not of a + /// changeset. + pub fn entry_for_rev( + &self, + file_rev: Revision, + ) -> Result { + Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) + } } fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { @@ -50,6 +70,14 @@ get_path_from_bytes(&encoded_bytes).into() } +pub struct FilelogEntry<'a>(RevlogEntry<'a>); + +impl FilelogEntry<'_> { + pub fn data(&self) -> Result { + Ok(FilelogRevisionData(self.0.data()?.into_owned())) + } +} + /// The data for one revision in a filelog, uncompressed and delta-resolved. pub struct FilelogRevisionData(Vec); diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs --- a/rust/hg-core/src/revlog/revlog.rs +++ b/rust/hg-core/src/revlog/revlog.rs @@ -39,9 +39,13 @@ } } +fn corrupted() -> HgError { + HgError::corrupted("corrupted revlog") +} + impl RevlogError { fn corrupted() -> Self { - RevlogError::Other(HgError::corrupted("corrupted revlog")) + RevlogError::Other(corrupted()) } } @@ -191,7 +195,7 @@ if rev == NULL_REVISION { return Ok(Cow::Borrowed(&[])); }; - self.get_entry(rev)?.data() + Ok(self.get_entry(rev)?.data()?) } /// Check the hash of some given data against the recorded hash. @@ -222,13 +226,13 @@ fn build_data_from_deltas( snapshot: RevlogEntry, deltas: &[RevlogEntry], - ) -> Result, RevlogError> { + ) -> Result, HgError> { let snapshot = snapshot.data_chunk()?; let deltas = deltas .iter() .rev() .map(RevlogEntry::data_chunk) - .collect::>, RevlogError>>()?; + .collect::, _>>()?; let patches: Vec<_> = deltas.iter().map(|d| patch::PatchList::new(d)).collect(); let patch = patch::fold_patch_lists(&patches); @@ -246,7 +250,10 @@ } /// Get an entry of the revlog. - fn get_entry(&self, rev: Revision) -> Result { + pub fn get_entry( + &self, + rev: Revision, + ) -> Result { let index_entry = self .index .get_entry(rev) @@ -281,8 +288,8 @@ fn get_entry_internal( &self, rev: Revision, - ) -> Result { - return self.get_entry(rev).map_err(|_| RevlogError::corrupted()); + ) -> Result { + return self.get_entry(rev).map_err(|_| corrupted()); } } @@ -304,7 +311,7 @@ } /// The data for this entry, after resolving deltas if any. - pub fn data(&self) -> Result, RevlogError> { + pub fn data(&self) -> Result, HgError> { let mut entry = self.clone(); let mut delta_chain = vec![]; @@ -328,7 +335,7 @@ .revlog .index .get_entry(self.rev) - .ok_or(RevlogError::InvalidRevision)?; + .ok_or_else(corrupted)?; let data = if delta_chain.is_empty() { entry.data_chunk()? @@ -344,13 +351,13 @@ ) { Ok(data) } else { - Err(RevlogError::corrupted()) + Err(corrupted()) } } /// Extract the data contained in the entry. /// This may be a delta. (See `is_delta`.) - fn data_chunk(&self) -> Result, RevlogError> { + fn data_chunk(&self) -> Result, HgError> { if self.bytes.is_empty() { return Ok(Cow::Borrowed(&[])); } @@ -365,39 +372,35 @@ // zstd data. b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), // A proper new format should have had a repo/store requirement. - _format_type => Err(RevlogError::corrupted()), + _format_type => Err(corrupted()), } } - fn uncompressed_zlib_data(&self) -> Result, RevlogError> { + fn uncompressed_zlib_data(&self) -> Result, HgError> { let mut decoder = ZlibDecoder::new(self.bytes); if self.is_delta() { let mut buf = Vec::with_capacity(self.compressed_len); - decoder - .read_to_end(&mut buf) - .map_err(|_| RevlogError::corrupted())?; + decoder.read_to_end(&mut buf).map_err(|_| corrupted())?; Ok(buf) } else { let mut buf = vec![0; self.uncompressed_len]; - decoder - .read_exact(&mut buf) - .map_err(|_| RevlogError::corrupted())?; + decoder.read_exact(&mut buf).map_err(|_| corrupted())?; Ok(buf) } } - fn uncompressed_zstd_data(&self) -> Result, RevlogError> { + fn uncompressed_zstd_data(&self) -> Result, HgError> { if self.is_delta() { let mut buf = Vec::with_capacity(self.compressed_len); zstd::stream::copy_decode(self.bytes, &mut buf) - .map_err(|_| RevlogError::corrupted())?; + .map_err(|_| corrupted())?; Ok(buf) } else { let mut buf = vec![0; self.uncompressed_len]; let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) - .map_err(|_| RevlogError::corrupted())?; + .map_err(|_| corrupted())?; if len != self.uncompressed_len { - Err(RevlogError::corrupted()) + Err(corrupted()) } else { Ok(buf) } diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -512,17 +512,18 @@ } let filelog = repo.filelog(hg_path)?; let fs_len = fs_metadata.len(); + let filelog_entry = + filelog.entry_for_node(entry.node_id()?).map_err(|_| { + HgError::corrupted("filelog missing node from manifest") + })?; // TODO: check `fs_len` here like below, but based on // `RevlogEntry::uncompressed_len` without decompressing the full filelog // contents where possible. This is only valid if the revlog data does not // contain metadata. See how Python’s `revlog.rawsize` calls // `storageutil.filerevisioncopied`. // (Maybe also check for content-modifying flags? See `revlog.size`.) - let filelog_entry = - filelog.data_for_node(entry.node_id()?).map_err(|_| { - HgError::corrupted("filelog missing node from manifest") - })?; - let contents_in_p1 = filelog_entry.file_data()?; + let filelog_data = filelog_entry.data()?; + let contents_in_p1 = filelog_data.file_data()?; if contents_in_p1.len() as u64 != fs_len { // No need to read the file contents: // it cannot be equal if it has a different length.