diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs --- a/rust/hg-core/src/revlog/changelog.rs +++ b/rust/hg-core/src/revlog/changelog.rs @@ -3,6 +3,10 @@ use crate::revlog::revlog::{Revlog, RevlogError}; use crate::revlog::Revision; use crate::revlog::{Node, NodePrefix}; +use crate::utils::hg_path::HgPath; +use std::ascii::escape_default; +use std::fmt::{Debug, Formatter}; +use std::ops::Range; /// A specialized `Revlog` to work with `changelog` data format. pub struct Changelog { @@ -35,7 +39,12 @@ if bytes.is_empty() { Ok(ChangelogRevisionData::null()) } else { - Ok(ChangelogRevisionData::new(bytes)) + Ok(ChangelogRevisionData::new(bytes).ok_or_else(|| { + RevlogError::Other(HgError::CorruptedRepository(format!( + "Invalid changelog data for revision {}", + rev + ))) + })?) } } @@ -45,21 +54,66 @@ } /// `Changelog` entry which knows how to interpret the `changelog` data bytes. -#[derive(Debug)] +#[derive(PartialEq)] pub struct ChangelogRevisionData { /// The data bytes of the `changelog` entry. bytes: Vec, + /// The byte range for the hex manifest (not including the newline) + manifest_range: Range, + /// The byte range for the user+email (not including the newline) + user_range: Range, + /// The byte range for the timestamp+timezone+extras (not including the + /// newline) + timestamp_range: Range, + /// The byte range for the file list (including newlines between, but not + /// after) + files_range: Range, + /// The byte range for the description (including newlines) + description_range: Range, } impl ChangelogRevisionData { - fn new(bytes: Vec) -> Self { - Self { bytes } + fn new(bytes: Vec) -> Option { + let mut line_iter = bytes.split(|b| b == &b'\n'); + let manifest_range = 0..line_iter.next().unwrap().len(); + let mut start_pos = manifest_range.end + 1; + let user_slice = line_iter.next()?; + let user_range = start_pos..start_pos + user_slice.len(); + start_pos += user_slice.len() + 1; + let timestamp_slice = line_iter.next()?; + let timestamp_range = start_pos..start_pos + timestamp_slice.len(); + start_pos += timestamp_slice.len() + 1; + let mut files_end_pos = start_pos; + loop { + // This line intentionally returns `None` is the list does not end + // in a newline + let line = line_iter.next()?; + if line.is_empty() { + break; + } + files_end_pos += line.len() + 1; + } + let files_range = start_pos..files_end_pos - 1; + if files_end_pos >= bytes.len() { + return None; + } + let description_range = files_end_pos + 1..bytes.len(); + + Some(Self { + bytes, + manifest_range, + user_range, + timestamp_range, + files_range, + description_range, + }) } fn null() -> Self { Self::new( b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(), ) + .unwrap() } /// Return an iterator over the lines of the entry. @@ -70,7 +124,92 @@ /// Return the node id of the `manifest` referenced by this `changelog` /// entry. pub fn manifest_node(&self) -> Result { - let manifest_node_hex = self.lines().next().unwrap(); + let manifest_node_hex = &self.bytes[self.manifest_range.clone()]; Node::from_hex_for_repo(manifest_node_hex) } + + /// Return the node id of the `manifest` referenced by this `changelog` + /// entry. + pub fn files(&self) -> impl Iterator { + self.bytes[self.files_range.clone()] + .split(|b| b == &b'\n') + .map(|path| HgPath::new(path)) + } + + /// Return the node id of the `manifest` referenced by this `changelog` + /// entry. + pub fn description(&self) -> &[u8] { + &self.bytes[self.description_range.clone()] + } } + +impl Debug for ChangelogRevisionData { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChangelogRevisionData") + .field("bytes", &debug_bytes(&self.bytes)) + .field( + "manifest", + &debug_bytes(&self.bytes[self.manifest_range.clone()]), + ) + .field("user", &debug_bytes(&self.bytes[self.user_range.clone()])) + .field( + "timestamp", + &debug_bytes(&self.bytes[self.timestamp_range.clone()]), + ) + .field( + "files", + &debug_bytes(&self.bytes[self.files_range.clone()]), + ) + .field( + "description", + &debug_bytes(&self.bytes[self.description_range.clone()]), + ) + .finish() + } +} + +fn debug_bytes(bytes: &[u8]) -> String { + String::from_utf8(bytes.iter().flat_map(|b| escape_default(*b)).collect()) + .unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + use itertools::Itertools; + use pretty_assertions::assert_eq; + + #[test] + fn test_create_changelogrevisiondata_invalid() { + assert_eq!( + ChangelogRevisionData::new(b"abcd\n\n0 0\nfile1".to_vec()), + None + ); + assert_eq!( + ChangelogRevisionData::new(b"abcd\n\n0 0\nfile1\n".to_vec()), + None + ); + } + + #[test] + fn test_create_changelogrevisiondata() { + let data = ChangelogRevisionData::new( + b"abcd +Some One +0 0 +file1 +file2 + +some +commit +message" + .to_vec(), + ) + .unwrap(); + assert_eq!( + data.files().collect_vec(), + vec![HgPath::new("file1"), HgPath::new("file2")] + ); + assert_eq!(data.description(), b"some\ncommit\nmessage"); + } +}