diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs --- a/rust/hg-core/src/requirements.rs +++ b/rust/hg-core/src/requirements.rs @@ -69,4 +69,8 @@ "revlogv1", "sparserevlog", "store", + // As of this writing everything rhg does is read-only. + // When it starts writing to the repository, it’ll need to either keep the + // persistent nodemap up to date or remove this entry: + "persistent-nodemap", ]; diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs --- a/rust/hg-core/src/revlog.rs +++ b/rust/hg-core/src/revlog.rs @@ -7,6 +7,7 @@ pub mod node; pub mod nodemap; +mod nodemap_docket; pub mod path_encode; pub use node::{Node, NodeError, NodePrefix, NodePrefixRef}; pub mod changelog; diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs +++ b/rust/hg-core/src/revlog/index.rs @@ -132,6 +132,16 @@ } } +impl super::RevlogIndex for Index { + fn len(&self) -> usize { + self.len() + } + + fn node(&self, rev: Revision) -> Option<&Node> { + self.get_entry(rev).map(|entry| entry.hash()) + } +} + #[derive(Debug)] pub struct IndexEntry<'a> { bytes: &'a [u8], @@ -190,7 +200,7 @@ /// /// Currently, SHA-1 is used and only the first 20 bytes of this field /// are used. - pub fn hash(&self) -> &Node { + pub fn hash(&self) -> &'a Node { (&self.bytes[32..52]).try_into().unwrap() } } diff --git a/rust/hg-core/src/revlog/nodemap_docket.rs b/rust/hg-core/src/revlog/nodemap_docket.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/revlog/nodemap_docket.rs @@ -0,0 +1,112 @@ +use memmap::Mmap; +use std::convert::TryInto; +use std::path::{Path, PathBuf}; + +use super::revlog::{mmap_open, RevlogError}; + +const ONDISK_VERSION: u8 = 1; + +pub(super) struct NodeMapDocket { + pub data_length: usize, + // TODO: keep here more of the data from `parse()` when we need it +} + +impl NodeMapDocket { + pub fn read_from_file( + index_path: &Path, + ) -> Result, RevlogError> { + let docket_path = index_path.with_extension("n"); + let docket_bytes = match std::fs::read(&docket_path) { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + return Ok(None) + } + Err(e) => return Err(RevlogError::IoError(e)), + Ok(bytes) => bytes, + }; + + let mut input = if let Some((&ONDISK_VERSION, rest)) = + docket_bytes.split_first() + { + rest + } else { + return Ok(None); + }; + let input = &mut input; + + let uid_size = read_u8(input)? as usize; + let _tip_rev = read_be_u64(input)?; + // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit + // systems? + let data_length = read_be_u64(input)? as usize; + let _data_unused = read_be_u64(input)?; + let tip_node_size = read_be_u64(input)? as usize; + let uid = read_bytes(input, uid_size)?; + let _tip_node = read_bytes(input, tip_node_size)?; + + let uid = + std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?; + let docket = NodeMapDocket { data_length }; + + // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap` + // config is false? + let mmap = mmap_open(&rawdata_path(&docket_path, uid)) + // TODO: do we want to silently ignore a file not found here and + // return `None`? The equivalent Python code has a + // branch that might have intended to do that, but looks like it + // causes a NameError exception. + .map_err(RevlogError::IoError)?; + if mmap.len() < data_length { + return Ok(None); + } + + Ok(Some((docket, mmap))) + } +} + +fn read_bytes<'a>( + input: &mut &'a [u8], + count: usize, +) -> Result<&'a [u8], RevlogError> { + if let Some(start) = input.get(..count) { + *input = &input[count..]; + Ok(start) + } else { + Err(RevlogError::Corrupted) + } +} + +fn read_u8<'a>(input: &mut &[u8]) -> Result { + Ok(read_bytes(input, 1)?[0]) +} + +fn read_be_u64<'a>(input: &mut &[u8]) -> Result { + let array = read_bytes(input, std::mem::size_of::())? + .try_into() + .unwrap(); + Ok(u64::from_be_bytes(array)) +} + +fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf { + let docket_name = docket_path + .file_name() + .expect("expected a base name") + .to_str() + .expect("expected an ASCII file name in the store"); + let prefix = strip_suffix(docket_name, ".n.a") + .or_else(|| strip_suffix(docket_name, ".n")) + .expect("expected docket path in .n or .n.a"); + let name = format!("{}-{}.nd", prefix, uid); + docket_path + .parent() + .expect("expected a non-root path") + .join(name) +} + +// TODO: use the str method when we require Rust 1.45 +fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> { + if s.ends_with(suffix) { + Some(&s[..s.len() - suffix.len()]) + } else { + None + } +} diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs --- a/rust/hg-core/src/revlog/revlog.rs +++ b/rust/hg-core/src/revlog/revlog.rs @@ -14,6 +14,9 @@ use super::index::Index; use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE}; +use super::nodemap; +use super::nodemap::NodeMap; +use super::nodemap_docket::NodeMapDocket; use super::patch; use crate::revlog::Revision; @@ -27,7 +30,7 @@ UnknowDataFormat(u8), } -fn mmap_open(path: &Path) -> Result { +pub(super) fn mmap_open(path: &Path) -> Result { let file = File::open(path)?; let mmap = unsafe { MmapOptions::new().map(&file) }?; Ok(mmap) @@ -41,6 +44,7 @@ index: Index, /// When index and data are not interleaved: bytes of the revlog data data_bytes: Option + Send>>, + nodemap: Option, } impl Revlog { @@ -77,7 +81,20 @@ Some(Box::new(data_mmap)) }; - Ok(Revlog { index, data_bytes }) + let nodemap = NodeMapDocket::read_from_file(index_path)?.map( + |(docket, data)| { + nodemap::NodeTree::load_bytes( + Box::new(data), + docket.data_length, + ) + }, + ); + + Ok(Revlog { + index, + data_bytes, + nodemap, + }) } /// Return number of entries of the `Revlog`. @@ -96,6 +113,13 @@ &self, node: NodePrefixRef, ) -> Result { + if let Some(nodemap) = &self.nodemap { + return nodemap + .find_bin(&self.index, node) + // TODO: propagate details of this error: + .map_err(|_| RevlogError::Corrupted)? + .ok_or(RevlogError::InvalidRevision); + } // This is brute force. But it is fast enough for now. // Optimization will come later. let mut found_by_prefix = None; diff --git a/tests/test-rhg.t b/tests/test-rhg.t --- a/tests/test-rhg.t +++ b/tests/test-rhg.t @@ -197,4 +197,6 @@ .hg/store/00changelog.i .hg/store/00changelog.n $ rhg files -r c3ae8dec9fad - [252] + of + $ rhg cat -r c3ae8dec9fad of + r5000