Changeset View
Changeset View
Standalone View
Standalone View
rust/hg-core/src/dirstate_tree/on_disk.rs
Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Line(s) | |||||
const STORED_NODE_ID_BYTES: usize = 32; | const STORED_NODE_ID_BYTES: usize = 32; | ||||
/// … even though only 160 bits are used for now, with SHA-1 | /// … even though only 160 bits are used for now, with SHA-1 | ||||
const USED_NODE_ID_BYTES: usize = 20; | const USED_NODE_ID_BYTES: usize = 20; | ||||
pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | ||||
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | ||||
/// Must match the constant of the same name in | |||||
/// `mercurial/dirstateutils/docket.py` | |||||
const TREE_METADATA_SIZE: usize = 40; | |||||
/// Make sure that size-affecting changes are made knowingly | |||||
#[allow(unused)] | |||||
fn static_assert_size_of() { | |||||
let _ = std::mem::transmute::<DocketHeader, [u8; 121]>; | |||||
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>; | |||||
let _ = std::mem::transmute::<Node, [u8; 43]>; | |||||
} | |||||
// Must match `HEADER` in `mercurial/dirstateutils/docket.py` | // Must match `HEADER` in `mercurial/dirstateutils/docket.py` | ||||
#[derive(BytesCast)] | #[derive(BytesCast)] | ||||
#[repr(C)] | #[repr(C)] | ||||
struct DocketHeader { | struct DocketHeader { | ||||
marker: [u8; V2_FORMAT_MARKER.len()], | marker: [u8; V2_FORMAT_MARKER.len()], | ||||
parent_1: [u8; STORED_NODE_ID_BYTES], | parent_1: [u8; STORED_NODE_ID_BYTES], | ||||
parent_2: [u8; STORED_NODE_ID_BYTES], | parent_2: [u8; STORED_NODE_ID_BYTES], | ||||
/// Counted in bytes | /// Counted in bytes | ||||
data_size: Size, | data_size: Size, | ||||
metadata: TreeMetadata, | |||||
uuid_size: u8, | uuid_size: u8, | ||||
} | } | ||||
pub struct Docket<'on_disk> { | pub struct Docket<'on_disk> { | ||||
header: &'on_disk DocketHeader, | header: &'on_disk DocketHeader, | ||||
uuid: &'on_disk [u8], | uuid: &'on_disk [u8], | ||||
} | } | ||||
#[derive(BytesCast)] | #[derive(BytesCast)] | ||||
#[repr(C)] | #[repr(C)] | ||||
struct Root { | struct TreeMetadata { | ||||
root_nodes: ChildNodes, | root_nodes: ChildNodes, | ||||
nodes_with_entry_count: Size, | nodes_with_entry_count: Size, | ||||
nodes_with_copy_source_count: Size, | nodes_with_copy_source_count: Size, | ||||
/// How many bytes of this data file are not used anymore | /// How many bytes of this data file are not used anymore | ||||
unreachable_bytes: Size, | unreachable_bytes: Size, | ||||
/// If non-zero, a hash of ignore files that were used for some previous | /// If non-zero, a hash of ignore files that were used for some previous | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Line(s) | pub(super) struct Node { | ||||
/// - That timestamp was already strictly in the past when observed, | /// - That timestamp was already strictly in the past when observed, | ||||
/// meaning that later changes cannot happen in the same clock tick | /// meaning that later changes cannot happen in the same clock tick | ||||
/// and must cause a different modification time (unless the system | /// and must cause a different modification time (unless the system | ||||
/// clock jumps back and we get unlucky, which is not impossible but | /// clock jumps back and we get unlucky, which is not impossible but | ||||
/// but deemed unlikely enough). | /// but deemed unlikely enough). | ||||
/// - All direct children of this directory (as returned by | /// - All direct children of this directory (as returned by | ||||
/// `std::fs::read_dir`) either have a corresponding dirstate node, or | /// `std::fs::read_dir`) either have a corresponding dirstate node, or | ||||
/// are ignored by ignore patterns whose hash is in | /// are ignored by ignore patterns whose hash is in | ||||
/// `Root::ignore_patterns_hash`. | /// `TreeMetadata::ignore_patterns_hash`. | ||||
/// | /// | ||||
/// This means that if `std::fs::symlink_metadata` later reports the | /// This means that if `std::fs::symlink_metadata` later reports the | ||||
/// same modification time and ignored patterns haven’t changed, a run | /// same modification time and ignored patterns haven’t changed, a run | ||||
/// of status that is not listing ignored files can skip calling | /// of status that is not listing ignored files can skip calling | ||||
/// `std::fs::read_dir` again for this directory, iterate child | /// `std::fs::read_dir` again for this directory, iterate child | ||||
/// dirstate nodes instead. | /// dirstate nodes instead. | ||||
state: u8, | state: u8, | ||||
data: Entry, | data: Entry, | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Line(s) | |||||
struct PathSlice { | struct PathSlice { | ||||
start: Offset, | start: Offset, | ||||
len: PathSize, | len: PathSize, | ||||
} | } | ||||
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | ||||
type OptPathSlice = PathSlice; | type OptPathSlice = PathSlice; | ||||
/// Make sure that size-affecting changes are made knowingly | |||||
fn _static_assert_size_of() { | |||||
let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; | |||||
let _ = std::mem::transmute::<Root, [u8; 40]>; | |||||
let _ = std::mem::transmute::<Node, [u8; 43]>; | |||||
} | |||||
/// Unexpected file format found in `.hg/dirstate` with the "v2" format. | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. | ||||
/// | /// | ||||
/// This should only happen if Mercurial is buggy or a repository is corrupted. | /// This should only happen if Mercurial is buggy or a repository is corrupted. | ||||
#[derive(Debug)] | #[derive(Debug)] | ||||
pub struct DirstateV2ParseError; | pub struct DirstateV2ParseError; | ||||
impl From<DirstateV2ParseError> for HgError { | impl From<DirstateV2ParseError> for HgError { | ||||
fn from(_: DirstateV2ParseError) -> Self { | fn from(_: DirstateV2ParseError) -> Self { | ||||
Show All 14 Lines | pub fn parents(&self) -> DirstateParents { | ||||
.unwrap() | .unwrap() | ||||
.clone(); | .clone(); | ||||
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | ||||
.unwrap() | .unwrap() | ||||
.clone(); | .clone(); | ||||
DirstateParents { p1, p2 } | DirstateParents { p1, p2 } | ||||
} | } | ||||
pub fn tree_metadata(&self) -> &[u8] { | |||||
self.header.metadata.as_bytes() | |||||
} | |||||
pub fn data_size(&self) -> usize { | pub fn data_size(&self) -> usize { | ||||
// This `unwrap` could only panic on a 16-bit CPU | // This `unwrap` could only panic on a 16-bit CPU | ||||
self.header.data_size.get().try_into().unwrap() | self.header.data_size.get().try_into().unwrap() | ||||
} | } | ||||
pub fn data_filename(&self) -> String { | pub fn data_filename(&self) -> String { | ||||
String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() | String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() | ||||
} | } | ||||
} | } | ||||
pub fn read_docket( | pub fn read_docket( | ||||
on_disk: &[u8], | on_disk: &[u8], | ||||
) -> Result<Docket<'_>, DirstateV2ParseError> { | ) -> Result<Docket<'_>, DirstateV2ParseError> { | ||||
let (header, uuid) = | let (header, uuid) = | ||||
DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; | DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; | ||||
let uuid_size = header.uuid_size as usize; | let uuid_size = header.uuid_size as usize; | ||||
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | ||||
Ok(Docket { header, uuid }) | Ok(Docket { header, uuid }) | ||||
} else { | } else { | ||||
Err(DirstateV2ParseError) | Err(DirstateV2ParseError) | ||||
} | } | ||||
} | } | ||||
fn read_root<'on_disk>( | |||||
on_disk: &'on_disk [u8], | |||||
) -> Result<&'on_disk Root, DirstateV2ParseError> { | |||||
// Find the `Root` at the end of the given slice | |||||
let root_offset = on_disk | |||||
.len() | |||||
.checked_sub(std::mem::size_of::<Root>()) | |||||
// A non-empty slice too short is an error | |||||
.ok_or(DirstateV2ParseError)?; | |||||
let (root, _) = Root::from_bytes(&on_disk[root_offset..]) | |||||
.map_err(|_| DirstateV2ParseError)?; | |||||
Ok(root) | |||||
} | |||||
pub(super) fn read<'on_disk>( | pub(super) fn read<'on_disk>( | ||||
on_disk: &'on_disk [u8], | on_disk: &'on_disk [u8], | ||||
metadata: &[u8], | |||||
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | ||||
if on_disk.is_empty() { | if on_disk.is_empty() { | ||||
return Ok(DirstateMap::empty(on_disk)); | return Ok(DirstateMap::empty(on_disk)); | ||||
} | } | ||||
let root = read_root(on_disk)?; | let (meta, _) = TreeMetadata::from_bytes(metadata) | ||||
let mut unreachable_bytes = root.unreachable_bytes.get(); | .map_err(|_| DirstateV2ParseError)?; | ||||
// Each append writes a new `Root`, so it’s never reused | |||||
unreachable_bytes += std::mem::size_of::<Root>() as u32; | |||||
let dirstate_map = DirstateMap { | let dirstate_map = DirstateMap { | ||||
on_disk, | on_disk, | ||||
root: dirstate_map::ChildNodes::OnDisk(read_nodes( | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | ||||
on_disk, | on_disk, | ||||
root.root_nodes, | meta.root_nodes, | ||||
)?), | )?), | ||||
nodes_with_entry_count: root.nodes_with_entry_count.get(), | nodes_with_entry_count: meta.nodes_with_entry_count.get(), | ||||
nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), | nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), | ||||
ignore_patterns_hash: root.ignore_patterns_hash, | ignore_patterns_hash: meta.ignore_patterns_hash, | ||||
unreachable_bytes, | unreachable_bytes: meta.unreachable_bytes.get(), | ||||
}; | }; | ||||
Ok(dirstate_map) | Ok(dirstate_map) | ||||
} | } | ||||
impl Node { | impl Node { | ||||
pub(super) fn full_path<'on_disk>( | pub(super) fn full_path<'on_disk>( | ||||
&self, | &self, | ||||
on_disk: &'on_disk [u8], | on_disk: &'on_disk [u8], | ||||
▲ Show 20 Lines • Show All 215 Lines • ▼ Show 20 Line(s) | on_disk | ||||
.get(start..) | .get(start..) | ||||
.and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) | ||||
.map(|(slice, _rest)| slice) | .map(|(slice, _rest)| slice) | ||||
.ok_or_else(|| DirstateV2ParseError) | .ok_or_else(|| DirstateV2ParseError) | ||||
} | } | ||||
pub(crate) fn for_each_tracked_path<'on_disk>( | pub(crate) fn for_each_tracked_path<'on_disk>( | ||||
on_disk: &'on_disk [u8], | on_disk: &'on_disk [u8], | ||||
metadata: &[u8], | |||||
mut f: impl FnMut(&'on_disk HgPath), | mut f: impl FnMut(&'on_disk HgPath), | ||||
) -> Result<(), DirstateV2ParseError> { | ) -> Result<(), DirstateV2ParseError> { | ||||
let root = read_root(on_disk)?; | let (meta, _) = TreeMetadata::from_bytes(metadata) | ||||
.map_err(|_| DirstateV2ParseError)?; | |||||
fn recur<'on_disk>( | fn recur<'on_disk>( | ||||
on_disk: &'on_disk [u8], | on_disk: &'on_disk [u8], | ||||
nodes: ChildNodes, | nodes: ChildNodes, | ||||
f: &mut impl FnMut(&'on_disk HgPath), | f: &mut impl FnMut(&'on_disk HgPath), | ||||
) -> Result<(), DirstateV2ParseError> { | ) -> Result<(), DirstateV2ParseError> { | ||||
for node in read_nodes(on_disk, nodes)? { | for node in read_nodes(on_disk, nodes)? { | ||||
if let Some(state) = node.state()? { | if let Some(state) = node.state()? { | ||||
if state.is_tracked() { | if state.is_tracked() { | ||||
f(node.full_path(on_disk)?) | f(node.full_path(on_disk)?) | ||||
} | } | ||||
} | } | ||||
recur(on_disk, node.children, f)? | recur(on_disk, node.children, f)? | ||||
} | } | ||||
Ok(()) | Ok(()) | ||||
} | } | ||||
recur(on_disk, root.root_nodes, &mut f) | recur(on_disk, meta.root_nodes, &mut f) | ||||
} | } | ||||
/// Returns new data together with whether that data should be appended to the | /// Returns new data and metadata, together with whether that data should be | ||||
/// existing data file whose content is at `dirstate_map.on_disk` (true), | /// appended to the existing data file whose content is at | ||||
/// instead of written to a new data file (false). | /// `dirstate_map.on_disk` (true), instead of written to a new data file | ||||
/// (false). | |||||
pub(super) fn write( | pub(super) fn write( | ||||
dirstate_map: &mut DirstateMap, | dirstate_map: &mut DirstateMap, | ||||
can_append: bool, | can_append: bool, | ||||
) -> Result<(Vec<u8>, bool), DirstateError> { | ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> { | ||||
let append = can_append && dirstate_map.write_should_append(); | let append = can_append && dirstate_map.write_should_append(); | ||||
// This ignores the space for paths, and for nodes without an entry. | // This ignores the space for paths, and for nodes without an entry. | ||||
// TODO: better estimate? Skip the `Vec` and write to a file directly? | // TODO: better estimate? Skip the `Vec` and write to a file directly? | ||||
let size_guess = std::mem::size_of::<Root>() | let size_guess = std::mem::size_of::<Node>() | ||||
+ std::mem::size_of::<Node>() | |||||
* dirstate_map.nodes_with_entry_count as usize; | * dirstate_map.nodes_with_entry_count as usize; | ||||
let mut writer = Writer { | let mut writer = Writer { | ||||
dirstate_map, | dirstate_map, | ||||
append, | append, | ||||
out: Vec::with_capacity(size_guess), | out: Vec::with_capacity(size_guess), | ||||
}; | }; | ||||
let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | ||||
let root = Root { | let meta = TreeMetadata { | ||||
root_nodes, | root_nodes, | ||||
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | ||||
nodes_with_copy_source_count: dirstate_map | nodes_with_copy_source_count: dirstate_map | ||||
.nodes_with_copy_source_count | .nodes_with_copy_source_count | ||||
.into(), | .into(), | ||||
unreachable_bytes: dirstate_map.unreachable_bytes.into(), | unreachable_bytes: dirstate_map.unreachable_bytes.into(), | ||||
ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | ||||
}; | }; | ||||
writer.out.extend(root.as_bytes()); | Ok((writer.out, meta.as_bytes().to_vec(), append)) | ||||
Ok((writer.out, append)) | |||||
} | } | ||||
struct Writer<'dmap, 'on_disk> { | struct Writer<'dmap, 'on_disk> { | ||||
dirstate_map: &'dmap DirstateMap<'on_disk>, | dirstate_map: &'dmap DirstateMap<'on_disk>, | ||||
append: bool, | append: bool, | ||||
out: Vec<u8>, | out: Vec<u8>, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 161 Lines • Show Last 20 Lines |