diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -10,3 +10,7 @@ [dev-dependencies] rand = "*" rand_pcg = "*" + +[dependencies] +memchr = "2.2.0" +byteorder = "1.3.1" \ No newline at end of file diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/dirstate.rs @@ -0,0 +1,292 @@ +// Copyright 2019 Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use byteorder::{BigEndian, ByteOrder, WriteBytesExt}; +use std::collections::HashMap; +use {DirstatePackError, DirstateParseError}; + +pub type DirStateParents<'a> = ((&'a [u8], &'a [u8])); +/// The C implementation uses all signed types. This will be an issue +/// either when 4GB+ source files are commonplace or in 2038, whichever +/// comes first. +pub type DirstateTuple = (i8, i32, i32, i32); +pub type DirstateVec = Vec<(Vec, DirstateTuple)>; +pub type CopyMap<'a> = Vec<(&'a [u8], &'a [u8])>; + +/// Parents are stored in the dirstate as byte hashes. +const PARENT_SIZE: usize = 20; +/// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. +const MIN_ENTRY_SIZE: usize = 17; + +pub fn parse_dirstate( + contents: &[u8], +) -> Result<(DirStateParents, DirstateVec, CopyMap), DirstateParseError> { + if contents.len() < PARENT_SIZE { + return Err(DirstateParseError::TooLittleData); + } + + let mut dirstate_vec = vec![]; + let mut copies = vec![]; + let parents = ( + &contents[..PARENT_SIZE], + &contents[PARENT_SIZE..PARENT_SIZE * 2], + ); + let mut i = PARENT_SIZE * 2; + + while i < contents.len() { + if i + MIN_ENTRY_SIZE > contents.len() { + return Err(DirstateParseError::Overflow); + } + + let slice = &contents[i..]; + let state = slice[0] as i8; + let mode = BigEndian::read_i32(&slice[1..]); + let size = BigEndian::read_i32(&slice[5..]); + let mtime = BigEndian::read_i32(&slice[9..]); + let path_len = BigEndian::read_i32(&slice[13..]); + + if path_len as usize > contents.len() - i { + return Err(DirstateParseError::Overflow); + } + let path = + &slice[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len as usize)]; + + let (path, copy) = match memchr::memchr(0, &path) { + None => (path, None), + Some(i) => (&path[..i], Some(&path[(i + 1)..])), + }; + + if let Some(copy_name) = copy { + copies.push((path, copy_name)); + }; + dirstate_vec.push((path.to_owned(), (state, mode, size, mtime))); + i = i + MIN_ENTRY_SIZE + (path_len as usize); + } + + Ok((parents, dirstate_vec, copies)) +} + +pub fn pack_dirstate( + dirstate_vec: &DirstateVec, + copymap: &HashMap, Vec>, + parents: DirStateParents, + now: i32, +) -> Result<(Vec, DirstateVec), DirstatePackError> { + if parents.0.len() != PARENT_SIZE || parents.1.len() != PARENT_SIZE { + return Err(DirstatePackError::CorruptedParent); + } + + let expected_size: usize = dirstate_vec + .iter() + .map(|(ref filename, _)| { + let mut length = MIN_ENTRY_SIZE + filename.len(); + if let Some(ref copy) = copymap.get(filename) { + length += copy.len() + 1; + } + length + }) + .sum(); + let expected_size = expected_size + PARENT_SIZE * 2; + + let mut packed = Vec::with_capacity(expected_size); + let mut new_dirstate_vec = vec![]; + + packed.extend(parents.0); + packed.extend(parents.1); + + for (ref filename, (state, mode, size, mtime)) in dirstate_vec { + let mut new_filename: Vec = filename.to_owned(); + let mut new_mtime: i32 = *mtime; + if *state == 'n' as i8 && *mtime == now.into() { + // The file was last modified "simultaneously" with the current + // write to dirstate (i.e. within the same second for file- + // systems with a granularity of 1 sec). This commonly happens + // for at least a couple of files on 'update'. + // The user could change the file without changing its size + // within the same second. Invalidate the file's mtime in + // dirstate, forcing future 'status' calls to compare the + // contents of the file if the size is the same. This prevents + // mistakenly treating such files as clean. + new_mtime = -1; + new_dirstate_vec.push(( + filename.to_owned(), + (*state, *mode, *size, new_mtime), + )); + } + + if let Some(copy) = copymap.get(filename) { + new_filename.push('\0' as u8); + new_filename.extend(copy); + } + + packed.write_i8(*state)?; + packed.write_i32::(*mode)?; + packed.write_i32::(*size)?; + packed.write_i32::(new_mtime)?; + packed.write_i32::(new_filename.len() as i32)?; + packed.extend(new_filename) + } + + if packed.len() != expected_size { + return Err(DirstatePackError::BadSize(expected_size, packed.len())); + } + + Ok((packed, new_dirstate_vec)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pack_dirstate_empty() { + let dirstate_vec: DirstateVec = vec![]; + let copymap = HashMap::new(); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let expected = + (b"1234567891011121314100000000000000000000".to_vec(), vec![]); + + assert_eq!( + expected, + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() + ); + } + #[test] + fn test_pack_dirstate_one_entry() { + let dirstate_vec: DirstateVec = vec![( + vec!['f' as u8, '1' as u8], + ('n' as i8, 0o644, 0, 791231220), + )]; + let copymap = HashMap::new(); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let expected = ( + [ + 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, + 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, + 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49, + ] + .to_vec(), + vec![], + ); + + assert_eq!( + expected, + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() + ); + } + #[test] + fn test_pack_dirstate_one_entry_with_copy() { + let dirstate_vec: DirstateVec = + vec![(b"f1".to_vec(), ('n' as i8, 0o644, 0, 791231220))]; + let mut copymap = HashMap::new(); + copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let expected = ( + [ + 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, + 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, + 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, + 112, 121, 110, 97, 109, 101, + ] + .to_vec(), + vec![], + ); + + assert_eq!( + expected, + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() + ); + } + + #[test] + fn test_parse_pack_one_entry_with_copy() { + let dirstate_vec: DirstateVec = + vec![(b"f1".to_vec(), ('n' as i8, 0o644, 0, 791231220))]; + let mut copymap = HashMap::new(); + copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let result = + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); + + assert_eq!( + ( + parents, + dirstate_vec, + copymap + .iter() + .map(|(k, v)| (k.as_slice(), v.as_slice())) + .collect() + ), + parse_dirstate(result.0.as_slice()).unwrap() + ) + } + + #[test] + fn test_parse_pack_multiple_entries_with_copy() { + let dirstate_vec: DirstateVec = vec![ + (b"f1".to_vec(), ('n' as i8, 0o644, 123, 791231220)), + (b"f2".to_vec(), ('m' as i8, 0o777, 1000, 791231220)), + (b"f3".to_vec(), ('r' as i8, 0o644, 234553, 791231220)), + (b"f4\xF6".to_vec(), ('a' as i8, 0o644, -1, -1)), + ]; + let mut copymap = HashMap::new(); + copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); + copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec()); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let result = + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); + + assert_eq!( + (parents, dirstate_vec, copymap), + parse_dirstate(result.0.as_slice()) + .and_then(|(p, dvec, cvec)| Ok(( + p, + dvec, + cvec.iter() + .map(|(k, v)| (k.to_vec(), v.to_vec())) + .collect() + ))) + .unwrap() + ) + } + + #[test] + /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 + fn test_parse_pack_one_entry_with_copy_and_time_conflict() { + let dirstate_vec: DirstateVec = + vec![(b"f1".to_vec(), ('n' as i8, 0o644, 0, 15000000))]; + let mut copymap = HashMap::new(); + copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); + let parents: DirStateParents = + (b"12345678910111213141", b"00000000000000000000"); + let now: i32 = 15000000; + let result = + pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); + + assert_eq!( + ( + parents, + vec![(b"f1".to_vec(), ('n' as i8, 0o644, 0, -1))], + copymap + .iter() + .map(|(k, v)| (k.as_slice(), v.as_slice())) + .collect() + ), + parse_dirstate(result.0.as_slice()).unwrap() + ) + } +} diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -2,6 +2,9 @@ // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +extern crate byteorder; +extern crate memchr; + mod ancestors; pub mod dagops; pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; @@ -40,3 +43,22 @@ ParentOutOfRange(Revision), WorkingDirectoryUnsupported, } + +#[derive(Clone, Debug, PartialEq)] +pub enum DirstateParseError { + TooLittleData, + Overflow, +} + +#[derive(Debug, PartialEq)] +pub enum DirstatePackError { + CorruptedEntry(String), + CorruptedParent, + BadSize(usize, usize), +} + +impl From for DirstatePackError { + fn from(e: std::io::Error) -> Self { + DirstatePackError::CorruptedEntry(e.to_string()) + } +}