Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in hg files which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
( )
Alphare | |
indygreg |
hg-reviewers |
Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in hg files which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
No Linters Available |
No Unit Test Coverage |
Path | Packages | |||
---|---|---|---|---|
M | rust/hg-core/src/dirstate/dirstate_map.rs (16 lines) | |||
M | rust/hg-core/src/dirstate/parsers.rs (82 lines) | |||
M | rust/hg-cpython/src/parsers.rs (18 lines) |
Commit | Parents | Author | Summary | Date |
---|---|---|---|---|
6c1efe522c35 | 0041a42c6f28 | Antoine Cezar | Aug 4 2020, 4:59 AM |
Status | Author | Revision | |
---|---|---|---|
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | D9049 hg-core: add path_encode | |
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | D8962 rhg: Add debug timing | |
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar | ||
Closed | acezar |
pub fn read( | pub fn read( | ||||
&mut self, | &mut self, | ||||
file_contents: &[u8], | file_contents: &[u8], | ||||
) -> Result<Option<DirstateParents>, DirstateError> { | ) -> Result<Option<DirstateParents>, DirstateError> { | ||||
if file_contents.is_empty() { | if file_contents.is_empty() { | ||||
return Ok(None); | return Ok(None); | ||||
} | } | ||||
let parents = parse_dirstate( | let (parents, entries, copies) = parse_dirstate(file_contents)?; | ||||
&mut self.state_map, | self.state_map.extend( | ||||
&mut self.copy_map, | entries | ||||
file_contents, | .into_iter() | ||||
)?; | .map(|(path, entry)| (path.to_owned(), entry)), | ||||
); | |||||
self.copy_map.extend( | |||||
copies | |||||
.into_iter() | |||||
.map(|(path, copy)| (path.to_owned(), copy.to_owned())), | |||||
); | |||||
if !self.dirty_parents { | if !self.dirty_parents { | ||||
self.set_parents(&parents); | self.set_parents(&parents); | ||||
} | } | ||||
Ok(Some(parents)) | Ok(Some(parents)) | ||||
} | } | ||||
use std::io::Cursor; | use std::io::Cursor; | ||||
use std::time::Duration; | use std::time::Duration; | ||||
/// Parents are stored in the dirstate as byte hashes. | /// Parents are stored in the dirstate as byte hashes. | ||||
pub const PARENT_SIZE: usize = 20; | pub const PARENT_SIZE: usize = 20; | ||||
/// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | ||||
const MIN_ENTRY_SIZE: usize = 17; | const MIN_ENTRY_SIZE: usize = 17; | ||||
// TODO parse/pack: is mutate-on-loop better for performance? | type ParseResult<'a> = ( | ||||
DirstateParents, | |||||
Vec<(&'a HgPath, DirstateEntry)>, | |||||
Vec<(&'a HgPath, &'a HgPath)>, | |||||
); | |||||
#[timed] | #[timed] | ||||
pub fn parse_dirstate( | pub fn parse_dirstate( | ||||
state_map: &mut StateMap, | |||||
copy_map: &mut CopyMap, | |||||
contents: &[u8], | contents: &[u8], | ||||
) -> Result<DirstateParents, DirstateParseError> { | ) -> Result<ParseResult, DirstateParseError> { | ||||
if contents.len() < PARENT_SIZE * 2 { | if contents.len() < PARENT_SIZE * 2 { | ||||
return Err(DirstateParseError::TooLittleData); | return Err(DirstateParseError::TooLittleData); | ||||
} | } | ||||
let mut copies = vec![]; | |||||
let mut entries = vec![]; | |||||
let mut curr_pos = PARENT_SIZE * 2; | let mut curr_pos = PARENT_SIZE * 2; | ||||
let parents = DirstateParents { | let parents = DirstateParents { | ||||
p1: contents[..PARENT_SIZE].try_into().unwrap(), | p1: contents[..PARENT_SIZE].try_into().unwrap(), | ||||
p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(), | p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(), | ||||
}; | }; | ||||
while curr_pos < contents.len() { | while curr_pos < contents.len() { | ||||
let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | ||||
let (path, copy) = match memchr::memchr(0, path) { | let (path, copy) = match memchr::memchr(0, path) { | ||||
None => (path, None), | None => (path, None), | ||||
Some(i) => (&path[..i], Some(&path[(i + 1)..])), | Some(i) => (&path[..i], Some(&path[(i + 1)..])), | ||||
}; | }; | ||||
if let Some(copy_path) = copy { | if let Some(copy_path) = copy { | ||||
copy_map.insert( | copies.push((HgPath::new(path), HgPath::new(copy_path))); | ||||
HgPath::new(path).to_owned(), | |||||
HgPath::new(copy_path).to_owned(), | |||||
); | |||||
}; | }; | ||||
state_map.insert( | entries.push(( | ||||
HgPath::new(path).to_owned(), | HgPath::new(path), | ||||
DirstateEntry { | DirstateEntry { | ||||
state, | state, | ||||
mode, | mode, | ||||
size, | size, | ||||
mtime, | mtime, | ||||
}, | }, | ||||
); | )); | ||||
curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | ||||
} | } | ||||
Ok(parents) | Ok((parents, entries, copies)) | ||||
} | } | ||||
/// `now` is the duration in seconds since the Unix epoch | /// `now` is the duration in seconds since the Unix epoch | ||||
pub fn pack_dirstate( | pub fn pack_dirstate( | ||||
state_map: &mut StateMap, | state_map: &mut StateMap, | ||||
copy_map: &CopyMap, | copy_map: &CopyMap, | ||||
parents: DirstateParents, | parents: DirstateParents, | ||||
now: Duration, | now: Duration, | ||||
p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
}; | }; | ||||
let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
let result = | let result = | ||||
pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
.unwrap(); | .unwrap(); | ||||
let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
&mut new_state_map, | .into_iter() | ||||
&mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
result.as_slice(), | .collect(); | ||||
) | let new_copy_map: CopyMap = copies | ||||
.unwrap(); | .into_iter() | ||||
.map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
.collect(); | |||||
assert_eq!( | assert_eq!( | ||||
(parents, state_map, copymap), | (parents, state_map, copymap), | ||||
(new_parents, new_state_map, new_copy_map) | (new_parents, new_state_map, new_copy_map) | ||||
) | ) | ||||
} | } | ||||
#[test] | #[test] | ||||
fn test_parse_pack_multiple_entries_with_copy() { | fn test_parse_pack_multiple_entries_with_copy() { | ||||
p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
}; | }; | ||||
let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
let result = | let result = | ||||
pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
.unwrap(); | .unwrap(); | ||||
let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
&mut new_state_map, | .into_iter() | ||||
&mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
result.as_slice(), | .collect(); | ||||
) | let new_copy_map: CopyMap = copies | ||||
.unwrap(); | .into_iter() | ||||
.map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
.collect(); | |||||
assert_eq!( | assert_eq!( | ||||
(parents, state_map, copymap), | (parents, state_map, copymap), | ||||
(new_parents, new_state_map, new_copy_map) | (new_parents, new_state_map, new_copy_map) | ||||
) | ) | ||||
} | } | ||||
#[test] | #[test] | ||||
/// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | ||||
p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
}; | }; | ||||
let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
let result = | let result = | ||||
pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
.unwrap(); | .unwrap(); | ||||
let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
&mut new_state_map, | .into_iter() | ||||
&mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
result.as_slice(), | .collect(); | ||||
) | let new_copy_map: CopyMap = FastHashMap::default(); | ||||
.unwrap(); | copies | ||||
.into_iter() | |||||
.map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
.collect(); | |||||
assert_eq!( | assert_eq!( | ||||
( | ( | ||||
parents, | parents, | ||||
[( | [( | ||||
HgPathBuf::from_bytes(b"f1"), | HgPathBuf::from_bytes(b"f1"), | ||||
DirstateEntry { | DirstateEntry { | ||||
state: EntryState::Normal, | state: EntryState::Normal, |
These should be a collect() instead of default + extend.