Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in hg files which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
( )
| Alphare | |
| indygreg |
| hg-reviewers |
Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in hg files which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
| No Linters Available |
| No Unit Test Coverage |
| Path | Packages | |||
|---|---|---|---|---|
| M | rust/hg-core/src/dirstate/dirstate_map.rs (16 lines) | |||
| M | rust/hg-core/src/dirstate/parsers.rs (82 lines) | |||
| M | rust/hg-cpython/src/parsers.rs (18 lines) |
| Commit | Parents | Author | Summary | Date |
|---|---|---|---|---|
| 6c1efe522c35 | 0041a42c6f28 | Antoine Cezar | Aug 4 2020, 4:59 AM |
| Status | Author | Revision | |
|---|---|---|---|
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | D9049 hg-core: add path_encode | |
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | D8962 rhg: Add debug timing | |
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar | ||
| Closed | acezar |
| pub fn read( | pub fn read( | ||||
| &mut self, | &mut self, | ||||
| file_contents: &[u8], | file_contents: &[u8], | ||||
| ) -> Result<Option<DirstateParents>, DirstateError> { | ) -> Result<Option<DirstateParents>, DirstateError> { | ||||
| if file_contents.is_empty() { | if file_contents.is_empty() { | ||||
| return Ok(None); | return Ok(None); | ||||
| } | } | ||||
| let parents = parse_dirstate( | let (parents, entries, copies) = parse_dirstate(file_contents)?; | ||||
| &mut self.state_map, | self.state_map.extend( | ||||
| &mut self.copy_map, | entries | ||||
| file_contents, | .into_iter() | ||||
| )?; | .map(|(path, entry)| (path.to_owned(), entry)), | ||||
| ); | |||||
| self.copy_map.extend( | |||||
| copies | |||||
| .into_iter() | |||||
| .map(|(path, copy)| (path.to_owned(), copy.to_owned())), | |||||
| ); | |||||
| if !self.dirty_parents { | if !self.dirty_parents { | ||||
| self.set_parents(&parents); | self.set_parents(&parents); | ||||
| } | } | ||||
| Ok(Some(parents)) | Ok(Some(parents)) | ||||
| } | } | ||||
| use std::io::Cursor; | use std::io::Cursor; | ||||
| use std::time::Duration; | use std::time::Duration; | ||||
| /// Parents are stored in the dirstate as byte hashes. | /// Parents are stored in the dirstate as byte hashes. | ||||
| pub const PARENT_SIZE: usize = 20; | pub const PARENT_SIZE: usize = 20; | ||||
| /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | ||||
| const MIN_ENTRY_SIZE: usize = 17; | const MIN_ENTRY_SIZE: usize = 17; | ||||
| // TODO parse/pack: is mutate-on-loop better for performance? | type ParseResult<'a> = ( | ||||
| DirstateParents, | |||||
| Vec<(&'a HgPath, DirstateEntry)>, | |||||
| Vec<(&'a HgPath, &'a HgPath)>, | |||||
| ); | |||||
| #[timed] | #[timed] | ||||
| pub fn parse_dirstate( | pub fn parse_dirstate( | ||||
| state_map: &mut StateMap, | |||||
| copy_map: &mut CopyMap, | |||||
| contents: &[u8], | contents: &[u8], | ||||
| ) -> Result<DirstateParents, DirstateParseError> { | ) -> Result<ParseResult, DirstateParseError> { | ||||
| if contents.len() < PARENT_SIZE * 2 { | if contents.len() < PARENT_SIZE * 2 { | ||||
| return Err(DirstateParseError::TooLittleData); | return Err(DirstateParseError::TooLittleData); | ||||
| } | } | ||||
| let mut copies = vec![]; | |||||
| let mut entries = vec![]; | |||||
| let mut curr_pos = PARENT_SIZE * 2; | let mut curr_pos = PARENT_SIZE * 2; | ||||
| let parents = DirstateParents { | let parents = DirstateParents { | ||||
| p1: contents[..PARENT_SIZE].try_into().unwrap(), | p1: contents[..PARENT_SIZE].try_into().unwrap(), | ||||
| p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(), | p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(), | ||||
| }; | }; | ||||
| while curr_pos < contents.len() { | while curr_pos < contents.len() { | ||||
| let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | ||||
| let (path, copy) = match memchr::memchr(0, path) { | let (path, copy) = match memchr::memchr(0, path) { | ||||
| None => (path, None), | None => (path, None), | ||||
| Some(i) => (&path[..i], Some(&path[(i + 1)..])), | Some(i) => (&path[..i], Some(&path[(i + 1)..])), | ||||
| }; | }; | ||||
| if let Some(copy_path) = copy { | if let Some(copy_path) = copy { | ||||
| copy_map.insert( | copies.push((HgPath::new(path), HgPath::new(copy_path))); | ||||
| HgPath::new(path).to_owned(), | |||||
| HgPath::new(copy_path).to_owned(), | |||||
| ); | |||||
| }; | }; | ||||
| state_map.insert( | entries.push(( | ||||
| HgPath::new(path).to_owned(), | HgPath::new(path), | ||||
| DirstateEntry { | DirstateEntry { | ||||
| state, | state, | ||||
| mode, | mode, | ||||
| size, | size, | ||||
| mtime, | mtime, | ||||
| }, | }, | ||||
| ); | )); | ||||
| curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | ||||
| } | } | ||||
| Ok(parents) | Ok((parents, entries, copies)) | ||||
| } | } | ||||
| /// `now` is the duration in seconds since the Unix epoch | /// `now` is the duration in seconds since the Unix epoch | ||||
| pub fn pack_dirstate( | pub fn pack_dirstate( | ||||
| state_map: &mut StateMap, | state_map: &mut StateMap, | ||||
| copy_map: &CopyMap, | copy_map: &CopyMap, | ||||
| parents: DirstateParents, | parents: DirstateParents, | ||||
| now: Duration, | now: Duration, | ||||
| p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
| p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
| }; | }; | ||||
| let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
| let result = | let result = | ||||
| pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
| .unwrap(); | .unwrap(); | ||||
| let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
| let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
| let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
| &mut new_state_map, | .into_iter() | ||||
| &mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
| result.as_slice(), | .collect(); | ||||
| ) | let new_copy_map: CopyMap = copies | ||||
| .unwrap(); | .into_iter() | ||||
| .map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
| .collect(); | |||||
| assert_eq!( | assert_eq!( | ||||
| (parents, state_map, copymap), | (parents, state_map, copymap), | ||||
| (new_parents, new_state_map, new_copy_map) | (new_parents, new_state_map, new_copy_map) | ||||
| ) | ) | ||||
| } | } | ||||
| #[test] | #[test] | ||||
| fn test_parse_pack_multiple_entries_with_copy() { | fn test_parse_pack_multiple_entries_with_copy() { | ||||
| p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
| p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
| }; | }; | ||||
| let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
| let result = | let result = | ||||
| pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
| .unwrap(); | .unwrap(); | ||||
| let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
| let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
| let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
| &mut new_state_map, | .into_iter() | ||||
| &mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
| result.as_slice(), | .collect(); | ||||
| ) | let new_copy_map: CopyMap = copies | ||||
| .unwrap(); | .into_iter() | ||||
| .map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
| .collect(); | |||||
| assert_eq!( | assert_eq!( | ||||
| (parents, state_map, copymap), | (parents, state_map, copymap), | ||||
| (new_parents, new_state_map, new_copy_map) | (new_parents, new_state_map, new_copy_map) | ||||
| ) | ) | ||||
| } | } | ||||
| #[test] | #[test] | ||||
| /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | ||||
| p1: *b"12345678910111213141", | p1: *b"12345678910111213141", | ||||
| p2: *b"00000000000000000000", | p2: *b"00000000000000000000", | ||||
| }; | }; | ||||
| let now = Duration::new(15000000, 0); | let now = Duration::new(15000000, 0); | ||||
| let result = | let result = | ||||
| pack_dirstate(&mut state_map, ©map, parents.clone(), now) | pack_dirstate(&mut state_map, ©map, parents.clone(), now) | ||||
| .unwrap(); | .unwrap(); | ||||
| let mut new_state_map: StateMap = FastHashMap::default(); | let (new_parents, entries, copies) = | ||||
| let mut new_copy_map: CopyMap = FastHashMap::default(); | parse_dirstate(result.as_slice()).unwrap(); | ||||
| let new_parents = parse_dirstate( | let new_state_map: StateMap = entries | ||||
| &mut new_state_map, | .into_iter() | ||||
| &mut new_copy_map, | .map(|(path, entry)| (path.to_owned(), entry)) | ||||
| result.as_slice(), | .collect(); | ||||
| ) | let new_copy_map: CopyMap = FastHashMap::default(); | ||||
| .unwrap(); | copies | ||||
| .into_iter() | |||||
| .map(|(path, copy)| (path.to_owned(), copy.to_owned())) | |||||
| .collect(); | |||||
| assert_eq!( | assert_eq!( | ||||
| ( | ( | ||||
| parents, | parents, | ||||
| [( | [( | ||||
| HgPathBuf::from_bytes(b"f1"), | HgPathBuf::from_bytes(b"f1"), | ||||
| DirstateEntry { | DirstateEntry { | ||||
| state: EntryState::Normal, | state: EntryState::Normal, | ||||
These should be a collect() instead of default + extend.