diff --git a/mercurial/copies.py b/mercurial/copies.py --- a/mercurial/copies.py +++ b/mercurial/copies.py @@ -25,7 +25,10 @@ from .utils import stringutil -from .revlogutils import flagutil +from .revlogutils import ( + flagutil, + sidedata as sidedatamod, +) rustmod = policy.importrust("copy_tracing") @@ -175,7 +178,7 @@ return cm -def _revinfo_getter(repo): +def _revinfo_getter(repo, match): """returns a function that returns the following data given a " * p1: revision number of first parent @@ -215,20 +218,42 @@ # time to save memory. merge_caches = {} - def revinfo(rev): - p1, p2 = parents(rev) - value = None - e = merge_caches.pop(rev, None) - if e is not None: - return e - changes = None - if flags(rev) & HASCOPIESINFO: - changes = changelogrevision(rev).changes - value = (p1, p2, changes) - if p1 != node.nullrev and p2 != node.nullrev: - # XXX some case we over cache, IGNORE - merge_caches[rev] = value - return value + alwaysmatch = match.always() + + if rustmod is not None and alwaysmatch: + + def revinfo(rev): + p1, p2 = parents(rev) + value = None + e = merge_caches.pop(rev, None) + if e is not None: + return e + if flags(rev) & HASCOPIESINFO: + raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES) + else: + raw = None + value = (p1, p2, raw) + if p1 != node.nullrev and p2 != node.nullrev: + # XXX some case we over cache, IGNORE + merge_caches[rev] = value + return value + + else: + + def revinfo(rev): + p1, p2 = parents(rev) + value = None + e = merge_caches.pop(rev, None) + if e is not None: + return e + changes = None + if flags(rev) & HASCOPIESINFO: + changes = changelogrevision(rev).changes + value = (p1, p2, changes) + if p1 != node.nullrev and p2 != node.nullrev: + # XXX some case we over cache, IGNORE + merge_caches[rev] = value + return value return revinfo @@ -289,7 +314,7 @@ revs = sorted(iterrevs) if repo.filecopiesmode == b'changeset-sidedata': - revinfo = _revinfo_getter(repo) + revinfo = _revinfo_getter(repo, match) return _combine_changeset_copies( revs, children, b.rev(), revinfo, match, isancestor ) diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs --- a/rust/hg-core/src/copy_tracing.rs +++ b/rust/hg-core/src/copy_tracing.rs @@ -5,8 +5,9 @@ use im_rc::ordmap::DiffItem; use im_rc::ordmap::OrdMap; +use std::cmp::Ordering; use std::collections::HashMap; -use std::collections::HashSet; +use std::convert::TryInto; pub type PathCopies = HashMap; @@ -23,18 +24,18 @@ type TimeStampedPathCopies = OrdMap; /// hold parent 1, parent 2 and relevant files actions. -pub type RevInfo = (Revision, Revision, ChangedFiles); +pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>); /// represent the files affected by a changesets /// /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need /// all the data categories tracked by it. -pub struct ChangedFiles { - removed: HashSet, - merged: HashSet, - salvaged: HashSet, - copied_from_p1: PathCopies, - copied_from_p2: PathCopies, +/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need +/// all the data categories tracked by it. +pub struct ChangedFiles<'a> { + nb_items: u32, + index: &'a [u8], + data: &'a [u8], } /// Represent active changes that affect the copy tracing. @@ -62,55 +63,161 @@ Normal, } -impl ChangedFiles { - pub fn new( - removed: HashSet, - merged: HashSet, - salvaged: HashSet, - copied_from_p1: PathCopies, - copied_from_p2: PathCopies, - ) -> Self { - ChangedFiles { - removed, - merged, - salvaged, - copied_from_p1, - copied_from_p2, - } +type FileChange<'a> = (u8, &'a HgPath, &'a HgPath); + +const EMPTY: &[u8] = b""; +const COPY_MASK: u8 = 3; +const P1_COPY: u8 = 2; +const P2_COPY: u8 = 3; +const ACTION_MASK: u8 = 28; +const REMOVED: u8 = 12; +const MERGED: u8 = 8; +const SALVAGED: u8 = 16; + +impl<'a> ChangedFiles<'a> { + const INDEX_START: usize = 4; + const ENTRY_SIZE: u32 = 9; + const FILENAME_START: u32 = 1; + const COPY_SOURCE_START: u32 = 5; + + pub fn new(data: &'a [u8]) -> Self { + assert!( + data.len() >= 4, + "data size ({}) is too small to contain the header (4)", + data.len() + ); + let nb_items_raw: [u8; 4] = (&data[0..=3]) + .try_into() + .expect("failed to turn 4 bytes into 4 bytes"); + let nb_items = u32::from_be_bytes(nb_items_raw); + + let index_size = (nb_items * Self::ENTRY_SIZE) as usize; + let index_end = Self::INDEX_START + index_size; + + assert!( + data.len() >= index_end, + "data size ({}) is too small to fit the index_data ({})", + data.len(), + index_end + ); + + let ret = ChangedFiles { + nb_items, + index: &data[Self::INDEX_START..index_end], + data: &data[index_end..], + }; + let max_data = ret.filename_end(nb_items - 1) as usize; + assert!( + ret.data.len() >= max_data, + "data size ({}) is too small to fit all data ({})", + data.len(), + index_end + max_data + ); + ret } pub fn new_empty() -> Self { ChangedFiles { - removed: HashSet::new(), - merged: HashSet::new(), - salvaged: HashSet::new(), - copied_from_p1: PathCopies::new(), - copied_from_p2: PathCopies::new(), + nb_items: 0, + index: EMPTY, + data: EMPTY, + } + } + + /// internal function to return an individual entry at a given index + fn entry(&'a self, idx: u32) -> FileChange<'a> { + if idx >= self.nb_items { + panic!( + "index for entry is higher that the number of file {} >= {}", + idx, self.nb_items + ) + } + let flags = self.flags(idx); + let filename = self.filename(idx); + let copy_idx = self.copy_idx(idx); + let copy_source = self.filename(copy_idx); + (flags, filename, copy_source) + } + + /// internal function to return the filename of the entry at a given index + fn filename(&self, idx: u32) -> &HgPath { + let filename_start; + if idx == 0 { + filename_start = 0; + } else { + filename_start = self.filename_end(idx - 1) } + let filename_end = self.filename_end(idx); + let filename_start = filename_start as usize; + let filename_end = filename_end as usize; + HgPath::new(&self.data[filename_start..filename_end]) + } + + /// internal function to return the flag field of the entry at a given + /// index + fn flags(&self, idx: u32) -> u8 { + let idx = idx as usize; + self.index[idx * (Self::ENTRY_SIZE as usize)] + } + + /// internal function to return the end of a filename part at a given index + fn filename_end(&self, idx: u32) -> u32 { + let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START; + let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START; + let start = start as usize; + let end = end as usize; + let raw = (&self.index[start..end]) + .try_into() + .expect("failed to turn 4 bytes into 4 bytes"); + u32::from_be_bytes(raw) + } + + /// internal function to return index of the copy source of the entry at a + /// given index + fn copy_idx(&self, idx: u32) -> u32 { + let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START; + let end = (idx + 1) * Self::ENTRY_SIZE; + let start = start as usize; + let end = end as usize; + let raw = (&self.index[start..end]) + .try_into() + .expect("failed to turn 4 bytes into 4 bytes"); + u32::from_be_bytes(raw) } /// Return an iterator over all the `Action` in this instance. - fn iter_actions(&self, parent: usize) -> impl Iterator { - let copies_iter = match parent { - 1 => self.copied_from_p1.iter(), - 2 => self.copied_from_p2.iter(), - _ => unreachable!(), - }; - let remove_iter = self.removed.iter(); - let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y)); - let remove_iter = remove_iter.map(|x| Action::Removed(x)); - copies_iter.chain(remove_iter) + fn iter_actions(&self, parent: usize) -> ActionsIterator { + ActionsIterator { + changes: &self, + parent: parent, + current: 0, + } } /// return the MergeCase value associated with a filename fn get_merge_case(&self, path: &HgPath) -> MergeCase { - if self.salvaged.contains(path) { - return MergeCase::Salvaged; - } else if self.merged.contains(path) { - return MergeCase::Merged; - } else { + if self.nb_items == 0 { return MergeCase::Normal; } + let mut low_part = 0; + let mut high_part = self.nb_items; + + while low_part < high_part { + let cursor = (low_part + high_part - 1) / 2; + let (flags, filename, _source) = self.entry(cursor); + match path.cmp(filename) { + Ordering::Less => low_part = cursor + 1, + Ordering::Greater => high_part = cursor, + Ordering::Equal => { + return match flags & ACTION_MASK { + MERGED => MergeCase::Merged, + SALVAGED => MergeCase::Salvaged, + _ => MergeCase::Normal, + }; + } + } + } + MergeCase::Normal } } @@ -150,6 +257,50 @@ } } +struct ActionsIterator<'a> { + changes: &'a ChangedFiles<'a>, + parent: usize, + current: u32, +} + +impl<'a> Iterator for ActionsIterator<'a> { + type Item = Action<'a>; + + fn next(&mut self) -> Option> { + while self.current < self.changes.nb_items { + let (flags, file, source) = self.changes.entry(self.current); + self.current += 1; + if (flags & ACTION_MASK) == REMOVED { + return Some(Action::Removed(file)); + } + let copy = flags & COPY_MASK; + if self.parent == 1 && copy == P1_COPY { + return Some(Action::Copied(file, source)); + } + if self.parent == 2 && copy == P2_COPY { + return Some(Action::Copied(file, source)); + } + } + return None; + } +} + +/// A small struct whose purpose is to ensure lifetime of bytes referenced in +/// ChangedFiles +/// +/// It is passed to the RevInfoMaker callback who can assign any necessary +/// content to the `data` attribute. The copy tracing code is responsible for +/// keeping the DataHolder alive at least as long as the ChangedFiles object. +pub struct DataHolder { + /// RevInfoMaker callback should assign data referenced by the + /// ChangedFiles struct it return to this attribute. The DataHolder + /// lifetime will be at least as long as the ChangedFiles one. + pub data: Option, +} + +pub type RevInfoMaker<'a, D> = + Box Fn(Revision, &'r mut DataHolder) -> RevInfo<'r> + 'a>; + /// Same as mercurial.copies._combine_changeset_copies, but in Rust. /// /// Arguments are: @@ -163,11 +314,11 @@ /// * ChangedFiles /// isancestors(low_rev, high_rev): callback to check if a revision is an /// ancestor of another -pub fn combine_changeset_copies bool>( +pub fn combine_changeset_copies bool, D>( revs: Vec, children: HashMap>, target_rev: Revision, - rev_info: &impl Fn(Revision) -> RevInfo, + rev_info: RevInfoMaker, is_ancestor: &A, ) -> PathCopies { let mut all_copies = HashMap::new(); @@ -190,7 +341,8 @@ // We will chain the copies information accumulated for `rev` with // the individual copies information for each of its children. // Creating a new PathCopies for each `rev` → `children` vertex. - let (p1, p2, changes) = rev_info(*child); + let mut d: DataHolder = DataHolder { data: None }; + let (p1, p2, changes) = rev_info(*child, &mut d); let parent = if rev == p1 { 1 diff --git a/rust/hg-cpython/src/copy_tracing.rs b/rust/hg-cpython/src/copy_tracing.rs --- a/rust/hg-cpython/src/copy_tracing.rs +++ b/rust/hg-cpython/src/copy_tracing.rs @@ -11,8 +11,9 @@ use hg::copy_tracing::combine_changeset_copies; use hg::copy_tracing::ChangedFiles; +use hg::copy_tracing::DataHolder; use hg::copy_tracing::RevInfo; -use hg::utils::hg_path::HgPathBuf; +use hg::copy_tracing::RevInfoMaker; use hg::Revision; /// Combines copies information contained into revision `revs` to build a copy @@ -57,184 +58,41 @@ // happens in case of programing error or severe data corruption. Such // errors will raise panic and the rust-cpython harness will turn them into // Python exception. - let rev_info_maker = |rev: Revision| -> RevInfo { - let res: PyTuple = rev_info - .call(py, (rev,), None) - .expect("rust-copy-tracing: python call to `rev_info` failed") - .cast_into(py) - .expect( - "rust-copy_tracing: python call to `rev_info` returned \ - unexpected non-Tuple value", - ); - let p1 = res.get_item(py, 0).extract(py).expect( - "rust-copy-tracing: \ - rev_info return is invalid, first item is a not a revision", - ); - let p2 = res.get_item(py, 1).extract(py).expect( - "rust-copy-tracing: \ - rev_info return is invalid, second item is a not a revision", - ); - - let changes = res.get_item(py, 2); - - let files; - if !changes - .hasattr(py, "copied_from_p1") - .expect("rust-copy-tracing: python call to `hasattr` failed") - { - files = ChangedFiles::new_empty(); - } else { - let p1_copies: PyDict = changes - .getattr(py, "copied_from_p1") - .expect( - "rust-copy-tracing: retrieval of python attribute \ - `copied_from_p1` failed", - ) - .cast_into(py) - .expect( - "rust-copy-tracing: failed to convert `copied_from_p1` \ - to PyDict", - ); - let p1_copies: PyResult<_> = p1_copies - .items(py) - .iter() - .map(|(key, value)| { - let key = key.extract::(py).expect( - "rust-copy-tracing: conversion of copy destination to\ - PyBytes failed", - ); - let key = key.data(py); - let value = value.extract::(py).expect( - "rust-copy-tracing: conversion of copy source to \ - PyBytes failed", - ); - let value = value.data(py); - Ok(( - HgPathBuf::from_bytes(key), - HgPathBuf::from_bytes(value), - )) - }) - .collect(); - - let p2_copies: PyDict = changes - .getattr(py, "copied_from_p2") - .expect( - "rust-copy-tracing: retrieval of python attribute \ - `copied_from_p2` failed", - ) + let rev_info_maker: RevInfoMaker = + Box::new(|rev: Revision, d: &mut DataHolder| -> RevInfo { + let res: PyTuple = rev_info + .call(py, (rev,), None) + .expect("rust-copy-tracing: python call to `rev_info` failed") .cast_into(py) .expect( - "rust-copy-tracing: failed to convert `copied_from_p2` \ - to PyDict", + "rust-copy_tracing: python call to `rev_info` returned \ + unexpected non-Tuple value", ); - let p2_copies: PyResult<_> = p2_copies - .items(py) - .iter() - .map(|(key, value)| { - let key = key.extract::(py).expect( - "rust-copy-tracing: conversion of copy destination to \ - PyBytes failed"); - let key = key.data(py); - let value = value.extract::(py).expect( - "rust-copy-tracing: conversion of copy source to \ - PyBytes failed", - ); - let value = value.data(py); - Ok(( - HgPathBuf::from_bytes(key), - HgPathBuf::from_bytes(value), - )) - }) - .collect(); - - let removed: PyObject = changes.getattr(py, "removed").expect( - "rust-copy-tracing: retrieval of python attribute \ - `removed` failed", + let p1 = res.get_item(py, 0).extract(py).expect( + "rust-copy-tracing: rev_info return is invalid, first item \ + is a not a revision", ); - let removed: PyResult<_> = removed - .iter(py) - .expect( - "rust-copy-tracing: getting a python iterator over the \ - `removed` set failed", - ) - .map(|filename| { - let filename = filename - .expect( - "rust-copy-tracing: python iteration over the \ - `removed` set failed", - ) - .extract::(py) - .expect( - "rust-copy-tracing: \ - conversion of `removed` item to PyBytes failed", - ); - let filename = filename.data(py); - Ok(HgPathBuf::from_bytes(filename)) - }) - .collect(); - - let merged: PyObject = changes.getattr(py, "merged").expect( - "rust-copy-tracing: retrieval of python attribute \ - `merged` failed", + let p2 = res.get_item(py, 1).extract(py).expect( + "rust-copy-tracing: rev_info return is invalid, first item \ + is a not a revision", ); - let merged: PyResult<_> = merged - .iter(py) - .expect( - "rust-copy-tracing: getting a python iterator over the \ - `merged` set failed", - ) - .map(|filename| { - let filename = filename - .expect( - "rust-copy-tracing: python iteration over the \ - `merged` set failed", - ) - .extract::(py) - .expect( - "rust-copy-tracing: \ - conversion of `merged` item to PyBytes failed", - ); - let filename = filename.data(py); - Ok(HgPathBuf::from_bytes(filename)) - }) - .collect(); - let salvaged: PyObject = changes.getattr(py, "salvaged").expect( - "rust-copy-tracing: retrieval of python attribute \ - `salvaged` failed", - ); - let salvaged: PyResult<_> = salvaged - .iter(py) - .expect( - "rust-copy-tracing: getting a python iterator over the \ - `salvaged` set failed", - ) - .map(|filename| { - let filename = filename - .expect( - "rust-copy-tracing: python iteration over the \ - `salvaged` set failed", - ) - .extract::(py) - .expect( - "rust-copy-tracing: \ - conversion of `salvaged` item to PyBytes failed", - ); - let filename = filename.data(py); - Ok(HgPathBuf::from_bytes(filename)) - }) - .collect(); - files = ChangedFiles::new( - removed.unwrap(), - merged.unwrap(), - salvaged.unwrap(), - p1_copies.unwrap(), - p2_copies.unwrap(), - ); - } + let files = match res.get_item(py, 2).extract::(py) { + Ok(raw) => { + // Give responsability for the raw bytes lifetime to + // hg-core + d.data = Some(raw); + let addrs = d.data.as_ref().expect( + "rust-copy-tracing: failed to get a reference to the \ + raw bytes for copy data").data(py); + ChangedFiles::new(addrs) + } + // value was presumably None, meaning they was no copy data. + Err(_) => ChangedFiles::new_empty(), + }; - (p1, p2, files) - }; + (p1, p2, files) + }); let children: PyResult<_> = children .items(py) .iter() @@ -250,7 +108,7 @@ revs?, children?, target_rev, - &rev_info_maker, + rev_info_maker, &is_ancestor_wrap, ); let out = PyDict::new(py);