This deduplicates the copy-tracing-specific logic
Details
Details
Diff Detail
Diff Detail
- Repository
- rHG Mercurial
- Branch
- default
- Lint
No Linters Available - Unit
No Unit Test Coverage
This deduplicates the copy-tracing-specific logic
| No Linters Available |
| No Unit Test Coverage |
| Path | Packages | |||
|---|---|---|---|---|
| M | rust/hg-core/src/copy_tracing.rs (227 lines) | |||
| M | rust/hg-core/src/utils.rs (150 lines) |
| Commit | Parents | Author | Summary | Date |
|---|---|---|---|---|
| 53b0aab6c386 | c55914e492f6 | Simon Sapin | Dec 23 2020, 5:48 AM |
| use crate::utils::hg_path::HgPath; | use crate::utils::hg_path::HgPath; | ||||
| use crate::utils::hg_path::HgPathBuf; | use crate::utils::hg_path::HgPathBuf; | ||||
| use crate::Revision; | use crate::Revision; | ||||
| use crate::NULL_REVISION; | use crate::NULL_REVISION; | ||||
| use im_rc::ordmap::DiffItem; | |||||
| use im_rc::ordmap::Entry; | use im_rc::ordmap::Entry; | ||||
| use im_rc::ordmap::OrdMap; | use im_rc::ordmap::OrdMap; | ||||
| use im_rc::OrdSet; | use im_rc::OrdSet; | ||||
| use std::cmp::Ordering; | use std::cmp::Ordering; | ||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||
| use std::convert::TryInto; | use std::convert::TryInto; | ||||
| /// merge two copies-mapping together, minor and major | /// merge two copies-mapping together, minor and major | ||||
| /// | /// | ||||
| /// In case of conflict, value from "major" will be picked, unless in some | /// In case of conflict, value from "major" will be picked, unless in some | ||||
| /// cases. See inline documentation for details. | /// cases. See inline documentation for details. | ||||
| fn merge_copies_dict( | fn merge_copies_dict( | ||||
| path_map: &TwoWayPathMap, | path_map: &TwoWayPathMap, | ||||
| current_merge: Revision, | current_merge: Revision, | ||||
| mut minor: InternalPathCopies, | minor: InternalPathCopies, | ||||
| mut major: InternalPathCopies, | major: InternalPathCopies, | ||||
| changes: &ChangedFiles, | changes: &ChangedFiles, | ||||
| ) -> InternalPathCopies { | ) -> InternalPathCopies { | ||||
| // This closure exist as temporary help while multiple developper are | use crate::utils::{ordmap_union_with_merge, MergeResult}; | ||||
| // actively working on this code. Feel free to re-inline it once this | |||||
| // code is more settled. | ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| { | ||||
| let cmp_value = | let (pick, overwrite) = compare_value( | ||||
| |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| { | |||||
| compare_value( | |||||
| path_map, | path_map, | ||||
| current_merge, | current_merge, | ||||
| changes, | changes, | ||||
| dest, | dest, | ||||
| src_minor, | src_minor, | ||||
| src_major, | src_major, | ||||
| ) | ); | ||||
| }; | |||||
| if minor.is_empty() { | |||||
| major | |||||
| } else if major.is_empty() { | |||||
| minor | |||||
| } else if minor.len() * 2 < major.len() { | |||||
| // Lets says we are merging two InternalPathCopies instance A and B. | |||||
| // | |||||
| // If A contains N items, the merge result will never contains more | |||||
| // than N values differents than the one in A | |||||
| // | |||||
| // If B contains M items, with M > N, the merge result will always | |||||
| // result in a minimum of M - N value differents than the on in | |||||
| // A | |||||
| // | |||||
| // As a result, if N < (M-N), we know that simply iterating over A will | |||||
| // yield less difference than iterating over the difference | |||||
| // between A and B. | |||||
| // | |||||
| // This help performance a lot in case were a tiny | |||||
| // InternalPathCopies is merged with a much larger one. | |||||
| for (dest, src_minor) in minor { | |||||
| let src_major = major.get(&dest); | |||||
| match src_major { | |||||
| None => { | |||||
| major.insert(dest, src_minor); | |||||
| } | |||||
| Some(src_major) => { | |||||
| let (pick, overwrite) = | |||||
| cmp_value(&dest, &src_minor, src_major); | |||||
| if overwrite { | |||||
| let src = match pick { | |||||
| MergePick::Major => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| src_major, | |||||
| &src_minor, | |||||
| ), | |||||
| MergePick::Minor => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| &src_minor, | |||||
| src_major, | |||||
| ), | |||||
| MergePick::Any => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| src_major, | |||||
| &src_minor, | |||||
| ), | |||||
| }; | |||||
| major.insert(dest, src); | |||||
| } else { | |||||
| match pick { | |||||
| MergePick::Any | MergePick::Major => None, | |||||
| MergePick::Minor => major.insert(dest, src_minor), | |||||
| }; | |||||
| } | |||||
| } | |||||
| }; | |||||
| } | |||||
| major | |||||
| } else if major.len() * 2 < minor.len() { | |||||
| // This use the same rational than the previous block. | |||||
| // (Check previous block documentation for details.) | |||||
| for (dest, src_major) in major { | |||||
| let src_minor = minor.get(&dest); | |||||
| match src_minor { | |||||
| None => { | |||||
| minor.insert(dest, src_major); | |||||
| } | |||||
| Some(src_minor) => { | |||||
| let (pick, overwrite) = | |||||
| cmp_value(&dest, src_minor, &src_major); | |||||
| if overwrite { | if overwrite { | ||||
| let src = match pick { | let (winner, loser) = match pick { | ||||
| MergePick::Major => CopySource::new_from_merge( | MergePick::Major | MergePick::Any => (src_major, src_minor), | ||||
| current_merge, | MergePick::Minor => (src_minor, src_major), | ||||
| &src_major, | |||||
| src_minor, | |||||
| ), | |||||
| MergePick::Minor => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| src_minor, | |||||
| &src_major, | |||||
| ), | |||||
| MergePick::Any => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| &src_major, | |||||
| src_minor, | |||||
| ), | |||||
| }; | |||||
| minor.insert(dest, src); | |||||
| } else { | |||||
| match pick { | |||||
| MergePick::Any | MergePick::Minor => None, | |||||
| MergePick::Major => minor.insert(dest, src_major), | |||||
| }; | |||||
| } | |||||
| } | |||||
| }; | }; | ||||
| } | MergeResult::UseNewValue(CopySource::new_from_merge( | ||||
| minor | |||||
| } else { | |||||
| let mut override_minor = Vec::new(); | |||||
| let mut override_major = Vec::new(); | |||||
| let mut to_major = |k: &PathToken, v: &CopySource| { | |||||
| override_major.push((k.clone(), v.clone())) | |||||
| }; | |||||
| let mut to_minor = |k: &PathToken, v: &CopySource| { | |||||
| override_minor.push((k.clone(), v.clone())) | |||||
| }; | |||||
| // The diff function leverage detection of the identical subpart if | |||||
| // minor and major has some common ancestors. This make it very | |||||
| // fast is most case. | |||||
| // | |||||
| // In case where the two map are vastly different in size, the current | |||||
| // approach is still slowish because the iteration will iterate over | |||||
| // all the "exclusive" content of the larger on. This situation can be | |||||
| // frequent when the subgraph of revision we are processing has a lot | |||||
| // of roots. Each roots adding they own fully new map to the mix (and | |||||
| // likely a small map, if the path from the root to the "main path" is | |||||
| // small. | |||||
| // | |||||
| // We could do better by detecting such situation and processing them | |||||
| // differently. | |||||
| for d in minor.diff(&major) { | |||||
| match d { | |||||
| DiffItem::Add(k, v) => to_minor(k, v), | |||||
| DiffItem::Remove(k, v) => to_major(k, v), | |||||
| DiffItem::Update { old, new } => { | |||||
| let (dest, src_major) = new; | |||||
| let (_, src_minor) = old; | |||||
| let (pick, overwrite) = | |||||
| cmp_value(dest, src_minor, src_major); | |||||
| if overwrite { | |||||
| let src = match pick { | |||||
| MergePick::Major => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| src_major, | |||||
| src_minor, | |||||
| ), | |||||
| MergePick::Minor => CopySource::new_from_merge( | |||||
| current_merge, | |||||
| src_minor, | |||||
| src_major, | |||||
| ), | |||||
| MergePick::Any => CopySource::new_from_merge( | |||||
| current_merge, | current_merge, | ||||
| src_major, | winner, | ||||
| src_minor, | loser, | ||||
| ), | )) | ||||
| }; | |||||
| to_minor(dest, &src); | |||||
| to_major(dest, &src); | |||||
| } else { | } else { | ||||
| match pick { | match pick { | ||||
| MergePick::Major => to_minor(dest, src_major), | MergePick::Any | MergePick::Major => { | ||||
| MergePick::Minor => to_major(dest, src_minor), | MergeResult::UseRightValue | ||||
| // If the two entry are identical, no need to do | |||||
| // anything (but diff should not have yield them) | |||||
| MergePick::Any => unreachable!(), | |||||
| } | |||||
| } | } | ||||
| MergePick::Minor => MergeResult::UseLeftValue, | |||||
| } | } | ||||
| }; | |||||
| } | |||||
| let updates; | |||||
| let mut result; | |||||
| if override_major.is_empty() { | |||||
| result = major | |||||
| } else if override_minor.is_empty() { | |||||
| result = minor | |||||
| } else { | |||||
| if override_minor.len() < override_major.len() { | |||||
| updates = override_minor; | |||||
| result = minor; | |||||
| } else { | |||||
| updates = override_major; | |||||
| result = major; | |||||
| } | |||||
| for (k, v) in updates { | |||||
| result.insert(k, v); | |||||
| } | |||||
| } | |||||
| result | |||||
| } | } | ||||
| }) | |||||
| } | } | ||||
| /// represent the side that should prevail when merging two | /// represent the side that should prevail when merging two | ||||
| /// InternalPathCopies | /// InternalPathCopies | ||||
| enum MergePick { | enum MergePick { | ||||
| /// The "major" (p1) side prevails | /// The "major" (p1) side prevails | ||||
| Major, | Major, | ||||
| /// The "minor" (p2) side prevails | /// The "minor" (p2) side prevails | ||||
| // utils module | // utils module | ||||
| // | // | ||||
| // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | ||||
| // | // | ||||
| // This software may be used and distributed according to the terms of the | // This software may be used and distributed according to the terms of the | ||||
| // GNU General Public License version 2 or any later version. | // GNU General Public License version 2 or any later version. | ||||
| //! Contains useful functions, traits, structs, etc. for use in core. | //! Contains useful functions, traits, structs, etc. for use in core. | ||||
| use crate::utils::hg_path::HgPath; | use crate::utils::hg_path::HgPath; | ||||
| use im_rc::ordmap::DiffItem; | |||||
| use im_rc::ordmap::OrdMap; | |||||
| use std::{io::Write, ops::Deref}; | use std::{io::Write, ops::Deref}; | ||||
| pub mod files; | pub mod files; | ||||
| pub mod hg_path; | pub mod hg_path; | ||||
| pub mod path_auditor; | pub mod path_auditor; | ||||
| /// Useful until rust/issues/56345 is stable | /// Useful until rust/issues/56345 is stable | ||||
| /// | /// | ||||
| // TODO: use the str method when we require Rust 1.45 | // TODO: use the str method when we require Rust 1.45 | ||||
| pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> { | pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> { | ||||
| if s.ends_with(suffix) { | if s.ends_with(suffix) { | ||||
| Some(&s[..s.len() - suffix.len()]) | Some(&s[..s.len() - suffix.len()]) | ||||
| } else { | } else { | ||||
| None | None | ||||
| } | } | ||||
| } | } | ||||
| pub(crate) enum MergeResult<V> { | |||||
| UseLeftValue, | |||||
| UseRightValue, | |||||
| UseNewValue(V), | |||||
| } | |||||
| /// Return the union of the two given maps, | |||||
| /// calling `merge(key, left_value, right_value)` to resolve keys that exist in | |||||
| /// both. | |||||
| /// | |||||
| /// CC https://github.com/bodil/im-rs/issues/166 | |||||
| pub(crate) fn ordmap_union_with_merge<K, V>( | |||||
| left: OrdMap<K, V>, | |||||
| right: OrdMap<K, V>, | |||||
| mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
| ) -> OrdMap<K, V> | |||||
| where | |||||
| K: Clone + Ord, | |||||
| V: Clone + PartialEq, | |||||
| { | |||||
| if left.ptr_eq(&right) { | |||||
| // One of the two maps is an unmodified clone of the other | |||||
| left | |||||
| } else if left.len() / 2 > right.len() { | |||||
| // When two maps have different sizes, | |||||
| // their size difference is a lower bound on | |||||
| // how many keys of the larger map are not also in the smaller map. | |||||
| // This in turn is a lower bound on the number of differences in | |||||
| // `OrdMap::diff` and the "amount of work" that would be done | |||||
| // by `ordmap_union_with_merge_by_diff`. | |||||
| // | |||||
| // Here `left` is more than twice the size of `right`, | |||||
| // so the number of differences is more than the total size of | |||||
| // `right`. Therefore an algorithm based on iterating `right` | |||||
| // is more efficient. | |||||
| // | |||||
| // This helps a lot when a tiny (or empty) map is merged | |||||
| // with a large one. | |||||
| ordmap_union_with_merge_by_iter(left, right, merge) | |||||
| } else if left.len() < right.len() / 2 { | |||||
| // Same as above but with `left` and `right` swapped | |||||
| ordmap_union_with_merge_by_iter(right, left, |key, a, b| { | |||||
| // Also swapped in `merge` arguments: | |||||
| match merge(key, b, a) { | |||||
| MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), | |||||
| // … and swap back in `merge` result: | |||||
| MergeResult::UseLeftValue => MergeResult::UseRightValue, | |||||
| MergeResult::UseRightValue => MergeResult::UseLeftValue, | |||||
| } | |||||
| }) | |||||
| } else { | |||||
| // For maps of similar size, use the algorithm based on `OrdMap::diff` | |||||
| ordmap_union_with_merge_by_diff(left, right, merge) | |||||
| } | |||||
| } | |||||
| /// Efficient if `right` is much smaller than `left` | |||||
| fn ordmap_union_with_merge_by_iter<K, V>( | |||||
| mut left: OrdMap<K, V>, | |||||
| right: OrdMap<K, V>, | |||||
| mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
| ) -> OrdMap<K, V> | |||||
| where | |||||
| K: Clone + Ord, | |||||
| V: Clone, | |||||
| { | |||||
| for (key, right_value) in right { | |||||
| match left.get(&key) { | |||||
| None => { | |||||
| left.insert(key, right_value); | |||||
| } | |||||
| Some(left_value) => match merge(&key, left_value, &right_value) { | |||||
| MergeResult::UseLeftValue => {} | |||||
| MergeResult::UseRightValue => { | |||||
| left.insert(key, right_value); | |||||
| } | |||||
| MergeResult::UseNewValue(new_value) => { | |||||
| left.insert(key, new_value); | |||||
| } | |||||
| }, | |||||
| } | |||||
| } | |||||
| left | |||||
| } | |||||
| /// Fallback when both maps are of similar size | |||||
| fn ordmap_union_with_merge_by_diff<K, V>( | |||||
| mut left: OrdMap<K, V>, | |||||
| mut right: OrdMap<K, V>, | |||||
| mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
| ) -> OrdMap<K, V> | |||||
| where | |||||
| K: Clone + Ord, | |||||
| V: Clone + PartialEq, | |||||
| { | |||||
| // (key, value) pairs that would need to be inserted in either map | |||||
| // in order to turn it into the union. | |||||
| // | |||||
| // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, | |||||
| // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` | |||||
| // with `left_updates` only borrowing from `right` and `right_updates` from | |||||
| // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. | |||||
| // | |||||
| // This would allow moving all `.clone()` calls to after we’ve decided | |||||
| // which of `right_updates` or `left_updates` to use | |||||
| // (value ones becoming `Cow::into_owned`), | |||||
| // and avoid making clones we don’t end up using. | |||||
| let mut left_updates = Vec::new(); | |||||
| let mut right_updates = Vec::new(); | |||||
| for difference in left.diff(&right) { | |||||
| match difference { | |||||
| DiffItem::Add(key, value) => { | |||||
| left_updates.push((key.clone(), value.clone())) | |||||
| } | |||||
| DiffItem::Remove(key, value) => { | |||||
| right_updates.push((key.clone(), value.clone())) | |||||
| } | |||||
| DiffItem::Update { | |||||
| old: (key, left_value), | |||||
| new: (_, right_value), | |||||
| } => match merge(key, left_value, right_value) { | |||||
| MergeResult::UseLeftValue => { | |||||
| right_updates.push((key.clone(), left_value.clone())) | |||||
| } | |||||
| MergeResult::UseRightValue => { | |||||
| left_updates.push((key.clone(), right_value.clone())) | |||||
| } | |||||
| MergeResult::UseNewValue(new_value) => { | |||||
| left_updates.push((key.clone(), new_value.clone())); | |||||
| right_updates.push((key.clone(), new_value)) | |||||
| } | |||||
| }, | |||||
| } | |||||
| } | |||||
| if left_updates.len() < right_updates.len() { | |||||
| for (key, value) in left_updates { | |||||
| left.insert(key, value); | |||||
| } | |||||
| left | |||||
| } else { | |||||
| for (key, value) in right_updates { | |||||
| right.insert(key, value); | |||||
| } | |||||
| right | |||||
| } | |||||
| } | |||||