This deduplicates the copy-tracing-specific logic
Details
Details
Diff Detail
Diff Detail
- Repository
- rHG Mercurial
- Branch
- default
- Lint
No Linters Available - Unit
No Unit Test Coverage
This deduplicates the copy-tracing-specific logic
No Linters Available |
No Unit Test Coverage |
Path | Packages | |||
---|---|---|---|---|
M | rust/hg-core/src/copy_tracing.rs (227 lines) | |||
M | rust/hg-core/src/utils.rs (150 lines) |
Commit | Parents | Author | Summary | Date |
---|---|---|---|---|
1816f8bd5e2a | 84247c32d358 | Simon Sapin | Dec 23 2020, 5:48 AM |
use crate::utils::hg_path::HgPath; | use crate::utils::hg_path::HgPath; | ||||
use crate::utils::hg_path::HgPathBuf; | use crate::utils::hg_path::HgPathBuf; | ||||
use crate::Revision; | use crate::Revision; | ||||
use crate::NULL_REVISION; | use crate::NULL_REVISION; | ||||
use im_rc::ordmap::DiffItem; | |||||
use im_rc::ordmap::Entry; | use im_rc::ordmap::Entry; | ||||
use im_rc::ordmap::OrdMap; | use im_rc::ordmap::OrdMap; | ||||
use im_rc::OrdSet; | use im_rc::OrdSet; | ||||
use std::cmp::Ordering; | use std::cmp::Ordering; | ||||
use std::collections::HashMap; | use std::collections::HashMap; | ||||
use std::convert::TryInto; | use std::convert::TryInto; | ||||
/// merge two copies-mapping together, minor and major | /// merge two copies-mapping together, minor and major | ||||
/// | /// | ||||
/// In case of conflict, value from "major" will be picked, unless in some | /// In case of conflict, value from "major" will be picked, unless in some | ||||
/// cases. See inline documentation for details. | /// cases. See inline documentation for details. | ||||
fn merge_copies_dict( | fn merge_copies_dict( | ||||
path_map: &TwoWayPathMap, | path_map: &TwoWayPathMap, | ||||
current_merge: Revision, | current_merge: Revision, | ||||
mut minor: InternalPathCopies, | minor: InternalPathCopies, | ||||
mut major: InternalPathCopies, | major: InternalPathCopies, | ||||
changes: &ChangedFiles, | changes: &ChangedFiles, | ||||
) -> InternalPathCopies { | ) -> InternalPathCopies { | ||||
// This closure exist as temporary help while multiple developper are | use crate::utils::{ordmap_union_with_merge, MergeResult}; | ||||
// actively working on this code. Feel free to re-inline it once this | |||||
// code is more settled. | ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| { | ||||
let cmp_value = | let (pick, overwrite) = compare_value( | ||||
|dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| { | |||||
compare_value( | |||||
path_map, | path_map, | ||||
current_merge, | current_merge, | ||||
changes, | changes, | ||||
dest, | dest, | ||||
src_minor, | src_minor, | ||||
src_major, | src_major, | ||||
) | ); | ||||
}; | |||||
if minor.is_empty() { | |||||
major | |||||
} else if major.is_empty() { | |||||
minor | |||||
} else if minor.len() * 2 < major.len() { | |||||
// Lets says we are merging two InternalPathCopies instance A and B. | |||||
// | |||||
// If A contains N items, the merge result will never contains more | |||||
// than N values differents than the one in A | |||||
// | |||||
// If B contains M items, with M > N, the merge result will always | |||||
// result in a minimum of M - N value differents than the on in | |||||
// A | |||||
// | |||||
// As a result, if N < (M-N), we know that simply iterating over A will | |||||
// yield less difference than iterating over the difference | |||||
// between A and B. | |||||
// | |||||
// This help performance a lot in case were a tiny | |||||
// InternalPathCopies is merged with a much larger one. | |||||
for (dest, src_minor) in minor { | |||||
let src_major = major.get(&dest); | |||||
match src_major { | |||||
None => { | |||||
major.insert(dest, src_minor); | |||||
} | |||||
Some(src_major) => { | |||||
let (pick, overwrite) = | |||||
cmp_value(&dest, &src_minor, src_major); | |||||
if overwrite { | |||||
let src = match pick { | |||||
MergePick::Major => CopySource::new_from_merge( | |||||
current_merge, | |||||
src_major, | |||||
&src_minor, | |||||
), | |||||
MergePick::Minor => CopySource::new_from_merge( | |||||
current_merge, | |||||
&src_minor, | |||||
src_major, | |||||
), | |||||
MergePick::Any => CopySource::new_from_merge( | |||||
current_merge, | |||||
src_major, | |||||
&src_minor, | |||||
), | |||||
}; | |||||
major.insert(dest, src); | |||||
} else { | |||||
match pick { | |||||
MergePick::Any | MergePick::Major => None, | |||||
MergePick::Minor => major.insert(dest, src_minor), | |||||
}; | |||||
} | |||||
} | |||||
}; | |||||
} | |||||
major | |||||
} else if major.len() * 2 < minor.len() { | |||||
// This use the same rational than the previous block. | |||||
// (Check previous block documentation for details.) | |||||
for (dest, src_major) in major { | |||||
let src_minor = minor.get(&dest); | |||||
match src_minor { | |||||
None => { | |||||
minor.insert(dest, src_major); | |||||
} | |||||
Some(src_minor) => { | |||||
let (pick, overwrite) = | |||||
cmp_value(&dest, src_minor, &src_major); | |||||
if overwrite { | if overwrite { | ||||
let src = match pick { | let (winner, loser) = match pick { | ||||
MergePick::Major => CopySource::new_from_merge( | MergePick::Major | MergePick::Any => (src_major, src_minor), | ||||
current_merge, | MergePick::Minor => (src_minor, src_major), | ||||
&src_major, | |||||
src_minor, | |||||
), | |||||
MergePick::Minor => CopySource::new_from_merge( | |||||
current_merge, | |||||
src_minor, | |||||
&src_major, | |||||
), | |||||
MergePick::Any => CopySource::new_from_merge( | |||||
current_merge, | |||||
&src_major, | |||||
src_minor, | |||||
), | |||||
}; | |||||
minor.insert(dest, src); | |||||
} else { | |||||
match pick { | |||||
MergePick::Any | MergePick::Minor => None, | |||||
MergePick::Major => minor.insert(dest, src_major), | |||||
}; | |||||
} | |||||
} | |||||
}; | }; | ||||
} | MergeResult::UseNewValue(CopySource::new_from_merge( | ||||
minor | |||||
} else { | |||||
let mut override_minor = Vec::new(); | |||||
let mut override_major = Vec::new(); | |||||
let mut to_major = |k: &PathToken, v: &CopySource| { | |||||
override_major.push((k.clone(), v.clone())) | |||||
}; | |||||
let mut to_minor = |k: &PathToken, v: &CopySource| { | |||||
override_minor.push((k.clone(), v.clone())) | |||||
}; | |||||
// The diff function leverage detection of the identical subpart if | |||||
// minor and major has some common ancestors. This make it very | |||||
// fast is most case. | |||||
// | |||||
// In case where the two map are vastly different in size, the current | |||||
// approach is still slowish because the iteration will iterate over | |||||
// all the "exclusive" content of the larger on. This situation can be | |||||
// frequent when the subgraph of revision we are processing has a lot | |||||
// of roots. Each roots adding they own fully new map to the mix (and | |||||
// likely a small map, if the path from the root to the "main path" is | |||||
// small. | |||||
// | |||||
// We could do better by detecting such situation and processing them | |||||
// differently. | |||||
for d in minor.diff(&major) { | |||||
match d { | |||||
DiffItem::Add(k, v) => to_minor(k, v), | |||||
DiffItem::Remove(k, v) => to_major(k, v), | |||||
DiffItem::Update { old, new } => { | |||||
let (dest, src_major) = new; | |||||
let (_, src_minor) = old; | |||||
let (pick, overwrite) = | |||||
cmp_value(dest, src_minor, src_major); | |||||
if overwrite { | |||||
let src = match pick { | |||||
MergePick::Major => CopySource::new_from_merge( | |||||
current_merge, | |||||
src_major, | |||||
src_minor, | |||||
), | |||||
MergePick::Minor => CopySource::new_from_merge( | |||||
current_merge, | |||||
src_minor, | |||||
src_major, | |||||
), | |||||
MergePick::Any => CopySource::new_from_merge( | |||||
current_merge, | current_merge, | ||||
src_major, | winner, | ||||
src_minor, | loser, | ||||
), | )) | ||||
}; | |||||
to_minor(dest, &src); | |||||
to_major(dest, &src); | |||||
} else { | } else { | ||||
match pick { | match pick { | ||||
MergePick::Major => to_minor(dest, src_major), | MergePick::Any | MergePick::Major => { | ||||
MergePick::Minor => to_major(dest, src_minor), | MergeResult::UseRightValue | ||||
// If the two entry are identical, no need to do | |||||
// anything (but diff should not have yield them) | |||||
MergePick::Any => unreachable!(), | |||||
} | |||||
} | } | ||||
MergePick::Minor => MergeResult::UseLeftValue, | |||||
} | } | ||||
}; | |||||
} | |||||
let updates; | |||||
let mut result; | |||||
if override_major.is_empty() { | |||||
result = major | |||||
} else if override_minor.is_empty() { | |||||
result = minor | |||||
} else { | |||||
if override_minor.len() < override_major.len() { | |||||
updates = override_minor; | |||||
result = minor; | |||||
} else { | |||||
updates = override_major; | |||||
result = major; | |||||
} | |||||
for (k, v) in updates { | |||||
result.insert(k, v); | |||||
} | |||||
} | |||||
result | |||||
} | } | ||||
}) | |||||
} | } | ||||
/// represent the side that should prevail when merging two | /// represent the side that should prevail when merging two | ||||
/// InternalPathCopies | /// InternalPathCopies | ||||
enum MergePick { | enum MergePick { | ||||
/// The "major" (p1) side prevails | /// The "major" (p1) side prevails | ||||
Major, | Major, | ||||
/// The "minor" (p2) side prevails | /// The "minor" (p2) side prevails |
// utils module | // utils module | ||||
// | // | ||||
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | ||||
// | // | ||||
// This software may be used and distributed according to the terms of the | // This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | // GNU General Public License version 2 or any later version. | ||||
//! Contains useful functions, traits, structs, etc. for use in core. | //! Contains useful functions, traits, structs, etc. for use in core. | ||||
use crate::errors::{HgError, IoErrorContext}; | use crate::errors::{HgError, IoErrorContext}; | ||||
use crate::utils::hg_path::HgPath; | use crate::utils::hg_path::HgPath; | ||||
use im_rc::ordmap::DiffItem; | |||||
use im_rc::ordmap::OrdMap; | |||||
use std::{io::Write, ops::Deref}; | use std::{io::Write, ops::Deref}; | ||||
pub mod files; | pub mod files; | ||||
pub mod hg_path; | pub mod hg_path; | ||||
pub mod path_auditor; | pub mod path_auditor; | ||||
/// Useful until rust/issues/56345 is stable | /// Useful until rust/issues/56345 is stable | ||||
/// | /// | ||||
} | } | ||||
pub fn current_exe() -> Result<std::path::PathBuf, HgError> { | pub fn current_exe() -> Result<std::path::PathBuf, HgError> { | ||||
std::env::current_exe().map_err(|error| HgError::IoError { | std::env::current_exe().map_err(|error| HgError::IoError { | ||||
error, | error, | ||||
context: IoErrorContext::CurrentExe, | context: IoErrorContext::CurrentExe, | ||||
}) | }) | ||||
} | } | ||||
pub(crate) enum MergeResult<V> { | |||||
UseLeftValue, | |||||
UseRightValue, | |||||
UseNewValue(V), | |||||
} | |||||
/// Return the union of the two given maps, | |||||
/// calling `merge(key, left_value, right_value)` to resolve keys that exist in | |||||
/// both. | |||||
/// | |||||
/// CC https://github.com/bodil/im-rs/issues/166 | |||||
pub(crate) fn ordmap_union_with_merge<K, V>( | |||||
left: OrdMap<K, V>, | |||||
right: OrdMap<K, V>, | |||||
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
) -> OrdMap<K, V> | |||||
where | |||||
K: Clone + Ord, | |||||
V: Clone + PartialEq, | |||||
{ | |||||
if left.ptr_eq(&right) { | |||||
// One of the two maps is an unmodified clone of the other | |||||
left | |||||
} else if left.len() / 2 > right.len() { | |||||
// When two maps have different sizes, | |||||
// their size difference is a lower bound on | |||||
// how many keys of the larger map are not also in the smaller map. | |||||
// This in turn is a lower bound on the number of differences in | |||||
// `OrdMap::diff` and the "amount of work" that would be done | |||||
// by `ordmap_union_with_merge_by_diff`. | |||||
// | |||||
// Here `left` is more than twice the size of `right`, | |||||
// so the number of differences is more than the total size of | |||||
// `right`. Therefore an algorithm based on iterating `right` | |||||
// is more efficient. | |||||
// | |||||
// This helps a lot when a tiny (or empty) map is merged | |||||
// with a large one. | |||||
ordmap_union_with_merge_by_iter(left, right, merge) | |||||
} else if left.len() < right.len() / 2 { | |||||
// Same as above but with `left` and `right` swapped | |||||
ordmap_union_with_merge_by_iter(right, left, |key, a, b| { | |||||
// Also swapped in `merge` arguments: | |||||
match merge(key, b, a) { | |||||
MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), | |||||
// … and swap back in `merge` result: | |||||
MergeResult::UseLeftValue => MergeResult::UseRightValue, | |||||
MergeResult::UseRightValue => MergeResult::UseLeftValue, | |||||
} | |||||
}) | |||||
} else { | |||||
// For maps of similar size, use the algorithm based on `OrdMap::diff` | |||||
ordmap_union_with_merge_by_diff(left, right, merge) | |||||
} | |||||
} | |||||
/// Efficient if `right` is much smaller than `left` | |||||
fn ordmap_union_with_merge_by_iter<K, V>( | |||||
mut left: OrdMap<K, V>, | |||||
right: OrdMap<K, V>, | |||||
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
) -> OrdMap<K, V> | |||||
where | |||||
K: Clone + Ord, | |||||
V: Clone, | |||||
{ | |||||
for (key, right_value) in right { | |||||
match left.get(&key) { | |||||
None => { | |||||
left.insert(key, right_value); | |||||
} | |||||
Some(left_value) => match merge(&key, left_value, &right_value) { | |||||
MergeResult::UseLeftValue => {} | |||||
MergeResult::UseRightValue => { | |||||
left.insert(key, right_value); | |||||
} | |||||
MergeResult::UseNewValue(new_value) => { | |||||
left.insert(key, new_value); | |||||
} | |||||
}, | |||||
} | |||||
} | |||||
left | |||||
} | |||||
/// Fallback when both maps are of similar size | |||||
fn ordmap_union_with_merge_by_diff<K, V>( | |||||
mut left: OrdMap<K, V>, | |||||
mut right: OrdMap<K, V>, | |||||
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |||||
) -> OrdMap<K, V> | |||||
where | |||||
K: Clone + Ord, | |||||
V: Clone + PartialEq, | |||||
{ | |||||
// (key, value) pairs that would need to be inserted in either map | |||||
// in order to turn it into the union. | |||||
// | |||||
// TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, | |||||
// change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` | |||||
// with `left_updates` only borrowing from `right` and `right_updates` from | |||||
// `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. | |||||
// | |||||
// This would allow moving all `.clone()` calls to after we’ve decided | |||||
// which of `right_updates` or `left_updates` to use | |||||
// (value ones becoming `Cow::into_owned`), | |||||
// and avoid making clones we don’t end up using. | |||||
let mut left_updates = Vec::new(); | |||||
let mut right_updates = Vec::new(); | |||||
for difference in left.diff(&right) { | |||||
match difference { | |||||
DiffItem::Add(key, value) => { | |||||
left_updates.push((key.clone(), value.clone())) | |||||
} | |||||
DiffItem::Remove(key, value) => { | |||||
right_updates.push((key.clone(), value.clone())) | |||||
} | |||||
DiffItem::Update { | |||||
old: (key, left_value), | |||||
new: (_, right_value), | |||||
} => match merge(key, left_value, right_value) { | |||||
MergeResult::UseLeftValue => { | |||||
right_updates.push((key.clone(), left_value.clone())) | |||||
} | |||||
MergeResult::UseRightValue => { | |||||
left_updates.push((key.clone(), right_value.clone())) | |||||
} | |||||
MergeResult::UseNewValue(new_value) => { | |||||
left_updates.push((key.clone(), new_value.clone())); | |||||
right_updates.push((key.clone(), new_value)) | |||||
} | |||||
}, | |||||
} | |||||
} | |||||
if left_updates.len() < right_updates.len() { | |||||
for (key, value) in left_updates { | |||||
left.insert(key, value); | |||||
} | |||||
left | |||||
} else { | |||||
for (key, value) in right_updates { | |||||
right.insert(key, value); | |||||
} | |||||
right | |||||
} | |||||
} |