This is to make it support censor.policy=ignore without having
to duplicate that logic.
Also, change the censor test in such a way that it uses rhg now,
because extensions are disabled except when we call [hg censor].
This is to make it support censor.policy=ignore without having
to duplicate that logic.
Also, change the censor test in such a way that it uses rhg now,
because extensions are disabled except when we call [hg censor].
Automatic diff as part of commit; lint not applicable. |
Automatic diff as part of commit; unit tests not applicable. |
FYI this broke the format test, I'll amend it https://foss.heptapod.net/mercurial/mercurial-devel/-/jobs/550120#L59
Path | Packages | |||
---|---|---|---|---|
M | rust/hg-core/src/errors.rs (6 lines) | |||
M | rust/hg-core/src/revlog/filelog.rs (2 lines) | |||
M | rust/hg-core/src/revlog/revlog.rs (19 lines) | |||
M | rust/rhg/src/error.rs (3 lines) | |||
M | tests/test-censor.t (26 lines) |
/// A configuration value is not in the expected syntax. | /// A configuration value is not in the expected syntax. | ||||
/// | /// | ||||
/// These errors can happen in many places in the code because values are | /// These errors can happen in many places in the code because values are | ||||
/// parsed lazily as the file-level parser does not know the expected type | /// parsed lazily as the file-level parser does not know the expected type | ||||
/// and syntax of each value. | /// and syntax of each value. | ||||
#[from] | #[from] | ||||
ConfigValueParseError(ConfigValueParseError), | ConfigValueParseError(ConfigValueParseError), | ||||
/// Censored revision data. | |||||
CensoredNodeError, | |||||
} | } | ||||
/// Details about where an I/O error happened | /// Details about where an I/O error happened | ||||
#[derive(Debug)] | #[derive(Debug)] | ||||
pub enum IoErrorContext { | pub enum IoErrorContext { | ||||
/// `std::fs::metadata` | /// `std::fs::metadata` | ||||
ReadingMetadata(std::path::PathBuf), | ReadingMetadata(std::path::PathBuf), | ||||
ReadingFile(std::path::PathBuf), | ReadingFile(std::path::PathBuf), | ||||
write!(f, "abort: {}: {}", context, error) | write!(f, "abort: {}: {}", context, error) | ||||
} | } | ||||
HgError::CorruptedRepository(explanation) => { | HgError::CorruptedRepository(explanation) => { | ||||
write!(f, "abort: {}", explanation) | write!(f, "abort: {}", explanation) | ||||
} | } | ||||
HgError::UnsupportedFeature(explanation) => { | HgError::UnsupportedFeature(explanation) => { | ||||
write!(f, "unsupported feature: {}", explanation) | write!(f, "unsupported feature: {}", explanation) | ||||
} | } | ||||
HgError::CensoredNodeError => { | |||||
write!(f, "encountered a censored node") | |||||
} | |||||
HgError::ConfigValueParseError(error) => error.fmt(f), | HgError::ConfigValueParseError(error) => error.fmt(f), | ||||
} | } | ||||
} | } | ||||
} | } | ||||
// TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly? | // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly? | ||||
impl fmt::Display for IoErrorContext { | impl fmt::Display for IoErrorContext { | ||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | ||||
// Relevant code that implement this behavior in Python code: | // Relevant code that implement this behavior in Python code: | ||||
// basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | ||||
// revlog.size, revlog.rawsize | // revlog.size, revlog.rawsize | ||||
// Let’s call `file_data_len` what would be returned by | // Let’s call `file_data_len` what would be returned by | ||||
// `self.data().file_data().len()`. | // `self.data().file_data().len()`. | ||||
if self.0.is_cencored() { | if self.0.is_censored() { | ||||
let file_data_len = 0; | let file_data_len = 0; | ||||
return other_len != file_data_len; | return other_len != file_data_len; | ||||
} | } | ||||
if self.0.has_length_affecting_flag_processor() { | if self.0.has_length_affecting_flag_processor() { | ||||
// We can’t conclude anything about `file_data_len`. | // We can’t conclude anything about `file_data_len`. | ||||
return false; | return false; | ||||
} | } |
pub fn p2(&self) -> Option<Revision> { | pub fn p2(&self) -> Option<Revision> { | ||||
if self.p2 == NULL_REVISION { | if self.p2 == NULL_REVISION { | ||||
None | None | ||||
} else { | } else { | ||||
Some(self.p2) | Some(self.p2) | ||||
} | } | ||||
} | } | ||||
pub fn is_cencored(&self) -> bool { | pub fn is_censored(&self) -> bool { | ||||
(self.flags & REVISION_FLAG_CENSORED) != 0 | (self.flags & REVISION_FLAG_CENSORED) != 0 | ||||
} | } | ||||
pub fn has_length_affecting_flag_processor(&self) -> bool { | pub fn has_length_affecting_flag_processor(&self) -> bool { | ||||
// Relevant Python code: revlog.size() | // Relevant Python code: revlog.size() | ||||
// note: ELLIPSIS is known to not change the content | // note: ELLIPSIS is known to not change the content | ||||
(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | ||||
} | } | ||||
/// The data for this entry, after resolving deltas if any. | /// The data for this entry, after resolving deltas if any. | ||||
pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { | pub fn rawdata(&self) -> Result<Cow<'a, [u8]>, HgError> { | ||||
let mut entry = self.clone(); | let mut entry = self.clone(); | ||||
let mut delta_chain = vec![]; | let mut delta_chain = vec![]; | ||||
// The meaning of `base_rev_or_base_of_delta_chain` depends on | // The meaning of `base_rev_or_base_of_delta_chain` depends on | ||||
// generaldelta. See the doc on `ENTRY_DELTA_BASE` in | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in | ||||
// `mercurial/revlogutils/constants.py` and the code in | // `mercurial/revlogutils/constants.py` and the code in | ||||
// [_chaininfo] and in [index_deltachain]. | // [_chaininfo] and in [index_deltachain]. | ||||
let uses_generaldelta = self.revlog.index.uses_generaldelta(); | let uses_generaldelta = self.revlog.index.uses_generaldelta(); | ||||
while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { | ||||
let base_rev = if uses_generaldelta { | let base_rev = if uses_generaldelta { | ||||
base_rev | base_rev | ||||
} else { | } else { | ||||
entry.rev - 1 | entry.rev - 1 | ||||
}; | }; | ||||
delta_chain.push(entry); | delta_chain.push(entry); | ||||
entry = self.revlog.get_entry_internal(base_rev)?; | entry = self.revlog.get_entry_internal(base_rev)?; | ||||
} | } | ||||
let data = if delta_chain.is_empty() { | let data = if delta_chain.is_empty() { | ||||
entry.data_chunk()? | entry.data_chunk()? | ||||
} else { | } else { | ||||
Revlog::build_data_from_deltas(entry, &delta_chain)?.into() | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() | ||||
}; | }; | ||||
Ok(data) | |||||
} | |||||
fn check_data( | |||||
&self, | |||||
data: Cow<'a, [u8]>, | |||||
) -> Result<Cow<'a, [u8]>, HgError> { | |||||
if self.revlog.check_hash( | if self.revlog.check_hash( | ||||
self.p1, | self.p1, | ||||
self.p2, | self.p2, | ||||
self.hash.as_bytes(), | self.hash.as_bytes(), | ||||
&data, | &data, | ||||
) { | ) { | ||||
Ok(data) | Ok(data) | ||||
} else { | } else { | ||||
Err(corrupted()) | Err(corrupted()) | ||||
} | } | ||||
} | } | ||||
pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> { | |||||
let data = self.rawdata()?; | |||||
if self.is_censored() { | |||||
return Err(HgError::CensoredNodeError) | |||||
} | |||||
self.check_data(data) | |||||
} | |||||
/// Extract the data contained in the entry. | /// Extract the data contained in the entry. | ||||
/// This may be a delta. (See `is_delta`.) | /// This may be a delta. (See `is_delta`.) | ||||
fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> { | fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> { | ||||
if self.bytes.is_empty() { | if self.bytes.is_empty() { | ||||
return Ok(Cow::Borrowed(&[])); | return Ok(Cow::Borrowed(&[])); | ||||
} | } | ||||
match self.bytes[0] { | match self.bytes[0] { | ||||
// Revision data is the entirety of the entry, including this | // Revision data is the entirety of the entry, including this |
} | } | ||||
impl From<HgError> for CommandError { | impl From<HgError> for CommandError { | ||||
fn from(error: HgError) -> Self { | fn from(error: HgError) -> Self { | ||||
match error { | match error { | ||||
HgError::UnsupportedFeature(message) => { | HgError::UnsupportedFeature(message) => { | ||||
CommandError::unsupported(message) | CommandError::unsupported(message) | ||||
} | } | ||||
HgError::CensoredNodeError => { | |||||
CommandError::unsupported("Encountered a censored node") | |||||
} | |||||
HgError::Abort { | HgError::Abort { | ||||
message, | message, | ||||
detailed_exit_code, | detailed_exit_code, | ||||
} => { | } => { | ||||
CommandError::abort_with_exit_code(message, detailed_exit_code) | CommandError::abort_with_exit_code(message, detailed_exit_code) | ||||
} | } | ||||
_ => CommandError::abort(error.to_string()), | _ => CommandError::abort(error.to_string()), | ||||
} | } |
#require no-reposimplestore | #require no-reposimplestore | ||||
#testcases revlogv1 revlogv2 | #testcases revlogv1 revlogv2 | ||||
#if revlogv2 | #if revlogv2 | ||||
$ cat >> $HGRCPATH <<EOF | $ cat >> $HGRCPATH <<EOF | ||||
> [experimental] | > [experimental] | ||||
> revlogv2=enable-unstable-format-and-corrupt-my-data | > revlogv2=enable-unstable-format-and-corrupt-my-data | ||||
> EOF | > EOF | ||||
#endif | #endif | ||||
$ cat >> $HGRCPATH <<EOF | |||||
> [extensions] | |||||
> censor= | |||||
> EOF | |||||
$ cp $HGRCPATH $HGRCPATH.orig | $ cp $HGRCPATH $HGRCPATH.orig | ||||
Create repo with unimpeachable content | Create repo with unimpeachable content | ||||
$ hg init r | $ hg init r | ||||
$ cd r | $ cd r | ||||
$ echo 'Initially untainted file' > target | $ echo 'Initially untainted file' > target | ||||
$ echo 'Normal file here' > bystander | $ echo 'Normal file here' > bystander | ||||
$ hg cat -r 0 target | head -n 10 | $ hg cat -r 0 target | head -n 10 | ||||
Initially untainted file | Initially untainted file | ||||
Censor revision with 2 offenses | Censor revision with 2 offenses | ||||
(this also tests file pattern matching: path relative to cwd case) | (this also tests file pattern matching: path relative to cwd case) | ||||
$ mkdir -p foo/bar/baz | $ mkdir -p foo/bar/baz | ||||
$ hg --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target | $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target | ||||
$ hg cat -r $H1 target | head -n 10 | $ hg cat -r $H1 target | head -n 10 | ||||
Tainted file is now sanitized | Tainted file is now sanitized | ||||
$ hg cat -r $H2 target | head -n 10 | $ hg cat -r $H2 target | head -n 10 | ||||
Tainted file now super sanitized | Tainted file now super sanitized | ||||
$ hg cat -r $C2 target | head -n 10 | $ hg cat -r $C2 target | head -n 10 | ||||
abort: censored node: 1e0247a9a4b7 | abort: censored node: 1e0247a9a4b7 | ||||
(set censor.policy to ignore errors) | (set censor.policy to ignore errors) | ||||
$ hg cat -r $C1 target | head -n 10 | $ hg cat -r $C1 target | head -n 10 | ||||
Tainted file | Tainted file | ||||
Passwords: hunter2 | Passwords: hunter2 | ||||
$ hg cat -r 0 target | head -n 10 | $ hg cat -r 0 target | head -n 10 | ||||
Initially untainted file | Initially untainted file | ||||
Censor revision with 1 offense | Censor revision with 1 offense | ||||
(this also tests file pattern matching: with 'path:' scheme) | (this also tests file pattern matching: with 'path:' scheme) | ||||
$ hg --cwd foo/bar/baz censor -r $C1 path:target | $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target | ||||
$ hg cat -r $H1 target | head -n 10 | $ hg cat -r $H1 target | head -n 10 | ||||
Tainted file is now sanitized | Tainted file is now sanitized | ||||
$ hg cat -r $H2 target | head -n 10 | $ hg cat -r $H2 target | head -n 10 | ||||
Tainted file now super sanitized | Tainted file now super sanitized | ||||
$ hg cat -r $C2 target | head -n 10 | $ hg cat -r $C2 target | head -n 10 | ||||
abort: censored node: 1e0247a9a4b7 | abort: censored node: 1e0247a9a4b7 | ||||
(set censor.policy to ignore errors) | (set censor.policy to ignore errors) | ||||
$ hg cat -r $C1 target | head -n 10 | $ hg cat -r $C1 target | head -n 10 | ||||
with the file censored, but we can't censor at a head, so advance H1. | with the file censored, but we can't censor at a head, so advance H1. | ||||
$ hg update -r $H1 | $ hg update -r $H1 | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
$ C3=$H1 | $ C3=$H1 | ||||
$ echo 'advanced head H1' > target | $ echo 'advanced head H1' > target | ||||
$ hg ci -m 'advance head H1' target | $ hg ci -m 'advance head H1' target | ||||
$ H1=`hg id --debug -i` | $ H1=`hg id --debug -i` | ||||
$ hg censor -r $C3 target | $ hg --config extensions.censor= censor -r $C3 target | ||||
$ hg update -r $H2 | $ hg update -r $H2 | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
$ hg merge -r $C3 | $ hg merge -r $C3 | ||||
merging target | merging target | ||||
0 files updated, 1 files merged, 0 files removed, 0 files unresolved | 0 files updated, 1 files merged, 0 files removed, 0 files unresolved | ||||
(branch merge, don't forget to commit) | (branch merge, don't forget to commit) | ||||
Revisions present in repository heads may not be censored | Revisions present in repository heads may not be censored | ||||
$ hg update -C -r $H2 | $ hg update -C -r $H2 | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
$ hg censor -r $H2 target | $ hg --config extensions.censor= censor -r $H2 target | ||||
abort: cannot censor file in heads (78a8fc215e79) | abort: cannot censor file in heads (78a8fc215e79) | ||||
(clean/delete and commit first) | (clean/delete and commit first) | ||||
[255] | [255] | ||||
$ echo 'twiddling thumbs' > bystander | $ echo 'twiddling thumbs' > bystander | ||||
$ hg ci -m 'bystander commit' | $ hg ci -m 'bystander commit' | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ hg censor -r "$H2^" target | $ hg --config extensions.censor= censor -r "$H2^" target | ||||
abort: cannot censor file in heads (efbe78065929) | abort: cannot censor file in heads (efbe78065929) | ||||
(clean/delete and commit first) | (clean/delete and commit first) | ||||
[255] | [255] | ||||
Cannot censor working directory | Cannot censor working directory | ||||
$ echo 'seriously no passwords' > target | $ echo 'seriously no passwords' > target | ||||
$ hg ci -m 'extend second head arbitrarily' target | $ hg ci -m 'extend second head arbitrarily' target | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ hg update -r "$H2^" | $ hg update -r "$H2^" | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
$ hg censor -r . target | $ hg --config extensions.censor= censor -r . target | ||||
abort: cannot censor working directory | abort: cannot censor working directory | ||||
(clean/delete/update first) | (clean/delete/update first) | ||||
[255] | [255] | ||||
$ hg update -r $H2 | $ hg update -r $H2 | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
Can re-add file after being deleted + censored | Can re-add file after being deleted + censored | ||||
$ C4=$H2 | $ C4=$H2 | ||||
$ hg rm target | $ hg rm target | ||||
$ hg ci -m 'delete target so it may be censored' | $ hg ci -m 'delete target so it may be censored' | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ hg censor -r $C4 target | $ hg --config extensions.censor= censor -r $C4 target | ||||
$ hg cat -r $C4 target | head -n 10 | $ hg cat -r $C4 target | head -n 10 | ||||
$ hg cat -r "$H2^^" target | head -n 10 | $ hg cat -r "$H2^^" target | head -n 10 | ||||
Tainted file now super sanitized | Tainted file now super sanitized | ||||
$ echo 'fresh start' > target | $ echo 'fresh start' > target | ||||
$ hg add target | $ hg add target | ||||
$ hg ci -m reincarnated target | $ hg ci -m reincarnated target | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ hg cat -r $H2 target | head -n 10 | $ hg cat -r $H2 target | head -n 10 | ||||
> echo "Password: hunter$x" >> target | > echo "Password: hunter$x" >> target | ||||
> done | > done | ||||
$ hg ci -m 'add 100k passwords' | $ hg ci -m 'add 100k passwords' | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ C5=$H2 | $ C5=$H2 | ||||
$ hg revert -r "$H2^" target | $ hg revert -r "$H2^" target | ||||
$ hg ci -m 'cleaned 100k passwords' | $ hg ci -m 'cleaned 100k passwords' | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ hg censor -r $C5 target | $ hg --config extensions.censor= censor -r $C5 target | ||||
$ hg cat -r $C5 target | head -n 10 | $ hg cat -r $C5 target | head -n 10 | ||||
$ hg cat -r $H2 target | head -n 10 | $ hg cat -r $H2 target | head -n 10 | ||||
fresh start | fresh start | ||||
Repo with censored nodes can be cloned and cloned nodes are censored | Repo with censored nodes can be cloned and cloned nodes are censored | ||||
$ cd .. | $ cd .. | ||||
$ hg clone r rclone | $ hg clone r rclone | ||||
$ H3=`hg id --debug -i` | $ H3=`hg id --debug -i` | ||||
$ REV=$H3 | $ REV=$H3 | ||||
$ echo 'Re-sanitized; nothing to see here' > target | $ echo 'Re-sanitized; nothing to see here' > target | ||||
$ hg ci -m 're-sanitized' target | $ hg ci -m 're-sanitized' target | ||||
$ H2=`hg id --debug -i` | $ H2=`hg id --debug -i` | ||||
$ CLEANREV=$H2 | $ CLEANREV=$H2 | ||||
$ hg cat -r $REV target | head -n 10 | $ hg cat -r $REV target | head -n 10 | ||||
Passwords: hunter2hunter2 | Passwords: hunter2hunter2 | ||||
$ hg censor -r $REV target | $ hg --config extensions.censor= censor -r $REV target | ||||
$ hg cat -r $REV target | head -n 10 | $ hg cat -r $REV target | head -n 10 | ||||
$ hg cat -r $CLEANREV target | head -n 10 | $ hg cat -r $CLEANREV target | head -n 10 | ||||
Re-sanitized; nothing to see here | Re-sanitized; nothing to see here | ||||
$ hg push -f -r $H2 | $ hg push -f -r $H2 | ||||
pushing to $TESTTMP/r | pushing to $TESTTMP/r | ||||
searching for changes | searching for changes | ||||
adding changesets | adding changesets | ||||
adding manifests | adding manifests | ||||
crosschecking files in changesets and manifests | crosschecking files in changesets and manifests | ||||
checking files | checking files | ||||
checked 14 changesets with 15 changes to 2 files | checked 14 changesets with 15 changes to 2 files | ||||
$ cd ../r | $ cd ../r | ||||
Can import bundle where first revision of a file is censored | Can import bundle where first revision of a file is censored | ||||
$ hg init ../rinit | $ hg init ../rinit | ||||
$ hg censor -r 0 target | $ hg --config extensions.censor= censor -r 0 target | ||||
$ hg bundle -r 0 --base null ../rinit/initbundle | $ hg bundle -r 0 --base null ../rinit/initbundle | ||||
1 changesets found | 1 changesets found | ||||
$ cd ../rinit | $ cd ../rinit | ||||
$ hg unbundle initbundle | $ hg unbundle initbundle | ||||
adding changesets | adding changesets | ||||
adding manifests | adding manifests | ||||
adding file changes | adding file changes | ||||
added 1 changesets with 2 changes to 2 files | added 1 changesets with 2 changes to 2 files | ||||
0 1 1 -1 | 0 1 1 -1 | ||||
1 2 1 -1 | 1 2 1 -1 | ||||
2 2 2 1 | 2 2 2 1 | ||||
Censor the file | Censor the file | ||||
$ hg cat -r $B1 target | wc -l | $ hg cat -r $B1 target | wc -l | ||||
*50002 (re) | *50002 (re) | ||||
$ hg censor -r $B1 target | $ hg --config extensions.censor= censor -r $B1 target | ||||
$ hg cat -r $B1 target | wc -l | $ hg cat -r $B1 target | wc -l | ||||
*0 (re) | *0 (re) | ||||
Check the children is fine | Check the children is fine | ||||
$ hg cat -r $B2 target | wc -l | $ hg cat -r $B2 target | wc -l | ||||
*50003 (re) | *50003 (re) | ||||
#endif | #endif |