Some filelog-specific logic is moved from code rhg cat into this struct
where it can better be reused.
Additionally, a missing end delimiter for metadata causes an error
to be returned instead of being silently ignored.
Alphare |
hg-reviewers |
Some filelog-specific logic is moved from code rhg cat into this struct
where it can better be reused.
Additionally, a missing end delimiter for metadata causes an error
to be returned instead of being silently ignored.
Automatic diff as part of commit; lint not applicable. |
Automatic diff as part of commit; unit tests not applicable. |
rust/hg-core/src/revlog/filelog.rs | ||
---|---|---|
40 | The more I look at get_rev the more it feels confusing to me. I'm saying this for all revlogs: having a get_rev(rev) feels weird, maybe it should be get_entry(rev) or something. This would be for another patch, of course, but I thought I'd discuss it |
rust/hg-core/src/revlog/filelog.rs | ||
---|---|---|
40 | How about get_by_rev and get_by_node? But yes, for another patch |
rust/hg-core/src/revlog/filelog.rs | ||
---|---|---|
40 | Sounds good to me. |
Path | Packages | |||
---|---|---|---|---|
M | rust/hg-core/src/operations/cat.rs (38 lines) | |||
M | rust/hg-core/src/repo.rs (6 lines) | |||
M | rust/hg-core/src/revlog.rs (1 line) | |||
A | M | rust/hg-core/src/revlog/filelog.rs (79 lines) | ||
M | rust/hg-core/src/utils.rs (9 lines) |
Status | Author | Revision | |
---|---|---|---|
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin | ||
Closed | SimonSapin |
// list_tracked_files.rs | // list_tracked_files.rs | ||||
// | // | ||||
// Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> | ||||
// | // | ||||
// This software may be used and distributed according to the terms of the | // This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | // GNU General Public License version 2 or any later version. | ||||
use std::path::PathBuf; | |||||
use crate::repo::Repo; | use crate::repo::Repo; | ||||
use crate::revlog::path_encode::path_encode; | |||||
use crate::revlog::revlog::Revlog; | |||||
use crate::revlog::revlog::RevlogError; | use crate::revlog::revlog::RevlogError; | ||||
use crate::revlog::Node; | use crate::revlog::Node; | ||||
use crate::utils::files::get_path_from_bytes; | |||||
use crate::utils::hg_path::{HgPath, HgPathBuf}; | use crate::utils::hg_path::HgPathBuf; | ||||
pub struct CatOutput { | pub struct CatOutput { | ||||
/// Whether any file in the manifest matched the paths given as CLI | /// Whether any file in the manifest matched the paths given as CLI | ||||
/// arguments | /// arguments | ||||
pub found_any: bool, | pub found_any: bool, | ||||
/// The contents of matching files, in manifest order | /// The contents of matching files, in manifest order | ||||
pub concatenated: Vec<u8>, | pub concatenated: Vec<u8>, | ||||
/// Which of the CLI arguments did not match any manifest file | /// Which of the CLI arguments did not match any manifest file | ||||
pub missing: Vec<HgPathBuf>, | pub missing: Vec<HgPathBuf>, | ||||
/// The node ID that the given revset was resolved to | /// The node ID that the given revset was resolved to | ||||
pub node: Node, | pub node: Node, | ||||
} | } | ||||
const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n']; | |||||
/// Output the given revision of files | /// Output the given revision of files | ||||
/// | /// | ||||
/// * `root`: Repository root | /// * `root`: Repository root | ||||
/// * `rev`: The revision to cat the files from. | /// * `rev`: The revision to cat the files from. | ||||
/// * `files`: The files to output. | /// * `files`: The files to output. | ||||
pub fn cat<'a>( | pub fn cat<'a>( | ||||
repo: &Repo, | repo: &Repo, | ||||
revset: &str, | revset: &str, | ||||
let mut matched = vec![false; files.len()]; | let mut matched = vec![false; files.len()]; | ||||
let mut found_any = false; | let mut found_any = false; | ||||
for (manifest_file, node_bytes) in manifest.files_with_nodes() { | for (manifest_file, node_bytes) in manifest.files_with_nodes() { | ||||
for (cat_file, is_matched) in files.iter().zip(&mut matched) { | for (cat_file, is_matched) in files.iter().zip(&mut matched) { | ||||
if cat_file.as_bytes() == manifest_file.as_bytes() { | if cat_file.as_bytes() == manifest_file.as_bytes() { | ||||
*is_matched = true; | *is_matched = true; | ||||
found_any = true; | found_any = true; | ||||
let index_path = store_path(manifest_file, b".i"); | let file_log = repo.filelog(manifest_file)?; | ||||
let data_path = store_path(manifest_file, b".d"); | |||||
let file_log = | |||||
Revlog::open(repo, &index_path, Some(&data_path))?; | |||||
let file_node = Node::from_hex_for_repo(node_bytes)?; | let file_node = Node::from_hex_for_repo(node_bytes)?; | ||||
let file_rev = file_log.get_node_rev(file_node.into())?; | let entry = file_log.get_node(file_node)?; | ||||
let data = file_log.get_rev_data(file_rev)?; | bytes.extend(entry.data()?) | ||||
if data.starts_with(&METADATA_DELIMITER) { | |||||
let end_delimiter_position = data | |||||
[METADATA_DELIMITER.len()..] | |||||
.windows(METADATA_DELIMITER.len()) | |||||
.position(|bytes| bytes == METADATA_DELIMITER); | |||||
if let Some(position) = end_delimiter_position { | |||||
let offset = METADATA_DELIMITER.len() * 2; | |||||
bytes.extend(data[position + offset..].iter()); | |||||
} | |||||
} else { | |||||
bytes.extend(data); | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
let missing: Vec<_> = files | let missing: Vec<_> = files | ||||
.iter() | .iter() | ||||
.zip(&matched) | .zip(&matched) | ||||
.filter(|pair| !*pair.1) | .filter(|pair| !*pair.1) | ||||
.map(|pair| pair.0.clone()) | .map(|pair| pair.0.clone()) | ||||
.collect(); | .collect(); | ||||
Ok(CatOutput { | Ok(CatOutput { | ||||
found_any, | found_any, | ||||
concatenated: bytes, | concatenated: bytes, | ||||
missing, | missing, | ||||
node, | node, | ||||
}) | }) | ||||
} | } | ||||
fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { | |||||
let encoded_bytes = | |||||
path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); | |||||
get_path_from_bytes(&encoded_bytes).into() | |||||
} |
use crate::changelog::Changelog; | use crate::changelog::Changelog; | ||||
use crate::config::{Config, ConfigError, ConfigParseError}; | use crate::config::{Config, ConfigError, ConfigParseError}; | ||||
use crate::dirstate::DirstateParents; | use crate::dirstate::DirstateParents; | ||||
use crate::dirstate_tree::dirstate_map::DirstateMap; | use crate::dirstate_tree::dirstate_map::DirstateMap; | ||||
use crate::dirstate_tree::owning::OwningDirstateMap; | use crate::dirstate_tree::owning::OwningDirstateMap; | ||||
use crate::errors::HgError; | use crate::errors::HgError; | ||||
use crate::errors::HgResultExt; | use crate::errors::HgResultExt; | ||||
use crate::manifest::{Manifest, Manifestlog}; | use crate::manifest::{Manifest, Manifestlog}; | ||||
use crate::requirements; | use crate::requirements; | ||||
use crate::revlog::filelog::Filelog; | |||||
use crate::revlog::revlog::RevlogError; | use crate::revlog::revlog::RevlogError; | ||||
use crate::utils::files::get_path_from_bytes; | use crate::utils::files::get_path_from_bytes; | ||||
use crate::utils::hg_path::HgPath; | |||||
use crate::utils::SliceExt; | use crate::utils::SliceExt; | ||||
use crate::vfs::{is_dir, is_file, Vfs}; | use crate::vfs::{is_dir, is_file, Vfs}; | ||||
use crate::{exit_codes, Node}; | use crate::{exit_codes, Node}; | ||||
use crate::{DirstateError, Revision}; | use crate::{DirstateError, Revision}; | ||||
use std::cell::{Cell, Ref, RefCell, RefMut}; | use std::cell::{Cell, Ref, RefCell, RefMut}; | ||||
use std::collections::HashSet; | use std::collections::HashSet; | ||||
use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||
) -> Result<Manifest, RevlogError> { | ) -> Result<Manifest, RevlogError> { | ||||
let changelog = self.changelog()?; | let changelog = self.changelog()?; | ||||
let manifest = self.manifestlog()?; | let manifest = self.manifestlog()?; | ||||
let changelog_entry = changelog.get_rev(revision)?; | let changelog_entry = changelog.get_rev(revision)?; | ||||
let manifest_node = | let manifest_node = | ||||
Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; | Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; | ||||
manifest.get_node(manifest_node.into()) | manifest.get_node(manifest_node.into()) | ||||
} | } | ||||
pub fn filelog(&self, path: &HgPath) -> Result<Filelog, RevlogError> { | |||||
Filelog::open(self, path) | |||||
} | |||||
} | } | ||||
/// Lazily-initialized component of `Repo` with interior mutability | /// Lazily-initialized component of `Repo` with interior mutability | ||||
/// | /// | ||||
/// This differs from `OnceCell` in that the value can still be "deinitialized" | /// This differs from `OnceCell` in that the value can still be "deinitialized" | ||||
/// later by setting its inner `Option` to `None`. | /// later by setting its inner `Option` to `None`. | ||||
struct LazyCell<T, E> { | struct LazyCell<T, E> { | ||||
value: RefCell<Option<T>>, | value: RefCell<Option<T>>, |
// Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> | ||||
// and Mercurial contributors | // and Mercurial contributors | ||||
// | // | ||||
// This software may be used and distributed according to the terms of the | // This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | // GNU General Public License version 2 or any later version. | ||||
//! Mercurial concepts for handling revision history | //! Mercurial concepts for handling revision history | ||||
pub mod node; | pub mod node; | ||||
pub mod nodemap; | pub mod nodemap; | ||||
mod nodemap_docket; | mod nodemap_docket; | ||||
pub mod path_encode; | pub mod path_encode; | ||||
pub use node::{FromHexError, Node, NodePrefix}; | pub use node::{FromHexError, Node, NodePrefix}; | ||||
pub mod changelog; | pub mod changelog; | ||||
pub mod filelog; | |||||
pub mod index; | pub mod index; | ||||
pub mod manifest; | pub mod manifest; | ||||
pub mod patch; | pub mod patch; | ||||
pub mod revlog; | pub mod revlog; | ||||
/// Mercurial revision numbers | /// Mercurial revision numbers | ||||
/// | /// | ||||
/// As noted in revlog.c, revision numbers are actually encoded in | /// As noted in revlog.c, revision numbers are actually encoded in |
pub trait SliceExt { | pub trait SliceExt { | ||||
fn trim_end(&self) -> &Self; | fn trim_end(&self) -> &Self; | ||||
fn trim_start(&self) -> &Self; | fn trim_start(&self) -> &Self; | ||||
fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | ||||
fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | ||||
fn trim(&self) -> &Self; | fn trim(&self) -> &Self; | ||||
fn drop_prefix(&self, needle: &Self) -> Option<&Self>; | fn drop_prefix(&self, needle: &Self) -> Option<&Self>; | ||||
fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; | ||||
fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; | |||||
} | } | ||||
impl SliceExt for [u8] { | impl SliceExt for [u8] { | ||||
fn trim_end(&self) -> &[u8] { | fn trim_end(&self) -> &[u8] { | ||||
self.trim_end_matches(|byte| byte.is_ascii_whitespace()) | self.trim_end_matches(|byte| byte.is_ascii_whitespace()) | ||||
} | } | ||||
fn trim_start(&self) -> &[u8] { | fn trim_start(&self) -> &[u8] { | ||||
} | } | ||||
fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { | ||||
let mut iter = self.splitn(2, |&byte| byte == separator); | let mut iter = self.splitn(2, |&byte| byte == separator); | ||||
let a = iter.next()?; | let a = iter.next()?; | ||||
let b = iter.next()?; | let b = iter.next()?; | ||||
Some((a, b)) | Some((a, b)) | ||||
} | } | ||||
fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { | |||||
if let Some(pos) = find_slice_in_slice(self, separator) { | |||||
Some((&self[..pos], &self[pos + separator.len()..])) | |||||
} else { | |||||
None | |||||
} | |||||
} | |||||
} | } | ||||
pub trait Escaped { | pub trait Escaped { | ||||
/// Return bytes escaped for display to the user | /// Return bytes escaped for display to the user | ||||
fn escaped_bytes(&self) -> Vec<u8>; | fn escaped_bytes(&self) -> Vec<u8>; | ||||
} | } | ||||
impl Escaped for u8 { | impl Escaped for u8 { |
The more I look at get_rev the more it feels confusing to me. I'm saying this for all revlogs: having a get_rev(rev) feels weird, maybe it should be get_entry(rev) or something. This would be for another patch, of course, but I thought I'd discuss it