This crate casts pointers to custom structs, with compile-time safety checks,
for easy and efficient binary data parsing.
See https://crates.io/crates/bytes-cast and
https://docs.rs/bytes-cast/0.1.0/bytes_cast/
This crate casts pointers to custom structs, with compile-time safety checks,
for easy and efficient binary data parsing.
See https://crates.io/crates/bytes-cast and
https://docs.rs/bytes-cast/0.1.0/bytes_cast/
No Linters Available |
No Unit Test Coverage |
rust/hg-core/src/revlog/nodemap.rs | ||
---|---|---|
225 | This comment is now obsolete. | |
225 | Could that be kept as the following and reused later? rust pub const BLOCK_SIZE: usize = size_of::<Block>; | |
388 | If you're not going to use a constant (as per my comment above), use size_of as mem is already imported. | |
614 | I'd use expect to explain better why this cannot happen. |
Path | Packages | |||
---|---|---|---|---|
M | rust/Cargo.lock (21 lines) | |||
M | rust/hg-core/Cargo.toml (1 line) | |||
M | rust/hg-core/src/revlog/nodemap.rs (83 lines) | |||
M | rust/hg-core/src/revlog/nodemap_docket.rs (52 lines) | |||
M | rust/hg-core/src/revlog/revlog.rs (6 lines) |
Commit | Parents | Author | Summary | Date |
---|---|---|---|---|
382697282cdb | 172b294b6d65 | Simon Sapin | Jan 15 2021, 10:11 AM |
Status | Author | Revision | |
---|---|---|---|
Closed | SimonSapin | ||
Closed | SimonSapin |
] | ] | ||||
[[package]] | [[package]] | ||||
name = "byteorder" | name = "byteorder" | ||||
version = "1.3.4" | version = "1.3.4" | ||||
source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
[[package]] | [[package]] | ||||
name = "bytes-cast" | |||||
version = "0.1.0" | |||||
source = "registry+https://github.com/rust-lang/crates.io-index" | |||||
dependencies = [ | |||||
"bytes-cast-derive 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
] | |||||
[[package]] | |||||
name = "bytes-cast-derive" | |||||
version = "0.1.0" | |||||
source = "registry+https://github.com/rust-lang/crates.io-index" | |||||
dependencies = [ | |||||
"proc-macro2 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"syn 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
] | |||||
[[package]] | |||||
name = "cc" | name = "cc" | ||||
version = "1.0.66" | version = "1.0.66" | ||||
source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
dependencies = [ | dependencies = [ | ||||
"jobserver 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", | "jobserver 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
] | ] | ||||
[[package]] | [[package]] | ||||
version = "0.4.2" | version = "0.4.2" | ||||
source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
[[package]] | [[package]] | ||||
name = "hg-core" | name = "hg-core" | ||||
version = "0.1.0" | version = "0.1.0" | ||||
dependencies = [ | dependencies = [ | ||||
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", | "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"bytes-cast 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)", | "clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", | "crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"flate2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)", | "flate2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"format-bytes 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", | "format-bytes 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", | "hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"im-rc 15.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | "im-rc 15.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", | "log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" | "checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" | ||||
"checksum aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" | "checksum aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" | ||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" | "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" | ||||
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" | "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" | ||||
"checksum autocfg 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" | "checksum autocfg 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" | ||||
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" | "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" | ||||
"checksum bitmaps 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" | "checksum bitmaps 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" | ||||
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" | "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" | ||||
"checksum bytes-cast 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52" | |||||
"checksum bytes-cast-derive 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb936af9de38476664d6b58e529aff30d482e4ce1c5e150293d00730b0d81fdb" | |||||
"checksum cc 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" | "checksum cc 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" | ||||
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" | ||||
"checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | "checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | ||||
"checksum clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)" = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" | "checksum clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)" = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" | ||||
"checksum const_fn 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826" | "checksum const_fn 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826" | ||||
"checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" | "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" | ||||
"checksum crc32fast 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" | "checksum crc32fast 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" | ||||
"checksum crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" | "checksum crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" |
[package] | [package] | ||||
name = "hg-core" | name = "hg-core" | ||||
version = "0.1.0" | version = "0.1.0" | ||||
authors = ["Georges Racinet <gracinet@anybox.fr>"] | authors = ["Georges Racinet <gracinet@anybox.fr>"] | ||||
description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" | ||||
edition = "2018" | edition = "2018" | ||||
[lib] | [lib] | ||||
name = "hg" | name = "hg" | ||||
[dependencies] | [dependencies] | ||||
bytes-cast = "0.1" | |||||
byteorder = "1.3.4" | byteorder = "1.3.4" | ||||
hex = "0.4.2" | hex = "0.4.2" | ||||
im-rc = "15.0.*" | im-rc = "15.0.*" | ||||
lazy_static = "1.4.0" | lazy_static = "1.4.0" | ||||
memchr = "2.3.3" | memchr = "2.3.3" | ||||
rand = "0.7.3" | rand = "0.7.3" | ||||
rand_pcg = "0.2.1" | rand_pcg = "0.2.1" | ||||
rand_distr = "0.2.2" | rand_distr = "0.2.2" |
use bytes_cast::{unaligned, BytesCast}; | |||||
use memmap::Mmap; | use memmap::Mmap; | ||||
use std::convert::TryInto; | |||||
use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||
use super::revlog::RevlogError; | use super::revlog::RevlogError; | ||||
use crate::repo::Repo; | use crate::repo::Repo; | ||||
use crate::utils::strip_suffix; | use crate::utils::strip_suffix; | ||||
const ONDISK_VERSION: u8 = 1; | const ONDISK_VERSION: u8 = 1; | ||||
pub(super) struct NodeMapDocket { | pub(super) struct NodeMapDocket { | ||||
pub data_length: usize, | pub data_length: usize, | ||||
// TODO: keep here more of the data from `parse()` when we need it | // TODO: keep here more of the data from `parse()` when we need it | ||||
} | } | ||||
#[derive(BytesCast)] | |||||
#[repr(C)] | |||||
struct DocketHeader { | |||||
uid_size: u8, | |||||
_tip_rev: unaligned::U64Be, | |||||
data_length: unaligned::U64Be, | |||||
_data_unused: unaligned::U64Be, | |||||
tip_node_size: unaligned::U64Be, | |||||
} | |||||
impl NodeMapDocket { | impl NodeMapDocket { | ||||
/// Return `Ok(None)` when the caller should proceed without a persistent | /// Return `Ok(None)` when the caller should proceed without a persistent | ||||
/// nodemap: | /// nodemap: | ||||
/// | /// | ||||
/// * This revlog does not have a `.n` docket file (it is not generated for | /// * This revlog does not have a `.n` docket file (it is not generated for | ||||
/// small revlogs), or | /// small revlogs), or | ||||
/// * The docket has an unsupported version number (repositories created by | /// * The docket has an unsupported version number (repositories created by | ||||
/// later hg, maybe that should be a requirement instead?), or | /// later hg, maybe that should be a requirement instead?), or | ||||
/// * The docket file points to a missing (likely deleted) data file (this | /// * The docket file points to a missing (likely deleted) data file (this | ||||
/// can happen in a rare race condition). | /// can happen in a rare race condition). | ||||
pub fn read_from_file( | pub fn read_from_file( | ||||
repo: &Repo, | repo: &Repo, | ||||
index_path: &Path, | index_path: &Path, | ||||
) -> Result<Option<(Self, Mmap)>, RevlogError> { | ) -> Result<Option<(Self, Mmap)>, RevlogError> { | ||||
let docket_path = index_path.with_extension("n"); | let docket_path = index_path.with_extension("n"); | ||||
let docket_bytes = match repo.store_vfs().read(&docket_path) { | let docket_bytes = match repo.store_vfs().read(&docket_path) { | ||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => { | Err(e) if e.kind() == std::io::ErrorKind::NotFound => { | ||||
return Ok(None) | return Ok(None) | ||||
} | } | ||||
Err(e) => return Err(RevlogError::IoError(e)), | Err(e) => return Err(RevlogError::IoError(e)), | ||||
Ok(bytes) => bytes, | Ok(bytes) => bytes, | ||||
}; | }; | ||||
let mut input = if let Some((&ONDISK_VERSION, rest)) = | let input = if let Some((&ONDISK_VERSION, rest)) = | ||||
docket_bytes.split_first() | docket_bytes.split_first() | ||||
{ | { | ||||
rest | rest | ||||
} else { | } else { | ||||
return Ok(None); | return Ok(None); | ||||
}; | }; | ||||
let input = &mut input; | |||||
let uid_size = read_u8(input)? as usize; | let (header, rest) = DocketHeader::from_bytes(input)?; | ||||
let _tip_rev = read_be_u64(input)?; | let uid_size = header.uid_size as usize; | ||||
// TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit | // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit | ||||
// systems? | // systems? | ||||
let data_length = read_be_u64(input)? as usize; | let tip_node_size = header.tip_node_size.get() as usize; | ||||
let _data_unused = read_be_u64(input)?; | let data_length = header.data_length.get() as usize; | ||||
let tip_node_size = read_be_u64(input)? as usize; | let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?; | ||||
let uid = read_bytes(input, uid_size)?; | let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?; | ||||
let _tip_node = read_bytes(input, tip_node_size)?; | |||||
let uid = | let uid = | ||||
std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?; | std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?; | ||||
let docket = NodeMapDocket { data_length }; | let docket = NodeMapDocket { data_length }; | ||||
let data_path = rawdata_path(&docket_path, uid); | let data_path = rawdata_path(&docket_path, uid); | ||||
// TODO: use `std::fs::read` here when the `persistent-nodemap.mmap` | // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap` | ||||
// config is false? | // config is false? | ||||
match repo.store_vfs().mmap_open(&data_path) { | match repo.store_vfs().mmap_open(&data_path) { | ||||
} else { | } else { | ||||
Err(RevlogError::IoError(error)) | Err(RevlogError::IoError(error)) | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
fn read_bytes<'a>( | |||||
input: &mut &'a [u8], | |||||
count: usize, | |||||
) -> Result<&'a [u8], RevlogError> { | |||||
if let Some(start) = input.get(..count) { | |||||
*input = &input[count..]; | |||||
Ok(start) | |||||
} else { | |||||
Err(RevlogError::Corrupted) | |||||
} | |||||
} | |||||
fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> { | |||||
Ok(read_bytes(input, 1)?[0]) | |||||
} | |||||
fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> { | |||||
let array = read_bytes(input, std::mem::size_of::<u64>())? | |||||
.try_into() | |||||
.unwrap(); | |||||
Ok(u64::from_be_bytes(array)) | |||||
} | |||||
fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf { | fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf { | ||||
let docket_name = docket_path | let docket_name = docket_path | ||||
.file_name() | .file_name() | ||||
.expect("expected a base name") | .expect("expected a base name") | ||||
.to_str() | .to_str() | ||||
.expect("expected an ASCII file name in the store"); | .expect("expected an ASCII file name in the store"); | ||||
let prefix = strip_suffix(docket_name, ".n.a") | let prefix = strip_suffix(docket_name, ".n.a") | ||||
.or_else(|| strip_suffix(docket_name, ".n")) | .or_else(|| strip_suffix(docket_name, ".n")) | ||||
.expect("expected docket path in .n or .n.a"); | .expect("expected docket path in .n or .n.a"); | ||||
let name = format!("{}-{}.nd", prefix, uid); | let name = format!("{}-{}.nd", prefix, uid); | ||||
docket_path | docket_path | ||||
.parent() | .parent() | ||||
.expect("expected a non-root path") | .expect("expected a non-root path") | ||||
.join(name) | .join(name) | ||||
} | } |
UnsuportedVersion(u16), | UnsuportedVersion(u16), | ||||
InvalidRevision, | InvalidRevision, | ||||
/// Found more than one entry whose ID match the requested prefix | /// Found more than one entry whose ID match the requested prefix | ||||
AmbiguousPrefix, | AmbiguousPrefix, | ||||
Corrupted, | Corrupted, | ||||
UnknowDataFormat(u8), | UnknowDataFormat(u8), | ||||
} | } | ||||
impl From<bytes_cast::FromBytesError> for RevlogError { | |||||
fn from(_: bytes_cast::FromBytesError) -> Self { | |||||
RevlogError::Corrupted | |||||
} | |||||
} | |||||
/// Read only implementation of revlog. | /// Read only implementation of revlog. | ||||
pub struct Revlog { | pub struct Revlog { | ||||
/// When index and data are not interleaved: bytes of the revlog index. | /// When index and data are not interleaved: bytes of the revlog index. | ||||
/// When index and data are interleaved: bytes of the revlog index and | /// When index and data are interleaved: bytes of the revlog index and | ||||
/// data. | /// data. | ||||
index: Index, | index: Index, | ||||
/// When index and data are not interleaved: bytes of the revlog data | /// When index and data are not interleaved: bytes of the revlog data | ||||
data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
Could that be kept as the following and reused later?