diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1,6 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "aho-corasick" version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -42,6 +47,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "cc" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "jobserver 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -92,6 +105,14 @@ ] [[package]] +name = "crc32fast" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "crossbeam" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -176,6 +197,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "flate2" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)", + "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz_oxide 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "getrandom" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -186,6 +219,11 @@ ] [[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "hermit-abi" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -205,6 +243,7 @@ "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -220,6 +259,7 @@ "same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "twox-hash 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "zstd 0.5.3+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -234,6 +274,22 @@ ] [[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "jobserver" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -244,6 +300,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "libz-sys" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.58 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)", + "vcpkg 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "log" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -299,6 +366,14 @@ ] [[package]] +name = "miniz_oxide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "num-integer" version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -333,6 +408,11 @@ ] [[package]] +name = "pkg-config" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "ppv-lite86" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -613,6 +693,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "vcpkg" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -649,18 +734,49 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "zstd" +version = "0.5.3+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "zstd-safe 2.0.5+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "zstd-safe" +version = "2.0.5+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)", + "zstd-sys 1.4.17+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "zstd-sys" +version = "1.4.17+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.58 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] +"checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +"checksum cc 1.0.58 (registry+https://github.com/rust-lang/crates.io-index)" = "f9a06fb2e53271d7c279ec1efea6ab691c35a2ae67ec0d91d7acec0caf13b518" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" "checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" "checksum colored 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59" "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" +"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum crossbeam 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e" "checksum crossbeam-channel 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "cced8691919c02aac3cb0a1bc2e9b73d89e832bf9a06fc579d4e71b68a2da061" "checksum crossbeam-deque 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" @@ -670,11 +786,16 @@ "checksum ctor 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "47c5e5ac752e18207b12e16b10631ae5f7f68f8805f335f9b817ead83d9ffce1" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +"checksum flate2 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "68c90b0fc46cf89d227cc78b40e494ff81287a92dd07631e5af0d06fe3cf885e" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" "checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" "checksum hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" +"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +"checksum jobserver 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" +"checksum libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "2eb5e43362e38e2bca2fd5f5134c4d4564a23a5c28e9b95411652021a8675ebe" "checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" "checksum maybe-uninit 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" "checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" @@ -682,10 +803,12 @@ "checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum micro-timer 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "25b31d6cb9112984323d05d7a353f272ae5d7a307074f9ab9b25c00121b8c947" "checksum micro-timer-macros 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5694085dd384bb9e824207facc040c248d9df653f55e28c3ad0686958b448504" +"checksum miniz_oxide 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "be0f75932c1f6cfae3c04000e40114adf955636e19040f9c0a2c380702aa1c7f" "checksum num-integer 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" "checksum num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" "checksum num_cpus 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" "checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +"checksum pkg-config 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)" = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" "checksum proc-macro2 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)" = "6c09721c6781493a2a492a96b5a5bf19b65917fe6728884e7c44dd0c60ca3435" @@ -719,9 +842,13 @@ "checksum twox-hash 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3bfd5b7557925ce778ff9b9ef90e3ade34c524b5ff10e239c69a42d546d2af56" "checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +"checksum vcpkg 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +"checksum zstd 0.5.3+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "01b32eaf771efa709e8308605bbf9319bf485dc1503179ec0469b611937c0cd8" +"checksum zstd-safe 2.0.5+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "1cfb642e0d27f64729a639c52db457e0ae906e7bc6f5fe8f5c453230400f1055" +"checksum zstd-sys 1.4.17+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b89249644df056b522696b1bb9e7c18c87e8ffa3e2f0dc3b0155875d6498f01b" diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -23,9 +23,10 @@ crossbeam = "0.7.3" micro-timer = "0.3.0" log = "0.4.8" +memmap = "0.7.0" +zstd = "0.5.3" [dev-dependencies] clap = "*" -memmap = "0.7.0" pretty_assertions = "0.6.1" tempfile = "3.1.0" diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs --- a/rust/hg-core/src/revlog.rs +++ b/rust/hg-core/src/revlog.rs @@ -8,6 +8,9 @@ pub mod node; pub mod nodemap; pub use node::{Node, NodeError, NodePrefix, NodePrefixRef}; +pub mod index; +pub mod patch; +pub mod revlog; /// Mercurial revision numbers /// diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/revlog/index.rs @@ -0,0 +1,299 @@ +use crate::revlog::{Revision, NULL_REVISION}; +use byteorder::{BigEndian, ByteOrder}; + +pub const INDEX_ENTRY_SIZE: usize = 64; + +/// A Revlog index +#[derive(Debug)] +pub struct Index<'a> { + bytes: &'a [u8], + /// Offsets of starts of index blocks. + /// Only needed when the index is interleaved with data. + offsets: Option>, +} + +impl<'a> Index<'a> { + /// Create an index from bytes. + /// Calculate the start of each entry when is_inline is true. + pub fn new(bytes: &'a [u8], is_inline: bool) -> Self { + if is_inline { + let mut offset: usize = 0; + let mut offsets = Vec::new(); + + while (bytes.len() - offset) >= INDEX_ENTRY_SIZE { + offsets.push(offset); + let end = offset + INDEX_ENTRY_SIZE; + let entry = IndexEntry { + bytes: &bytes[offset..end], + offset_override: None, + }; + + offset += INDEX_ENTRY_SIZE + entry.compressed_len(); + } + + Self { + bytes, + offsets: Some(offsets), + } + } else { + Self { + bytes, + offsets: None, + } + } + } + + /// Return the index entry corresponding to the given revision if it + /// exists. + pub fn get_entry(&self, rev: Revision) -> Option { + if rev == NULL_REVISION { + return None; + } + if let Some(offsets) = &self.offsets { + self.get_entry_inline(rev, offsets) + } else { + self.get_entry_separated(rev) + } + } + + fn get_entry_inline( + &self, + rev: Revision, + offsets: &[usize], + ) -> Option { + let start = *offsets.get(rev as usize)?; + let end = start.checked_add(INDEX_ENTRY_SIZE)?; + let bytes = &self.bytes[start..end]; + + // See IndexEntry for an explanation of this override. + let offset_override = Some(end); + + Some(IndexEntry { + bytes, + offset_override, + }) + } + + fn get_entry_separated(&self, rev: Revision) -> Option { + let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; + if rev as usize >= max_rev { + return None; + } + let start = rev as usize * INDEX_ENTRY_SIZE; + let end = start + INDEX_ENTRY_SIZE; + let bytes = &self.bytes[start..end]; + + // See IndexEntry for an explanation of this override. + let offset_override = match rev { + 0 => Some(0), + _ => None, + }; + + Some(IndexEntry { + bytes, + offset_override, + }) + } +} + +#[derive(Debug)] +pub struct IndexEntry<'a> { + bytes: &'a [u8], + /// Allows to override the offset value of the entry. + /// + /// For interleaved index and data, the offset stored in the index + /// corresponds to the separated data offset. + /// It has to be overridden with the actual offset in the interleaved + /// index which is just after the index block. + /// + /// For separated index and data, the offset stored in the first index + /// entry is mixed with the index headers. + /// It has to be overridden with 0. + offset_override: Option, +} + +impl<'a> IndexEntry<'a> { + /// Return the offset of the data if not overridden by offset_override. + pub fn offset(&self) -> usize { + if let Some(offset_override) = self.offset_override { + offset_override + } else { + let mut bytes = [0; 8]; + bytes[2..8].copy_from_slice(&self.bytes[0..=5]); + BigEndian::read_u64(&bytes[..]) as usize + } + } + + /// Return the compressed length of the data. + pub fn compressed_len(&self) -> usize { + BigEndian::read_u32(&self.bytes[8..=11]) as usize + } + + /// Return the uncompressed length of the data. + pub fn uncompressed_len(&self) -> usize { + BigEndian::read_u32(&self.bytes[12..=15]) as usize + } + + /// Return the revision upon which the data has been derived. + pub fn base_revision(&self) -> Revision { + // TODO Maybe return an Option when base_revision == rev? + // Requires to add rev to IndexEntry + + BigEndian::read_i32(&self.bytes[16..]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(test)] + #[derive(Debug, Copy, Clone)] + pub struct IndexEntryBuilder { + is_first: bool, + is_inline: bool, + is_general_delta: bool, + version: u16, + offset: usize, + compressed_len: usize, + uncompressed_len: usize, + base_revision: Revision, + } + + #[cfg(test)] + impl IndexEntryBuilder { + pub fn new() -> Self { + Self { + is_first: false, + is_inline: false, + is_general_delta: true, + version: 2, + offset: 0, + compressed_len: 0, + uncompressed_len: 0, + base_revision: 0, + } + } + + pub fn is_first(&mut self, value: bool) -> &mut Self { + self.is_first = value; + self + } + + pub fn with_inline(&mut self, value: bool) -> &mut Self { + self.is_inline = value; + self + } + + pub fn with_general_delta(&mut self, value: bool) -> &mut Self { + self.is_general_delta = value; + self + } + + pub fn with_version(&mut self, value: u16) -> &mut Self { + self.version = value; + self + } + + pub fn with_offset(&mut self, value: usize) -> &mut Self { + self.offset = value; + self + } + + pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { + self.compressed_len = value; + self + } + + pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { + self.uncompressed_len = value; + self + } + + pub fn with_base_revision(&mut self, value: Revision) -> &mut Self { + self.base_revision = value; + self + } + + pub fn build(&self) -> Vec { + let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); + if self.is_first { + bytes.extend(&match (self.is_general_delta, self.is_inline) { + (false, false) => [0u8, 0], + (false, true) => [0u8, 1], + (true, false) => [0u8, 2], + (true, true) => [0u8, 3], + }); + bytes.extend(&self.version.to_be_bytes()); + // Remaining offset bytes. + bytes.extend(&[0u8; 2]); + } else { + // Offset is only 6 bytes will usize is 8. + bytes.extend(&self.offset.to_be_bytes()[2..]); + } + bytes.extend(&[0u8; 2]); // Revision flags. + bytes.extend(&self.compressed_len.to_be_bytes()[4..]); + bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]); + bytes.extend(&self.base_revision.to_be_bytes()); + bytes + } + } + + #[test] + fn test_offset() { + let bytes = IndexEntryBuilder::new().with_offset(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.offset(), 1) + } + + #[test] + fn test_with_overridden_offset() { + let bytes = IndexEntryBuilder::new().with_offset(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: Some(2), + }; + + assert_eq!(entry.offset(), 2) + } + + #[test] + fn test_compressed_len() { + let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.compressed_len(), 1) + } + + #[test] + fn test_uncompressed_len() { + let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.uncompressed_len(), 1) + } + + #[test] + fn test_base_revision() { + let bytes = IndexEntryBuilder::new().with_base_revision(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.base_revision(), 1) + } +} + +#[cfg(test)] +pub use tests::IndexEntryBuilder; diff --git a/rust/hg-core/src/revlog/patch.rs b/rust/hg-core/src/revlog/patch.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/revlog/patch.rs @@ -0,0 +1,367 @@ +use byteorder::{BigEndian, ByteOrder}; + +/// A chunk of data to insert, delete or replace in a patch +/// +/// A chunk is: +/// - an insertion when `!data.is_empty() && start == end` +/// - an deletion when `data.is_empty() && start < end` +/// - a replacement when `!data.is_empty() && start < end` +/// - not doing anything when `data.is_empty() && start == end` +#[derive(Debug, Clone)] +struct PatchFrag<'a> { + /// The start position of the chunk of data to replace + start: i32, + /// The end position of the chunk of data to replace (open end interval) + end: i32, + /// The data replacing the chunk + data: &'a [u8], +} + +impl<'a> PatchFrag<'a> { + /// Adjusted start of the chunk to replace. + /// + /// Offset allow to take into account the growth/shrinkage of data + /// induced by previously applied chunks. + fn start_offseted_by(&self, offset: i32) -> i32 { + self.start + offset + } + + /// Adjusted end of the chunk to replace. + /// + /// Offset allow to take into account the growth/shrinkage of data + /// induced by previously applied chunks. + fn end_offseted_by(&self, offset: i32) -> i32 { + self.start_offseted_by(offset) + (self.data.len() as i32) + } + + /// Length of the replaced chunk. + fn replaced_len(&self) -> i32 { + self.end - self.start + } + + /// Length difference between the replacing data and the replaced data. + fn len_diff(&self) -> i32 { + (self.data.len() as i32) - self.replaced_len() + } +} + +/// The delta between two revisions data. +#[derive(Debug, Clone)] +pub struct PatchList<'a> { + /// A collection of chunks to apply. + /// + /// Those chunks are: + /// - ordered from the left-most replacement to the right-most replacement + /// - non-overlapping, meaning that two chucks can not change the same + /// chunk of the patched data + frags: Vec>, +} + +impl<'a> PatchList<'a> { + /// Create a `PatchList` from bytes. + pub fn new(data: &'a [u8]) -> Self { + let mut frags = vec![]; + let mut data = data; + while !data.is_empty() { + let start = BigEndian::read_i32(&data[0..]); + let end = BigEndian::read_i32(&data[4..]); + let len = BigEndian::read_i32(&data[8..]); + assert!(0 <= start && start <= end && len >= 0); + frags.push(PatchFrag { + start, + end, + data: &data[12..12 + (len as usize)], + }); + data = &data[12 + (len as usize)..]; + } + PatchList { frags } + } + + /// Return the final length of data after patching + /// given its initial length . + fn size(&self, initial_size: i32) -> i32 { + self.frags + .iter() + .fold(initial_size, |acc, frag| acc + frag.len_diff()) + } + + /// Apply the patch to some data. + pub fn apply(&self, initial: &[u8]) -> Vec { + let mut last: usize = 0; + let mut vec = + Vec::with_capacity(self.size(initial.len() as i32) as usize); + for PatchFrag { start, end, data } in self.frags.iter() { + vec.extend(&initial[last..(*start as usize)]); + vec.extend(data.iter()); + last = *end as usize; + } + vec.extend(&initial[last..]); + vec + } + + /// Combine two patch lists into a single patch list. + /// + /// Applying consecutive patches can lead to waste of time and memory + /// as the changes introduced by one patch can be overridden by the next. + /// Combining patches optimizes the whole patching sequence. + fn combine(&mut self, other: &mut Self) -> Self { + let mut frags = vec![]; + + // Keep track of each growth/shrinkage resulting from applying a chunk + // in order to adjust the start/end of subsequent chunks. + let mut offset = 0i32; + + // Keep track of the chunk of self.chunks to process. + let mut pos = 0; + + // For each chunk of `other`, chunks of `self` are processed + // until they start after the end of the current chunk. + for PatchFrag { start, end, data } in other.frags.iter() { + // Add chunks of `self` that start before this chunk of `other` + // without overlap. + while pos < self.frags.len() + && self.frags[pos].end_offseted_by(offset) <= *start + { + let first = self.frags[pos].clone(); + offset += first.len_diff(); + frags.push(first); + pos += 1; + } + + // The current chunk of `self` starts before this chunk of `other` + // with overlap. + // The left-most part of data is added as an insertion chunk. + // The right-most part data is kept in the chunk. + if pos < self.frags.len() + && self.frags[pos].start_offseted_by(offset) < *start + { + let first = &mut self.frags[pos]; + + let (data_left, data_right) = first.data.split_at( + (*start - first.start_offseted_by(offset)) as usize, + ); + let left = PatchFrag { + start: first.start, + end: first.start, + data: data_left, + }; + + first.data = data_right; + + offset += left.len_diff(); + + frags.push(left); + + // There is no index incrementation because the right-most part + // needs further examination. + } + + // At this point remaining chunks of `self` starts after + // the current chunk of `other`. + + // `start_offset` will be used to adjust the start of the current + // chunk of `other`. + // Offset tracking continues with `end_offset` to adjust the end + // of the current chunk of `other`. + let mut next_offset = offset; + + // Discard the chunks of `self` that are totally overridden + // by the current chunk of `other` + while pos < self.frags.len() + && self.frags[pos].end_offseted_by(next_offset) <= *end + { + let first = &self.frags[pos]; + next_offset += first.len_diff(); + pos += 1; + } + + // Truncate the left-most part of chunk of `self` that overlaps + // the current chunk of `other`. + if pos < self.frags.len() + && self.frags[pos].start_offseted_by(next_offset) < *end + { + let first = &mut self.frags[pos]; + + let how_much_to_discard = + *end - first.start_offseted_by(next_offset); + + first.data = &first.data[(how_much_to_discard as usize)..]; + + next_offset += how_much_to_discard; + } + + // Add the chunk of `other` with adjusted position. + frags.push(PatchFrag { + start: *start - offset, + end: *end - next_offset, + data, + }); + + // Go back to normal offset tracking for the next `o` chunk + offset = next_offset; + } + + // Add remaining chunks of `self`. + for elt in &self.frags[pos..] { + frags.push(elt.clone()); + } + PatchList { frags } + } +} + +/// Combine a list of patch list into a single patch optimized patch list. +pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> { + if lists.len() <= 1 { + if lists.is_empty() { + PatchList { frags: vec![] } + } else { + lists[0].clone() + } + } else { + let (left, right) = lists.split_at(lists.len() / 2); + let mut left_res = fold_patch_lists(left); + let mut right_res = fold_patch_lists(right); + left_res.combine(&mut right_res) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct PatchDataBuilder { + data: Vec, + } + + impl PatchDataBuilder { + pub fn new() -> Self { + Self { data: vec![] } + } + + pub fn replace( + &mut self, + start: usize, + end: usize, + data: &[u8], + ) -> &mut Self { + assert!(start <= end); + self.data.extend(&(start as i32).to_be_bytes()); + self.data.extend(&(end as i32).to_be_bytes()); + self.data.extend(&(data.len() as i32).to_be_bytes()); + self.data.extend(data.iter()); + self + } + + pub fn get(&mut self) -> &[u8] { + &self.data[..] + } + } + + #[test] + fn test_ends_before() { + let data = vec![0u8, 0u8, 0u8]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(0, 1, &[1, 2]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(2, 4, &[3, 4]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![1u8, 2, 3, 4]); + } + + #[test] + fn test_starts_after() { + let data = vec![0u8, 0u8, 0u8]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(2, 3, &[3]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(1, 2, &[1, 2]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![0u8, 1, 2, 3]); + } + + #[test] + fn test_overridden() { + let data = vec![0u8, 0, 0]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(1, 2, &[3, 4]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(1, 4, &[1, 2, 3]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![0u8, 1, 2, 3]); + } + + #[test] + fn test_right_most_part_is_overridden() { + let data = vec![0u8, 0, 0]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(0, 1, &[1, 3]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(1, 4, &[2, 3, 4]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![1u8, 2, 3, 4]); + } + + #[test] + fn test_left_most_part_is_overridden() { + let data = vec![0u8, 0, 0]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(1, 3, &[1, 3, 4]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(0, 2, &[1, 2]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![1u8, 2, 3, 4]); + } + + #[test] + fn test_mid_is_overridden() { + let data = vec![0u8, 0, 0]; + let mut patch1_data = PatchDataBuilder::new(); + patch1_data.replace(0, 3, &[1, 3, 3, 4]); + let mut patch1 = PatchList::new(patch1_data.get()); + + let mut patch2_data = PatchDataBuilder::new(); + patch2_data.replace(1, 3, &[2, 3]); + let mut patch2 = PatchList::new(patch2_data.get()); + + let patch = patch1.combine(&mut patch2); + + let result = patch.apply(&data); + + assert_eq!(result, vec![1u8, 2, 3, 4]); + } +} diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/revlog/revlog.rs @@ -0,0 +1,353 @@ +use std::borrow::Cow; +use std::fs::File; +use std::io::Read; +use std::ops::Deref; +use std::path::Path; + +use byteorder::{BigEndian, ByteOrder}; +use flate2::read::ZlibDecoder; +use memmap::{Mmap, MmapOptions}; +use micro_timer::timed; +use zstd; + +use super::index::Index; +use super::patch; +use crate::revlog::Revision; + +pub enum RevlogError { + IoError(std::io::Error), + UnsuportedVersion(u16), + InvalidRevision, + Corrupted, + UnknowDataFormat(u8), +} + +fn mmap_open(path: &Path) -> Result { + let file = File::open(path)?; + let mmap = unsafe { MmapOptions::new().map(&file) }?; + Ok(mmap) +} + +/// Read only implementation of revlog. +pub struct Revlog { + /// When index and data are not interleaved: bytes of the revlog index. + /// When index and data are interleaved: bytes of the revlog index and + /// data. + index_bytes: Box + Send>, + /// When index and data are not interleaved: bytes of the revlog data + data_bytes: Option + Send>>, +} + +impl Revlog { + /// Open a revlog index file. + /// + /// It will also open the associated data file if index and data are not + /// interleaved. + #[timed] + pub fn open(index_path: &Path) -> Result { + let index_mmap = + mmap_open(&index_path).map_err(RevlogError::IoError)?; + + let version = get_version(&index_mmap); + if version != 1 { + return Err(RevlogError::UnsuportedVersion(version)); + } + + let is_inline = is_inline(&index_mmap); + + let index_bytes = Box::new(index_mmap); + + // TODO load data only when needed // + // type annotation required + // won't recognize Mmap as Deref + let data_bytes: Option + Send>> = + if is_inline { + None + } else { + let data_path = index_path.with_extension("d"); + let data_mmap = + mmap_open(&data_path).map_err(RevlogError::IoError)?; + Some(Box::new(data_mmap)) + }; + + Ok(Revlog { + index_bytes, + data_bytes, + }) + } + + /// Return the full data associated to a revision. + /// + /// All entries required to build the final data out of deltas will be + /// retrieved as needed, and the deltas will be applied to the inital + /// snapshot to rebuild the final data. + #[timed] + pub fn get_rev_data(&self, rev: Revision) -> Result, RevlogError> { + // Todo return -> Cow + let mut entry = self.get_entry(rev)?; + let mut delta_chain = vec![]; + while let Some(base_rev) = entry.base_rev { + delta_chain.push(entry); + entry = self + .get_entry(base_rev) + .map_err(|_| RevlogError::Corrupted)?; + } + + if delta_chain.is_empty() { + Ok(entry.data()?.into()) + } else { + Revlog::build_data_from_deltas(entry, &delta_chain) + } + } + + /// Build the full data of a revision out its snapshot + /// and its deltas. + #[timed] + fn build_data_from_deltas( + snapshot: RevlogEntry, + deltas: &[RevlogEntry], + ) -> Result, RevlogError> { + let snapshot = snapshot.data()?; + let deltas = deltas + .iter() + .rev() + .map(RevlogEntry::data) + .collect::>, RevlogError>>()?; + let patches: Vec<_> = + deltas.iter().map(|d| patch::PatchList::new(d)).collect(); + let patch = patch::fold_patch_lists(&patches); + Ok(patch.apply(&snapshot)) + } + + /// Return the revlog index. + fn index(&self) -> Index { + let is_inline = self.data_bytes.is_none(); + Index::new(&self.index_bytes, is_inline) + } + + /// Return the revlog data. + fn data(&self) -> &[u8] { + match self.data_bytes { + Some(ref data_bytes) => &data_bytes, + None => &self.index_bytes, + } + } + + /// Get an entry of the revlog. + fn get_entry(&self, rev: Revision) -> Result { + let index = self.index(); + let index_entry = + index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; + let start = index_entry.offset(); + let end = start + index_entry.compressed_len(); + let entry = RevlogEntry { + rev, + bytes: &self.data()[start..end], + compressed_len: index_entry.compressed_len(), + uncompressed_len: index_entry.uncompressed_len(), + base_rev: if index_entry.base_revision() == rev { + None + } else { + Some(index_entry.base_revision()) + }, + }; + Ok(entry) + } +} + +/// The revlog entry's bytes and the necessary informations to extract +/// the entry's data. +#[derive(Debug)] +pub struct RevlogEntry<'a> { + rev: Revision, + bytes: &'a [u8], + compressed_len: usize, + uncompressed_len: usize, + base_rev: Option, +} + +impl<'a> RevlogEntry<'a> { + /// Extract the data contained in the entry. + pub fn data(&self) -> Result, RevlogError> { + if self.bytes.is_empty() { + return Ok(Cow::Borrowed(&[])); + } + match self.bytes[0] { + // Revision data is the entirety of the entry, including this + // header. + b'\0' => Ok(Cow::Borrowed(self.bytes)), + // Raw revision data follows. + b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), + // zlib (RFC 1950) data. + b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data())), + // zstd data. + b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data())), + format_type => Err(RevlogError::UnknowDataFormat(format_type)), + } + } + + fn uncompressed_zlib_data(&self) -> Vec { + let mut decoder = ZlibDecoder::new(self.bytes); + if self.is_delta() { + let mut buf = Vec::with_capacity(self.compressed_len); + decoder.read_to_end(&mut buf).expect("corrupted zlib data"); + buf + } else { + let mut buf = vec![0; self.uncompressed_len]; + decoder.read_exact(&mut buf).expect("corrupted zlib data"); + buf + } + } + + fn uncompressed_zstd_data(&self) -> Vec { + if self.is_delta() { + let mut buf = Vec::with_capacity(self.compressed_len); + zstd::stream::copy_decode(self.bytes, &mut buf) + .expect("corrupted zstd data"); + buf + } else { + let mut buf = vec![0; self.uncompressed_len]; + let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) + .expect("corrupted zstd data"); + assert_eq!(len, self.uncompressed_len, "corrupted zstd data"); + buf + } + } + + /// Tell if the entry is a snapshot or a delta + /// (influences on decompression). + fn is_delta(&self) -> bool { + self.base_rev.is_some() + } +} + +/// Value of the inline flag. +pub fn is_inline(index_bytes: &[u8]) -> bool { + match &index_bytes[0..=1] { + [0, 0] | [0, 2] => false, + _ => true, + } +} + +/// Format version of the revlog. +pub fn get_version(index_bytes: &[u8]) -> u16 { + BigEndian::read_u16(&index_bytes[2..=3]) +} + +#[cfg(test)] +mod tests { + use super::*; + + use super::super::index::IndexEntryBuilder; + + #[cfg(test)] + pub struct RevlogBuilder { + version: u16, + is_general_delta: bool, + is_inline: bool, + offset: usize, + index: Vec>, + data: Vec>, + } + + #[cfg(test)] + impl RevlogBuilder { + pub fn new() -> Self { + Self { + version: 2, + is_inline: false, + is_general_delta: true, + offset: 0, + index: vec![], + data: vec![], + } + } + + pub fn with_inline(&mut self, value: bool) -> &mut Self { + self.is_inline = value; + self + } + + pub fn with_general_delta(&mut self, value: bool) -> &mut Self { + self.is_general_delta = value; + self + } + + pub fn with_version(&mut self, value: u16) -> &mut Self { + self.version = value; + self + } + + pub fn push( + &mut self, + mut index: IndexEntryBuilder, + data: Vec, + ) -> &mut Self { + if self.index.is_empty() { + index.is_first(true); + index.with_general_delta(self.is_general_delta); + index.with_inline(self.is_inline); + index.with_version(self.version); + } else { + index.with_offset(self.offset); + } + self.index.push(index.build()); + self.offset += data.len(); + self.data.push(data); + self + } + + pub fn build_inline(&self) -> Vec { + let mut bytes = + Vec::with_capacity(self.index.len() + self.data.len()); + for (index, data) in self.index.iter().zip(self.data.iter()) { + bytes.extend(index); + bytes.extend(data); + } + bytes + } + } + + #[test] + fn is_not_inline_when_no_inline_flag_test() { + let bytes = RevlogBuilder::new() + .with_general_delta(false) + .with_inline(false) + .push(IndexEntryBuilder::new(), vec![]) + .build_inline(); + + assert_eq!(is_inline(&bytes), false) + } + + #[test] + fn is_inline_when_inline_flag_test() { + let bytes = RevlogBuilder::new() + .with_general_delta(false) + .with_inline(true) + .push(IndexEntryBuilder::new(), vec![]) + .build_inline(); + + assert_eq!(is_inline(&bytes), true) + } + + #[test] + fn is_inline_when_inline_and_generaldelta_flags_test() { + let bytes = RevlogBuilder::new() + .with_general_delta(true) + .with_inline(true) + .push(IndexEntryBuilder::new(), vec![]) + .build_inline(); + + assert_eq!(is_inline(&bytes), true) + } + + #[test] + fn version_test() { + let bytes = RevlogBuilder::new() + .with_version(1) + .push(IndexEntryBuilder::new(), vec![]) + .build_inline(); + + assert_eq!(get_version(&bytes), 1) + } +}