diff --git a/rust/treedirstate/Cargo.lock b/rust/treedirstate/Cargo.lock --- a/rust/treedirstate/Cargo.lock +++ b/rust/treedirstate/Cargo.lock @@ -35,6 +35,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] +name = "byteorder" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "cc" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -171,9 +176,19 @@ name = "rusttreedirstate" version = "0.1.0" dependencies = [ + "byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tempdir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -213,6 +228,7 @@ "checksum backtrace 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "99f2ce94e22b8e664d95c57fff45b98a966c2252b60691d0b7aeeccd88d70983" "checksum backtrace-sys 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "44585761d6161b0f57afc49482ab6bd067e4edef48c12a152c237eb0203f7661" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" +"checksum byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff81738b726f5d099632ceaffe7fb65b90212e8dce59d518729e7e8634032d3d" "checksum cc 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a9b13a57efd6b30ecd6598ebdb302cca617930b5470647570468a65d12ef9719" "checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" "checksum dbghelp-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "97590ba53bcb8ac28279161ca943a924d1fd4a8fb3fa63302591647c4fc5b850" @@ -231,6 +247,7 @@ "checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" "checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" "checksum rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "aee45432acc62f7b9a108cc054142dac51f979e69e71ddce7d6fc7adf29e817e" +"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" diff --git a/rust/treedirstate/Cargo.toml b/rust/treedirstate/Cargo.toml --- a/rust/treedirstate/Cargo.toml +++ b/rust/treedirstate/Cargo.toml @@ -11,8 +11,10 @@ crate-type = ["cdylib"] [dependencies] +byteorder = "*" error-chain = "*" [dev-dependencies] itertools = "0.7.2" quickcheck = "*" +tempdir = "*" diff --git a/rust/treedirstate/src/errors.rs b/rust/treedirstate/src/errors.rs --- a/rust/treedirstate/src/errors.rs +++ b/rust/treedirstate/src/errors.rs @@ -3,10 +3,20 @@ error_chain! { errors { + NotAStoreFile { + description("the provided store file is not a valid store file"), + } + UnsupportedVersion(v: u32) { + description("store file version not supported"), + display("store file version not supported: {}", v), + } InvalidStoreId(id: u64) { description("invalid store id"), display("invalid store id: {}", id), } + ReadOnlyStore { + description("store is read-only"), + } } foreign_links { Io(::std::io::Error); diff --git a/rust/treedirstate/src/filestore.rs b/rust/treedirstate/src/filestore.rs new file mode 100644 --- /dev/null +++ b/rust/treedirstate/src/filestore.rs @@ -0,0 +1,316 @@ +// Copyright Facebook, Inc. 2017 +//! Implementation of a store using file I/O. + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use errors::*; +use std; +use std::borrow::Cow; +use std::cell::RefCell; +use std::fs::File; +use std::fs::OpenOptions; +use std::io::{BufWriter, Read, Seek, SeekFrom, Write}; +use std::path::Path; +use store::{BlockId, Store, StoreView}; + +// File storage format: +// +// Header: Magic string: 'appendstore\n' +// Version: BigEndian u32 (Current version: 1) +// +// Entries: Length: BigEndian u32 +// Data: "Length" bytes of data + +const MAGIC_LEN: usize = 12; +const MAGIC: [u8; MAGIC_LEN] = *b"appendstore\n"; +const VERSION: u32 = 1; +const HEADER_LEN: u64 = (MAGIC_LEN + 4) as u64; + +/// Implementation of a store using file I/O to read and write blocks to a file. +pub struct FileStore { + /// The underlying file. This is stored in a RefCell so that we can seek during reads. + file: RefCell>, + + /// The position in the file to which new items will be written. + position: u64, + + /// Whether the file handle is currently at the end of the file. This is used to avoid seeking + /// to the end each time a block is written, as seeking causes the BufWrite to flush, which + /// hurts performance. This is stored in a RefCell so that we can seek away from the end + /// during reads. + at_end: RefCell, + + /// True if the file is read-only. + read_only: bool, +} + +impl FileStore { + /// Create a new FileStore, overwriting any existing file. + pub fn create>(path: P) -> Result { + let mut file = BufWriter::new(OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path)?); + file.write(&MAGIC)?; + file.write_u32::(VERSION)?; + Ok(FileStore { + file: RefCell::new(file), + position: HEADER_LEN, + at_end: RefCell::new(true), + read_only: false, + }) + } + + /// Open an existing FileStore. Attempts to open the file in read/write mode. If write + /// access is not permitted, falls back to opening the file in read-only mode. When open + /// in read-only mode, new blocks of data cannot be appended. + pub fn open>(path: P) -> Result { + let mut read_only = false; + let file = OpenOptions::new() + .read(true) + .write(true) + .open(&path) + .or_else(|_e| { + read_only = true; + OpenOptions::new().read(true).open(&path) + })?; + let mut file = BufWriter::new(file); + + // Check the file header is as expected. + let mut buffer = [0; MAGIC_LEN]; + file.get_ref() + .read_exact(&mut buffer) + .map_err(|_e| ErrorKind::NotAStoreFile)?; + if buffer != MAGIC { + bail!(ErrorKind::NotAStoreFile); + } + let version = file.get_ref().read_u32::()?; + if version != VERSION { + bail!(ErrorKind::UnsupportedVersion(version)); + } + + // Find the size of the file (and hence the position to write new blocks of data) + // by seeking to the end. + let position = file.seek(SeekFrom::End(0))?; + + Ok(FileStore { + file: RefCell::new(file), + position, + at_end: RefCell::new(true), + read_only, + }) + } +} + +impl Store for FileStore { + fn append(&mut self, data: &[u8]) -> Result { + if self.read_only { + bail!(ErrorKind::ReadOnlyStore); + } + let id = BlockId(self.position); + let file = self.file.get_mut(); + let at_end = self.at_end.get_mut(); + if !*at_end { + file.seek(SeekFrom::Start(self.position))?; + *at_end = true; + } + assert!(data.len() <= std::u32::MAX as usize, "data too long"); + file.write_u32::(data.len() as u32)?; + file.write_all(data)?; + self.position += 4 + data.len() as u64; + debug_assert!(self.position == file.seek(SeekFrom::End(0))?); + Ok(id) + } + + fn flush(&mut self) -> Result<()> { + self.file.get_mut().flush()?; + Ok(()) + } +} + +impl StoreView for FileStore { + fn read<'a>(&'a self, id: BlockId) -> Result> { + // Check the ID is in range. + if id.0 < HEADER_LEN || id.0 > self.position - 4 { + bail!(ErrorKind::InvalidStoreId(id.0)); + } + + // Get mutable access to the file, and seek to the right location. + let mut file = self.file.borrow_mut(); + file.seek(SeekFrom::Start(id.0))?; + *self.at_end.borrow_mut() = false; + + // Read the block of data from the file. + let size = file.get_mut().read_u32::()?; + if size as u64 > self.position - id.0 { + // The stored size of this block exceeds the number of bytes left in the file. We + // must have been given an invalid ID. + bail!(ErrorKind::InvalidStoreId(id.0)); + } + let mut buffer: Vec = Vec::with_capacity(size as usize); + unsafe { + // This is safe as we've just allocated the buffer and are about to read into it. + buffer.set_len(size as usize); + } + file.get_mut().read_exact(&mut buffer[..])?; + + Ok(Cow::from(buffer)) + } +} + +#[cfg(test)] +mod tests { + use tempdir::TempDir; + use filestore::FileStore; + use store::{BlockId, Store, StoreView}; + use std::fs; + use std::io::Write; + + #[test] + fn goodpath() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut s = FileStore::create(&p).expect("create store"); + let id1 = s.append("data block 1".as_bytes()).expect("write block 1"); + let id2 = s.append("data block two".as_bytes()) + .expect("write block 2"); + s.flush().expect("flush"); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + drop(s); + let mut s = FileStore::open(&p).expect("open store"); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + let id3 = s.append("third data block".as_bytes()) + .expect("write block 3"); + s.flush().expect("flush"); + drop(s); + let s = FileStore::open(p.clone()).expect("open store"); + assert_eq!(s.read(id3).expect("read 3"), "third data block".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + dir.close().expect("clean up temp dir"); + } + + #[test] + fn readonly() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut s = FileStore::create(&p).expect("create store"); + let id1 = s.append("data block 1".as_bytes()).expect("write block 1"); + s.flush().expect("flush"); + drop(s); + let mut perms = fs::metadata(&p).unwrap().permissions(); + perms.set_readonly(true); + fs::set_permissions(&p, perms).unwrap(); + let mut s = FileStore::open(&p).expect("open store"); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + assert_eq!( + s.append("third data block".as_bytes()) + .unwrap_err() + .to_string(), + "store is read-only" + ); + } + + #[test] + fn empty_file() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&p) + .unwrap(); + drop(file); + assert_eq!( + FileStore::open(&p) + .err() + .expect("file should not be opened") + .to_string(), + "the provided store file is not a valid store file" + ); + } + + #[test] + fn invalid_file() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&p) + .unwrap(); + file.write(b"not a store file").unwrap(); + drop(file); + assert_eq!( + FileStore::open(&p) + .err() + .expect("file should not be opened") + .to_string(), + "the provided store file is not a valid store file" + ); + } + + #[test] + fn unsupported_version() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&p) + .unwrap(); + // Version 0 is not supported. + file.write(b"appendstore\n\x00\x00\x00\x00").unwrap(); + drop(file); + assert_eq!( + FileStore::open(&p) + .err() + .expect("file should not be opened") + .to_string(), + "store file version not supported: 0" + ); + } + + #[test] + fn invalid_store_ids() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&p) + .unwrap(); + // Version 0 is not supported. + file.write(b"appendstore\n\x00\x00\x00\x01\x00\x00\xff\xffdata") + .unwrap(); + drop(file); + let f = FileStore::open(&p).expect("file should be opened"); + // Store ID 2 is inside the header. + assert_eq!( + f.read(BlockId(2)).unwrap_err().to_string(), + "invalid store id: 2" + ); + // Store ID 16 has an invalid length. + assert_eq!( + f.read(BlockId(16)).unwrap_err().to_string(), + "invalid store id: 16" + ); + // Store ID 22 is within 4 bytes of the end of the file. + assert_eq!( + f.read(BlockId(22)).unwrap_err().to_string(), + "invalid store id: 22" + ); + // Store ID 64 is after the end of the file. + assert_eq!( + f.read(BlockId(64)).unwrap_err().to_string(), + "invalid store id: 64" + ); + } +} diff --git a/rust/treedirstate/src/lib.rs b/rust/treedirstate/src/lib.rs --- a/rust/treedirstate/src/lib.rs +++ b/rust/treedirstate/src/lib.rs @@ -12,6 +12,8 @@ //! The directory state also stores files that are in the working copy parent manifest but have //! been marked as removed. +extern crate byteorder; + #[macro_use] extern crate error_chain; @@ -22,7 +24,11 @@ #[macro_use] extern crate quickcheck; +#[cfg(test)] +extern crate tempdir; + pub mod errors; +pub mod filestore; pub mod store; pub mod vecmap; diff --git a/rust/treedirstate/src/store.rs b/rust/treedirstate/src/store.rs --- a/rust/treedirstate/src/store.rs +++ b/rust/treedirstate/src/store.rs @@ -5,7 +5,7 @@ use std::borrow::Cow; #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub struct BlockId(u64); +pub struct BlockId(pub u64); /// Append-only storage. Blocks of data may be stored in an instance of a Store. Once written, /// blocks are immutable.