diff --git a/rust/treedirstate/Cargo.toml b/rust/treedirstate/Cargo.toml --- a/rust/treedirstate/Cargo.toml +++ b/rust/treedirstate/Cargo.toml @@ -11,4 +11,8 @@ crate-type = ["cdylib"] [dependencies] +byteorder = "*" error-chain = "*" + +[dev-dependencies] +tempdir = "*" diff --git a/rust/treedirstate/src/errors.rs b/rust/treedirstate/src/errors.rs --- a/rust/treedirstate/src/errors.rs +++ b/rust/treedirstate/src/errors.rs @@ -3,6 +3,14 @@ error_chain! { errors { + NotAStoreFile { + description("the provided store file is not a valid store file"), + display("the provided store file is not a valid store file"), + } + UnsupportedVersion(v: u32) { + description("store file version not supported"), + display("store file version not supported: {}", v), + } InvalidStoreId(id: usize) { description("invalid store id"), display("invalid store id: {}", id), diff --git a/rust/treedirstate/src/filestore.rs b/rust/treedirstate/src/filestore.rs new file mode 100644 --- /dev/null +++ b/rust/treedirstate/src/filestore.rs @@ -0,0 +1,182 @@ +// Copyright Facebook, Inc. 2017 +//! Implementation of a store using file I/O. + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use errors::*; +use std::borrow::Cow; +use std::cell::RefCell; +use std::fs::File; +use std::fs::OpenOptions; +use std::io::{BufWriter, Read, Seek, SeekFrom, Write}; +use std::path::Path; +use store::{BlockId, Store, StoreView}; + +// File storage format: +// +// Header: Magic string: 'appendstore\n' +// Version: BigEndian u32 (Current version: 1) +// +// Entries: Length: BigEndian u32 +// Data: "Length" bytes of data + +const MAGIC: &[u8] = b"appendstore\n"; +const MAGIC_LEN: usize = 12; +const VERSION: u32 = 1; +const HEADER_LEN: usize = MAGIC_LEN + 4; + +/// Implementation of a store using file I/O to read and write blocks to a file. +pub struct FileStore { + /// The underlying file. This is stored in a RefCell so that we can seek during reads. + file: RefCell>, + + /// The position in the file to which new items will be written. + position: u64, + + /// Whether the file handle is currently at the end of the file. This is used to avoid seeking + /// to the end each time a block is written, as seeking causes the BufWrite to flush, which + /// hurts performance. This is stored in a RefCell so that we can seek away from the end + /// during reads. + at_end: RefCell, + + /// True if the file is read-only. + read_only: bool, +} + +impl FileStore { + /// Create a new FileStore, overwriting any existing file. + pub fn create>(path: P) -> Result { + let mut file = BufWriter::new(OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(path)?); + file.write(MAGIC)?; + file.write_u32::(VERSION)?; + Ok(FileStore { + file: RefCell::new(file), + position: HEADER_LEN as u64, + at_end: RefCell::new(true), + read_only: false, + }) + } + + /// Open an existing FileStore. + pub fn open>(path: P) -> Result { + let mut read_only = false; + let file = OpenOptions::new().read(true).write(true).open(&path).or_else(|_e| { + read_only = true; + OpenOptions::new().read(true).open(&path) + })?; + let mut file = BufWriter::new(file); + + // Check the file header is as expected. + let mut buffer = [0; MAGIC_LEN]; + file.get_ref().read_exact(&mut buffer)?; + if buffer != MAGIC { + bail!(ErrorKind::NotAStoreFile); + } + let version = file.get_ref().read_u32::()?; + if version != VERSION { + bail!(ErrorKind::UnsupportedVersion(version)); + } + + // Find the size of the file (and hence the position to write new blocks of data) + // by seeking to the end. + let position = file.seek(SeekFrom::End(0))?; + + Ok(FileStore { + file: RefCell::new(file), + position, + at_end: RefCell::new(true), + read_only, + }) + } +} + +impl Store for FileStore { + fn append(&mut self, data: &[u8]) -> Result { + if self.read_only { + unimplemented!(); + } + let id = self.position as BlockId; + let file = self.file.get_mut(); + let at_end = self.at_end.get_mut(); + if !*at_end { + file.seek(SeekFrom::Start(self.position))?; + *at_end = true; + } + file.write_u32::(data.len() as u32)?; + self.position += 4; + self.position += file.write(data)? as u64; + Ok(id) + } + + fn flush(&mut self) -> Result<()> { + self.file.get_mut().flush()?; + Ok(()) + } +} + +impl StoreView for FileStore { + fn read<'a>(&'a self, id: BlockId) -> Result> { + // Check the ID is in range. + if id < HEADER_LEN || id as u64 >= self.position { + bail!(ErrorKind::InvalidStoreId(id)); + } + + // Get mutable access to the file, and seek to the right location. + let mut file = self.file.borrow_mut(); + file.seek(SeekFrom::Start(id as u64))?; + *self.at_end.borrow_mut() = false; + + // Read the block of data from the file. + let size = file.get_mut().read_u32::()?; + if size as u64 > self.position - id as u64 { + // The stored size of this block exceeds the number of bytes left in the file. We + // must have been given an invalid ID. + bail!(ErrorKind::InvalidStoreId(id)); + } + let mut buffer = Vec::with_capacity(size as usize); + unsafe { + // This is safe as we've just allocated the buffer and are about to read into it. + buffer.set_len(size as usize); + } + file.get_mut().read_exact(buffer.as_mut_slice())?; + + Ok(Cow::from(buffer)) + } +} + +#[cfg(test)] +mod tests { + use tempdir::TempDir; + use filestore::FileStore; + use store::{Store, StoreView}; + + #[test] + fn basic_test() { + let dir = TempDir::new("filestore_test").expect("create temp dir"); + let p = dir.path().join("store"); + let mut s = FileStore::create(p.clone()).expect("create store"); + let id1 = s.append("data block 1".as_bytes()).expect("write block 1"); + let id2 = s.append("data block two".as_bytes()) + .expect("write block 2"); + s.flush().expect("flush"); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + drop(s); + let mut s = FileStore::open(p.clone()).expect("open store"); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + let id3 = s.append("third data block".as_bytes()) + .expect("write block 3"); + s.flush().expect("flush"); + drop(s); + let s = FileStore::open(p.clone()).expect("open store"); + assert_eq!(s.read(id3).expect("read 3"), "third data block".as_bytes()); + assert_eq!(s.read(id2).expect("read 2"), "data block two".as_bytes()); + assert_eq!(s.read(id1).expect("read 1"), "data block 1".as_bytes()); + dir.close().expect("clean up temp dir"); + } +} diff --git a/rust/treedirstate/src/lib.rs b/rust/treedirstate/src/lib.rs --- a/rust/treedirstate/src/lib.rs +++ b/rust/treedirstate/src/lib.rs @@ -12,10 +12,16 @@ //! The directory state also stores files that are in the working copy parent manifest but have //! been marked as removed. +extern crate byteorder; + +#[cfg(test)] +extern crate tempdir; + #[macro_use] extern crate error_chain; pub mod errors; +pub mod filestore; pub mod store; pub mod vecmap;