diff --git a/rust/radix204/Cargo.toml b/rust/radix204/Cargo.toml --- a/rust/radix204/Cargo.toml +++ b/rust/radix204/Cargo.toml @@ -4,9 +4,12 @@ [dependencies] error-chain = "0.11" +libc = "0.2" +memmap = "0.6" [dev-dependencies] quickcheck = "0.4" rand = "0.3" rand_derive = "0.3" rustc-test = "0.2" +tempdir = "0.3" diff --git a/rust/radix204/src/errors.rs b/rust/radix204/src/errors.rs --- a/rust/radix204/src/errors.rs +++ b/rust/radix204/src/errors.rs @@ -8,6 +8,7 @@ error_chain! { foreign_links { BorrowMut(::std::cell::BorrowMutError); + Io(::std::io::Error); } errors { diff --git a/rust/radix204/src/lib.rs b/rust/radix204/src/lib.rs --- a/rust/radix204/src/lib.rs +++ b/rust/radix204/src/lib.rs @@ -9,6 +9,8 @@ #[macro_use] extern crate error_chain; +extern crate memmap; + #[cfg(test)] #[macro_use] extern crate quickcheck; @@ -20,9 +22,13 @@ #[macro_use] extern crate rand_derive; +#[cfg(test)] +extern crate tempdir; + pub mod types; pub mod errors; pub mod base16; pub mod radix; +pub mod mmapvec; pub type RadixBuffer = radix::RadixBuffer; diff --git a/rust/radix204/src/mmapvec.rs b/rust/radix204/src/mmapvec.rs new file mode 100644 --- /dev/null +++ b/rust/radix204/src/mmapvec.rs @@ -0,0 +1,229 @@ +// Copyright 2017 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Simple vector based on mmap + +use errors::Result; +use std::borrow::Borrow; +use std::fs::{File, OpenOptions}; +use std::io::{Seek, SeekFrom}; +use std::marker::PhantomData; +use std::mem::size_of; +use std::ops::{Deref, DerefMut, Drop, Index, IndexMut, Range, RangeFrom, RangeTo, RangeFull}; +use std::path::PathBuf; +use std::ptr; +use std::slice; +use memmap::{MmapMut, MmapOptions}; + +/// A subset of Vec features based on a mmap-ed file. Application should take care of +/// locking so `MVec`s backed by a same file would either have multiple readers, or have +/// a unique reader. +pub struct MmapVec { + mmap: MmapMut, + file: File, + len: usize, + cap: usize, + _t: PhantomData, +} + +impl MmapVec { + pub fn from_file(mut file: File) -> Result { + let min_size: usize = size_of::().max(4096); + + // Make sure the file is not empty and has a size of multiples of size_of::() + file.seek(SeekFrom::Start(0))?; + let mut size = file.seek(SeekFrom::End(0))? as usize; + let len = size / size_of::(); + if size < min_size { + file.set_len(min_size as u64)?; + size = min_size; + } + let cap = size / size_of::(); + size = cap * size_of::(); + + // mmap the file + let mmap = unsafe { MmapOptions::new().len(size).map_mut(&file) }?; + Ok(Self { + mmap, + file, + len, + cap, + _t: PhantomData, + }) + } + + pub fn from_path>(path: P) -> Result { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(path.borrow())?; + Self::from_file(file) + } + + #[inline] + pub fn len(&self) -> usize { self.len } + + #[inline] + pub fn cap(&self) -> usize { self.cap } + + pub fn extend_from_slice(&mut self, other: &[T]) { + let len = self.len(); + let slice_len = other.len(); + self.reserve(slice_len); + self.len += slice_len; + self[len..(len + slice_len)].copy_from_slice(other); + } + + pub fn resize(&mut self, new_len: usize, value: T) { + let len = self.len; + if new_len > len { + let n = new_len - len; + self.reserve(n); + unsafe { + let mut ptr = self.mmap.as_mut_ptr() as *mut T; + ptr = ptr.offset(self.len as isize); + for _ in 0..n { + ptr::write(ptr, value); + ptr = ptr.offset(1); + } + } + } + self.len = new_len; + } + + #[inline] + pub fn reserve(&mut self, additional: usize) { + let required_cap = self.len + additional; + if required_cap > self.cap { + // Preallocate more space to reduce disk fragments. Neither too big nor too small. + let new_cap = ((required_cap + 0x40_0000) & !0x3f_ffff) + .min(self.cap * 4) + .max(required_cap); + // Always align to 4KB + let new_cap = (new_cap + 0x1000) & !0xfff; + let size = new_cap * size_of::(); + self.file.set_len(size as u64).expect("disk full"); + self.mmap = unsafe { MmapOptions::new().len(size).map_mut(&self.file) }.expect("mmap"); + self.cap = new_cap; + } + } + + pub fn sync(&self) -> Result<()> { Ok(self.mmap.flush()?) } +} + +impl Deref for MmapVec { + type Target = [T]; + + #[inline] + fn deref(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.mmap.as_ptr() as *const T, self.len) } + } +} + +impl DerefMut for MmapVec { + #[inline] + fn deref_mut(&mut self) -> &mut [T] { + unsafe { slice::from_raw_parts_mut(self.mmap.as_mut_ptr() as *mut T, self.len) } + } +} + +impl Drop for MmapVec { + fn drop(&mut self) { + if self.cap != self.len { + // Truncate the file to actual size + let size = self.len * size_of::(); + self.file.set_len(size as u64).expect("truncate failed"); + } + } +} + +// Mark MmapVec as supporting Index/IndexMut interface +macro_rules! impl_index { + ($I: ty, $T: tt, $O: tt) => { + impl<$T: Copy> Index<$I> for MmapVec { + type Output = $O; + #[inline] + fn index(&self, index: $I) -> &$O { self.deref().index(index) } + } + + impl<$T: Copy> IndexMut<$I> for MmapVec { + #[inline] + fn index_mut(&mut self, index: $I) -> &mut $O { self.deref_mut().index_mut(index) } + } + } +} + +impl_index!(usize, T, T); +impl_index!(Range, T, [T]); +impl_index!(RangeFrom, T, [T]); +impl_index!(RangeTo, T, [T]); +impl_index!(RangeFull, T, [T]); + +#[cfg(test)] +mod tests { + use super::*; + use std::fmt::Debug; + use tempdir::TempDir; + + fn check_with_vec(v: &Vec) -> Result<()> { + let tempdir = TempDir::new("mvec")?; + let path = tempdir.path().join("mvec"); + { + let mut m: MmapVec = MmapVec::from_path(&path)?; + + assert_eq!(m.len(), 0); + assert_ne!(m.cap(), 0); + + m.extend_from_slice(&v[..]); + assert_eq!(m.len(), v.len()); + assert!(m.cap() >= v.len()); + assert_eq!(m[..], v[..]); + } + { + // Reload the file + let m: MmapVec = MmapVec::::from_path(&path)?; + assert_eq!(m[..], v[..]); + } + Ok(()) + } + + #[test] + fn check_large_buffer() { + assert!(check_with_vec(&vec![0x12345678abcdef01u64; 10000]).is_ok()); + } + + #[test] + fn check_resize() { + let tempdir = TempDir::new("mvec").unwrap(); + let path = tempdir.path().join("resize"); + + let mut v = Vec::::new(); // reference + { + let mut m: MmapVec = MmapVec::from_path(path).unwrap(); + for &(size, default) in [(10, 101), (100, 201), (1000, 301), (500, 401)].iter() { + m.resize(size, default); + v.resize(size, default); + assert_eq!(m[..], v[..]); + } + } + + assert!(check_with_vec(&vec![0x12345678abcdef01u64; 10000]).is_ok()); + } + + quickcheck! { + fn test_compare_with_vec_i8(v: Vec) -> bool { + check_with_vec(&v).is_ok() + } + + fn test_compare_with_vec_u32(v: Vec) -> bool { + check_with_vec(&v).is_ok() + } + + fn test_compare_with_vec_i64(v: Vec) -> bool { + check_with_vec(&v).is_ok() + } + } +}