diff --git a/rust/radixbuf/Cargo.toml b/rust/radixbuf/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/radixbuf/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "radixbuf" +version = "0.1.0" + +[dependencies] +vlqencoding = { path = "../vlqencoding" } + +[dev-dependencies] +quickcheck = "0.4" diff --git a/rust/radixbuf/README.md b/rust/radixbuf/README.md new file mode 100644 --- /dev/null +++ b/rust/radixbuf/README.md @@ -0,0 +1,10 @@ +# radixbuf + +This is a typical [Radix Tree](https://en.wikipedia.org/wiki/Radix_tree) implementation focused on efficient on-disk access. + +Namely, it has the following properties: + + - Looking up a key is not O(length of the file). + - Making changes to the tree is not O(length of the file). + +This is still a work-in-progress. diff --git a/rust/radixbuf/src/lib.rs b/rust/radixbuf/src/lib.rs new file mode 100644 --- /dev/null +++ b/rust/radixbuf/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +#[macro_use] +extern crate quickcheck; + +extern crate vlqencoding; + +mod serialize; diff --git a/rust/radixbuf/src/serialize.rs b/rust/radixbuf/src/serialize.rs new file mode 100644 --- /dev/null +++ b/rust/radixbuf/src/serialize.rs @@ -0,0 +1,146 @@ +//! Simple serialization that converts between T and an offset + +use std::cell::RefCell; +use std::io::{self, Read, Write, Seek, SeekFrom, ErrorKind}; +use std::marker::PhantomData; +use std::mem::transmute; +use std::ops::DerefMut; +use std::rc::Rc; + +/// Minimal serializable abstraction. +pub trait Serialize: Sized { + fn write_to(&self, w: &mut W) -> io::Result<()>; + fn read_from(r: &mut R) -> io::Result; +} + +/// Indicates a type will be serialized into fixed number of bytes. +/// This decides whether the type could support rewrite-in-place or not. +pub trait FixedSizedSerialize: Serialize {} + +/// Optimized for compact storage. +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub struct Offset(u64, PhantomData); + +impl Offset { + pub unsafe fn from_raw(offset: u64) -> Self { Offset::(offset, PhantomData) } +} + +impl Serialize for Offset { + fn write_to(&self, w: &mut W) -> io::Result<()> { self.0.write_to(w) } + + fn read_from(r: &mut R) -> io::Result { + Ok(unsafe { Offset::from_raw(u64::read_from(r)?) }) + } +} + +/// Buffer that supports read and write serializable objects. +pub struct Buffer(Rc>); + +impl Buffer { + pub fn new(buf: &Rc>) -> Self { Buffer(buf.clone()) } + + /// Append a Serialize. Returns its offset. + pub fn push(&mut self, data: &S) -> io::Result> { + let mut cur = self.0.try_borrow_mut().or( + Err(ErrorKind::ConnectionRefused), + )?; + let pos = cur.seek(SeekFrom::End(0))?; + data.write_to(cur.deref_mut())?; + Ok(unsafe { Offset::::from_raw(pos) }) + } + + /// In-place rewrite a FixedSizedSerialize. + pub fn rewrite(&mut self, pos: Offset, data: &S) -> io::Result<()> { + let mut cur = self.0.try_borrow_mut().or( + Err(ErrorKind::ConnectionRefused), + )?; + cur.seek(SeekFrom::Start(pos.0 as u64))?; + data.write_to(cur.deref_mut()) + } + + /// Parse a Serialize from a given offset. + pub fn retrieve(&mut self, pos: Offset) -> io::Result { + let mut cur = self.0.try_borrow_mut().or( + Err(ErrorKind::ConnectionRefused), + )?; + cur.seek(SeekFrom::Start(pos.0 as u64))?; + S::read_from(cur.deref_mut()) + } +} + +macro_rules! impl_serialize_unsigned { + ($T: ty, $S: expr) => { + impl Serialize for $T { + fn write_to(&self, w: &mut W) -> io::Result<()> { + let buf: [u8; $S] = unsafe { transmute(self.to_be()) }; + w.write_all(&buf) + } + + fn read_from(r: &mut R) -> io::Result { + let mut buf = [0u8; $S]; + r.read_exact(&mut buf)?; + let value: $T = unsafe { transmute(buf) }; + Ok(value.to_be()) + } + } + impl FixedSizedSerialize for $T {} + } +} + +impl_serialize_unsigned!(u8, 1); +impl_serialize_unsigned!(u16, 2); +impl_serialize_unsigned!(u32, 4); +impl_serialize_unsigned!(u64, 8); + +#[cfg(test)] +mod tests { + use serialize::*; + use std::boxed::Box; + use std::cell::RefCell; + use std::fmt::Debug; + use std::io::Cursor; + use std::rc::Rc; + + fn check_round_trip(keys: Vec) -> bool { + let mut buf = Buffer(Rc::new(RefCell::new(Cursor::new(vec![])))); + let mut offsets: Vec> = vec![]; + + // Push keys + for k in keys.iter() { + offsets.push(buf.push(k).unwrap()); + } + + // Check keys + offsets.iter().enumerate().all(|(i, offset)| { + (buf.retrieve::(offset.clone())).unwrap() == keys[i] + }) + } + + quickcheck! { + fn test_round_trip_u8(v: Vec) -> bool { + check_round_trip(v) + } + + fn test_round_trip_u16(v: Vec) -> bool { + check_round_trip(v) + } + + fn test_round_trip_u32(v: Vec) -> bool { + check_round_trip(v) + } + + fn test_round_trip_u64(v: Vec) -> bool { + check_round_trip(v) + } + } + + #[test] + fn test_rewrite() { + let mut buf = Buffer(Rc::new(RefCell::new(Cursor::new(vec![])))); + let offsets: Vec> = (200..203u32).map(|i| buf.push(&i).unwrap()).collect(); + buf.rewrite(offsets[1], &400u32).expect("rewrite"); + assert_eq!(buf.retrieve::(offsets[0]).unwrap(), 200); + assert_eq!(buf.retrieve::(offsets[1]).unwrap(), 400); + assert_eq!(buf.retrieve::(offsets[2]).unwrap(), 202); + } +}