diff --git a/rust/radixbuf/Cargo.toml b/rust/radixbuf/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/radixbuf/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "radixbuf" +version = "0.1.0" + +[dependencies] +vlqencoding = { path = "../vlqencoding" } + +[dev-dependencies] +quickcheck = "0.4" diff --git a/rust/radixbuf/README.md b/rust/radixbuf/README.md new file mode 100644 --- /dev/null +++ b/rust/radixbuf/README.md @@ -0,0 +1,10 @@ +# radixbuf + +This is a typical [Radix Tree](https://en.wikipedia.org/wiki/Radix_tree) implementation focused on efficient on-disk access. + +Namely, it has the following properties: + + - Looking up a key is not O(length of the file). + - Making changes to the tree is not O(length of the file). + +This is still a work-in-progress. diff --git a/rust/radixbuf/src/keyid/mod.rs b/rust/radixbuf/src/keyid/mod.rs new file mode 100644 --- /dev/null +++ b/rust/radixbuf/src/keyid/mod.rs @@ -0,0 +1,162 @@ +//! Key (byte array) <-> KeyId conversion + +use std::cell::RefCell; +use std::ops::DerefMut; +use std::rc::Rc; +use std::fmt::Debug; +use std::io::{self, Read, Write, Seek, SeekFrom, ErrorKind}; + +/// A KeyId takes 4 bytes. Optimized for compact storage. +pub type KeyId = u32; + +/// A Key is a byte array with serialization support. +/// +/// There are 2 types implemented: +/// - TwentyByteKey: 20-byte key. Serialized as 20-byte content. +/// - VariantKey: Variant-length key. Serialized as vlq-encoded-length + content. +pub trait Key: Debug + Sized + PartialEq { + fn write_to(&self, w: &mut W) -> io::Result<()>; + fn read_from(r: &mut R) -> io::Result; +} + +#[derive(Debug, PartialEq, Clone)] +pub struct TwentyByteKey([u8; 20]); + +#[derive(Debug, PartialEq, Clone)] +pub struct VariantKey(Vec); + +impl Key for TwentyByteKey { + fn write_to(&self, w: &mut W) -> io::Result<()> { w.write_all(&self.0) } + + fn read_from(r: &mut R) -> io::Result { + let mut buf = [0u8; 20]; + r.read_exact(&mut buf)?; + Ok(TwentyByteKey(buf)) + } +} + +impl Key for VariantKey { + fn write_to(&self, w: &mut W) -> io::Result<()> { + use vlqencoding::VLQEncode; + w.write_vlq(self.0.len())?; + w.write_all(&self.0) + } + + fn read_from(r: &mut R) -> io::Result { + use vlqencoding::VLQDecode; + let len: usize = r.read_vlq()?; + let mut buf = vec![0u8; len]; + r.read_exact(&mut buf)?; + Ok(VariantKey(buf)) + } +} + +/// A KeyBuffer stores keys in an append-only buffer. +pub struct KeyBuffer { + buf: Rc>, +} + +impl KeyBuffer { + /// Store a new key and return its KeyId. Do not take care of de-duplication. + fn push_key(&mut self, key: &K) -> io::Result { + let mut cur = self.buf.try_borrow_mut().or( + Err(ErrorKind::ConnectionRefused), + )?; + let pos = cur.seek(SeekFrom::End(0))?; + if pos > KeyId::max_value() as u64 { + return Err(ErrorKind::Other.into()); + } + key.write_to(cur.deref_mut())?; + Ok(pos as KeyId) + } + + /// Given a KeyId, get the stored Key. + fn get_key(&mut self, id: KeyId) -> io::Result { + let mut cur = self.buf.try_borrow_mut().or( + Err(ErrorKind::ConnectionRefused), + )?; + cur.seek(SeekFrom::Start(id as u64))?; + K::read_from(cur.deref_mut()) + } +} + +#[cfg(test)] +mod tests { + use std::cell::RefCell; + use std::rc::Rc; + use std::boxed::Box; + use std::io::Cursor; + use quickcheck::{Arbitrary, Gen}; + use keyid::*; + + impl Arbitrary for TwentyByteKey { + fn arbitrary(gen: &mut G) -> Self { + let x = u64::arbitrary(gen); + let y = u64::arbitrary(gen); + let z = u32::arbitrary(gen); + TwentyByteKey( + [ + ((z >> 24) & 0xff) as u8, + ((z >> 16) & 0xff) as u8, + ((z >> 8) & 0xff) as u8, + ((z >> 0) & 0xff) as u8, + ((y >> 56) & 0xff) as u8, + ((y >> 48) & 0xff) as u8, + ((y >> 40) & 0xff) as u8, + ((y >> 32) & 0xff) as u8, + ((y >> 24) & 0xff) as u8, + ((y >> 16) & 0xff) as u8, + ((y >> 8) & 0xff) as u8, + ((y >> 0) & 0xff) as u8, + ((x >> 56) & 0xff) as u8, + ((x >> 48) & 0xff) as u8, + ((x >> 40) & 0xff) as u8, + ((x >> 32) & 0xff) as u8, + ((x >> 24) & 0xff) as u8, + ((x >> 16) & 0xff) as u8, + ((x >> 8) & 0xff) as u8, + ((x >> 0) & 0xff) as u8, + ], + ) + } + + fn shrink(&self) -> Box> { + // XXX: This does not do "shrink". + Box::new(vec![].into_iter()) + } + } + + impl Arbitrary for VariantKey { + fn arbitrary(gen: &mut G) -> Self { VariantKey(Vec::::arbitrary(gen)) } + + fn shrink(&self) -> Box> { + // XXX: This does not do "shrink". + Box::new(vec![].into_iter()) + } + } + + fn check_round_trip(keys: Vec) -> bool { + let mut buf = KeyBuffer { buf: Rc::new(RefCell::new(Cursor::new(vec![]))) }; + let mut offsets: Vec = vec![]; + + // Push keys + for k in keys.iter() { + offsets.push(buf.push_key(k).unwrap()); + } + + // Check keys + offsets.iter().enumerate().all(|(i, &offset)| { + (buf.get_key::(offset)).unwrap() == keys[i] + }) + } + + quickcheck! { + fn test_round_trip_twentybytekey(keys: Vec) -> bool { + check_round_trip(keys) + } + + fn test_round_trip_variantkey(keys: Vec) -> bool { + check_round_trip(keys) + } + } +} diff --git a/rust/radixbuf/src/lib.rs b/rust/radixbuf/src/lib.rs new file mode 100644 --- /dev/null +++ b/rust/radixbuf/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +#[macro_use] +extern crate quickcheck; + +extern crate vlqencoding; + +mod keyid;