diff --git a/rust/radix204/src/lib.rs b/rust/radix204/src/lib.rs --- a/rust/radix204/src/lib.rs +++ b/rust/radix204/src/lib.rs @@ -31,4 +31,4 @@ pub mod radix; pub mod mmapvec; -pub type RadixBuffer = radix::RadixBuffer; +pub type RadixBuffer = radix::RadixBuffer; diff --git a/rust/radix204/src/radix.rs b/rust/radix204/src/radix.rs --- a/rust/radix204/src/radix.rs +++ b/rust/radix204/src/radix.rs @@ -5,30 +5,31 @@ //! Main radix buffer implementation that maintains `Key` to `KeyId` mapping. -use std::ops::DerefMut; +use std::ops::{DerefMut, Index, IndexMut, Range}; +use std::marker::PhantomData; use base16::ToBase16Iter; use errors::{Result, ErrorKind}; +use mmapvec::MmapVec; use types::{Key, KeyId, Offset, Shared}; -// KeyBuffer and RadixBuffer. -// They might be changed to type parameters. ex. they might be backed by mmap. -type KB = Vec; -type RB = Vec; - /// Represents pointer to 16 offsets that are rewritable in-place. -struct RadixNode(Offset); +struct RadixNode(Offset, PhantomData<(KB, RB)>); /// The return value of `RadixNode::follow`. -enum FollowResult { - Radix(RadixNode), +enum FollowResult { + Radix(RadixNode), KeyId(KeyId), Missing, } -impl RadixNode { +impl RadixNode +where + KB: VecLike, + RB: VecLike, +{ #[inline] - pub fn new(offset: Offset) -> Self { RadixNode(offset) } + pub fn new(offset: Offset) -> Self { RadixNode(offset, PhantomData) } #[inline] pub fn create(vec: &mut RB) -> Result { @@ -41,27 +42,24 @@ return Err(ErrorKind::OffsetOverflow(pos).into()); } vec.resize(pos + 16, 0); - Ok(RadixNode(Offset::from(pos as u32))) + Ok(RadixNode(Offset::from(pos as u32), PhantomData)) } /// Follow index by one step. #[inline] - pub fn follow(&self, vec: &RB, index: u8) -> Result { + pub fn follow(&self, vec: &RB, index: u8) -> Result> { debug_assert!(index < 16); let pos = self.0.to_usize() + usize::from(index); - let o = vec.get(pos); - match o { - None => Err(ErrorKind::OffsetOverflow(pos).into()), - Some(v) => { - let v = v.to_le(); - if v == 0 { - Ok(FollowResult::Missing) - } else if v & 1 != 0 { - Ok(FollowResult::KeyId((v >> 1).into())) - } else { - Ok(FollowResult::Radix(RadixNode::new(v.into()))) - } - } + if pos >= vec.len() { + return Err(ErrorKind::OffsetOverflow(pos).into()); + } + let v = vec[pos].to_le(); + if v == 0 { + Ok(FollowResult::Missing) + } else if v & 1 != 0 { + Ok(FollowResult::KeyId((v >> 1).into())) + } else { + Ok(FollowResult::Radix(RadixNode::::new(v.into()))) } } @@ -85,7 +83,7 @@ /// Rewrite an index entry to point to another `RadixNode`. #[inline] - pub fn write_radix(&self, vec: &mut RB, index: u8, node: &RadixNode) -> Result<()> { + pub fn write_radix(&self, vec: &mut RB, index: u8, node: &RadixNode) -> Result<()> { self.write_raw_offset(vec, index, node.0) } @@ -111,9 +109,13 @@ /// `RadixBuffer` provides efficient `Key` to `KeyId` lookups. It maintains two buffers: /// - A key buffer for `Key` lookups. Caller is responsible for appending new keys there. /// - A radix buffer for storing radix tree offsets. -pub struct RadixBuffer(Shared, Shared, Offset); +pub struct RadixBuffer(Shared, Shared, Offset); -impl RadixBuffer { +impl RadixBuffer +where + KB: VecLike, + RB: VecLike, +{ pub fn new(key_buf: Shared, radix_buf: Shared, root: Offset) -> Self { RadixBuffer(key_buf, radix_buf, root) } @@ -134,7 +136,7 @@ /// Find `KeyId` from a `Key`. Do not check if `KeyId` and `Key` matches. pub fn lookup_unchecked(&self, key: &Key) -> Result> { borrow_shared(&self.1, |rbuf| { - let mut radix = RadixNode::new(self.2); + let mut radix = RadixNode::::new(self.2); for b in key.to_base16iter() { match radix.follow(rbuf, b)? { FollowResult::Radix(next_radix) => { @@ -152,7 +154,7 @@ pub fn lookup_prefix(&self, base16seq: &[u8]) -> Result> { borrow_shared(&self.1, |rbuf| { - let mut radix = RadixNode::new(self.2); + let mut radix = RadixNode::::new(self.2); for &b in base16seq { debug_assert!(b < 16); match radix.follow(rbuf, b)? { @@ -205,7 +207,7 @@ /// Insert when both `KeyId` and `Key` are known pub fn insert_with_key(&self, new_key_id: KeyId, new_key: &Key) -> Result<()> { borrow_shared(&self.1, |rbuf| { - let mut radix = RadixNode::new(self.2); + let mut radix = RadixNode::::new(self.2); for (i, b) in new_key.to_base16iter().enumerate() { match radix.follow(rbuf, b)? { FollowResult::Radix(next_radix) => { @@ -283,6 +285,33 @@ func(borrowed.deref_mut()) } +/// Vec-like interface +pub trait VecLike + : IndexMut + + Index + + IndexMut> + + Index, Output = [T]> { + fn extend_from_slice(&mut self, other: &[T]); + fn resize(&mut self, new_len: usize, value: T); + fn len(&self) -> usize; +} + +macro_rules! impl_vec_like { + ($T: ident) => { + impl VecLike for $T { + #[inline] + fn extend_from_slice(&mut self, other: &[T]) { $T::extend_from_slice(self, other) } + #[inline] + fn resize(&mut self, new_len: usize, value: T) { $T::resize(self, new_len, value) } + #[inline] + fn len(&self) -> usize { $T::len(self) } + } + } +} + +impl_vec_like!(Vec); +impl_vec_like!(MmapVec); + #[cfg(test)] mod tests { use super::*; @@ -290,21 +319,37 @@ use std::ops::Deref; use std::rc::Rc; use std::cell::RefCell; - use rand::{ChaChaRng, Rng}; + use rand::{ChaChaRng, thread_rng, Rng}; use test::Bencher; use quickcheck::{Arbitrary, Gen}; + use tempdir::TempDir; type KB = Vec; type RB = Vec; - fn prepare_kbuf_radix() -> (Shared, Shared, RadixBuffer) { + fn prepare_kbuf_radix() -> (Shared, Shared, RadixBuffer) { let kbuf = Rc::new(RefCell::new(vec![])); let rbuf = Rc::new(RefCell::new(vec![0u32; 17])); let radix = RadixBuffer::new(Rc::clone(&kbuf), Rc::clone(&rbuf), 1.into()); (kbuf, rbuf, radix) } - fn append_key(kbuf: &Shared, key: &Key) -> KeyId { + type MKB = MmapVec; + type MRB = MmapVec; + + fn prepare_mmap_kbuf_radix(tempdir: TempDir) + -> (Shared, Shared, RadixBuffer) { + let kbuf = Rc::new(RefCell::new( + MKB::from_path(tempdir.path().join("kbuf")).unwrap(), + )); + let mut rvec = MRB::from_path(tempdir.path().join("rbuf")).unwrap(); + rvec.extend_from_slice(&[0u32; 17]); + let rbuf = Rc::new(RefCell::new(rvec)); + let radix = RadixBuffer::new(Rc::clone(&kbuf), Rc::clone(&rbuf), 1.into()); + (kbuf, rbuf, radix) + } + + fn append_key>(kbuf: &Shared, key: &Key) -> KeyId { borrow_shared(&kbuf, |kvec| { let pos = kvec.len(); kvec.extend_from_slice(key.deref()); @@ -321,14 +366,22 @@ } fn check_with_stdset(std_set: HashSet) -> bool { - let (kbuf, _rbuf, radix) = prepare_kbuf_radix(); + let (kbuf, rbuf, radix) = prepare_kbuf_radix(); + let tempdir = TempDir::new("mradix").unwrap(); + let (mkbuf, mrbuf, mradix) = prepare_mmap_kbuf_radix(tempdir); for &key in &std_set { if radix.lookup(&key).unwrap().is_some() { return false; } + if mradix.lookup(&key).unwrap().is_some() { + return false; + } let key_id = append_key(&kbuf, &key); radix.insert(key_id).expect("insert"); radix.insert_with_key(key_id, &key).expect("insert"); + let key_id = append_key(&mkbuf, &key); + mradix.insert(key_id).expect("insert"); + mradix.insert_with_key(key_id, &key).expect("insert"); } for &orig in &std_set { for bit in 0..161 { @@ -338,11 +391,16 @@ buf[(bit - 1) / 8] ^= 1u8 << ((bit - 1) % 8); key = buf.into(); } - if std_set.contains(&key) ^ radix.lookup(&key).unwrap().is_some() { + let expected = std_set.contains(&key); + if expected != radix.lookup(&key).unwrap().is_some() || + expected != mradix.lookup(&key).unwrap().is_some() + { return false; } } } + assert_eq!(mkbuf.borrow()[..], kbuf.borrow()[..]); + assert_eq!(mrbuf.borrow()[..], rbuf.borrow()[..]); true } @@ -409,7 +467,32 @@ }) } - fn prepare_radix_keys(count: usize) -> (RadixBuffer, Vec) { + #[bench] + fn bench_50k_insert_mmap(b: &mut Bencher) { + let tempdir = TempDir::new("mradix").unwrap(); + const COUNT: u32 = 51200; + let key_buf: Vec = ChaChaRng::new_unseeded() + .gen_iter() + .take(COUNT as usize * Key::fixed_len()) + .collect(); + let mut kvec = MKB::from_path(tempdir.path().join("kbuf")).unwrap(); + kvec.extend_from_slice(&key_buf); + let kbuf = Rc::new(RefCell::new(kvec)); + b.iter(|| { + let mut rvec = MRB::from_path(tempdir.path().join( + format!("rbuf-{}", thread_rng().next_u64()), + )).unwrap(); + rvec.extend_from_slice(&[0u32; 17]); + let rbuf = Rc::new(RefCell::new(rvec)); + let radix = RadixBuffer::new(Rc::clone(&kbuf), Rc::clone(&rbuf), 1.into()); + for key_id in 0..COUNT { + let v = key_id * Key::fixed_len() as u32; + radix.insert(v.into()).expect("insert"); + } + }) + } + + fn prepare_radix_keys(count: usize) -> (RadixBuffer, Vec) { let mut rng = ChaChaRng::new_unseeded(); let (kbuf, _rbuf, radix) = prepare_kbuf_radix(); let mut keys = vec![];