diff --git a/rust/radixbuf/src/base16.rs b/rust/radixbuf/src/base16.rs new file mode 100644 --- /dev/null +++ b/rust/radixbuf/src/base16.rs @@ -0,0 +1,136 @@ +// Copyright 2017 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Base16 iterators. The main radix index uses base16 to make space usage more efficient +//! and support "prefix" lookups by hex strings. + +pub trait ToBase16Iter<'a, T> { + fn to_base16iter(&'a self) -> Base16Iter<'a, T>; +} + +impl<'a, T: AsRef<[u8]>> ToBase16Iter<'a, T> for T { + #[inline] + fn to_base16iter(&'a self) -> Base16Iter<'a, T> { + let len = self.as_ref().len() * 2; + Base16Iter(self, 0, len) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct Base16Iter<'a, T: 'a>(&'a T, usize, usize); + +/// Base16 iterator for [u8] +impl<'a, T: AsRef<[u8]>> Iterator for Base16Iter<'a, T> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option { + if self.2 <= self.1 { + None + } else { + let i = self.1; + self.1 = i + 1; + let v = self.0.as_ref()[i / 2]; + if i & 1 == 0 { v >> 4 } else { v & 0xf }.into() + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } + + #[inline] + fn count(self) -> usize { self.len() } +} + +impl<'a, T: AsRef<[u8]>> DoubleEndedIterator for Base16Iter<'a, T> { + #[inline] + fn next_back(&mut self) -> Option { + if self.2 <= self.1 { + None + } else { + let i = self.2 - 1; + self.2 = i; + let v = self.0.as_ref()[i / 2]; + if i & 1 == 0 { v >> 4 } else { v & 0xf }.into() + } + } +} + +impl<'a, T: AsRef<[u8]>> ExactSizeIterator for Base16Iter<'a, T> { + #[inline] + fn len(&self) -> usize { self.2 - self.1 } +} + +impl<'a, T: AsRef<[u8]>> Base16Iter<'a, T> { + #[inline] + pub fn skip(self, n: usize) -> Self { Base16Iter(self.0, self.1 + n, self.2) } + + #[inline] + pub fn take(self, n: usize) -> Self { + // TODO: Use (self.1 + n).min(self.2) once ord_max_min is available (Rust 1.22) + let mut end = self.1 + n; + if self.2 < end { + end = self.2 + } + Base16Iter(self.0, self.1, end) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use test::Bencher; + + quickcheck! { + fn check_skip_rev(src: Vec) -> bool { + let iter = src.to_base16iter(); + let full: Vec = iter.clone().collect(); + let rev: Vec = iter.clone().rev().collect(); + (0..full.len()).all(|i| { + let v: Vec = iter.clone().skip(i).collect(); + let r: Vec = iter.clone().skip(i).rev().rev().rev().collect(); + v.capacity() == v.len() && v[..] == full[i..] && + r.capacity() == r.len() && r[..] == rev[..(rev.len() - i)] + }) + } + } + + // The below patterns (skip, zip, rev; skip, take, rev) are used in radix.rs. + // Make sure they work at iterator level without needing an extra container. + + #[test] + fn test_zip_skip_rev() { + let x = [0x12, 0x34, 0x56u8, 0x21u8]; + let y = [0x78, 0x90, 0xab, 0xcdu8]; + let i = x.to_base16iter() + .skip(2) + .zip(y.to_base16iter().skip(3)) + .rev(); // .rev() works directly + let v: Vec<(u8, u8)> = i.collect(); + assert_eq!(v.capacity(), v.len()); + assert_eq!(v, vec![(2, 0xd), (6, 0xc), (5, 0xb), (4, 0xa), (3, 0)]); + } + + #[test] + fn test_skip_take_rev() { + let x = [0x12, 0x34, 0x56u8]; + let i = x.to_base16iter().skip(3).take(3).rev(); // .rev() works directly + let v: Vec = i.collect(); + assert_eq!(v.capacity(), v.len()); + assert_eq!(v, vec![6, 5, 4]); + } + + #[bench] + fn bench_key_base64(b: &mut Bencher) { + let x = [4u8; 20]; + b.iter(|| { + let y: u8 = x.to_base16iter().sum(); + assert_eq!(y, 4 * 20 as u8); + }) + } +} diff --git a/rust/radixbuf/src/lib.rs b/rust/radixbuf/src/lib.rs --- a/rust/radixbuf/src/lib.rs +++ b/rust/radixbuf/src/lib.rs @@ -19,3 +19,4 @@ extern crate vlqencoding; pub mod errors; +pub mod base16;