diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -18,3 +18,4 @@ subinclude:cfastmanifest/.hgignore subinclude:linelog/.hgignore +subinclude:rust/.hgignore diff --git a/rust/.hgignore b/rust/.hgignore new file mode 100644 --- /dev/null +++ b/rust/.hgignore @@ -0,0 +1,2 @@ +target/ +vlqencoding/Cargo.lock diff --git a/rust/vlqencoding/Cargo.toml b/rust/vlqencoding/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/vlqencoding/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "vlqencoding" +version = "0.1.0" + +[dependencies] + +[dev-dependencies] +quickcheck = "0.4" diff --git a/rust/vlqencoding/src/lib.rs b/rust/vlqencoding/src/lib.rs new file mode 100644 --- /dev/null +++ b/rust/vlqencoding/src/lib.rs @@ -0,0 +1,237 @@ +//! VLQ (Variable-length quantity) encoding. + +#[cfg(test)] +#[macro_use] +extern crate quickcheck; + +use std::mem::size_of; +use std::io::{self, Read, Write}; + +pub trait VLQEncode { + /// Encode an integer to a VLQ byte array and write it directly to a stream. + /// + /// # Examples + /// + /// ``` + /// use vlqencoding::VLQEncode; + /// let mut v = vec![]; + /// + /// let x = 120u8; + /// v.write_vlq(x).expect("writing an encoded u8 to a vec should work"); + /// assert_eq!(v, vec![120]); + /// + /// let x = 22742734291u64; + /// v.write_vlq(x).expect("writing an encoded u64 to a vec should work"); + /// + /// assert_eq!(v, vec![120, 211, 171, 202, 220, 84]); + /// ``` + /// + /// Signed integers are encoded via zag-zip: + /// + /// ``` + /// use vlqencoding::VLQEncode; + /// let mut v = vec![]; + /// + /// let x = -3i8; + /// v.write_vlq(x).expect("writing an encoded i8 to a vec should work"); + /// assert_eq!(v, vec![5]); + /// + /// let x = 1000i16; + /// v.write_vlq(x).expect("writing an encoded i16 to a vec should work"); + /// assert_eq!(v, vec![5, 208, 15]); + /// ``` + fn write_vlq(&mut self, value: T) -> io::Result<()>; +} + +pub trait VLQDecode { + /// Read a VLQ byte array from stream and decode it to an integer. + /// + /// # Examples + /// + /// ``` + /// use vlqencoding::VLQDecode; + /// use std::io::{Cursor,Seek,SeekFrom,ErrorKind}; + /// + /// let mut c = Cursor::new(vec![120u8, 211, 171, 202, 220, 84]); + /// + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap(), 120u8); + /// + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap_err().kind(), ErrorKind::InvalidData); + /// + /// c.seek(SeekFrom::Start(1)).expect("seek should work"); + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap(), 22742734291u64); + /// ``` + /// + /// Signed integers are decoded via zig-zap: + /// + /// ``` + /// use vlqencoding::VLQDecode; + /// use std::io::{Cursor,Seek,SeekFrom,ErrorKind}; + /// + /// let mut c = Cursor::new(vec![5u8, 208, 15]); + /// + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap(), -3i8); + /// + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap_err().kind(), ErrorKind::InvalidData); + /// + /// c.seek(SeekFrom::Start(1)).expect("seek should work"); + /// let x: Result = c.read_vlq(); + /// assert_eq!(x.unwrap(), 1000i32); + /// ``` + fn read_vlq(&mut self) -> io::Result; +} + +macro_rules! impl_unsigned_primitive { + ($T: ty) => ( + impl VLQEncode<$T> for W { + fn write_vlq(&mut self, value: $T) -> io::Result<()> { + let mut buf = [0u8]; + let mut value = value; + loop { + let mut byte = (value & 127) as u8; + let next = value >> 7; + if next != 0 { + byte |= 128; + } + buf[0] = byte; + self.write_all(&buf)?; + value = next; + if value == 0 { + break; + } + } + Ok(()) + } + } + + impl VLQDecode<$T> for R { + fn read_vlq(&mut self) -> io::Result<$T> { + let mut buf = [0u8]; + let mut value = 0 as $T; + let mut base = 1 as $T; + let base_multiplier = (1 << 7) as $T; + loop { + self.read_exact(&mut buf)?; + let byte = buf[0]; + value = ((byte & 127) as $T).checked_mul(base) + .and_then(|v| v.checked_add(value)) + .ok_or(io::ErrorKind::InvalidData)?; + if byte & 128 == 0 { + break; + } + base = base.checked_mul(base_multiplier).ok_or(io::ErrorKind::InvalidData)?; + } + Ok(value) + } + } + ) +} + +impl_unsigned_primitive!(u64); +impl_unsigned_primitive!(u32); +impl_unsigned_primitive!(u16); +impl_unsigned_primitive!(u8); + +macro_rules! impl_signed_primitive { + ($T: ty, $U: ty) => ( + impl VLQEncode<$T> for W { + fn write_vlq(&mut self, v: $T) -> io::Result<()> { + self.write_vlq(((v << 1) ^ (v >> (size_of::<$U>() * 8 - 1))) as $U) + } + } + + impl VLQDecode<$T> for R { + fn read_vlq(&mut self) -> io::Result<$T> { + (self.read_vlq() as Result<$U, _>).map (|n| { + if n & 1 == 0 { + (n >> 1) as $T + } else { + - (((n >> 1) + 1) as $T) + } + }) + } + } + ) +} + +impl_signed_primitive!(i64, u64); +impl_signed_primitive!(i32, u32); +impl_signed_primitive!(i16, u16); +impl_signed_primitive!(i8, u8); + +#[cfg(test)] +mod tests { + use std::io::{self, Cursor, Seek, SeekFrom}; + use {VLQDecode, VLQEncode}; + + macro_rules! impl_check_round_trip { + ($N: ident, $T: ty) => ( + fn $N(x: $T) -> bool { + let mut v = vec![]; + v.write_vlq(x).expect("write to vec should succeed"); + + let mut c = Cursor::new(v); + let y: $T = c.read_vlq().unwrap(); + x == y + } + ) + } + + impl_check_round_trip!(check_round_trip_u64, u64); + impl_check_round_trip!(check_round_trip_i64, i64); + + #[test] + fn test_round_trip_manual() { + for i in vec![ + 0, + 1, + 17, + 63, + 127, + 128, + 129, + 255, + 256, + 65535, + 65536, + 65537, + 12345678901234, + 12345678901234567890, + 18446744073709551615u64, + ] { + assert!(check_round_trip_u64(i)); + assert!(check_round_trip_i64(i as i64)); + assert!(check_round_trip_i64(-(i as i64))); + } + } + + #[test] + fn test_read_errors() { + let mut c = Cursor::new(vec![]); + assert_eq!((c.read_vlq() as io::Result).unwrap_err().kind(), + io::ErrorKind::UnexpectedEof); + + let mut c = Cursor::new(vec![255, 129]); + assert_eq!((c.read_vlq() as io::Result).unwrap_err().kind(), + io::ErrorKind::UnexpectedEof); + + c.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!((c.read_vlq() as io::Result).unwrap_err().kind(), + io::ErrorKind::InvalidData); + } + + quickcheck! { + fn test_round_trip_u64_quickcheck(x: u64) -> bool { + check_round_trip_u64(x) + } + + fn test_round_trip_i64_quickcheck(x: i64) -> bool { + check_round_trip_i64(x) + } + } +} \ No newline at end of file