diff --git a/rust/indexes/src/childmap.rs b/rust/indexes/src/childmap.rs new file mode 100644 --- /dev/null +++ b/rust/indexes/src/childmap.rs @@ -0,0 +1,467 @@ +// Copyright 2017 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use errors::{Result, ErrorKind}; +use std::mem::transmute; +use std::slice; +use std::u32; +use radixbuf::key::{FixedKey, KeyId}; +use radixbuf::radix::{radix_lookup, radix_insert_with_key, RADIX_NCHILDREN}; +use radixbuf::errors as rerrors; +use changelog_utils::{changelog_end_rev, rev_to_node, CHANGELOG_ENTRY_SIZE, + CHANGELOG_ENTRY_P1_OFFSET, CHANGELOG_ENTRY_NODE_OFFSET}; + +/// An index with rev -> a list of rev mapping intended for storing children. +/// +/// Revision numbers are used to make the index compact. Insertion does not +/// have to be topo order. +/// +/// Assume revision r+1 is likely a child of revision r so that does not +/// have to be stored in the index but will be checked using changelog.i +/// during lookups. +/// +/// Similar to nodemap, it has an on-disk immutable index and an im-memory +/// side index. The "key buffer" (see radixbuf::radix for what a key buffer +/// is) is not changelog.i but the following format: +/// +/// key-buffer := key-buffer-len (u32) | key-buffer + key-entry +/// key-entry := rev (u32) + key-entry-offset (u32) +/// +/// Child revs are stored in a linked list. key-entry-offset might be +/// rewritten in-place so the "key buffer" is not append-only. +/// +/// Similar to nodemap, the main radix buffer has a tiny header that is +/// used to verify if the index is broken or not. That is: +/// +/// raidx-buffer := radix-buffer-size (u32) + next-rev (u32) +/// + last-node ([u8; 20]) +/// +/// When serializing to disk, the two buffers need to be merged into one: +/// +/// childmap := radix-buffer + key-buffer +pub struct ChildRevMap { + changelogi: C, + end_rev: u32, + main_index: I, // Immutable main index + side_index: (Vec, Vec), // Mutable side index (key, radix) +} + +// Offsets in the main radix and key buffers +const RADIX_LEN_OFFSET: usize = 0; +const RADIX_NEXT_REV_OFFSET: usize = 1; +const RADIX_LAST_NODE_OFFSET: usize = 2; +const RADIX_LAST_NODE_LEN: usize = 5; +const RADIX_HEADER_LEN: usize = RADIX_LAST_NODE_OFFSET + RADIX_LAST_NODE_LEN; +const KEY_LEN_OFFSET: usize = 0; + +// Offsets of root nodes in radix buffers +const MAIN_RADIX_OFFSET: u32 = RADIX_HEADER_LEN as u32; +const SIDE_RADIX_OFFSET: u32 = 0; + +impl ChildRevMap +where + C: AsRef<[u8]>, + I: AsRef<[u32]>, +{ + /// Initialize ChildRevMap from a non-inlined version of changelog.i and an incomplete index. + pub fn new(changelogi: C, main_index: I) -> Result { + // The index must contain at least 24 elements: + // - index[0]: radix buffer size + // - index[1]: next rev + // - index[2..7]: last node + // - index[7..7+16]: first radix node entry + // - index[23]: padding in key-buffer + if main_index.as_ref().len() < RADIX_HEADER_LEN + RADIX_NCHILDREN { + bail!(ErrorKind::IndexCorrupted); + } + + // Check if length matches + let radix_buf_len = u32::from_be(main_index.as_ref()[RADIX_LEN_OFFSET]) as usize; + if radix_buf_len >= main_index.as_ref().len() { + bail!(ErrorKind::IndexCorrupted); + } + let key_buf_len = u32::from_be(main_index.as_ref()[radix_buf_len as usize]) as usize; + if key_buf_len == 0 || radix_buf_len + key_buf_len != main_index.as_ref().len() { + bail!(ErrorKind::IndexCorrupted); + } + + // Check if next_rev and last_node in index match changelog.i + let next_rev = u32::from_be(main_index.as_ref()[RADIX_NEXT_REV_OFFSET]); + let end_rev = changelog_end_rev(&changelogi); + + if next_rev > end_rev { + bail!(ErrorKind::IndexCorrupted); + } else if next_rev > 0 { + let node_index: [u8; 20] = unsafe { + let mut buf = [0u32; RADIX_LAST_NODE_LEN]; + buf.copy_from_slice( + &main_index.as_ref()[RADIX_LAST_NODE_OFFSET.. + RADIX_LAST_NODE_OFFSET + RADIX_LAST_NODE_LEN], + ); + transmute(buf) + }; + let node_changelog = rev_to_node(&changelogi, (next_rev - 1).into())?; + if &node_index[..] != node_changelog { + bail!(ErrorKind::IndexCorrupted); + } + } + + // Build side_index for the revisions not in the main index + let mut side_index = (vec![0u32; 16], vec![0u32; 1]); + build( + changelogi.as_ref(), + &mut side_index.0, + &mut side_index.1, + SIDE_RADIX_OFFSET, + next_rev, + end_rev, + )?; + + Ok(ChildRevMap { + changelogi, + end_rev, + main_index, + side_index, + }) + } + + /// Return an empty index that can be used as "main_index" when passed to `new`. + pub fn empty_index_buffer() -> Vec { + let mut result = vec![0u32; RADIX_HEADER_LEN + RADIX_NCHILDREN]; + result[RADIX_LEN_OFFSET] = (result.len() as u32).to_be(); + let mut key_buf = vec![0u32]; + key_buf[KEY_LEN_OFFSET] = (key_buf.len() as u32).to_be(); + result.append(&mut key_buf); + return result; + } + + /// Return parent revisions. + /// This is made compatible with hg for convenience. If the source of truth + /// supports more than 2 parents. The signature of this function should + /// change. + pub fn parentrevs(&self, rev: u32) -> Option<(i32, i32)> { + if rev >= self.end_rev { + None + } else { + Some(parents(self.changelogi.as_ref(), rev)) + } + } + + /// Get a list of child revisions for a given revision. + pub fn lookup(&self, rev: i32) -> Result> { + let mut childrevs = Vec::with_capacity(1); + // Check main index + lookup( + &self.main_index, + &self.main_key_buf(), + MAIN_RADIX_OFFSET, + rev, + &mut childrevs, + )?; + + // Check side index + lookup( + &self.side_index.0, + &self.side_index.1, + SIDE_RADIX_OFFSET, + rev, + &mut childrevs, + )?; + + // Check changelog.i, rev + 1 + match self.parentrevs((rev + 1) as u32) { + None => (), + Some((p1, p2)) => { + if (p1 == rev) || (p2 >= 0 && p2 == rev) { + childrevs.push(rev + 1) + } + } + } + + childrevs.sort(); + Ok(childrevs) + } + + /// How many revisions the side index has. + pub fn lag(&self) -> u32 { + let next_rev = u32::from_be(self.main_index.as_ref()[RADIX_NEXT_REV_OFFSET]); + self.end_rev - next_rev + } + + /// Incrementally build the main index based on the existing one. + pub fn build_incrementally(&self) -> Result> { + // Copy the main index since we need to modify it. + let radix_len = self.main_radix_len(); + let mut radix_buf = self.main_index.as_ref()[0..radix_len].to_vec(); + let mut key_buf = self.main_key_buf().to_vec(); + let next_rev = u32::from_be(radix_buf[RADIX_NEXT_REV_OFFSET]); + build( + self.changelogi.as_ref(), + &mut radix_buf, + &mut key_buf, + MAIN_RADIX_OFFSET, + next_rev, + self.end_rev, + )?; + + assert!(radix_buf.len() <= u32::MAX as usize || key_buf.len() <= u32::MAX as usize); + + // Rewrite headers + radix_buf[RADIX_LEN_OFFSET] = (radix_buf.len() as u32).to_be(); + radix_buf[RADIX_NEXT_REV_OFFSET] = self.end_rev.to_be(); + if self.end_rev > 0 { + let offset = (self.end_rev - 1) * CHANGELOG_ENTRY_SIZE as u32 + + CHANGELOG_ENTRY_NODE_OFFSET as u32; + let node = FixedKey::read(&self.changelogi, offset.into())?; + let mut buf = [0u8; 20]; + buf.copy_from_slice(&node); + let buf: [u32; RADIX_LAST_NODE_LEN] = unsafe { transmute(buf) }; + radix_buf[RADIX_LAST_NODE_OFFSET..RADIX_LAST_NODE_OFFSET + RADIX_LAST_NODE_LEN] + .copy_from_slice(&buf); + } + key_buf[KEY_LEN_OFFSET] = (key_buf.len() as u32).to_be(); + + radix_buf.append(&mut key_buf); + Ok(radix_buf) + } + + #[inline] + fn main_radix_len(&self) -> usize { + u32::from_be(self.main_index.as_ref()[RADIX_LEN_OFFSET]) as usize + } + + #[inline] + fn main_key_buf(&self) -> &[u32] { &self.main_index.as_ref()[self.main_radix_len()..] } +} + +#[inline] +fn parents(changelog: &[u8], rev: u32) -> (i32, i32) { + let revs: [i32; 2] = unsafe { + let mut buf = [0u8; 8]; + let offset = rev as usize * CHANGELOG_ENTRY_SIZE as usize + CHANGELOG_ENTRY_P1_OFFSET; + buf.copy_from_slice(&changelog[offset..offset + 8]); + transmute(buf) + }; + (revs[0].to_be(), revs[1].to_be()) +} + +fn insert(radix_buf: &mut Vec, key_buf: &mut Vec, offset: u32, parent: i32, child: i32) + -> Result<()> { + assert_ne!(parent, child); + // Find the existing slot of child. Creat an entry on demand. + let key = rev_to_key(parent); + let existing = radix_lookup(radix_buf, offset, &key, &key_reader, &key_buf)?; + let offset: u32 = match existing { + Some(v) => v.into(), + None => { + let key_id = append_linked_node(key_buf, parent, 0); + radix_insert_with_key(radix_buf, offset, key_id.into(), &key, &key_reader, key_buf)?; + key_id + } + }; + + // Insert the parent to the linked list. + // + // Before: After: + // + // key_id (offset) key_id (offset) pos + // v v v + // +---------------+ +---------------+ +--------------+ + // | parent | next | | parent | new | | child | next | + // +---------------+ +-----------v---+ +---^----------+ + // | | + // +----------+ + if offset as usize + 1 >= key_buf.len() { + bail!(ErrorKind::IndexCorrupted) + } + let next = u32::from_be(key_buf[offset as usize + 1]); + let pos = append_linked_node(key_buf, child, next); + key_buf[offset as usize + 1] = pos.to_be(); + + Ok(()) +} + +/// Read parent revs from radix index -> linked list. Append them to result. +fn lookup(radix_buf: &R, key_buf: &K, offset: u32, parent: i32, result: &mut Vec) + -> Result<()> +where + R: AsRef<[u32]>, + K: AsRef<[u32]>, +{ + let key = rev_to_key(parent); + let radix_buf = radix_buf.as_ref(); + let key_buf = key_buf.as_ref(); + match radix_lookup(&radix_buf, offset, &key, &key_reader, &key_buf)? { + Some(id) => { + // Read the linked list from key_buf + let mut offset: usize = id.into(); + while offset != 0 { + if offset + 1 > key_buf.as_ref().len() { + bail!(ErrorKind::IndexCorrupted); + } + let new_parent = i32::from_be(key_buf[offset] as i32); + let next = u32::from_be(key_buf[offset + 1]); + if new_parent != parent { + result.push(new_parent); + } + offset = next as usize; + } + Ok(()) + } + None => Ok(()), + } +} + +/// Read parent -> child relationships for parent in xrange(start_rev, end_rev) from changelog, +/// insert them to the radix index. +fn build( + changelog: &[u8], radix_buf: &mut Vec, key_buf: &mut Vec, offset: u32, start_rev: u32, + end_rev: u32 +) -> Result<()> { + for i in start_rev..end_rev { + let (p1, p2) = parents(changelog, i); + // Skip "parent + 1 == child" case to save space. + if p1 + 1 != i as i32 { + insert(radix_buf, key_buf, offset, p1, i as i32)?; + } + if p2 >= 0 && p2 + 1 != i as i32 { + insert(radix_buf, key_buf, offset, p2, i as i32)?; + } + } + Ok(()) +} + +/// Append (rev: u32, next_offset: u32) to the key buffer. Return the offset of rev. +fn append_linked_node(key_buf: &mut Vec, rev: i32, next: u32) -> u32 { + let pos = key_buf.len() as u64; + assert!(pos < 0xffff_ffff); + key_buf.push(rev.to_be() as u32); + key_buf.push(next.to_be()); + pos as u32 +} + +/// Read fixed 4-byte sequence from key buffer +fn key_reader>(key_buf: &K, key_id: KeyId) -> rerrors::Result<&[u8]> { + let key_buf = key_buf.as_ref(); + let start_pos: usize = key_id.into(); + let end_pos = start_pos + 1; + if key_buf.len() < end_pos { + bail!(rerrors::ErrorKind::InvalidKeyId(key_id)); + } + let result = &key_buf[start_pos..end_pos]; + // Cast [u32] to [u8] + Ok(&unsafe { + slice::from_raw_parts(result.as_ptr() as *const u8, 4) + }) +} + +/// Convert revision number to a key ([u8]) +fn rev_to_key(rev: i32) -> [u8; 4] { unsafe { transmute(rev.to_be()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_manual_case() { + let mut changelogi: Vec = vec![]; + let empty_index = ChildRevMap::, Vec>::empty_index_buffer(); + { + // Take a snapshot of empty changelog.i and do some tests. + let changelogi = changelogi.clone(); + let map = ChildRevMap::new(&changelogi, empty_index.clone()).unwrap(); + assert_eq!(map.lag(), 0); + assert_eq!(map.build_incrementally().unwrap(), empty_index.clone()); + } + + // Insert 2 revisions: + // 1 2 + // |/ + // 0 + let mut index_partial: Vec = vec![]; + append_changelog(&mut changelogi, 0, -1, -1); + append_changelog(&mut changelogi, 1, 0, -1); + append_changelog(&mut changelogi, 2, 0, -1); + { + let changelogi = changelogi.clone(); + let map1 = ChildRevMap::new(&changelogi, empty_index.clone()).unwrap(); + let index = map1.build_incrementally().unwrap(); + index_partial.extend(index.clone()); // used by the next test + let map2 = ChildRevMap::new(&changelogi, index).unwrap(); + + assert_eq!(map1.lag(), 3); + assert_eq!(map2.lag(), 0); + for map in [&map1, &map2].iter() { + assert_eq!(map.lookup(-1).unwrap(), vec![0]); + assert_eq!(map.lookup(0).unwrap(), vec![1, 2]); + assert_eq!(map.lookup(1).unwrap(), vec![]); + assert_eq!(map.lookup(2).unwrap(), vec![]); + assert_eq!(map.lookup(3).unwrap(), vec![]); + } + } + + // Insert more revisions: + // 6 7 5 + // |\|\ | + // 1 2 3 4 + // \|/ / + // |/ + // 0 8 + append_changelog(&mut changelogi, 3, 0, -1); + append_changelog(&mut changelogi, 4, 0, -1); + append_changelog(&mut changelogi, 5, 4, -1); + append_changelog(&mut changelogi, 6, 1, 2); + append_changelog(&mut changelogi, 7, 2, 3); + append_changelog(&mut changelogi, 8, -1, -1); + { + let changelogi = changelogi.clone(); + + // build from scratch + let map_from_scratch = ChildRevMap::new(&changelogi, empty_index.clone()).unwrap(); + + // build incrementally from a childmap with 3 revisions + let map_from_partial = ChildRevMap::new(&changelogi, index_partial).unwrap(); + + let index_complete = map_from_scratch.build_incrementally().unwrap(); + assert_eq!( + index_complete, + map_from_partial.build_incrementally().unwrap() + ); + + // build from an up-to-date childmap + let map_from_complete = ChildRevMap::new(&changelogi, index_complete).unwrap(); + + assert_eq!(map_from_scratch.lag(), 9); + assert_eq!(map_from_partial.lag(), 6); + assert_eq!(map_from_complete.lag(), 0); + for map in [&map_from_scratch, &map_from_partial, &map_from_complete].iter() { + assert_eq!(map.lookup(-1).unwrap(), vec![0, 8]); + assert_eq!(map.lookup(0).unwrap(), vec![1, 2, 3, 4]); + assert_eq!(map.lookup(1).unwrap(), vec![6]); + assert_eq!(map.lookup(2).unwrap(), vec![6, 7]); + assert_eq!(map.lookup(3).unwrap(), vec![7]); + assert_eq!(map.lookup(4).unwrap(), vec![5]); + assert_eq!(map.lookup(5).unwrap(), vec![]); + assert_eq!(map.lookup(6).unwrap(), vec![]); + assert_eq!(map.lookup(7).unwrap(), vec![]); + assert_eq!(map.lookup(8).unwrap(), vec![]); + assert_eq!(map.lookup(9).unwrap(), vec![]); + } + } + } + + fn append_changelog(changelogi: &mut Vec, rev: u32, p1: i32, p2: i32) { + assert_eq!( + rev as usize, + changelogi.len() / CHANGELOG_ENTRY_SIZE as usize + ); + let mut entry = [0u8; CHANGELOG_ENTRY_SIZE as usize]; + let p1buf: [u8; 4] = unsafe { transmute(p1.to_be()) }; + let p2buf: [u8; 4] = unsafe { transmute(p2.to_be()) }; + entry[24..28].copy_from_slice(&p1buf); + entry[28..32].copy_from_slice(&p2buf); + changelogi.extend_from_slice(&entry); + } +} diff --git a/rust/indexes/src/lib.rs b/rust/indexes/src/lib.rs --- a/rust/indexes/src/lib.rs +++ b/rust/indexes/src/lib.rs @@ -12,6 +12,7 @@ extern crate radixbuf; pub mod errors; +pub mod childmap; pub mod nodemap; mod changelog_utils; mod pybuf; diff --git a/rust/indexes/src/pyext.rs b/rust/indexes/src/pyext.rs --- a/rust/indexes/src/pyext.rs +++ b/rust/indexes/src/pyext.rs @@ -3,13 +3,15 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use cpython::{PyResult, PyBytes, PyObject}; +use childmap::ChildRevMap; +use cpython::{PyResult, PyBytes, PyObject, PyInt}; use nodemap::NodeRevMap; use pybuf::SimplePyBuf; use std::slice; py_module_initializer!(indexes, initindexes, PyInit_indexes, |py, m| { try!(m.add_class::(py)); + try!(m.add_class::(py)); Ok(()) }); @@ -52,3 +54,40 @@ Ok(PyBytes::new(py, slice)) } }); + +py_class!(class childmap |py| { + data childmap: ChildRevMap, SimplePyBuf>; + + def __new__(_cls, changelog: &PyObject, index: &PyObject) -> PyResult {; + let changelog_buf = SimplePyBuf::new(py, changelog); + let index_buf = SimplePyBuf::new(py, index); + let map = ChildRevMap::new(changelog_buf, index_buf)?; + childmap::create_instance(py, map) + } + + def __getitem__(&self, rev: i32) -> PyResult> { + Ok(self.childmap(py).lookup(rev)?) + } + + def parentrevs(&self, rev: PyInt) -> PyResult> { + let revs = self.childmap(py).parentrevs(rev.value(py) as u32); + Ok(revs) + } + + def build(&self) -> PyResult { + let buf = self.childmap(py).build_incrementally()?; + let slice = unsafe { slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 4) }; + Ok(PyBytes::new(py, slice)) + } + + def lag(&self) -> PyResult { + Ok(self.childmap(py).lag()) + } + + @staticmethod + def emptyindexbuffer() -> PyResult { + let buf = ChildRevMap::, Vec>::empty_index_buffer(); + let slice = unsafe { slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 4) }; + Ok(PyBytes::new(py, slice)) + } +});