diff --git a/rust/indexes/src/childmap.rs b/rust/indexes/src/childmap.rs new file mode 100644 --- /dev/null +++ b/rust/indexes/src/childmap.rs @@ -0,0 +1,235 @@ +// Copyright 2017 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use errors::{Result, ErrorKind}; +use std::mem::transmute; +use radixbuf::key::{KeyId, FixedKey}; +use radixbuf::radix::{KeyReader, radix_lookup, radix_insert_with_key}; +use radixbuf::errors as rerrors; +use utils::{changelog_next_rev, rev_to_node}; + +/// An index with rev -> a list of rev mapping intended for storing children. +/// Use revision numbers to make the index compact. But insertion order does not have to be +/// topo-sorted. +pub struct ChildRevMap { + changelog: C, + end_rev: u32, + main_index: I, // Immutable main index + side_index: Vec, // Mutable side index +} + +impl ChildRevMap +where + C: AsRef<[u8]>, + I: AsRef<[u32]>, +{ + /// Initialize ChildRevMap from a non-inlined version of changelog.i and an incomplete index. + pub fn new(changelog: C, main_index: I) -> Result { + // Sanity check. Like NodeRevMap. + + // The index must contain at least 22 elements. index[1..6] tracks the last node. index[0] + // tracks the last rev. index[6..6+16] tracks the radix node. + if main_index.as_ref().len() < 22 { + bail!(ErrorKind::IndexCorrupted); + } + + // Check if the index is behind and build incrementally + let next_rev = main_index.as_ref()[0].to_le(); + let end_rev = changelog_next_rev(&changelog); + + if next_rev > end_rev { + bail!(ErrorKind::IndexCorrupted); + } else if next_rev > 0 { + let node_index: [u8; 20] = unsafe { + let mut buf = [0u32; 5]; + buf.copy_from_slice(&main_index.as_ref()[1..6]); + transmute(buf) + }; + let node_changelog = rev_to_node(changelog.as_ref(), (next_rev - 1).into())?; + if &node_index[..] != node_changelog { + bail!(ErrorKind::IndexCorrupted); + } + } + + // Build side_index for the revisions not in the main index + let mut side_index = vec![0u32; 16]; + build(changelog.as_ref(), &mut side_index, 0, next_rev, end_rev)?; + + Ok(ChildRevMap { + changelog, + end_rev, + main_index, + side_index, + }) + } + + // This is made compatible with hg for convenience. If the source of truth + // supports more than 2 parents. The signature of this function should + // change. + pub fn parentrevs(&self, rev: u32) -> Option<(i32, i32)> { + if rev >= self.end_rev { + None + } else { + Some(parents(self.changelog.as_ref(), rev)) + } + } + + pub fn lookup(&self, rev: u32) -> Result> { + let mut childrevs = Vec::with_capacity(1); + + // Combine both indexes + lookup(&self.main_index.as_ref(), 6, rev, &mut childrevs)?; + lookup(&self.side_index, 0, rev, &mut childrevs)?; + + // Common case + if rev + 1 < self.end_rev { + let (p1, p2) = self.parentrevs(rev + 1).unwrap(); + if (p1 >= 0 && p1 as u32 == rev) || (p2 >= 0 && p2 as u32 == rev) { + childrevs.push(rev + 1) + } + } + + Ok(childrevs) + } + + /// How many revisions the side index has. + pub fn lag(&self) -> u32 { + let next_rev = self.main_index.as_ref()[0].to_le(); + self.end_rev - next_rev + } + + /// Incrementally build the main index based on the existing one. + pub fn build_incrementally(&self) -> Result> { + // Copy the main index since we need to modify it. + let mut index = self.main_index.as_ref().to_vec(); + let next_rev = index[0].to_le(); + let cl = self.changelog.as_ref(); + build(cl, &mut index, 6, next_rev, self.end_rev)?; + // Rewrite header + index[0] = self.end_rev.to_le(); + if self.end_rev > 0 { + let offset = self.end_rev * 64 - 32; + let node = FixedKey::read(self.changelog.as_ref(), offset.into())?; + let mut buf = [0u8; 20]; + buf.copy_from_slice(&node); + let buf: [u32; 5] = unsafe {transmute(buf)}; + index[1..6].copy_from_slice(&buf); + } + Ok(index) + } +} + +#[inline] +fn parents(changelog: &[u8], rev: u32) -> (i32, i32) { + let revs: [i32; 2] = unsafe { + let mut buf = [0u8; 8]; + let offset = rev as usize * 64 + 24; + buf.copy_from_slice(&changelog[offset..offset + 8]); + transmute(buf) + }; + (revs[0].to_be(), revs[1].to_be()) +} + +#[inline] +fn insert(index: &mut Vec, offset: u32, parent: u32, child: u32) -> Result<()> { + assert_ne!(parent, child); + // Find the existing slot of child. + let key_reader = KeyReader::RadixBufferAllocated(read_key); + let key = rev_to_buf(parent); + let existing = radix_lookup(index, offset, &key, &key_reader)?; + let offset: u32 = match existing { + Some(v) => v.into(), + None => { + let key_id = append_node(index, parent, 0); + radix_insert_with_key(index, offset, key_id.into(), &key, &key_reader)?; + key_id + } + }.to_le(); + + // Insert the parent to the linked list. + // + // Before: After: + // + // key_id (offset) key_id (unchanged) + // v v + // +---------------+ +---------------+ +--------------+ + // | parent | next | | parent | new | | child | next | + // +---------------+ +---------------+ +---^----------+ + // | | + // +-----------------+ + // + if offset as usize + 1 >= index.len() { + bail!(ErrorKind::IndexCorrupted) + } + let next = index[offset as usize + 1].to_le(); + let pos = append_node(index, child, next); + index[offset as usize + 1] = pos.to_le(); + Ok(()) +} + +#[inline] +fn lookup(index: &[u32], offset: u32, parent: u32, result: &mut Vec) -> Result<()> { + let key_reader = KeyReader::RadixBufferAllocated(read_key); + + let key = rev_to_buf(parent); + match radix_lookup(&index, offset, &key, &key_reader)? { + Some(id) => { + let mut offset: usize = id.into(); + while offset != 0 { + if offset + 1 > index.len() { + bail!(ErrorKind::IndexCorrupted); + } + let new_parent = index[offset].to_le(); + let next = index[offset + 1].to_le(); + if new_parent != parent { + result.push(new_parent); + } + offset = next as usize; + } + Ok(()) + } + None => Ok(()), + } +} + +fn build(changelog: &[u8], index: &mut Vec, offset: u32, start_rev: u32, end_rev: u32) + -> Result<()> { + for i in start_rev..end_rev { + let (p1, p2) = parents(changelog, i); + // Skip "parent + 1 == child" case to save space. + if p1 >= 0 && p1 + 1 != i as i32 { + insert(index, offset, p1 as u32, i)?; + } + if p2 >= 0 && p2 + 1 != i as i32 { + insert(index, offset, p2 as u32, i)?; + } + } + Ok(()) +} + +/// Append (rev: u32, next_offset: u32) to the buffer. Return the offset of rev. +fn append_node(buf: &mut Vec, rev: u32, next: u32) -> u32 { + let pos = buf.len() as u64; + assert!(pos < 0xffff_ffff); + buf.push(rev.to_le()); + buf.push(next.to_le()); + pos as u32 +} + +fn read_key(radix_buf: &[u32], key_id: KeyId) -> rerrors::Result> { + // `key_id` is a pointer + let offset: usize = key_id.into(); + if offset >= radix_buf.len() { + bail!(rerrors::ErrorKind::InvalidKeyId(key_id)) + } + let rev = radix_buf[offset].to_le(); + Ok(rev_to_buf(rev)) +} + +#[inline] +fn rev_to_buf(rev: u32) -> Box<[u8]> { + let buf: [u8; 4] = unsafe { transmute(rev.to_le()) }; + buf.to_vec().into_boxed_slice() +} diff --git a/rust/indexes/src/lib.rs b/rust/indexes/src/lib.rs --- a/rust/indexes/src/lib.rs +++ b/rust/indexes/src/lib.rs @@ -12,6 +12,7 @@ extern crate radixbuf; pub mod errors; +pub mod childmap; pub mod nodemap; mod utils; mod pybuf; diff --git a/rust/indexes/src/pyext.rs b/rust/indexes/src/pyext.rs --- a/rust/indexes/src/pyext.rs +++ b/rust/indexes/src/pyext.rs @@ -3,13 +3,15 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use cpython::{PyResult, PyBytes, PyObject}; +use childmap::ChildRevMap; +use cpython::{PyResult, PyBytes, PyObject, PyInt}; use nodemap::NodeRevMap; use pybuf::SimplePyBuf; use std::slice; py_module_initializer!(indexes, initindexes, PyInit_indexes, |py, m| { try!(m.add_class::(py)); + try!(m.add_class::(py)); Ok(()) }); @@ -45,3 +47,33 @@ Ok(self.nodemap(py).lag()) } }); + +py_class!(class childmap |py| { + data childmap: ChildRevMap, SimplePyBuf>; + + def __new__(_cls, changelog: &PyObject, index: &PyObject) -> PyResult {; + let changelog_buf = SimplePyBuf::new(py, changelog); + let index_buf = SimplePyBuf::new(py, index); + let map = ChildRevMap::new(changelog_buf, index_buf)?; + childmap::create_instance(py, map) + } + + def __getitem__(&self, rev: u32) -> PyResult> { + Ok(self.childmap(py).lookup(rev)?) + } + + def parentrevs(&self, rev: PyInt) -> PyResult> { + let revs = self.childmap(py).parentrevs(rev.value(py) as u32); + Ok(revs) + } + + def build(&self) -> PyResult { + let buf = self.childmap(py).build_incrementally()?; + let slice = unsafe { slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 4) }; + Ok(PyBytes::new(py, slice)) + } + + def lag(&self) -> PyResult { + Ok(self.childmap(py).lag()) + } +});