diff --git a/rust/indexes/Cargo.toml b/rust/indexes/Cargo.toml --- a/rust/indexes/Cargo.toml +++ b/rust/indexes/Cargo.toml @@ -12,3 +12,11 @@ [dependencies] radixbuf = { path = "../radixbuf" } error-chain = "0.11" + +[dependencies.cpython] +git = "https://github.com/dgrunwald/rust-cpython.git" +default-features = false +features = ["extension-module-2-7"] + +[dependencies.python27-sys] +git = "https://github.com/dgrunwald/rust-cpython.git" diff --git a/rust/indexes/src/lib.rs b/rust/indexes/src/lib.rs --- a/rust/indexes/src/lib.rs +++ b/rust/indexes/src/lib.rs @@ -3,9 +3,14 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +extern crate python27_sys; + +#[macro_use] +extern crate cpython; #[macro_use] extern crate error_chain; extern crate radixbuf; pub mod errors; pub mod nodemap; +mod pybuf; diff --git a/rust/indexes/src/pybuf.rs b/rust/indexes/src/pybuf.rs new file mode 100644 --- /dev/null +++ b/rust/indexes/src/pybuf.rs @@ -0,0 +1,100 @@ +// Copyright 2017 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! A simple `Py_buffer` wrapper that allows zero-copy reading of Python +//! owned memory. + +// The objects in memory have a relationship like: +// +// ```text +// SimplePyBuf | Raw Data Python object +// +-----------+ | +-------+ +-----------+ +// | Py_buffer | | | | <-- owns -- _ | +// | +-------+ | | | | +-----------+ +// | | *buf -- points to --> | | +// | | len | | | | | +// | +-------+ | | +-------+ +// +-----------+ | +// | +// Rust-managed | Python-managed +// ``` +// +// Notes: +// - Raw data is owned by (embedded in, or pointed by) the Python object. +// Raw data gets freed when the Python object is destructed. +// - Py_buffer is not a Python object but a Python-defined C struct allowing +// native code to access "Raw data" directly. When constructing Py_buffer +// from a Python object, the refcount of that Python object increases. +// The refcount decreases when Py_buffer gets destructed via PyBuffer_Release. +// - Py_buffer is used to expose the raw pointer and length. +// - Memory alignment is up to the actual implementation of "Python object". +// For a mmap buffer, the libc mmap function guarantees that. + +use std::marker::PhantomData; +use std::mem; +use std::slice; +use cpython::{Python, PyObject}; +use python27_sys as cpy; + +pub struct SimplePyBuf(cpy::Py_buffer, PhantomData); + +// Since the buffer is read-only and Python cannot move the raw buffer (because +// we own the Py_buffer struct). It's safe to share and use SimplePyBuf in other +// threads. +unsafe impl Send for SimplePyBuf {} +unsafe impl Sync for SimplePyBuf {} + +impl SimplePyBuf { + pub fn new(_py: Python, obj: &PyObject) -> Self { + // Note about GC on obj: + // + // Practically, obj here is some low-level, non-container ones like + // bytes or memoryview that does not support GC (i.e. do not have + // Py_TPFLAGS_HAVE_GC set). refcount is the only way to release them. + // So no need to pay extra attention on them - SimplePyBuf will get + // refcount right and that's enough. + // + // Otherwise (obj is a container type that does support GC), whoever + // owns this SimplePyBuf in the Rust world needs to do one of the + // following: + // - implement tp_traverse in its Python class + // - call PyObject_GC_UnTrack to let GC ignore obj + + // Note about buffer mutability: + // + // The code here wants to access the buffer without taking Python GIL. + // Therefore `obj` should be a read-only object. That is true for Python + // bytes or buffer(some_other_immutable_obj). For now, explicitly + // whitelist those two types. Beware that `PyBuffer_Check` won't guarnatee + // its inner object is also immutable. + unsafe { + if cpy::PyBytes_Check(obj.as_ptr()) == 0 && cpy::PyBuffer_Check(obj.as_ptr()) == 0 { + panic!("potentially unsafe type"); + } + + let mut buf = mem::zeroed::>(); + let r = cpy::PyObject_GetBuffer(obj.as_ptr(), &mut buf.0, cpy::PyBUF_SIMPLE); + if r == -1 { + panic!("failed to get Py_buffer"); + } + buf + } + } +} + +impl AsRef<[T]> for SimplePyBuf { + #[inline] + fn as_ref(&self) -> &[T] { + let len = self.0.len as usize / mem::size_of::(); + unsafe { slice::from_raw_parts(self.0.buf as *const T, len) } + } +} + +impl Drop for SimplePyBuf { + fn drop(&mut self) { + let _gil = Python::acquire_gil(); + unsafe { cpy::PyBuffer_Release(&mut self.0) } + } +}