diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs --- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs +++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs @@ -8,22 +8,25 @@ //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the //! `hg-core` package. -use std::cell::RefCell; +use std::cell::{RefCell, RefMut}; +use std::collections::hash_map::Iter; +use std::convert::TryInto; use cpython::{ - exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyObject, PyResult, - ToPyObject, + exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult, + Python, }; use dirstate::extract_dirstate; +use exceptions::AlreadyBorrowed; use hg::{ DirsIterable, DirsMultiset, DirstateMapError, DirstateParseError, EntryState, }; -use std::convert::TryInto; py_class!(pub class Dirs |py| { - data dirs_map: RefCell; + data inner: RefCell; + data leak_count: RefCell; // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes // a `list`) @@ -59,18 +62,18 @@ ) }; - Self::create_instance(py, RefCell::new(inner)) + Self::create_instance(py, RefCell::new(inner), RefCell::new(0)) } def addpath(&self, path: PyObject) -> PyResult { - self.dirs_map(py).borrow_mut().add_path( + self.borrow_mut(py)?.add_path( path.extract::(py)?.data(py), ); Ok(py.None()) } def delpath(&self, path: PyObject) -> PyResult { - self.dirs_map(py).borrow_mut().delete_path( + self.borrow_mut(py)?.delete_path( path.extract::(py)?.data(py), ) .and(Ok(py.None())) @@ -89,30 +92,43 @@ }) } - // This is really inefficient on top of being ugly, but it's an easy way - // of having it work to continue working on the rest of the module - // hopefully bypassing Python entirely pretty soon. - def __iter__(&self) -> PyResult { - let dict = PyDict::new(py); - - for (key, value) in self.dirs_map(py).borrow().iter() { - dict.set_item( - py, - PyBytes::new(py, &key[..]), - value.to_py_object(py), - )?; - } - - let locals = PyDict::new(py); - locals.set_item(py, "obj", dict)?; - - py.eval("iter(obj)", None, Some(&locals)) + def __iter__(&self) -> PyResult { + DirsMultisetKeysIterator::create_instance( + py, + RefCell::new(Some(DirsMultisetLeakedRef::new(py, &self))), + RefCell::new(self.leak_immutable(py).iter()), + ) } def __contains__(&self, item: PyObject) -> PyResult { Ok(self - .dirs_map(py) + .inner(py) .borrow() .contains_key(item.extract::(py)?.data(py).as_ref())) } }); + +py_shared_ref!(Dirs, DirsMultiset, inner, DirsMultisetLeakedRef); + +impl Dirs { + pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult { + Self::create_instance(py, RefCell::new(d), RefCell::new(0)) + } + + fn translate_key( + py: Python, + res: (&Vec, &u32), + ) -> PyResult> { + Ok(Some(PyBytes::new(py, res.0))) + } +} + +py_shared_mapping_iterator!( + DirsMultisetKeysIterator, + DirsMultisetLeakedRef, + Iter, + Vec, + u32, + Dirs::translate_key, + Option +); diff --git a/rust/hg-cpython/src/dirstate/mod.rs b/rust/hg-cpython/src/dirstate/mod.rs --- a/rust/hg-cpython/src/dirstate/mod.rs +++ b/rust/hg-cpython/src/dirstate/mod.rs @@ -29,6 +29,7 @@ mod dirs_multiset; use dirstate::dirs_multiset::Dirs; use std::convert::TryFrom; +use exceptions::AlreadyBorrowed; /// C code uses a custom `dirstate_tuple` type, checks in multiple instances /// for this type, and raises a Python `Exception` if the check does not pass. @@ -99,6 +100,7 @@ m.add(py, "__doc__", "Dirstate - Rust implementation")?; m.add_class::(py)?; + m.add(py, "AlreadyBorrowed", py.get_type::())?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs --- a/rust/hg-cpython/src/exceptions.rs +++ b/rust/hg-cpython/src/exceptions.rs @@ -65,3 +65,5 @@ } } } + +py_exception!(shared_ref, AlreadyBorrowed, RuntimeError); diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -27,6 +27,8 @@ pub mod ancestors; mod cindex; mod conversion; +#[macro_use] +pub mod macros; pub mod dagops; pub mod dirstate; pub mod parsers; diff --git a/rust/hg-cpython/src/macros.rs b/rust/hg-cpython/src/macros.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/macros.rs @@ -0,0 +1,254 @@ +// macros.rs +// +// Copyright 2019 Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Macros for use in the `hg-cpython` bridge library. + +/// Allows a `py_class!` generated struct to share references to one of its +/// data members with Python. +/// +/// # Warning +/// +/// The targeted `py_class!` needs to have the +/// `data leak_count: RefCell;` data attribute to compile. +/// A better, more complicated macro is needed to automatically insert the +/// leak count, which is not yet really battle tested (what happens when +/// multiple references are needed?). See the example below. +/// +/// TODO allow Python container types: for now, integration with the garbage +/// collector does not extend to Rust structs holding references to Python +/// objects. Should the need surface, `__traverse__` and `__clear__` will +/// need to be written as per the `rust-cpython` docs on GC integration. +/// +/// # Parameters +/// +/// * `$name` is the same identifier used in for `py_class!` macro call. +/// * `$inner_struct` is the identifier of the underlying Rust struct +/// * `$data_member` is the identifier of the data member of `$inner_struct` +/// that will be shared. +/// * `$leaked` is the identifier to give to the struct that will manage +/// references to `$name`, to be used for example in other macros like +/// `py_shared_mapping_iterator`. +/// +/// # Example +/// +/// ``` +/// struct MyStruct { +/// inner: Vec; +/// } +/// +/// py_class!(pub class MyType |py| { +/// data inner: RefCell; +/// data leak_count: RefCell; +/// }); +/// +/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef); +/// ``` +macro_rules! py_shared_ref { + ( + $name: ident, + $inner_struct: ident, + $data_member: ident, + $leaked: ident + ) => { + impl $name { + fn leak_immutable(&self, py: Python) -> &'static $inner_struct { + let ptr = self.$data_member(py).as_ptr(); + *self.leak_count(py).borrow_mut() += 1; + unsafe { &*ptr } + } + + fn borrow_mut<'a>( + &'a self, + py: Python<'a>, + ) -> PyResult> { + match *self.leak_count(py).borrow() { + 0 => Ok(self.$data_member(py).borrow_mut()), + // TODO + // For now, this works differently than Python references + // in the case of iterators. + // Python does not complain when the data an iterator + // points to is modified if the iterator is never used + // afterwards. + // Here, we are stricter than this by refusing to give a + // mutable reference if it is already borrowed. + // While the additional safety might be argued for, it + // breaks valid programming patterns in Python and we need + // to fix this issue down the line. + _ => Err(AlreadyBorrowed::new( + py, + "Cannot borrow mutably while there are \ + immutable references in Python objects", + )), + } + } + + fn decrease_leak_count(&self, py: Python) { + *self.leak_count(py).borrow_mut() -= 1; + } + } + + /// Manage immutable references to `$name` leaked into Python + /// iterators. + /// + /// In truth, this does not represent leaked references themselves; + /// it is instead useful alongside them to manage them. + pub struct $leaked { + inner: $name, + } + + impl $leaked { + fn new(py: Python, inner: &$name) -> Self { + Self { + inner: inner.clone_ref(py), + } + } + } + + impl Drop for $leaked { + fn drop(&mut self) { + let gil = Python::acquire_gil(); + let py = gil.python(); + self.inner.decrease_leak_count(py); + } + } + }; +} + +/// Defines a `py_class!` that acts as a Python iterator over a Rust iterator. +macro_rules! py_shared_iterator_impl { + ( + $name: ident, + $leaked: ident, + $iterator_type: ty, + $success_func: expr, + $success_type: ty + ) => { + py_class!(pub class $name |py| { + data inner: RefCell>; + data it: RefCell<$iterator_type>; + + def __next__(&self) -> PyResult<$success_type> { + let mut inner_opt = self.inner(py).borrow_mut(); + if inner_opt.is_some() { + match self.it(py).borrow_mut().next() { + None => { + // replace Some(inner) by None, drop $leaked + inner_opt.take(); + Ok(None) + } + Some(res) => { + $success_func(py, res) + } + } + } else { + Ok(None) + } + } + + def __iter__(&self) -> PyResult { + Ok(self.clone_ref(py)) + } + }); + + impl $name { + pub fn from_inner( + py: Python, + leaked: Option<$leaked>, + it: $iterator_type + ) -> PyResult { + Self::create_instance( + py, + RefCell::new(leaked), + RefCell::new(it) + ) + } + } + }; +} + +/// Defines a `py_class!` that acts as a Python mapping iterator over a Rust +/// iterator. +/// +/// TODO: this is a bit awkward to use, and a better (more complicated) +/// procedural macro would simplify the interface a lot. +/// +/// # Parameters +/// +/// * `$name` is the identifier to give to the resulting Rust struct. +/// * `$leaked` corresponds to `$leaked` in the matching `py_shared_ref!` call. +/// * `$iterator_type` is the iterator type +/// (like `std::collections::hash_map::Iter`). +/// * `$key_type` is the type of the key in the mapping +/// * `$value_type` is the type of the value in the mapping +/// * `$success_func` is a function for processing the Rust `(key, value)` +/// tuple on iteration success, turning it into something Python understands. +/// * `$success_func` is the return type of `$success_func` +/// +/// # Example +/// +/// ``` +/// struct MyStruct { +/// inner: HashMap, Vec>; +/// } +/// +/// py_class!(pub class MyType |py| { +/// data inner: RefCell; +/// data leak_count: RefCell; +/// +/// def __iter__(&self) -> PyResult { +/// MyTypeItemsIterator::create_instance( +/// py, +/// RefCell::new(Some(MyTypeLeakedRef::new(py, &self))), +/// RefCell::new(self.leak_immutable(py).iter()), +/// ) +/// } +/// }); +/// +/// impl MyType { +/// fn translate_key_value( +/// py: Python, +/// res: (&Vec, &Vec), +/// ) -> PyResult> { +/// let (f, entry) = res; +/// Ok(Some(( +/// PyBytes::new(py, f), +/// PyBytes::new(py, entry), +/// ))) +/// } +/// } +/// +/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef); +/// +/// py_shared_mapping_iterator!( +/// MyTypeItemsIterator, +/// MyTypeLeakedRef, +/// std::collections::hash_map::Iter, +/// Vec, +/// Vec, +/// MyType::translate_key_value, +/// Option<(PyBytes, PyBytes)> +/// ); +/// ``` +macro_rules! py_shared_mapping_iterator { + ( + $name:ident, + $leaked:ident, + $iterator_type: ident, + $key_type: ty, + $value_type: ty, + $success_func: path, + $success_type: ty + ) => { + py_shared_iterator_impl!( + $name, + $leaked, + $iterator_type<'static, $key_type, $value_type>, + $success_func, + $success_type + ); + }; +} \ No newline at end of file