diff --git a/rust/hgcli/src/hgext/base85.rs b/rust/hgcli/src/hgext/base85.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/hgext/base85.rs @@ -0,0 +1,317 @@ +use cpython::{PyObject, PyResult, Python, Py_ssize_t, PythonObject, PyBytes, PyErr, exc}; +use cpython::_detail::ffi; + +use std; +use std::{mem, sync}; +use super::cpython_ext; + +const B85CHARS: &[u8; 85] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; +static mut B85DEC: [u8; 256] = [0; 256]; +static B85DEC_START: sync::Once = sync::ONCE_INIT; + +fn b85prep() { + B85DEC_START.call_once(|| { + for i in 0..mem::size_of_val(B85CHARS) { + unsafe { + B85DEC[B85CHARS[i] as usize] = (i + 1) as u8; + } + } + }); +} + +pub fn b85encode(py: Python, text: &str, pad: i32) -> PyResult { + let text = text.as_bytes(); + let tlen: Py_ssize_t = { text.len() as Py_ssize_t}; + let olen: Py_ssize_t = if pad != 0 { + ((tlen + 3) / 4 * 5) - 3 + } else { + let mut olen: Py_ssize_t = tlen % 4; + if olen > 0 { + olen += 1; + } + olen += tlen / 4 * 5; + olen + }; + + let out: PyBytes = cpython_ext::pybytes_new_without_copying(py, olen + 3); + + let dst = unsafe { + let buffer = ffi::PyBytes_AsString(out.as_object().as_ptr()) as *mut u8; + let length = ffi::PyBytes_Size(out.as_object().as_ptr()) as usize; + std::slice::from_raw_parts_mut(buffer, length) + }; + + let mut ptext = &text[..]; + let mut len = { ptext.len() }; + let mut dst_off: usize = 0; + + loop { + + if len == 0 { + break; + } + + let mut acc: u32 = 0; + + for i in [24, 16, 8, 0].iter() { + let ch = ptext[0] as u32; + acc |= ch << i; + + ptext = &ptext[1..]; + len -= 1; + + if len == 0 { + break; + } + } + + for i in [4, 3, 2, 1, 0].iter() { + let val: usize = (acc % 85) as usize; + acc /= 85; + + dst[*i + dst_off] = B85CHARS[val]; + } + + dst_off += 5; + } + + if pad == 0 { + unsafe { ffi::_PyString_Resize(&mut out.as_object().as_ptr() as *mut *mut ffi::PyObject, olen); } + } + + return Ok(out.into_object()); +} + +pub fn b85decode(py: Python, text: &str) -> PyResult { + let b85dec = unsafe { B85DEC }; + + let text = text.as_bytes(); + let len = { text.len() }; + let mut ptext = &text[..]; + let i = len % 5; + let olen_g: usize = len / 5 * 4 + { + if i > 0 { + i - 1 + } else { + 0 + }}; + + let out: PyBytes = cpython_ext::pybytes_new_without_copying(py, olen_g as Py_ssize_t); + + let dst = unsafe { + let buffer = ffi::PyBytes_AsString(out.as_object().as_ptr()) as *mut u8; + let length = ffi::PyBytes_Size(out.as_object().as_ptr()) as usize; + std::slice::from_raw_parts_mut(buffer, length) + }; + let mut dst_off = 0; + + let mut i = 0; + while i < len { + let mut acc: u32 = 0; + let mut cap = len - i - 1; + if cap > 4 { + cap = 4 + } + for _ in 0..cap { + let c = b85dec[ptext[0] as usize] as i32 - 1; + ptext = &ptext[1..]; + if c < 0 { + return Err(PyErr::new::(py, format!("bad base85 character at position {}", i))); + } + acc = acc * 85 + (c as u32); + i += 1; + } + if i < len { + i += 1; + let c = b85dec[ptext[0] as usize] as i32 - 1; + ptext = &ptext[1..]; + if c < 0 { + return Err(PyErr::new::(py, format!("bad base85 character at position {}", i))); + } + /* overflow detection: 0xffffffff == "|NsC0", + * "|NsC" == 0x03030303 */ + if acc > 0x03030303 { + return Err(PyErr::new::(py, format!("bad base85 character at position {}", i))); + } + + acc *= 85; + + if acc > (0xffffffff_u32 - (c as u32)) { + return Err(PyErr::new::(py, format!("bad base85 character at position {}", i))); + } + acc += c as u32; + } + + let olen = olen_g - dst_off; + + cap = if olen < 4 { + olen + } else { + 4 + }; + + for _ in 0..(4 - cap) { + acc *= 85; + } + + if (cap > 0) && (cap < 4) { + acc += 0xffffff >> (cap - 1) * 8; + } + + for j in 0..cap{ + acc = (acc << 8) | (acc >> 24); + dst[j + dst_off] = acc as u8; + } + + dst_off += cap; + } + + if dst[olen_g - 1] == b'\0' { + unsafe { ffi::_PyString_Resize(&mut out.as_object().as_ptr() as *mut *mut ffi::PyObject, (olen_g - 1) as Py_ssize_t); } + } + + return Ok(out.into_object()); +} + +py_module_initializer!(oxidized_base85, initoxidized_base85, PyInit_oxidized_base85, |py, m| { + b85prep(); + m.add(py, "__doc__", "Oxidized base85 module")?; + m.add(py, "b85encode", py_fn!(py, b85encode(text: &str, pad: i32)))?; + m.add(py, "b85decode", py_fn!(py, b85decode(text: &str)))?; + Ok(()) +}); + +#[cfg(test)] +mod test { + use hgext; + use cpython::Python; + + + #[test] + fn test_encoder_abc_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85encode(py, "abc", 1).unwrap().extract(py).unwrap(); + assert_eq!(res, "VPazd"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py,"b85encode", ("abc", 1), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "VPazd"); + } + + #[test] + fn test_encoder_chinese_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85encode(py, "这是一个测试的例子", 1).unwrap().extract(py).unwrap(); + assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py,"b85encode", ("这是一个测试的例子", 1), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa"); + } + + #[test] + fn test_encoder_abc_no_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85encode(py, "abc", 0).unwrap().extract(py).unwrap(); + assert_eq!(res, "VPaz"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py,"b85encode", ("abc", 0), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "VPaz"); + } + + #[test] + fn test_encoder_chinese_no_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85encode(py, "这是一个测试的例子", 0).unwrap().extract(py).unwrap(); + assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py, "b85encode", ("这是一个测试的例子", 0), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq"); + } + + #[test] + fn test_decoder_abc_no_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85decode(py, "VPaz").unwrap().extract(py).unwrap(); + assert_eq!(res, "abc"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py, "b85decode", ("VPaz", ), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "abc"); + } + + #[test] + fn test_decoder_abc_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85decode(py, "VPazd").unwrap().extract(py).unwrap(); + assert_eq!(res, "abc"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py, "b85decode", ("VPazd", ), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "abc"); + } + + #[test] + fn test_decoder_chinese_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85decode(py, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa").unwrap().extract(py).unwrap(); + assert_eq!(res, "这是一个测试的例子"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py, "b85decode", ("=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa", ), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "这是一个测试的例子"); + } + + #[test] + fn test_decoder_chinese_no_pad() -> () { + hgext::set_py_env(); + + let gil = Python::acquire_gil(); + let py = gil.python(); + hgext::init_all_hg_ext(py); + + let res: String = super::b85decode(py, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq").unwrap().extract(py).unwrap(); + assert_eq!(res, "这是一个测试的例子"); + + let base85 = py.import("oxidized_base85").unwrap(); + let res: String = base85.call(py, "b85decode", ("=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq", ), None).unwrap().extract(py).unwrap(); + assert_eq!(res, "这是一个测试的例子"); + } +} diff --git a/rust/hgcli/src/hgext/cpython_ext.rs b/rust/hgcli/src/hgext/cpython_ext.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/hgext/cpython_ext.rs @@ -0,0 +1,25 @@ +use cpython::{PyObject, Python, Py_ssize_t, PyBytes, PythonObjectWithCheckedDowncast}; + +use python27_sys as ffi; + +use std; + +#[inline] +pub unsafe fn cast_from_owned_ptr_or_panic(py : Python, p : *mut ffi::PyObject) -> T + where T: PythonObjectWithCheckedDowncast { + if p.is_null() { + panic!("NULL pointer detected.") + } else { + PyObject::from_owned_ptr(py, p).cast_into(py).unwrap() + } +} + +pub fn pybytes_new_without_copying(py: Python, len: Py_ssize_t) -> PyBytes { + unsafe { + if len <= 0 { + panic!("the request bytes length should be > 0.") + } + cast_from_owned_ptr_or_panic(py, + ffi::PyBytes_FromStringAndSize(std::ptr::null(), len)) + } +} \ No newline at end of file diff --git a/rust/hgcli/src/hgext/mod.rs b/rust/hgcli/src/hgext/mod.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/hgext/mod.rs @@ -0,0 +1,129 @@ +extern crate libc; + +pub mod base85; +pub mod cpython_ext; + +use std; +use std::{env, sync}; +use std::path::{PathBuf}; +use std::ffi::{CString, OsStr}; +use python27_sys as ffi; +use cpython; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::{OsStrExt}; + +static HG_EXT_REG: sync::Once = sync::ONCE_INIT; + +#[no_mangle] +pub fn init_all_hg_ext(_py: cpython::Python) { + HG_EXT_REG.call_once(|| { + unsafe { + base85::initoxidized_base85(); + } + }); +} + +#[derive(Debug)] +pub struct Environment { + _exe: PathBuf, + python_exe: PathBuf, + python_home: PathBuf, + mercurial_modules: PathBuf, +} + +// On UNIX, platform string is just bytes and should not contain NUL. +#[cfg(target_family = "unix")] +fn cstring_from_os>(s: T) -> CString { + CString::new(s.as_ref().as_bytes()).unwrap() +} + +#[cfg(target_family = "windows")] +fn cstring_from_os>(s: T) -> CString { + CString::new(s.as_ref().to_str().unwrap()).unwrap() +} + +fn set_python_home(env: &Environment) { + let raw = cstring_from_os(&env.python_home).into_raw(); + unsafe { + ffi::Py_SetPythonHome(raw); + } +} + +static PYTHON_ENV_START: sync::Once = sync::ONCE_INIT; + +/// the second half initialization code are copied from rust-cpython +/// fn pythonrun::prepare_freethreaded_python() +/// because this function is called mainly by `cargo test` +/// and the multi-thread nature requires to properly +/// set up threads and GIL. In the corresponding version, +/// prepare_freethreaded_python() is turned off, so the cargo +/// test features must be properly called. +pub fn set_py_env() { + PYTHON_ENV_START.call_once(|| { + let env = { + let exe = env::current_exe().unwrap(); + + let mercurial_modules = std::env::var("HGROOT").expect("must set mercurial's root folder (one layer above mercurial folder itself"); + + let python_exe = std::env::var("HGRUST_PYTHONEXE").expect("set PYTHONEXE to the full path of the python.exe file"); + + let python_home = std::env::var("HGRUST_PYTHONHOME").expect("if you don't want to use system one, set PYTHONHOME according to python doc"); + + Environment { + _exe: exe.clone(), + python_exe: PathBuf::from(python_exe), + python_home: PathBuf::from(python_home), + mercurial_modules: PathBuf::from(mercurial_modules), + } + }; + + //println!("{:?}", env); + + // Tell Python where it is installed. + set_python_home(&env); + + // Set program name. The backing memory needs to live for the duration of the + // interpreter. + // + // TODO consider storing this in a static or associating with lifetime of + // the Python interpreter. + // + // Yes, we use the path to the Python interpreter not argv[0] here. The + // reason is because Python uses the given path to find the location of + // Python files. Apparently we could define our own ``Py_GetPath()`` + // implementation. But this may require statically linking Python, which is + // not desirable. + let program_name = cstring_from_os(&env.python_exe).as_ptr(); + unsafe { + ffi::Py_SetProgramName(program_name as *mut i8); + } + + unsafe { + //ffi::Py_Initialize(); + + if ffi::Py_IsInitialized() != 0 { + // If Python is already initialized, we expect Python threading to also be initialized, + // as we can't make the existing Python main thread acquire the GIL. + assert!(ffi::PyEval_ThreadsInitialized() != 0); + } else { + // If Python isn't initialized yet, we expect that Python threading isn't initialized either. + assert!(ffi::PyEval_ThreadsInitialized() == 0); + // Initialize Python. + // We use Py_InitializeEx() with initsigs=0 to disable Python signal handling. + // Signal handling depends on the notion of a 'main thread', which doesn't exist in this case. + // Note that the 'main thread' notion in Python isn't documented properly; + // and running Python without one is not officially supported. + ffi::Py_InitializeEx(0); + ffi::PyEval_InitThreads(); + // PyEval_InitThreads() will acquire the GIL, + // but we don't want to hold it at this point + // (it's not acquired in the other code paths) + // So immediately release the GIL: + let _thread_state = ffi::PyEval_SaveThread(); + // Note that the PyThreadState returned by PyEval_SaveThread is also held in TLS by the Python runtime, + // and will be restored by PyGILState_Ensure. + } + } + }); +} diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs --- a/rust/hgcli/src/main.rs +++ b/rust/hgcli/src/main.rs @@ -6,9 +6,11 @@ // GNU General Public License version 2 or any later version. extern crate libc; -extern crate cpython; +#[macro_use] extern crate cpython; extern crate python27_sys; +pub mod hgext; + use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python}; use libc::{c_char, c_int};