diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs --- a/rust/hg-core/src/utils.rs +++ b/rust/hg-core/src/utils.rs @@ -8,6 +8,7 @@ //! Contains useful functions, traits, structs, etc. for use in core. pub mod files; +pub mod hg_path; /// Replaces the `from` slice with the `to` slice inside the `buf` slice. /// diff --git a/rust/hg-core/src/utils/hg_path.rs b/rust/hg-core/src/utils/hg_path.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/utils/hg_path.rs @@ -0,0 +1,436 @@ +use std::borrow::Borrow; +use std::convert::TryInto; +use std::ffi::OsString; +use std::iter::FusedIterator; +use std::ops::{Deref, Index, Range, RangeFrom, RangeFull, RangeTo}; +use std::path::{Path, PathBuf}; + +/// This is a repository-relative path (or canonical path): +/// - no null characters +/// - `/` separates directories +/// - no consecutive slashes +/// - no leading slash, +/// - no `.` nor `..` of special meaning +/// - stored in repository and shared across platforms +/// +/// This allows us to be encoding-transparent as much as possible, until really +/// needed; `HgPath` can be transformed into a platform-specific path (`OsStr` +/// or `Path`) whenever more complex operations are needed: +/// On Unix, it's just byte-to-byte conversion. On Windows, it has to be +/// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source +/// character encoding will be determined per repository basis. +#[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)] +pub struct HgPath { + inner: [u8], +} + +#[derive(Debug, Eq, PartialEq)] +pub enum HgPathError { + LeadingSlash, + /// Index of the second slash + ConsecutiveSlashes(usize), + /// Index of the null byte + ContainsNullByte(usize), +} + +impl HgPath { + fn unchecked_new + ?Sized>(s: &S) -> &Self { + unsafe { &*(s.as_ref() as *const [u8] as *const Self) } + } + pub fn new + ?Sized>(s: &S) -> &Self { + let new = Self::unchecked_new(s); + debug_assert_eq!(Ok(()), new.check_state()); + new + } + pub fn is_empty(&self) -> bool { + self.inner.len() == 0 + } + pub fn len(&self) -> usize { + self.inner.len() + } + fn to_hg_path_buf(&self) -> HgPathBuf { + HgPathBuf { + inner: self.inner.to_owned(), + } + } + fn from_inner(inner: &[u8]) -> &Self { + let new = unsafe { &*(inner as *const [u8] as *const HgPath) }; + debug_assert_eq!(Ok(()), new.check_state()); + new + } + pub fn bytes(&self) -> HgPathBytesIterator { + HgPathBytesIterator { path: &self } + } + pub fn to_ascii_uppercase(&self) -> HgPathBuf { + HgPathBuf::from(self.inner.to_ascii_uppercase()) + } + pub fn to_ascii_lowercase(&self) -> HgPathBuf { + HgPathBuf::from(self.inner.to_ascii_lowercase()) + } + pub fn as_bytes(&self) -> &[u8] { + unsafe { &*(&self.inner as *const _ as *const [u8]) } + } + /// Checks for errors in the path, short-circuiting at the first one. + /// Useful to get finer-grained errors. To simply check if the path is + /// valid, use `is_valid`. + pub fn check_state(&self) -> Result<(), HgPathError> { + if self.len() == 0 { + return Ok(()); + } + let bytes = self.as_bytes(); + let mut previous_byte = None; + + if bytes[0] == b'/' { + return Err(HgPathError::LeadingSlash); + } + for (index, byte) in bytes.iter().enumerate() { + match byte { + 0 => return Err(HgPathError::ContainsNullByte(index)), + b'/' => { + if previous_byte.is_some() && previous_byte == Some(b'/') { + return Err(HgPathError::ConsecutiveSlashes(index)); + } + } + _ => (), + }; + previous_byte = Some(*byte); + } + Ok(()) + } + pub fn is_valid(&self) -> bool { + self.check_state().is_ok() + } +} + +impl Index for HgPath { + type Output = u8; + + fn index(&self, i: usize) -> &Self::Output { + &self.inner[i] + } +} + +impl Index for HgPath { + type Output = HgPath; + + #[inline] + fn index(&self, _index: RangeFull) -> &HgPath { + &self + } +} + +impl Index> for HgPath { + type Output = HgPath; + + #[inline] + fn index(&self, range_to: RangeTo) -> &HgPath { + HgPath::new(&self.inner[range_to]) + } +} + +impl Index> for HgPath { + type Output = HgPath; + + #[inline] + fn index(&self, range_from: RangeFrom) -> &HgPath { + HgPath::new(&self.inner[range_from]) + } +} +impl Index> for HgPath { + type Output = HgPath; + + #[inline] + fn index(&self, range: Range) -> &HgPath { + HgPath::new(&self.inner[range]) + } +} + +impl Index for HgPathBuf { + type Output = u8; + + fn index(&self, i: usize) -> &Self::Output { + &self.inner[i] + } +} + +impl Index for HgPathBuf { + type Output = HgPath; + + #[inline] + fn index(&self, _index: RangeFull) -> &HgPath { + HgPath::from_inner(self.inner.as_slice()) + } +} + +impl Index> for HgPathBuf { + type Output = HgPath; + + #[inline] + fn index(&self, range_to: RangeTo) -> &HgPath { + &HgPath::new(&self.inner[range_to]) + } +} + +impl Index> for HgPathBuf { + type Output = HgPath; + + #[inline] + fn index(&self, range_from: RangeFrom) -> &HgPath { + HgPath::new(&self.inner[range_from]) + } +} + +#[derive(Debug)] +pub struct HgPathBytesIterator<'a> { + path: &'a HgPath, +} + +impl<'a> Iterator for HgPathBytesIterator<'a> { + type Item = u8; + fn next(&mut self) -> Option { + if self.path.len() == 0 { + None + } else { + let ret = self.path[0]; + self.path = &self.path[1..self.path.len()]; + Some(ret) + } + } +} + +impl<'a> ExactSizeIterator for HgPathBytesIterator<'a> { + // We can easily calculate the remaining number of iterations. + fn len(&self) -> usize { + self.path.len() + } +} + +impl<'a> DoubleEndedIterator for HgPathBytesIterator<'a> { + fn next_back(&mut self) -> Option { + if self.path.len() == 0 { + None + } else { + let back_position = self.path.len() - 1; + let ret = self.path[back_position]; + self.path = &self.path[..back_position]; + Some(ret) + } + } +} + +impl<'a> FusedIterator for HgPathBytesIterator<'a> {} + +#[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)] +pub struct HgPathBuf { + inner: Vec, +} + +impl HgPathBuf { + pub fn new() -> Self { + Self { inner: Vec::new() } + } + pub fn push(&mut self, byte: u8) { + self.inner.push(byte); + debug_assert_eq!(Ok(()), self.check_state()) + } + pub fn join>(&self, other: &T) -> Self { + let mut inner = self.inner.to_owned(); + inner.extend(other.as_ref().bytes()); + let res = Self { inner }; + debug_assert_eq!(Ok(()), res.check_state()); + res + } + pub fn contains(&self, other: u8) -> bool { + self.inner.contains(&other) + } + pub fn from_bytes(s: &[u8]) -> HgPathBuf { + HgPath::new(s).to_owned() + } + pub fn into_vec(self) -> Vec { + self.inner + } + pub fn as_vec(&self) -> &Vec { + &self.inner + } + pub fn as_ref(&self) -> &[u8] { + self.inner.as_ref() + } +} + +impl Deref for HgPathBuf { + type Target = HgPath; + + #[inline] + fn deref(&self) -> &HgPath { + &self[..] + } +} + +impl From> for HgPathBuf { + fn from(vec: Vec) -> Self { + let new = Self { inner: vec }; + debug_assert_eq!(Ok(()), new.check_state()); + new + } +} + +impl> From<&T> for HgPathBuf { + fn from(s: &T) -> HgPathBuf { + let new = s.as_ref().to_owned(); + debug_assert_eq!(Ok(()), new.check_state()); + new + } +} + +impl Into> for HgPathBuf { + fn into(self) -> Vec { + self.inner + } +} + +impl Borrow for HgPathBuf { + fn borrow(&self) -> &HgPath { + &self[..] + } +} + +impl ToOwned for HgPath { + type Owned = HgPathBuf; + fn to_owned(&self) -> HgPathBuf { + self.to_hg_path_buf() + } +} + +impl AsRef for HgPath { + fn as_ref(&self) -> &HgPath { + self + } +} + +impl AsRef for HgPathBuf { + fn as_ref(&self) -> &HgPath { + self + } +} + +impl Extend for HgPathBuf { + fn extend>(&mut self, iter: T) { + self.inner.extend(iter); + debug_assert_eq!(Ok(()), self.check_state()) + } +} + +impl TryInto for HgPathBuf { + type Error = std::io::Error; + + fn try_into(self) -> Result { + let os_str; + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + os_str = std::ffi::OsStr::from_bytes(&self.inner); + } + #[cfg(windows)] + { + // TODO: convert from Windows MBCS (ANSI encoding) to WTF8. + // Perhaps, the return type would have to be Result. + unimplemented!(); + } + + Ok(Path::new(os_str).to_path_buf()) + } +} + +impl TryInto for HgPathBuf { + type Error = std::io::Error; + + fn try_into(self) -> Result { + let os_str; + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + os_str = std::ffi::OsStr::from_bytes(&self.inner); + } + #[cfg(windows)] + { + // TODO: convert from Windows MBCS (ANSI encoding) to WTF8. + // Perhaps, the return type would have to be Result. + unimplemented!() + } + + Ok(os_str.to_os_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_path_states() { + assert_eq!( + Err(HgPathError::LeadingSlash), + HgPath::unchecked_new(b"/").check_state() + ); + assert_eq!( + Err(HgPathError::ConsecutiveSlashes(4)), + HgPath::unchecked_new(b"a/b//c").check_state() + ); + assert_eq!( + Err(HgPathError::ContainsNullByte(4)), + HgPath::unchecked_new(b"a/b/\0c").check_state() + ); + assert_eq!(true, HgPath::new(b"").is_valid()); + assert_eq!(true, HgPath::new(b"a/b/c").is_valid()); + // Backslashes in paths are not significant, but allowed + assert_eq!(true, HgPath::new(br"a\b/c").is_valid()); + // Dots in paths are not significant, but allowed + assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid()); + assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid()); + } + + #[test] + fn test_iter() { + let path = HgPath::new(b"a"); + let mut iter = path.bytes(); + assert_eq!(Some(b'a'), iter.next()); + assert_eq!(None, iter.next_back()); + assert_eq!(None, iter.next()); + + let path = HgPath::new(b"a"); + let mut iter = path.bytes(); + assert_eq!(Some(b'a'), iter.next_back()); + assert_eq!(None, iter.next_back()); + assert_eq!(None, iter.next()); + + let path = HgPath::new(b"abc"); + let mut iter = path.bytes(); + assert_eq!(Some(b'a'), iter.next()); + assert_eq!(Some(b'c'), iter.next_back()); + assert_eq!(Some(b'b'), iter.next_back()); + assert_eq!(None, iter.next_back()); + assert_eq!(None, iter.next()); + + let path = HgPath::new(b"abc"); + let mut iter = path.bytes(); + assert_eq!(Some(b'a'), iter.next()); + assert_eq!(Some(b'b'), iter.next()); + assert_eq!(Some(b'c'), iter.next()); + assert_eq!(None, iter.next_back()); + assert_eq!(None, iter.next()); + + let path = HgPath::new(b"abc"); + let iter = path.bytes(); + let mut vec = Vec::new(); + vec.extend(iter); + assert_eq!(vec![b'a', b'b', b'c'], vec); + + let path = HgPath::new(b"abc"); + let mut iter = path.bytes(); + assert_eq!(Some(2), iter.rposition(|c| c == b'c')); + + let path = HgPath::new(b"abc"); + let mut iter = path.bytes(); + assert_eq!(None, iter.rposition(|c| c == b'd')); + } +}