diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs --- a/rust/hg-core/src/utils/files.rs +++ b/rust/hg-core/src/utils/files.rs @@ -85,6 +85,83 @@ path.to_ascii_lowercase() } +#[cfg(windows)] +/// Copied from the Python stdlib's `os.path.splitdrive` implementation. +/// +/// Split a pathname into drive/UNC sharepoint and relative path specifiers. +/// Returns a 2-tuple (drive_or_unc, path); either part may be empty. +/// +/// If you assign +/// result = split_drive(p) +/// It is always true that: +/// result[0] + result[1] == p +/// +/// If the path contained a drive letter, drive_or_unc will contain everything +/// up to and including the colon. +/// e.g. split_drive("c:/dir") returns ("c:", "/dir") +/// +/// If the path contained a UNC path, the drive_or_unc will contain the host +/// name and share up to but not including the fourth directory separator +/// character. +/// e.g. split_drive("//host/computer/dir") returns ("//host/computer", "/dir") +/// +/// Paths cannot contain both a drive letter and a UNC path. +pub fn split_drive(path: impl AsRef) -> (HgPathBuf, HgPathBuf) { + let path = path.as_ref(); + let sep = std::path::MAIN_SEPARATOR as u8; + let bytes = path.as_bytes(); + let norm_bytes: Vec<_> = path + .as_bytes() + .iter() + .map(|c| if *c == b'\\' { sep } else { *c }) + .collect(); + if norm_bytes.len() > 1 { + if norm_bytes[0] == sep + && norm_bytes[1] == sep + && (norm_bytes.len() < 3 || norm_bytes[2] != sep) + { + // Is a UNC path: + // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path + // \\machine\mountpoint\directory\etc\... + // directory ^^^^^^^^^^^^^^^ + let index = norm_bytes[2..].iter().position(|b| *b == sep); + if index.is_none() { + return (HgPathBuf::new(), path.to_owned()); + } + let index = index.unwrap() + 2; + + let index2 = + norm_bytes[index + 1..].iter().position(|b| *b == sep); + // A UNC path can't have two slashes in a row + // (after the initial two) + if index2 == Some(0) { + return (HgPathBuf::new(), path.to_owned()); + } + let index2 = match index2 { + Some(i) => i + index + 1, + None => norm_bytes.len(), + }; + return ( + HgPathBuf::from_bytes(&bytes[..index2]), + HgPathBuf::from_bytes(&bytes[index2..]), + ); + } + if norm_bytes[1] == b':' { + return ( + HgPathBuf::from_bytes(&bytes[..2]), + HgPathBuf::from_bytes(&bytes[2..]), + ); + } + } + (HgPathBuf::new(), path.to_owned()) +} + +#[cfg(unix)] +/// Split a pathname into drive and path. On Posix, drive is always empty. +pub fn split_drive(path: impl AsRef) -> (HgPathBuf, HgPathBuf) { + (HgPathBuf::new(), path.as_ref().into()) +} + #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)] pub struct HgMetadata { pub st_dev: u64, @@ -133,4 +210,101 @@ assert_eq!(dirs.next(), None); assert_eq!(dirs.next(), None); } + + #[test] + #[cfg(unix)] + fn test_split_drive() { + // Taken from the Python stdlib's tests + assert_eq!( + split_drive(HgPath::new(br"/foo/bar")), + (HgPathBuf::new(), HgPathBuf::from_bytes(br"/foo/bar")) + ); + assert_eq!( + split_drive(HgPath::new(br"foo:bar")), + (HgPathBuf::new(), HgPathBuf::from_bytes(br"foo:bar")) + ); + assert_eq!( + split_drive(HgPath::new(br":foo:bar")), + (HgPathBuf::new(), HgPathBuf::from_bytes(br":foo:bar")) + ); + } + + #[test] + #[cfg(windows)] + fn test_split_drive() { + assert_eq!( + split_drive(HgPathBuf::from_bytes(br"c:\foo\bar")), + ( + HgPathBuf::from_bytes(br"c:"), + HgPathBuf::from_bytes(br"\foo\bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes(b"c:/foo/bar")), + ( + HgPathBuf::from_bytes(br"c:"), + HgPathBuf::from_bytes(br"/foo/bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes(br"\\conky\mountpoint\foo\bar")), + ( + HgPathBuf::from_bytes(br"\\conky\mountpoint"), + HgPathBuf::from_bytes(br"\foo\bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes(br"//conky/mountpoint/foo/bar")), + ( + HgPathBuf::from_bytes(br"//conky/mountpoint"), + HgPathBuf::from_bytes(br"/foo/bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes( + br"\\\conky\mountpoint\foo\bar" + )), + ( + HgPathBuf::from_bytes(br""), + HgPathBuf::from_bytes(br"\\\conky\mountpoint\foo\bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes( + br"///conky/mountpoint/foo/bar" + )), + ( + HgPathBuf::from_bytes(br""), + HgPathBuf::from_bytes(br"///conky/mountpoint/foo/bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes( + br"\\conky\\mountpoint\foo\bar" + )), + ( + HgPathBuf::from_bytes(br""), + HgPathBuf::from_bytes(br"\\conky\\mountpoint\foo\bar") + ) + ); + assert_eq!( + split_drive(HgPathBuf::from_bytes( + br"//conky//mountpoint/foo/bar" + )), + ( + HgPathBuf::from_bytes(br""), + HgPathBuf::from_bytes(br"//conky//mountpoint/foo/bar") + ) + ); + // UNC part containing U+0130 + assert_eq!( + split_drive(HgPathBuf::from_bytes( + b"//conky/MOUNTPO\xc4\xb0NT/foo/bar" + )), + ( + HgPathBuf::from_bytes(b"//conky/MOUNTPO\xc4\xb0NT"), + HgPathBuf::from_bytes(br"/foo/bar") + ) + ); + } }