diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt --- a/mercurial/helptext/internals/dirstate-v2.txt +++ b/mercurial/helptext/internals/dirstate-v2.txt @@ -372,7 +372,7 @@ This counter is used to implement `has_tracked_dir`. * Offset 30: - Some boolean values packed as bits of a single byte. + A single `flags` byte that packs some boolean values as bits. Starting from least-significant, bit masks are:: WDIR_TRACKED = 1 << 0 @@ -381,110 +381,116 @@ HAS_MODE_AND_SIZE = 1 << 3 HAS_MTIME = 1 << 4 MODE_EXEC_PERM = 1 << 5 - MODE_IS_SYMLINK = 1 << 7 - - - Other bits are unset. The meaning of these bits are: - - `WDIR_TRACKED` - Set if the working directory contains a tracked file at this node’s path. - This is typically set and unset by `hg add` and `hg rm`. - - `P1_TRACKED` - set if the working directory’s first parent changeset - (whose node identifier is found in tree metadata) - contains a tracked file at this node’s path. - This is a cache to reduce manifest lookups. - - `P2_INFO` - Set if the file has been involved in some merge operation. - Either because it was actually merged, - or because the version in the second parent p2 version was ahead, - or because some rename moved it there. - In either case `hg status` will want it displayed as modified. + MODE_IS_SYMLINK = 1 << 6 - Files that would be mentioned at all in the `dirstate-v1` file format - have a node with at least one of the above three bits set in `dirstate-v2`. - Let’s call these files "tracked anywhere", - and "untracked" the nodes with all three of these bits unset. - Untracked nodes are typically for directories: - they hold child nodes and form the tree structure. - Additional untracked nodes may also exist. - Although implementations should strive to clean up nodes - that are entirely unused, other untracked nodes may also exist. - For example, a future version of Mercurial might in some cases - add nodes for untracked files or/and ignored files in the working directory - in order to optimize `hg status` - by enabling it to skip `readdir` in more cases. + The meaning of each bit is described below. + Other bits are unset. - When a node is for a file tracked anywhere: - - If `HAS_MODE_AND_SIZE` is set, the file is expected - to be a symbolic link or a normal file based on `MODE_IS_SYMLINK`. - - If `HAS_MODE_AND_SIZE` is set, the file’s owner is expected - to have execute permission or not based on `MODE_EXEC_PERM`. - - If `HAS_MODE_AND_SIZE` is unset, - the expected type of file and permission are unknown. - The rest of the node data is three fields: - - * Offset 31: - 4 unused bytes, set to zero +* Offset 31: + A `size` field described below, as a 32-bit integer. + Unlike in dirstate-v1, negative values are not used. - * Offset 35: - If `HAS_MODE_AND_SIZE` is unset, four zero bytes. - Otherwise, a 32-bit integer for expected size of the file - truncated to its 31 least-significant bits. - Unlike in dirstate-v1, negative values are not used. - - * Offset 39: - If `HAS_MTIME` is unset, four zero bytes. - Otherwise, a 32-bit integer for expected modified time of the file - (as in `stat_result.st_mtime`), - truncated to its 31 least-significant bits. - Unlike in dirstate-v1, negative values are not used. - - If an untracked node `HAS_MTIME` *unset*, this space is unused: - - * Offset 31: - 12 unused bytes, set to zero - - If an untracked node `HAS_MTIME` *set*, - what follows is the modification time of a directory - represented similarly to the C `timespec` struct: - - * Offset 31: - 4 unused bytes, set to zero +* Offset 35: + The seconds component of an `mtime` field described below, + as a 32-bit integer. + Unlike in dirstate-v1, negative values are not used. - * Offset 35: - The number of seconds elapsed since the Unix epoch, - truncated to its lower 31 bits, - as a 32-bit integer. - - * Offset 39: - The sub-second number of nanoseconds elapsed since the Unix epoch, - as 32-bit integer. - Always greater than or equal to zero, and strictly less than a billion. - - The presence of a directory modification time means that at some point, - this path in the working directory was observed: - - - To be a directory - - With the given modification time - - That time was already strictly in the past when observed, - meaning that later changes cannot happen in the same clock tick - and must cause a different modification time - (unless the system clock jumps back and we get unlucky, - which is not impossible but deemed unlikely enough). - - All direct children of this directory - (as returned by `std::fs::read_dir`) - either have a corresponding dirstate node, - or are ignored by ignore patterns whose hash is in tree metadata. - - This means that if `std::fs::symlink_metadata` later reports - the same modification time - and ignored patterns haven’t changed, - a run of status that is not listing ignored files - can skip calling `std::fs::read_dir` again for this directory, - and iterate child dirstate nodes instead. - +* Offset 39: + The nanoseconds component of an `mtime` field described below, + as a 32-bit integer. * (Offset 43: end of this node) + +The meaning of the boolean values packed in `flags` is: + +`WDIR_TRACKED` + Set if the working directory contains a tracked file at this node’s path. + This is typically set and unset by `hg add` and `hg rm`. + +`P1_TRACKED` + Set if the working directory’s first parent changeset + (whose node identifier is found in tree metadata) + contains a tracked file at this node’s path. + This is a cache to reduce manifest lookups. + +`P2_INFO` + Set if the file has been involved in some merge operation. + Either because it was actually merged, + or because the version in the second parent p2 version was ahead, + or because some rename moved it there. + In either case `hg status` will want it displayed as modified. + +Files that would be mentioned at all in the `dirstate-v1` file format +have a node with at least one of the above three bits set in `dirstate-v2`. +Let’s call these files "tracked anywhere", +and "untracked" the nodes with all three of these bits unset. +Untracked nodes are typically for directories: +they hold child nodes and form the tree structure. +Additional untracked nodes may also exist. +Although implementations should strive to clean up nodes +that are entirely unused, other untracked nodes may also exist. +For example, a future version of Mercurial might in some cases +add nodes for untracked files or/and ignored files in the working directory +in order to optimize `hg status` +by enabling it to skip `readdir` in more cases. + +`HAS_MODE_AND_SIZE` + Must be unset for untracked nodes. + For files tracked anywhere, if this is set: + - The `size` field is the expected file size, + in bytes truncated its lower to 31 bits, + for the file to be clean. + - The expected execute permission for the file’s owner + is given by `MODE_EXEC_PERM` + - The expected file type is given by `MODE_IS_SIMLINK`: + a symbolic link if set, or a normal file if unset. + If this is unset the expected size, permission, and file type are unknown. + The `size` field is unused (set to zero). + +`HAS_MTIME` + If unset, the `mtime` field is unused (set to zero). + If set, it contains a timestamp represented as + - the number of seconds since the Unix epoch, + truncated to its lower 31 bits. + - and the number of nanoseconds since `mtime.seconds`, + always stritctly less than one billion. + This may be zero if more precision is not available. + (This can happen because of limitations in any of Mercurial, Python, + libc, the operating system, …) + + If set for a file tracked anywhere, + `mtime` is the expected modification time for the file to be clean. + + If set for an untracked node, at some point, + this path in the working directory was observed: + + - To be a directory + - With the modification time given in `mtime` + - That time was already strictly in the past when observed, + meaning that later changes cannot happen in the same clock tick + and must cause a different modification time + (unless the system clock jumps back and we get unlucky, + which is not impossible but deemed unlikely enough). + - All direct children of this directory + (as returned by `std::fs::read_dir`) + either have a corresponding dirstate node, + or are ignored by ignore patterns whose hash is in tree metadata. + + This means that if `std::fs::symlink_metadata` later reports + the same modification time + and ignored patterns haven’t changed, + a run of status that is not listing ignored files + can skip calling `std::fs::read_dir` again for this directory, + and iterate child dirstate nodes instead. + +`MODE_EXEC_PERM` + Must be unset if `HAS_MODE_AND_SIZE` is unset. + If `HAS_MODE_AND_SIZE` is set, + this indicates whether the file’s own is expected + to have execute permission. + +`MODE_IS_SYMLINK` + Must be unset if `HAS_MODE_AND_SIZE` is unset. + If `HAS_MODE_AND_SIZE` is set, + this indicates whether the file is expected to be a symlink + as opposed to a normal file. diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -97,7 +97,8 @@ pub(super) descendants_with_entry_count: Size, pub(super) tracked_descendants_count: Size, flags: Flags, - data: Entry, + size: U32Be, + mtime: PackedTruncatedTimestamp, } bitflags! { @@ -110,23 +111,14 @@ const HAS_MODE_AND_SIZE = 1 << 3; const HAS_MTIME = 1 << 4; const MODE_EXEC_PERM = 1 << 5; - const MODE_IS_SYMLINK = 1 << 7; + const MODE_IS_SYMLINK = 1 << 6; } } -#[derive(BytesCast, Copy, Clone, Debug)] -#[repr(C)] -struct Entry { - _padding: U32Be, - size: U32Be, - mtime: U32Be, -} - /// Duration since the Unix epoch #[derive(BytesCast, Copy, Clone)] #[repr(C)] -struct PackedTimestamp { - _padding: U32Be, +struct PackedTruncatedTimestamp { truncated_seconds: U32Be, nanoseconds: U32Be, } @@ -329,7 +321,7 @@ ) -> Result, DirstateV2ParseError> { Ok( if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() { - Some(self.data.as_timestamp()?) + Some(self.mtime.try_into()?) } else { None }, @@ -356,12 +348,12 @@ let p1_tracked = self.flags.contains(Flags::P1_TRACKED); let p2_info = self.flags.contains(Flags::P2_INFO); let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) { - Some((self.synthesize_unix_mode(), self.data.size.into())) + Some((self.synthesize_unix_mode(), self.size.into())) } else { None }; let mtime = if self.flags.contains(Flags::HAS_MTIME) { - Some(self.data.mtime.into()) + Some(self.mtime.truncated_seconds.into()) } else { None }; @@ -407,10 +399,10 @@ tracked_descendants_count: self.tracked_descendants_count.get(), }) } -} -impl Entry { - fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) { + fn from_dirstate_entry( + entry: &DirstateEntry, + ) -> (Flags, U32Be, PackedTruncatedTimestamp) { let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) = entry.v2_data(); // TODO: convert throug raw flag bits instead? @@ -418,53 +410,26 @@ flags.set(Flags::WDIR_TRACKED, wdir_tracked); flags.set(Flags::P1_TRACKED, p1_tracked); flags.set(Flags::P2_INFO, p2_info); - let (size, mtime); - if let Some((m, s)) = mode_size_opt { + let size = if let Some((m, s)) = mode_size_opt { let exec_perm = m & libc::S_IXUSR != 0; let is_symlink = m & libc::S_IFMT == libc::S_IFLNK; flags.set(Flags::MODE_EXEC_PERM, exec_perm); flags.set(Flags::MODE_IS_SYMLINK, is_symlink); - size = s; - flags.insert(Flags::HAS_MODE_AND_SIZE) + flags.insert(Flags::HAS_MODE_AND_SIZE); + s.into() } else { - size = 0; - } - if let Some(m) = mtime_opt { - mtime = m; - flags.insert(Flags::HAS_MTIME); - } else { - mtime = 0; - } - let raw_entry = Entry { - _padding: 0.into(), - size: size.into(), - mtime: mtime.into(), + 0.into() }; - (flags, raw_entry) - } - - fn from_timestamp(timestamp: TruncatedTimestamp) -> Self { - let packed = PackedTimestamp { - _padding: 0.into(), - truncated_seconds: timestamp.truncated_seconds().into(), - nanoseconds: timestamp.nanoseconds().into(), + let mtime = if let Some(m) = mtime_opt { + flags.insert(Flags::HAS_MTIME); + PackedTruncatedTimestamp { + truncated_seconds: m.into(), + nanoseconds: 0.into(), + } + } else { + PackedTruncatedTimestamp::null() }; - // Safety: both types implement the `ByteCast` trait, so we could - // safely use `as_bytes` and `from_bytes` to do this conversion. Using - // `transmute` instead makes the compiler check that the two types - // have the same size, which eliminates the error case of - // `from_bytes`. - unsafe { std::mem::transmute::(packed) } - } - - fn as_timestamp(self) -> Result { - // Safety: same as above in `from_timestamp` - let packed = - unsafe { std::mem::transmute::(self) }; - TruncatedTimestamp::from_already_truncated( - packed.truncated_seconds.get(), - packed.nanoseconds.get(), - ) + (flags, size, mtime) } } @@ -610,20 +575,17 @@ }; on_disk_nodes.push(match node { NodeRef::InMemory(path, node) => { - let (flags, data) = match &node.data { + let (flags, size, mtime) = match &node.data { dirstate_map::NodeData::Entry(entry) => { - Entry::from_dirstate_entry(entry) + Node::from_dirstate_entry(entry) } dirstate_map::NodeData::CachedDirectory { mtime } => { - (Flags::HAS_MTIME, Entry::from_timestamp(*mtime)) + (Flags::HAS_MTIME, 0.into(), (*mtime).into()) } dirstate_map::NodeData::None => ( Flags::empty(), - Entry { - _padding: 0.into(), - size: 0.into(), - mtime: 0.into(), - }, + 0.into(), + PackedTruncatedTimestamp::null(), ), }; Node { @@ -641,7 +603,8 @@ .tracked_descendants_count .into(), flags, - data, + size, + mtime, } } NodeRef::OnDisk(node) => Node { @@ -725,3 +688,33 @@ .expect("dirstate-v2 path length overflow") .into() } + +impl From for PackedTruncatedTimestamp { + fn from(timestamp: TruncatedTimestamp) -> Self { + Self { + truncated_seconds: timestamp.truncated_seconds().into(), + nanoseconds: timestamp.nanoseconds().into(), + } + } +} + +impl TryFrom for TruncatedTimestamp { + type Error = DirstateV2ParseError; + + fn try_from( + timestamp: PackedTruncatedTimestamp, + ) -> Result { + Self::from_already_truncated( + timestamp.truncated_seconds.get(), + timestamp.nanoseconds.get(), + ) + } +} +impl PackedTruncatedTimestamp { + fn null() -> Self { + Self { + truncated_seconds: 0.into(), + nanoseconds: 0.into(), + } + } +}