diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -133,7 +133,7 @@ t->size = 0; } if (has_meaningful_mtime) { - t->flags |= dirstate_flag_has_file_mtime; + t->flags |= dirstate_flag_has_mtime; t->mtime = mtime; } else { t->mtime = 0; @@ -248,7 +248,7 @@ { if (dirstate_item_c_removed(self)) { return 0; - } else if (!(self->flags & dirstate_flag_has_file_mtime) || + } else if (!(self->flags & dirstate_flag_has_mtime) || !(self->flags & dirstate_flag_p1_tracked) || !(self->flags & dirstate_flag_wc_tracked) || (self->flags & dirstate_flag_p2_info)) { @@ -357,7 +357,7 @@ t->flags = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | dirstate_flag_has_meaningful_data | - dirstate_flag_has_file_mtime); + dirstate_flag_has_mtime); t->mode = mode; t->size = size; t->mtime = mtime; @@ -401,7 +401,7 @@ if (t->flags & dirstate_flag_expected_state_is_modified) { t->flags &= ~(dirstate_flag_expected_state_is_modified | dirstate_flag_has_meaningful_data | - dirstate_flag_has_file_mtime); + dirstate_flag_has_mtime); } t->mode = 0; if (t->flags & dirstate_flag_has_meaningful_data) { @@ -423,7 +423,7 @@ to make sure it is correct. */ static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self) { - self->flags &= ~dirstate_flag_has_file_mtime; + self->flags &= ~dirstate_flag_has_mtime; Py_RETURN_NONE; } @@ -437,7 +437,7 @@ } self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | dirstate_flag_has_meaningful_data | - dirstate_flag_has_file_mtime; + dirstate_flag_has_mtime; self->mode = mode; self->size = size; self->mtime = mtime; @@ -447,7 +447,7 @@ static PyObject *dirstate_item_set_tracked(dirstateItemObject *self) { self->flags |= dirstate_flag_wc_tracked; - self->flags &= ~dirstate_flag_has_file_mtime; + self->flags &= ~dirstate_flag_has_mtime; Py_RETURN_NONE; } @@ -465,7 +465,7 @@ if (self->flags & dirstate_flag_p2_info) { self->flags &= ~(dirstate_flag_p2_info | dirstate_flag_has_meaningful_data | - dirstate_flag_has_file_mtime); + dirstate_flag_has_mtime); self->mode = 0; self->mtime = 0; self->size = 0; diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h --- a/mercurial/cext/util.h +++ b/mercurial/cext/util.h @@ -35,8 +35,8 @@ static const int dirstate_flag_p1_tracked = 1 << 1; static const int dirstate_flag_p2_info = 1 << 2; static const int dirstate_flag_has_meaningful_data = 1 << 3; -static const int dirstate_flag_has_file_mtime = 1 << 4; -static const int dirstate_flag_has_directory_mtime = 1 << 5; +static const int dirstate_flag_has_mtime = 1 << 4; +static const int dirstate_flag_directory = 1 << 5; static const int dirstate_flag_mode_exec_perm = 1 << 6; static const int dirstate_flag_mode_is_symlink = 1 << 7; static const int dirstate_flag_expected_state_is_modified = 1 << 8; diff --git a/mercurial/dirstateutils/v2.py b/mercurial/dirstateutils/v2.py --- a/mercurial/dirstateutils/v2.py +++ b/mercurial/dirstateutils/v2.py @@ -56,6 +56,9 @@ assert TREE_METADATA_SIZE == TREE_METADATA.size assert NODE_SIZE == NODE.size +# match constant in mercuria/pure/parsers.py +DIRSTATE_V2_DIRECTORY = 1 << 5 + def parse_dirstate(map, copy_map, data, tree_metadata): """parse a full v2-dirstate from a binary data into dictionnaries: @@ -83,7 +86,7 @@ This is used by parse_dirstate to recursively fill `map` and `copy_map`. All directory specific information is ignored and do not need any - processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED) + processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED) """ for i in range(len): node_start = start + NODE_SIZE * i @@ -151,7 +154,7 @@ mtime_ns = 0 else: # There are no mtime-cached directories in the Python implementation - flags = 0 + flags = DIRSTATE_V2_DIRECTORY size = 0 mtime_s = 0 mtime_ns = 0 diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt --- a/mercurial/helptext/internals/dirstate-v2.txt +++ b/mercurial/helptext/internals/dirstate-v2.txt @@ -379,8 +379,8 @@ P1_TRACKED = 1 << 1 P2_INFO = 1 << 2 HAS_MODE_AND_SIZE = 1 << 3 - HAS_FILE_MTIME = 1 << 4 - HAS_DIRECTORY_MTIME = 1 << 5 + HAS_MTIME = 1 << 4 + DIRECTORY = 1 << 5 MODE_EXEC_PERM = 1 << 6 MODE_IS_SYMLINK = 1 << 7 EXPECTED_STATE_IS_MODIFIED = 1 << 8 @@ -476,37 +476,44 @@ If this is unset the expected size, permission, and file type are unknown. The `size` field is unused (set to zero). -`HAS_FILE_MTIME` - Must be unset for untracked nodes. - If this and `HAS_DIRECTORY_MTIME` are both unset, - the `mtime` field is unused (set to zero). - If this is set, `mtime` is the expected modification time. +`HAS_MTIME` + The nodes contains a "valid" last modification time in the `mtime` field. + -`HAS_DIRECTORY_MTIME` - Must be unset for file tracked anywhere. - If this and `HAS_DIRECTORY_MTIME` are both unset, - the `mtime` field is unused (set to zero). - If this is set, at some point, - this path in the working directory was observed: - - - To be a directory - - With the modification time given in `mtime` - - That time was already strictly in the past when observed, - meaning that later changes cannot happen in the same clock tick - and must cause a different modification time - (unless the system clock jumps back and we get unlucky, - which is not impossible but deemed unlikely enough). - - All direct children of this directory - (as returned by `std::fs::read_dir`) - either have a corresponding dirstate node, - or are ignored by ignore patterns whose hash is in tree metadata. + It means the `mtime` was already strictly in the past when observed, + meaning that later changes cannot happen in the same clock tick + and must cause a different modification time + (unless the system clock jumps back and we get unlucky, + which is not impossible but deemed unlikely enough). This means that if `std::fs::symlink_metadata` later reports the same modification time and ignored patterns haven’t changed, - a run of status that is not listing ignored files - can skip calling `std::fs::read_dir` again for this directory, + we can assume the node to be unchangeset on disk. + + The `mtime` field can then be used to skip more expensive lookup when + checking the status of "tracked" nodes. + + It can also be set for node where `DIRECTORY` is set. + See `DIRECTORY` documentation for details. + +`DIRECTORY` + When set, this entry will match a directory that exist or existed on the + file system. + + * When `HAS_MTIME` is set a directory has been seen on the file system and + `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set + does not indicate the lack of directory on the file system. + + * When not tracked anywhere, this node does not represent an ignored or + unknown files on disk. + + If `HAS_MTIME` is set + and `mtime` match the last modification time of the directory on disk, + the directory is unchanged + and we can skip calling `std::fs::read_dir` again for this directory, and iterate child dirstate nodes instead. + (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken in account) `MODE_EXEC_PERM` Must be unset if `HAS_MODE_AND_SIZE` is unset. @@ -524,7 +531,7 @@ Must be unset for untracked nodes. For: - a file tracked anywhere - - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`) + - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`) - if that metadata matches metadata found in the working directory with `stat` This bit indicates the status of the file. @@ -540,7 +547,7 @@ `ALL_UNKNOWN_RECORDED` If set, all "unknown" children existing on disk (at the time of the last status) have been recorded and the `mtime` associated with - `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file + `DIRECTORY` can be used for optimization even when "unknown" file are listed. Note that the amount recorded "unknown" children can still be zero if None @@ -553,7 +560,7 @@ `ALL_IGNORED_RECORDED` If set, all "ignored" children existing on disk (at the time of the last status) have been recorded and the `mtime` associated with - `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file + `DIRECTORY` can be used for optimization even when "ignored" file are listed. Note that the amount recorded "ignored" children can still be zero if None diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -49,8 +49,8 @@ DIRSTATE_V2_P1_TRACKED = 1 << 1 DIRSTATE_V2_P2_INFO = 1 << 2 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3 -DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4 -_DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available +DIRSTATE_V2_HAS_MTIME = 1 << 4 +DIRSTATE_V2_DIRECTORY = 1 << 5 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8 @@ -137,7 +137,7 @@ def from_v2_data(cls, flags, size, mtime): """Build a new DirstateItem object from V2 data""" has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE) - has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME) + has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_MTIME) mode = None if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED: @@ -441,7 +441,7 @@ if stat.S_ISLNK(self.mode): flags |= DIRSTATE_V2_MODE_IS_SYMLINK if self._mtime is not None: - flags |= DIRSTATE_V2_HAS_FILE_MTIME + flags |= DIRSTATE_V2_HAS_MTIME if self._fallback_exec is not None: flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -106,8 +106,8 @@ const P1_TRACKED = 1 << 1; const P2_INFO = 1 << 2; const HAS_MODE_AND_SIZE = 1 << 3; - const HAS_FILE_MTIME = 1 << 4; - const HAS_DIRECTORY_MTIME = 1 << 5; + const HAS_MTIME = 1 << 4; + const DIRECTORY = 1 << 5; const MODE_EXEC_PERM = 1 << 6; const MODE_IS_SYMLINK = 1 << 7; const EXPECTED_STATE_IS_MODIFIED = 1 << 8; @@ -328,16 +328,13 @@ pub(super) fn cached_directory_mtime( &self, ) -> Result, DirstateV2ParseError> { - // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we - // ignore the mtime if the flag is set. - if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) + // For now we do not have code to handle lacks of ALL_UNKNOWN_RECORDED, + // so we ignore the mtime if the flag is unset. + if self.flags().contains(Flags::DIRECTORY) + && self.flags().contains(Flags::HAS_MTIME) && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) { - if self.flags().contains(Flags::HAS_FILE_MTIME) { - Err(DirstateV2ParseError) - } else { - Ok(Some(self.mtime.try_into()?)) - } + Ok(Some(self.mtime.try_into()?)) } else { Ok(None) } @@ -369,7 +366,8 @@ } else { None }; - let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) + let mtime = if self.flags().contains(Flags::HAS_MTIME) + && !self.flags().contains(Flags::DIRECTORY) && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) { Some(self.mtime.truncated_seconds.into()) @@ -449,7 +447,7 @@ 0.into() }; let mtime = if let Some(m) = mtime_opt { - flags.insert(Flags::HAS_FILE_MTIME); + flags.insert(Flags::HAS_MTIME); PackedTruncatedTimestamp { truncated_seconds: m.into(), nanoseconds: 0.into(), @@ -630,13 +628,14 @@ // We never set ALL_IGNORED_RECORDED since we // don't track that case // currently. - Flags::HAS_DIRECTORY_MTIME + Flags::DIRECTORY + | Flags::HAS_MTIME | Flags::ALL_UNKNOWN_RECORDED, 0.into(), (*mtime).into(), ), dirstate_map::NodeData::None => ( - Flags::empty(), + Flags::DIRECTORY, 0.into(), PackedTruncatedTimestamp::null(), ),