diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py +++ b/hgext/largefiles/lfutil.py @@ -244,7 +244,7 @@ def lfdirstatestatus(lfdirstate, repo): pctx = repo[b'.'] match = matchmod.always() - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( match, subrepos=[], ignored=False, clean=False, unknown=False ) modified, clean = s.modified, s.clean @@ -263,6 +263,10 @@ size = st.st_size mtime = timestamp.mtime_of(st) cache_data = (mode, size, mtime) + # We should consider using the mtime_boundary + # logic here, but largefile never actually had + # ambiguity protection before, so this confuse + # the tests and need more thinking. lfdirstate.set_clean(lfile, cache_data) return s @@ -670,7 +674,7 @@ # large. lfdirstate = openlfdirstate(ui, repo) dirtymatch = matchmod.always() - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False ) modifiedfiles = unsure + s.modified + s.added + s.removed diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py +++ b/hgext/largefiles/overrides.py @@ -1519,7 +1519,7 @@ return orig(repo, matcher, prefix, uipathfn, opts) # Get the list of missing largefiles so we can remove them lfdirstate = lfutil.openlfdirstate(repo.ui, repo) - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( matchmod.always(), subrepos=[], ignored=False, @@ -1746,7 +1746,7 @@ # (*1) deprecated, but used internally (e.g: "rebase --collapse") lfdirstate = lfutil.openlfdirstate(repo.ui, repo) - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( matchmod.always(), subrepos=[], ignored=False, diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py +++ b/hgext/largefiles/reposetup.py @@ -197,7 +197,7 @@ match._files = [f for f in match._files if sfindirstate(f)] # Don't waste time getting the ignored and unknown # files from lfdirstate - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( match, subrepos=[], ignored=False, @@ -230,6 +230,10 @@ size = s.st_size mtime = timestamp.mtime_of(s) cache_data = (mode, size, mtime) + # We should consider using the mtime_boundary + # logic here, but largefile never actually had + # ambiguity protection before, so this confuse + # the tests and need more thinking. lfdirstate.set_clean(lfile, cache_data) else: tocheck = unsure + modified + added + clean diff --git a/hgext/remotefilelog/__init__.py b/hgext/remotefilelog/__init__.py --- a/hgext/remotefilelog/__init__.py +++ b/hgext/remotefilelog/__init__.py @@ -520,7 +520,7 @@ # Prefetch files before status attempts to look at their size and contents -def checklookup(orig, self, files): +def checklookup(orig, self, files, mtime_boundary): repo = self._repo if isenabled(repo): prefetchfiles = [] @@ -530,7 +530,7 @@ prefetchfiles.append((f, hex(parent.filenode(f)))) # batch fetch the needed files from the server repo.fileservice.prefetch(prefetchfiles) - return orig(self, files) + return orig(self, files, mtime_boundary) # Prefetch the logic that compares added and removed files for renames diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -1796,13 +1796,14 @@ sane.append(f) return sane - def _checklookup(self, files): + def _checklookup(self, files, mtime_boundary): # check for any possibly clean files if not files: - return [], [], [] + return [], [], [], [] modified = [] deleted = [] + clean = [] fixup = [] pctx = self._parents[0] # do a full compare of any files that might have changed @@ -1816,22 +1817,35 @@ or pctx[f].cmp(self[f]) ): modified.append(f) + elif mtime_boundary is None: + clean.append(f) else: - # XXX note that we have a race windows here since we gather - # the stats after we compared so the file might have - # changed. - # - # However this have always been the case and the - # refactoring moving the code here is improving the - # situation by narrowing the race and moving the two steps - # (comparison + stat) in the same location. - # - # Making this code "correct" is now possible. s = self[f].lstat() mode = s.st_mode size = s.st_size - mtime = timestamp.mtime_of(s) - fixup.append((f, (mode, size, mtime))) + file_mtime = timestamp.mtime_of(s) + cache_info = (mode, size, file_mtime) + + file_second = file_mtime[0] + boundary_second = mtime_boundary[0] + # If the mtime of the ambiguous file is younger (or equal) + # to the starting point of the `status` walk, we cannot + # garantee that another, racy, write will not happen right + # after with the same mtime and we cannot cache the + # information. + # + # However is the mtime is far away in the future, this is + # likely some mismatch between the current clock and + # previous file system operation. So mtime more than one days + # in the future are considered fine. + if ( + boundary_second + <= file_second + < (3600 * 24 + boundary_second) + ): + clean.append(f) + else: + fixup.append((f, cache_info)) except (IOError, OSError): # A file become inaccessible in between? Mark it as deleted, # matching dirstate behavior (issue5584). @@ -1841,7 +1855,7 @@ # it's in the dirstate. deleted.append(f) - return modified, deleted, fixup + return modified, deleted, clean, fixup def _poststatusfixup(self, status, fixup): """update dirstate for files that are actually clean""" @@ -1895,17 +1909,21 @@ subrepos = [] if b'.hgsub' in self: subrepos = sorted(self.substate) - cmp, s = self._repo.dirstate.status( + cmp, s, mtime_boundary = self._repo.dirstate.status( match, subrepos, ignored=ignored, clean=clean, unknown=unknown ) # check for any possibly clean files fixup = [] if cmp: - modified2, deleted2, fixup = self._checklookup(cmp) + modified2, deleted2, clean_set, fixup = self._checklookup( + cmp, mtime_boundary + ) s.modified.extend(modified2) s.deleted.extend(deleted2) + if clean_set and clean: + s.clean.extend(clean_set) if fixup and clean: s.clean.extend((f for f, _ in fixup)) diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -1308,11 +1308,20 @@ # Some matchers have yet to be implemented use_rust = False + # Get the time from the filesystem so we can disambiguate files that + # appear modified in the present or future. + try: + mtime_boundary = timestamp.get_fs_now(self._opener) + except OSError: + # In largefiles or readonly context + mtime_boundary = None + if use_rust: try: - return self._rust_status( + res = self._rust_status( match, listclean, listignored, listunknown ) + return res + (mtime_boundary,) except rustmod.FallbackError: pass @@ -1402,7 +1411,7 @@ status = scmutil.status( modified, added, removed, deleted, unknown, ignored, clean ) - return (lookup, status) + return (lookup, status, mtime_boundary) def matches(self, match): """ diff --git a/mercurial/narrowspec.py b/mercurial/narrowspec.py --- a/mercurial/narrowspec.py +++ b/mercurial/narrowspec.py @@ -323,7 +323,7 @@ removedmatch = matchmod.differencematcher(oldmatch, newmatch) ds = repo.dirstate - lookup, status = ds.status( + lookup, status, _mtime_boundary = ds.status( removedmatch, subrepos=[], ignored=True, clean=True, unknown=True ) trackeddirty = status.modified + status.added diff --git a/tests/test-dirstate-race.t b/tests/test-dirstate-race.t --- a/tests/test-dirstate-race.t +++ b/tests/test-dirstate-race.t @@ -66,11 +66,11 @@ > ) > def extsetup(ui): > extensions.wrapfunction(context.workingctx, '_checklookup', overridechecklookup) - > def overridechecklookup(orig, self, files): + > def overridechecklookup(orig, self, *args, **kwargs): > # make an update that changes the dirstate from underneath > self._repo.ui.system(br"sh '$TESTTMP/dirstaterace.sh'", > cwd=self._repo.root) - > return orig(self, files) + > return orig(self, *args, **kwargs) > EOF $ hg debugrebuilddirstate