diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -380,7 +380,7 @@ class changelog(revlog.revlog): - def __init__(self, opener, trypending=False): + def __init__(self, opener, trypending=False, concurrencychecker=None): """Load a changelog revlog using an opener. If ``trypending`` is true, we attempt to load the index from a @@ -389,6 +389,9 @@ revision) data for a transaction that hasn't been finalized yet. It exists in a separate file to facilitate readers (such as hooks processes) accessing data before a transaction is finalized. + + ``concurrencychecker`` will be passed to the revlog init function, see + the documentation there. """ if trypending and opener.exists(b'00changelog.i.a'): indexfile = b'00changelog.i.a' @@ -404,6 +407,7 @@ checkambig=True, mmaplargeindex=True, persistentnodemap=opener.options.get(b'persistent-nodemap', False), + concurrencychecker=concurrencychecker, ) if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1): diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -580,6 +580,11 @@ default=0, ) coreconfigitem( + b'debug', + b'revlog.verifyposition.changelog', + default=b'', +) +coreconfigitem( b'defaults', b'.*', default=None, diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -84,7 +84,10 @@ stringutil, ) -from .revlogutils import constants as revlogconst +from .revlogutils import ( + concurrency_checker as revlogchecker, + constants as revlogconst, +) release = lockmod.release urlerr = util.urlerr @@ -1639,7 +1642,10 @@ def changelog(self): # load dirstate before changelog to avoid race see issue6303 self.dirstate.prefetch_parents() - return self.store.changelog(txnutil.mayhavepending(self.root)) + return self.store.changelog( + txnutil.mayhavepending(self.root), + concurrencychecker=revlogchecker.get_checker(self.ui, b'changelog'), + ) @storecache(b'00manifest.i') def manifestlog(self): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -421,6 +421,11 @@ If `upperboundcomp` is not None, this is the expected maximal gain from compression for the data content. + + `concurrencychecker` is an optional function that receives 3 arguments: a + file handle, a filename, and an expected position. It should check whether + the current position in the file handle is valid, and log/warn/fail (by + raising). """ _flagserrorclass = error.RevlogError @@ -435,6 +440,7 @@ censorable=False, upperboundcomp=None, persistentnodemap=False, + concurrencychecker=None, ): """ create a revlog object @@ -495,6 +501,8 @@ self._loadindex() + self._concurrencychecker = concurrencychecker + def _loadindex(self): mmapindexthreshold = None opts = self.opener.options @@ -2288,6 +2296,21 @@ curr = len(self) prev = curr - 1 offset = self.end(prev) + + if self._concurrencychecker: + if self._inline: + # offset is "as if" it were in the .d file, so we need to add on + # the size of the entry metadata. + self._concurrencychecker( + ifh, self.indexfile, offset + curr * self._io.size + ) + else: + # Entries in the .i are a consistent size. + self._concurrencychecker( + ifh, self.indexfile, curr * self._io.size + ) + self._concurrencychecker(dfh, self.datafile, offset) + p1r, p2r = self.rev(p1), self.rev(p2) # full versions are inserted when the needed deltas diff --git a/mercurial/revlogutils/concurrency_checker.py b/mercurial/revlogutils/concurrency_checker.py new file mode 100644 --- /dev/null +++ b/mercurial/revlogutils/concurrency_checker.py @@ -0,0 +1,38 @@ +from ..i18n import _ +from .. import error + + +def get_checker(ui, revlog_name=b'changelog'): + """Get a function that checks file handle position is as expected. + + This is used to ensure that files haven't been modified outside of our + knowledge (such as on a networked filesystem, if `hg debuglocks` was used, + or writes to .hg that ignored locks happened). + + Due to revlogs supporting a concept of buffered, delayed, or diverted + writes, we're allowing the files to be shorter than expected (the data may + not have been written yet), but they can't be longer. + + Please note that this check is not perfect; it can't detect all cases (there + may be false-negatives/false-OKs), but it should never claim there's an + issue when there isn't (false-positives/false-failures). + """ + + vpos = ui.config(b'debug', b'revlog.verifyposition.' + revlog_name) + # Avoid any `fh.tell` cost if this isn't enabled. + if not vpos or vpos not in [b'log', b'warn', b'fail']: + return None + + def _checker(fh, fn, expected): + if fh.tell() <= expected: + return + + msg = _(b'%s: file cursor at position %d, expected %d') + # Always log if we're going to warn or fail. + ui.log(b'debug', msg + b'\n', fn, fh.tell(), expected) + if vpos == b'warn': + ui.warn((msg + b'\n') % (fn, fh.tell(), expected)) + elif vpos == b'fail': + raise error.RevlogError(msg % (fn, fh.tell(), expected)) + + return _checker diff --git a/mercurial/store.py b/mercurial/store.py --- a/mercurial/store.py +++ b/mercurial/store.py @@ -433,8 +433,12 @@ l.sort() return l - def changelog(self, trypending): - return changelog.changelog(self.vfs, trypending=trypending) + def changelog(self, trypending, concurrencychecker=None): + return changelog.changelog( + self.vfs, + trypending=trypending, + concurrencychecker=concurrencychecker, + ) def manifestlog(self, repo, storenarrowmatch): rootstore = manifest.manifestrevlog(self.vfs) diff --git a/tests/test-racy-mutations.t b/tests/test-racy-mutations.t new file mode 100644 --- /dev/null +++ b/tests/test-racy-mutations.t @@ -0,0 +1,102 @@ +#testcases skip-detection fail-if-detected + +Test situations that "should" only be reproducible: +- on networked filesystems, or +- user using `hg debuglocks` to eliminate the lock file, or +- something (that doesn't respect the lock file) writing to the .hg directory +while we're running + + $ hg init a + $ cd a + + $ cat > "$TESTTMP/waitlock_editor.sh" < [ -n "\${WAITLOCK_ANNOUNCE:-}" ] && touch "\${WAITLOCK_ANNOUNCE}" + > f="\${WAITLOCK_FILE}" + > start=\`date +%s\` + > timeout=5 + > while [ \\( ! -f \$f \\) -a \\( ! -L \$f \\) ]; do + > now=\`date +%s\` + > if [ "\`expr \$now - \$start\`" -gt \$timeout ]; then + > echo "timeout: \$f was not created in \$timeout seconds (it is now \$(date +%s))" + > exit 1 + > fi + > sleep 0.1 + > done + > if [ \$# -gt 1 ]; then + > cat "\$@" + > fi + > EOF + $ chmod +x "$TESTTMP/waitlock_editor.sh" + +Things behave differently if we don't already have a 00changelog.i file when +this all starts, so let's make one. + + $ echo r0 > r0 + $ hg commit -qAm 'r0' + +Start an hg commit that will take a while + $ EDITOR_STARTED="$(pwd)/.editor_started" + $ MISCHIEF_MANAGED="$(pwd)/.mischief_managed" + $ JOBS_FINISHED="$(pwd)/.jobs_finished" + +#if fail-if-detected + $ cat >> .hg/hgrc << EOF + > [debug] + > revlog.verifyposition.changelog = fail + > EOF +#endif + + $ echo foo > foo + $ (WAITLOCK_ANNOUNCE="${EDITOR_STARTED}" \ + > WAITLOCK_FILE="${MISCHIEF_MANAGED}" \ + > HGEDITOR="$TESTTMP/waitlock_editor.sh" \ + > hg commit -qAm 'r1 (foo)' --edit foo > .foo_commit_out 2>&1 ; touch "${JOBS_FINISHED}") & + +Wait for the "editor" to actually start + $ WAITLOCK_FILE="${EDITOR_STARTED}" "$TESTTMP/waitlock_editor.sh" + +Break the locks, and make another commit. + $ hg debuglocks -LW + $ echo bar > bar + $ hg commit -qAm 'r2 (bar)' bar + $ hg debugrevlogindex -c + rev linkrev nodeid p1 p2 + 0 0 222799e2f90b 000000000000 000000000000 + 1 1 6f124f6007a0 222799e2f90b 000000000000 + +Awaken the editor from that first commit + $ touch "${MISCHIEF_MANAGED}" +And wait for it to finish + $ WAITLOCK_FILE="${JOBS_FINISHED}" "$TESTTMP/waitlock_editor.sh" + +#if skip-detection +(Ensure there was no output) + $ cat .foo_commit_out +And observe a corrupted repository -- rev 2's linkrev is 1, which should never +happen for the changelog (the linkrev should always refer to itself). + $ hg debugrevlogindex -c + rev linkrev nodeid p1 p2 + 0 0 222799e2f90b 000000000000 000000000000 + 1 1 6f124f6007a0 222799e2f90b 000000000000 + 2 1 ac80e6205bb2 222799e2f90b 000000000000 +#endif + +#if fail-if-detected + $ cat .foo_commit_out + transaction abort! + rollback completed + note: commit message saved in .hg/last-message.txt + note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it + abort: 00changelog.i: file cursor at position 249, expected 121 +And no corruption in the changelog. + $ hg debugrevlogindex -c + rev linkrev nodeid p1 p2 + 0 0 222799e2f90b 000000000000 000000000000 + 1 1 6f124f6007a0 222799e2f90b 000000000000 +And, because of transactions, there's none in the manifestlog either. + $ hg debugrevlogindex -m + rev linkrev nodeid p1 p2 + 0 0 7b7020262a56 000000000000 000000000000 + 1 1 ad3fe36d86d9 7b7020262a56 000000000000 +#endif +