diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -26,6 +26,14 @@ raise error.RevlogError(b'unknown node: %s' % x) +def test_race_hook_1(): + """hook point for test + + This let tests to have things happens between the docket reading and the + data reading""" + pass + + def persisted_data(revlog): """read the nodemap for a revlog from disk""" if revlog._nodemap_file is None: @@ -50,6 +58,8 @@ filename = _rawdata_filepath(revlog, docket) use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap") + + test_race_hook_1() try: with revlog.opener(filename) as fd: if use_mmap: diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -613,9 +613,207 @@ $ hg id -r . --traceback 90d5d3ba2fc4 tip +(be a good citizen and regenerate the nodemap) + $ hg debugupdatecaches + $ hg debugnodemap --metadata + uid: * (glob) + tip-rev: 5005 + tip-node: 90d5d3ba2fc47db50f712570487cb261a68c8ffe + data-length: 121088 + data-unused: 0 + data-unused: 0.000% + +Check race condition when multiple process write new data to the repository +--------------------------------------------------------------------------- + +In this test, we check that two writers touching the repositories will not +overwrite each other data. This test is prompted by the existent of issue6554. +Where a writer ended up using and outdated docket to update the repository. See +the dedicated extension for details on the race windows and read/write schedule +necessary to end up in this situation: testlib/persistent-nodemap-race-ext.py + +The issue was initially observed on a server with a high push trafic, but it +can be reproduced using a share and two commiting process which seems simpler. + +The test is Rust only as the other implementation does not use the same +read/write patterns. + + $ cd .. + +#if rust + + $ cp -R test-repo race-repo + $ hg share race-repo ./other-wc --config format.use-share-safe=yes + updating working directory + 5001 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg debugformat -R ./race-repo | egrep 'share-safe|persistent-nodemap' + share-safe: yes + persistent-nodemap: yes + $ hg debugformat -R ./other-wc/ | egrep 'share-safe|persistent-nodemap' + share-safe: yes + persistent-nodemap: yes + $ hg -R ./other-wc update 'min(head())' + 3 files updated, 0 files merged, 2 files removed, 0 files unresolved + $ hg -R ./race-repo debugnodemap --metadata + uid: 43c37dde + tip-rev: 5005 + tip-node: 90d5d3ba2fc47db50f712570487cb261a68c8ffe + data-length: 121088 + data-unused: 0 + data-unused: 0.000% + $ hg -R ./race-repo log -G -r 'head()' + @ changeset: 5005:90d5d3ba2fc4 + | tag: tip + ~ user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: a2 + + o changeset: 5001:16395c3cf7e2 + | user: test + ~ date: Thu Jan 01 00:00:00 1970 +0000 + summary: foo + + $ hg -R ./other-wc log -G -r 'head()' + o changeset: 5005:90d5d3ba2fc4 + | tag: tip + ~ user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: a2 + + @ changeset: 5001:16395c3cf7e2 + | user: test + ~ date: Thu Jan 01 00:00:00 1970 +0000 + summary: foo + + $ echo left-side-race > race-repo/left-side-race + $ hg -R ./race-repo/ add race-repo/left-side-race + + $ echo right-side-race > ./other-wc/right-side-race + $ hg -R ./other-wc/ add ./other-wc/right-side-race + + $ mkdir sync-files + $ mkdir outputs + $ ( + > hg -R ./race-repo/ commit -m left-side-commit \ + > --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \ + > --config 'devel.nodemap-race.role=left'; + > touch sync-files/left-done + > ) > outputs/left.txt 2>&1 & + $ ( + > hg -R ./other-wc/ commit -m right-side-commit \ + > --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \ + > --config 'devel.nodemap-race.role=right'; + > touch sync-files/right-done + > ) > outputs/right.txt 2>&1 & + $ ( + > hg -R ./race-repo/ check-nodemap-race \ + > --config "extensions.race=${RUNTESTDIR}/testlib/persistent-nodemap-race-ext.py" \ + > --config 'devel.nodemap-race.role=reader'; + > touch sync-files/reader-done + > ) > outputs/reader.txt 2>&1 & + $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/left-done + $ cat outputs/left.txt + docket-details: + uid: 43c37dde + actual-tip: 5005 + tip-rev: 5005 + data-length: 121088 + nodemap-race: left side locked and ready to commit + docket-details: + uid: 43c37dde + actual-tip: 5005 + tip-rev: 5005 + data-length: 121088 + finalized changelog write + persisting changelog nodemap + new data start at 121088 + persisted changelog nodemap + docket-details: + uid: 43c37dde + actual-tip: 5006 + tip-rev: 5006 + data-length: 121280 + $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/right-done + $ cat outputs/right.txt + nodemap-race: right side start of the locking sequence + nodemap-race: right side reading changelog + nodemap-race: right side reading of changelog is done + docket-details: + uid: 43c37dde + actual-tip: 5006 + tip-rev: 5005 + data-length: 121088 + nodemap-race: right side ready to wait for the lock + nodemap-race: right side locked and ready to commit + docket-details: + uid: 43c37dde + actual-tip: 5006 + tip-rev: 5005 + data-length: 121088 + right ready to write, waiting for reader + right proceeding with writing its changelog index and nodemap + finalized changelog write + persisting changelog nodemap + new data start at 121088 + persisted changelog nodemap + docket-details: + uid: 43c37dde + actual-tip: 5007 + tip-rev: 5007 + data-length: 121472 + $ sh "$RUNTESTDIR"/testlib/wait-on-file 10 sync-files/reader-done + $ cat outputs/reader.txt + reader: reading changelog + reader ready to read the changelog, waiting for right + reader: nodemap docket read + record-data-length: 121280 + actual-data-length: 121280 + file-actual-length: 121472 + reader: changelog read + docket-details: + uid: 43c37dde + actual-tip: 5006 + tip-rev: 5006 + data-length: 121280 + tip-rev: 5006 + tip-node: 492901161367 + node-rev: 5006 + error while checking revision: 18 (known-bad-output !) + Inconsistency: Revision 5007 found in nodemap is not in revlog indexi (known-bad-output !) + + $ hg -R ./race-repo log -G -r 'head()' + o changeset: 5007:ac4a2abde241 + | tag: tip + ~ parent: 5001:16395c3cf7e2 + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: right-side-commit + + @ changeset: 5006:492901161367 + | user: test + ~ date: Thu Jan 01 00:00:00 1970 +0000 + summary: left-side-commit + + $ hg -R ./other-wc log -G -r 'head()' + @ changeset: 5007:ac4a2abde241 + | tag: tip + ~ parent: 5001:16395c3cf7e2 + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: right-side-commit + + o changeset: 5006:492901161367 + | user: test + ~ date: Thu Jan 01 00:00:00 1970 +0000 + summary: left-side-commit + +#endif + Test upgrade / downgrade ======================== + $ cd ./test-repo/ + downgrading $ cat << EOF >> .hg/hgrc diff --git a/tests/testlib/persistent-nodemap-race-ext.py b/tests/testlib/persistent-nodemap-race-ext.py new file mode 100644 --- /dev/null +++ b/tests/testlib/persistent-nodemap-race-ext.py @@ -0,0 +1,294 @@ +"""Create the race condition for issue6554 + +The persistent nodemap issues had an issue where a second writer could +overwrite the data that a previous write just wrote. The would break the append +only garantee of the persistent nodemap and could confuse reader. This +extensions create all the necessary synchronisation point to the race condition +to happen. + +It involves 3 process (a writer) (a writer) and + +[1] take the lock and start a transaction +[2] updated `00changelog.i` with the new data +[3] reads: + - the new changelog index `00changelog.i` + - the old `00changelog.n` +[4] update the persistent nodemap: + - writing new data from the last valid offset + - updating the docket (00changelog.n) +[5] release the lock +[6] grab the lock and run `repo.invalidate` +[7] reads: + - the changelog index after write + - the nodemap docket after write +[8] reload the changelog since `00changelog.n` changed + /!\ This is the faulty part in issue 6554, the outdated docket is kept +[9] write: + - the changelog index (00changelog.i) + - the nodemap data (00changelog*.nd) + /!\ if the outdated docket is used, the write starts from the same ofset + /!\ as in [4], overwriting data that wrote in step [4]. + - the nodemap docket (00changelog.n) +[10] reads the nodemap data from `00changelog*.nd` + /!\ if step [9] was wrong, the data matching the docket that + /!\ loaded have been overwritten and the expected root-nodes is no longer + /!\ valid. +""" + +from __future__ import print_function + +import os + +from mercurial.revlogutils.constants import KIND_CHANGELOG + +from mercurial import ( + changelog, + encoding, + extensions, + localrepo, + node, + pycompat, + registrar, + testing, + util, +) + +from mercurial.revlogutils import ( + nodemap as nodemaputil, +) + +configtable = {} +configitem = registrar.configitem(configtable) + +configitem(b'devel', b'nodemap-race.role', default=None) + +cmdtable = {} +command = registrar.command(cmdtable) + +LEFT = b'left' +RIGHT = b'right' +READER = b'reader' + +SYNC_DIR = os.path.join(encoding.environ[b'TESTTMP'], b'sync-files') + +# mark the end of step [1] +FILE_LEFT_LOCKED = os.path.join(SYNC_DIR, b'left-locked') +# mark that step [3] is ready to run. +FILE_RIGHT_READY_TO_LOCK = os.path.join(SYNC_DIR, b'right-ready-to-lock') + +# mark the end of step [2] +FILE_LEFT_CL_DATA_WRITE = os.path.join(SYNC_DIR, b'left-data') +# mark the end of step [4] +FILE_LEFT_CL_NODEMAP_WRITE = os.path.join(SYNC_DIR, b'left-nodemap') +# mark the end of step [3] +FILE_RIGHT_CL_NODEMAP_READ = os.path.join(SYNC_DIR, b'right-nodemap') +# mark that step [9] is read to run +FILE_RIGHT_CL_NODEMAP_PRE_WRITE = os.path.join( + SYNC_DIR, b'right-pre-nodemap-write' +) +# mark that step [9] has run. +FILE_RIGHT_CL_NODEMAP_POST_WRITE = os.path.join( + SYNC_DIR, b'right-post-nodemap-write' +) +# mark that step [7] is ready to run +FILE_READER_READY = os.path.join(SYNC_DIR, b'reader-ready') +# mark that step [7] has run +FILE_READER_READ_DOCKET = os.path.join(SYNC_DIR, b'reader-read-docket') + + +def _print(*args, **kwargs): + print(*args, **kwargs) + + +def _role(repo): + """find the role associated with the process""" + return repo.ui.config(b'devel', b'nodemap-race.role') + + +def wrap_changelog_finalize(orig, cl, tr): + """wrap the update of `00changelog.i` during transaction finalization + + This is useful for synchronisation before or after the file is updated on disk. + """ + role = getattr(tr, '_race_role', None) + if role == RIGHT: + print('right ready to write, waiting for reader') + testing.wait_file(FILE_READER_READY) + testing.write_file(FILE_RIGHT_CL_NODEMAP_PRE_WRITE) + testing.wait_file(FILE_READER_READ_DOCKET) + print('right proceeding with writing its changelog index and nodemap') + ret = orig(cl, tr) + print("finalized changelog write") + if role == LEFT: + testing.write_file(FILE_LEFT_CL_DATA_WRITE) + return ret + + +def wrap_persist_nodemap(orig, tr, revlog, *args, **kwargs): + """wrap the update of `00changelog.n` and `*.nd` during tr finalization + + This is useful for synchronisation before or after the files are updated on + disk. + """ + is_cl = revlog.target[0] == KIND_CHANGELOG + role = getattr(tr, '_race_role', None) + if is_cl: + if role == LEFT: + testing.wait_file(FILE_RIGHT_CL_NODEMAP_READ) + if is_cl: + print("persisting changelog nodemap") + print(" new data start at", revlog._nodemap_docket.data_length) + ret = orig(tr, revlog, *args, **kwargs) + if is_cl: + print("persisted changelog nodemap") + print_nodemap_details(revlog) + if role == LEFT: + testing.write_file(FILE_LEFT_CL_NODEMAP_WRITE) + elif role == RIGHT: + testing.write_file(FILE_RIGHT_CL_NODEMAP_POST_WRITE) + return ret + + +def print_nodemap_details(cl): + """print relevant information about the nodemap docket currently in memory""" + dkt = cl._nodemap_docket + print('docket-details:') + if dkt is None: + print(' ') + return + print(' uid: ', pycompat.sysstr(dkt.uid)) + print(' actual-tip: ', cl.tiprev()) + print(' tip-rev: ', dkt.tip_rev) + print(' data-length:', dkt.data_length) + + +def wrap_persisted_data(orig, revlog): + """print some information about the nodemap information we just read + + Used by the process only. + """ + ret = orig(revlog) + if ret is not None: + docket, data = ret + file_path = nodemaputil._rawdata_filepath(revlog, docket) + file_path = revlog.opener.join(file_path) + file_size = os.path.getsize(file_path) + print('record-data-length:', docket.data_length) + print('actual-data-length:', len(data)) + print('file-actual-length:', file_size) + return ret + + +def sync_read(orig): + """used by to force the race window + + This make sure we read the docker from while reading the datafile + after write. + """ + orig() + testing.write_file(FILE_READER_READ_DOCKET) + print('reader: nodemap docket read') + testing.wait_file(FILE_RIGHT_CL_NODEMAP_POST_WRITE) + + +def uisetup(ui): + class RacedRepo(localrepo.localrepository): + def lock(self, wait=True): + # make sure as the "Wrong" information in memory before + # grabbing the lock + newlock = self._currentlock(self._lockref) is None + if newlock and _role(self) == LEFT: + cl = self.unfiltered().changelog + print_nodemap_details(cl) + elif newlock and _role(self) == RIGHT: + testing.write_file(FILE_RIGHT_READY_TO_LOCK) + print('nodemap-race: right side start of the locking sequence') + testing.wait_file(FILE_LEFT_LOCKED) + testing.wait_file(FILE_LEFT_CL_DATA_WRITE) + self.invalidate(clearfilecache=True) + print('nodemap-race: right side reading changelog') + cl = self.unfiltered().changelog + tiprev = cl.tiprev() + tip = cl.node(tiprev) + tiprev2 = cl.rev(tip) + if tiprev != tiprev2: + raise RuntimeError( + 'bad tip -round-trip %d %d' % (tiprev, tiprev2) + ) + testing.write_file(FILE_RIGHT_CL_NODEMAP_READ) + print('nodemap-race: right side reading of changelog is done') + print_nodemap_details(cl) + testing.wait_file(FILE_LEFT_CL_NODEMAP_WRITE) + print('nodemap-race: right side ready to wait for the lock') + ret = super(RacedRepo, self).lock(wait=wait) + if newlock and _role(self) == LEFT: + print('nodemap-race: left side locked and ready to commit') + testing.write_file(FILE_LEFT_LOCKED) + testing.wait_file(FILE_RIGHT_READY_TO_LOCK) + cl = self.unfiltered().changelog + print_nodemap_details(cl) + elif newlock and _role(self) == RIGHT: + print('nodemap-race: right side locked and ready to commit') + cl = self.unfiltered().changelog + print_nodemap_details(cl) + return ret + + def transaction(self, *args, **kwargs): + # duck punch the role on the transaction to help other pieces of code + tr = super(RacedRepo, self).transaction(*args, **kwargs) + tr._race_role = _role(self) + return tr + + localrepo.localrepository = RacedRepo + + extensions.wrapfunction( + nodemaputil, 'persist_nodemap', wrap_persist_nodemap + ) + extensions.wrapfunction( + changelog.changelog, '_finalize', wrap_changelog_finalize + ) + + +def reposetup(ui, repo): + if _role(repo) == READER: + extensions.wrapfunction( + nodemaputil, 'persisted_data', wrap_persisted_data + ) + extensions.wrapfunction(nodemaputil, 'test_race_hook_1', sync_read) + + class ReaderRepo(repo.__class__): + @util.propertycache + def changelog(self): + print('reader ready to read the changelog, waiting for right') + testing.write_file(FILE_READER_READY) + testing.wait_file(FILE_RIGHT_CL_NODEMAP_PRE_WRITE) + return super(ReaderRepo, self).changelog + + repo.__class__ = ReaderRepo + + +@command(b'check-nodemap-race') +def cmd_check_nodemap_race(ui, repo): + """Run proper access in the race Windows and check nodemap content""" + repo = repo.unfiltered() + print('reader: reading changelog') + cl = repo.changelog + print('reader: changelog read') + print_nodemap_details(cl) + tip_rev = cl.tiprev() + tip_node = cl.node(tip_rev) + print('tip-rev: ', tip_rev) + print('tip-node:', node.short(tip_node).decode('ascii')) + print('node-rev:', cl.rev(tip_node)) + for r in cl.revs(): + n = cl.node(r) + try: + r2 = cl.rev(n) + except ValueError as exc: + print('error while checking revision:', r) + print(' ', exc) + return 1 + else: + if r2 != r: + print('revision %d is missing from the nodemap' % r) + return 1