diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -71,6 +71,7 @@ registrar, repair, repoview, + requirements, revlog, revset, revsetlang, @@ -105,6 +106,7 @@ from .revlogutils import ( deltas as deltautil, nodemap, + rewrite, sidedata, ) @@ -1451,6 +1453,60 @@ ui.write(b"%s\n" % f) +@command( + b"debug-repair-issue6528", + [ + ( + b'', + b'to-report', + b'', + _(b'build a report of corrupted revisions to this file'), + _(b'FILE'), + ), + ( + b'', + b'from-report', + b'', + _(b'repair revisions listed in this report file'), + _(b'FILE'), + ), + ] + + cmdutil.dryrunopts, +) +def debug_repair_issue6528(ui, repo, **opts): + """find corrupted revisions and repair them. See issue6528 for more details. + + The `--to-report` and `--from-report` flags allow you to cache and reuse the + computation of corrupted revisions for a given repository across clones. + The report format is line-based (with empty lines ignored): + + ``` + ... : + ``` + + There can be multiple broken revisions per filelog. + + Note that this does *not* mean that this repairs future corrupted revisions, + that needs a separate fix at the exchange level that hasn't been written yet + (as of 5.9rc0). + """ + cmdutil.check_incompatible_arguments( + opts, 'to_report', ['from_report', 'dry_run'] + ) + dry_run = opts.get('dry_run') + to_report = opts.get('to_report') + from_report = opts.get('from_report') + # TODO maybe add filelog pattern and revision pattern parameters to help + # narrow down the search for users that know what they're looking for? + + if requirements.REVLOGV1_REQUIREMENT not in repo.requirements: + raise error.Abort(_(b"can only repair revlogv1 repositories")) + + rewrite.repair_issue6528( + ui, repo, dry_run=dry_run, to_report=to_report, from_report=from_report + ) + + @command(b'debugformat', [] + cmdutil.formatteropts) def debugformat(ui, repo, **opts): """display format information about the current repository diff --git a/mercurial/revlogutils/rewrite.py b/mercurial/revlogutils/rewrite.py --- a/mercurial/revlogutils/rewrite.py +++ b/mercurial/revlogutils/rewrite.py @@ -7,6 +7,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import binascii import contextlib import os @@ -472,3 +473,222 @@ new_index_file.write(entry_bin) revlog._docket.index_end = new_index_file.tell() revlog._docket.data_end = new_data_file.tell() + + +def _get_filename_from_filelog_index(path): + # Drop the extension and the `data/` prefix + path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) + if len(path_part) < 2: + msg = _(b"cannot recognize filelog from filename: '%s'") + msg %= path + raise error.Abort(msg) + + return path_part[1] + + +def _filelog_from_filename(repo, path): + """Returns the filelog for the given `path`. Stolen from `engine.py`""" + + from .. import filelog # avoid cycle + + fl = filelog.filelog(repo.svfs, path) + rl = fl._revlog + if rl._format_version != constants.REVLOGV1: + msg = "expected version 1 revlog, got version '%d'" % rl._format_version + raise error.ProgrammingError(msg) + return fl + + +def _write_swapped_parents(repo, rl, rev, offset, fp): + """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`""" + from ..pure import parsers # avoid cycle + + if repo._currentlock(repo._lockref) is None: + # Let's be paranoid about it + msg = "repo needs to be locked to rewrite parents" + raise error.ProgrammingError(msg) + + index_format = parsers.IndexObject.index_format + entry = rl.index[rev] + new_entry = list(entry) + new_entry[5], new_entry[6] = entry[6], entry[5] + packed = index_format.pack(*new_entry[:8]) + fp.seek(offset) + fp.write(packed) + + +def _reorder_filelog_parents(repo, fl, to_fix): + """ + Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the + new version to disk, overwriting the old one with a rename. + """ + from ..pure import parsers # avoid cycle + + ui = repo.ui + assert len(to_fix) > 0 + rl = fl._revlog + + index_file = rl._indexfile + new_file_path = index_file + b'.tmp-parents-fix' + repaired_msg = _(b"repaired revision %d of 'filelog %s'\n") + try: + util.copyfile( + rl.opener.join(index_file), + rl.opener.join(new_file_path), + checkambig=rl._checkambig, + ) + + with rl.opener(new_file_path, mode=b"r+") as fp: + if rl._inline: + index = parsers.InlinedIndexObject(fp.read()) + for rev in fl.revs(): + if rev in to_fix: + offset = index._calculate_index(rev) + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + else: + index_format = parsers.IndexObject.index_format + for rev in to_fix: + offset = rev * index_format.size + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + + rl.opener.rename(new_file_path, index_file) + rl.clearcaches() + rl._loadindex() + finally: + util.tryunlink(new_file_path) + + +def _is_revision_corrupted(ui, fl, filerev, path): + """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a + special meaning compared to the reverse in the context of filelog-based + copytracing. issue6528 exists because new code assumed that parent ordering + didn't matter, so this detects if the revision contains metadata (since + it's only used for filelog-based copytracing) and its parents are in the + "wrong" order.""" + try: + raw_text = fl.rawdata(filerev) + except error.CensoredNodeError: + # We don't care about censored nodes as they never carry metadata + return False + has_meta = raw_text.startswith(b'\x01\n') + if has_meta: + (p1, p2) = fl.parentrevs(filerev) + if p1 != nullrev and p2 == nullrev: + msg = b"found corrupted revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, path)) + return True + return False + + +def _from_report(ui, repo, context, from_report, dry_run): + """ + Fix the revisions given in the `from_report` file, but still checks if the + revisions are indeed corrupted to prevent an unfortunate cyclic situation + where we'd swap well-ordered parents again. + + See the doc for `debug_fix_issue6528` for the format documentation. + """ + ui.write(_(b"loading report file '%s'\n") % from_report) + + with context(), open(from_report, mode='rb') as f: + for line in f.read().split(b'\n'): + if not line: + continue + filenodes, filename = line.split(b':', 1) + fl = _filelog_from_filename(repo, filename) + to_fix = set( + fl.rev(binascii.unhexlify(n.strip())) for n in filenodes.split() + ) + excluded = set() + + for filerev in to_fix: + if not _is_revision_corrupted(ui, fl, filerev, filename): + msg = _(b"revision %s of file '%s' is not corrupted\n") + msg %= (binascii.hexlify(fl.node(filerev)), filename) + ui.warn(msg) + excluded.add(filerev) + + to_fix = to_fix - excluded + if not to_fix: + msg = _(b"no corrupted revisions were found for '%s'\n") + ui.write(msg % filename) + continue + if not dry_run: + with ui.uninterruptible(): + _reorder_filelog_parents(repo, fl, sorted(to_fix)) + + +def repair_issue6528(ui, repo, dry_run=False, to_report=None, from_report=None): + from .. import store # avoid cycle + + @contextlib.contextmanager + def context(): + if dry_run or to_report: # No need for locking + yield + else: + with repo.wlock(), repo.lock(): + yield + + if from_report: + return _from_report(ui, repo, context, from_report, dry_run) + + report_entries = [] + + with context(): + total = sum( + 1 + for (t, p, _e, _s) in repo.store.datafiles() + if p.endswith(b'.i') and t & store.FILEFLAGS_FILELOG + ) + + progress = ui.makeprogress( + _(b"looking for corrupted revisions"), + unit=_(b"filelogs"), + total=total, + ) + found_nothing = True + + for file_type, path, _encoded, _size in repo.store.datafiles(): + if ( + not path.endswith(b'.i') + or not file_type & store.FILEFLAGS_FILELOG + ): + continue + progress.increment() + filename = _get_filename_from_filelog_index(path) + fl = _filelog_from_filename(repo, filename) + + # Set of filerevs (or hex filenodes if `to_report`) that need fixing + to_fix = set() + for filerev in fl.revs(): + # TODO speed up by looking at the start of the delta + # If it hasn't changed, it's not worth looking at the other revs + # in the same chain + corrupted = _is_revision_corrupted(ui, fl, filerev, path) + if corrupted: + found_nothing = False + if not dry_run: + if to_report: + to_fix.add(binascii.hexlify(fl.node(filerev))) + else: + to_fix.add(filerev) + + if to_fix: + to_fix = sorted(to_fix) + if to_report: + report_entries.append((filename, to_fix)) + else: + with repo.ui.uninterruptible(): + _reorder_filelog_parents(repo, fl, to_fix) + + if found_nothing: + ui.write(_(b"no corrupted revisions were found\n")) + + if to_report and len(report_entries) > 0: + with open(to_report, mode="wb") as f: + for path, to_fix in report_entries: + f.write(b"%s:%s\n" % (b" ".join(to_fix), path)) + + progress.complete() diff --git a/tests/bundles/issue6528.tar b/tests/bundles/issue6528.tar new file mode 100644 index 0000000000000000000000000000000000000000..f92105258524076c68bbb0fadb3827fcf801edd3 GIT binary patch literal 61440 zc%1EB30M^b`TU+JxpP2;60uAeQ;l2-dCNpH_oXnZ`oO|!wb0;_i zs&XJ4Xp-dDC`}@^>jFS1fg%`*aSIeeP?Y0VzeoVjDaojd#?Wf z4@GcDaAHCg-vec^KSH1wZ-0VDfc?dQ#r|WIj53j}s@tIS_UFEaa{Ez?#wcKaF<@tZ zt|*f@IgEufm5M{7G zg^(1mzZfXq{*^03;6F{5!TvN&;1G;e(fw!#V1Ebu>-1IO|I5aI&i@1civo-N5k#5D z=n`11J|S3B=`vi}pGJ%9k8pkh#c2Ws_7?}QUcoAJa`HI1@D_kgdN{?vj%IZx*lg73 zxE+ZstkVic|1r@2D-H_y?}!3_;D`eM;D`c$;fMnN;fMl%;)nwO;t1exss^_F z=h=@{fee@Re-d}#KTaZm|Du3~aRtjL6eJ=eQ9`9s69|U02&GnWEq02Mp=yjI2%KS2 z>0hlXKo!7=|3OC9Fq&0Wri`-lpP(tw|1Syv|Eo7_`L8fAI%OgjG+JZSD72M#tGjIf z-i(Z4+WCowSoLlgl1S8tTe|H_k5j`}}F(SZMA0Pw$hqd5L+l9SCQrt*3K z%E5mcjQknlhHd<3jg^}AUkI1&f5frG{vmdO4S-I+bJX@#FU)Er-86WlQEc5 z$rY?_Y*L6aQKMC%A^hGTjnQakDH4+fn?m@0q98sc95fC7v9OK*dVNwdV@NVqwhY(t z->LqOgZUrCfyMrb35Ke1{Q_LrALabLWBxymgZiH+XbH1pndB5LE6nX-)R^>!aj;Ua zQ)?37c1D(k+a@N!iL5roy0a~;)*Ikty@7=_I<>yNbVv$gN;D3WS}T%p7r4AZMA{PO z&jmAD4Y$w8&qAU%C9(!ssnzSaZM_;cC2EY8Nk}w0ctBilZl?j}W0!H9(ZnVT#~Zox z6sLKx@J}cGTXJ8w>fR~EY)mv8wA^nn8WXv2I8vo&!>rdQSjU>UUnT4_25~1z;tot6 zCwzhnP?iIQUsZHqNTNQO6~1Ksrtp8Mbck6qj5~mzmjPyUDws`XG+MZwR4dlmd%G4%(i7_9C8lmr8#Vik-s z$(#~Aioiyz^;&ghO$aCS@FPgc@!x6wcarAYe_;N%s>h$&-@uMCYYc2fJA_>Of84?T zG>L)szbH`YO{`9-SFuvHP8bCwO<;Abfzhg1t%;Eu*wI>jLJ-$JOyTYWBiFDD8gDeI ztoui!Qez5Z;F_1hrqsx_Kv__g{3qd0`7hYB;!!~G|7Cjr2aO^)@c&|<6!u4uWJae^ zvqn=n=Z1FS|9PX^*Z(mT1O8tmxI=E%PY72E$^OcU_9xx_!xn9g+Nf&quG_Dx>f!U_ zTi70MW`r6)E@PDn52}d0cA+2awP)W~y>sdrveK?;F6}Bt9z)6$ zIK`+a1w|4hDktS6M$2(TE~ijj$;qVDI7#6Oi@YphIow)!SswhF|9w~YH@$HF;ei!R z?*1O|w8q9prhzXQTY2qvhvdT}w@4t#^ac$xoBLmKazaNfzctm=@ zQVG<0VOH%Cf>qs`34#V%wez$V4_>=FkEg!AuT9)@zw`Az%dd4aa{soi5l-3-PnJL* zde-;-EUiw~)#GOu|7YFU9ZMIStJU>P&B><%vM=WwzF+Z8J|LH#$lVp%F0BP5af4t_PftNBlbu#ItVPhOZKLmA9&fsKb53^G z-1{lb-VVRm^M0{fP4=ws_gV8A-`8Igvvh~ybiWV3Sn!|z{c=N(B%j#Z=1~7LK~265 zy0d(>=RCHseBbcSM7hkv)A7KC{Ba< zAH_jY{m+Lh-qz0^1@%AK!`*IQKfCcTAZPpHd+U7k+s7m{j9N--%eC8VU3k`|r@ zg6eXm4rHm=2{D8}*{y|s9j{~zXoUG(Bl1r)zWZ|I4!ikA&1${g@pJ%Hic<4mRdxI) zaN3#wVEm^zDB^!#uEbhD*C^nB;9MF0{kKCROTF#&v=t z`1?cnY+y%sf=k^Xbdwf&5Cvn97ODb4(OfB*44SV73qB+;HGJ7i^h)+lugzgkf0nfi zx_2ei;V1EaCyXmtp*Ce{E_UNeBv(9mXH2Bd`I1*kZa{0!TRqV~TcWp?R6mZ;}x8~RXb$0rN zTyZ+*Uxm{0Ur}}Z=im13%zyCyCvi}G{FfAP%=7aaKi++CV8^D!nID~LUT=B3zJdE} z4=>~xQ?>D*XPMwb*3PE3B(dH7-;EoGe8O~Y-stJTs*e9Fgr%(uDy$wg9~poD^Pd9u zF3-ovKW=@xGEI%%diT$fc69uJu{#(J91G{?ui_@4>`=12kKgEGtVf^m{6~+I;`al1EC`riTG0?z9#Kgx9 zuRmEf>Y?r7!c(W%))GkKSXpdz!oW8h0mtA8=t#BU_zJlc5(%=pH*&U4lnGZ(H{wTvKCxKe?u)FjPtHxi-75fWu(D6S$1hN4(TCgszRqzH^i zEpb_@j8E39H0p8jN;n9%e6)mY;k=o`lFyLSLJrT5`S)SvH+*urY2=6;kAuNSGlm{| z;dFhpmn6{P?!JxZcdxOn!4O- zr!gfWLsfE0t|Apg(N$9vUNzQ`z{V>(!AgB{vc?1p+k!&xr!WgKLA9$olMntvyl z*DuTK_mKEx9=z1~otc-HraN7;ajo5>vBa9DRsa-j}_)L2VXv%<&F7{QkEa9{uDgpOX8*)%a$^j(K@S zUq9xTba(q4^VSG5d1E)t;F^IFC@iwz>Kzi~@{03GFm{iq`4^QvQor`${Y^vHPq-4k z?d;xfVw{vsyeome?e6D4f4xsi{IcPaw(Z;helTunV`M;Vtjbs z`)}XNtPvBvs#bc*djty%IsOc%NM#FD6`sadbLTifL7Mu`MU ztaECpr%=g~v=QWu;R=7X?4DTjZ%zCD?H57k+oT@vg(USkop4<4q-46;9aD?=fNN7}9ipPMm+qA(Iu(p+^^rY2E^N~U0CG{JHW3YNfWL`};v zl>#riGlgO4j`ydKMp)ck3Q&spZygv~ei<(7f0X0%KZF4Dzl#Ig&Rwan5ox=ju-%BT zZv53c15_SL!GFu-5W>HNRO>R!aLIq1bl^XRQK0@W3NR^T+rCGnP@%g=0MY;bC?)?Z zB*PW|`N>}G^S^1D2K*NV_MJP3)NZgpVi)%dHY4pf8T(B|;pSic(*R0=(#HRabOM*X z{})3^F#m%nu-Lzn61_?QZ{V`!e_<3xfbqX#z}Eg19_U>H%5Hz01o_|Mz{&m<8R%UC zT>1Z^@!y5{|0n|T|HJ_p|6Q%&%>N1u^ezFe_+K>syNLfNsQ-xrF#a2i|E{co-X*{l z|Ea?9p9TCUXabD?6bJu|@!z~dDD(L5u^De1J>-)LdwZTs$?u1x)qiUf+w^_I*Yy;p z3BK7s$S+R(^4bk-#qX0c#{4>MOw0B&7N`6-bMx!;+5rP3mtLDpD4I`+`0`0rS#9OJ)v7j%kq zL4F=tDYpVQXFcw5Kezd*h7mK*=k>|B&@SWl_)~3OjKBQq!~yN)euIp22Y1~0s9Th# z$46^g`VOPL*A3~ma-Li5n)6ZveFJ>XHnt1_dL%5t;BSD6!`A;-c%r8g;KKi72wqJ6 z59I%g12F&7zh(ZXAy7Hyf8qkv$pb!Fuy^;qBd7O50-8Q}SpUTId$rnO6ITuW!9-6z z_|zDGY~tv4Z(JC>>e?5pHC{{4TX)I9zH96C_d|Uf<$_>=K1A$o$~H| zhs|vpzGg5I6aHF2bMy2*L3#D!Q$Kr=oxbI!S>}~+Zt$Hp8@q%?-wj;9D0oi3XHMfL zvjmNj?KCRW#7|>0TA$qBFctCg3`?5vvnP`FEckF;BwTuI@8bs(w&v$exz(-rn$MvY z3G??H*b;m3ey=SdYnwm)DqAyl8!@Oxs7XETvu97d_WlqKZ%BLUp0lvYB0-~r!pR5Q zXjJM+pDfpiGhbNW2j0Fp?#O>~XWns-ncw1ekDRx+y;%D&FWda)qkge#vC}=<&aAKL z-+ft9oTQfYXcP2!of|LWqI=(o&b#iN5ZPeNr>SlJZ^>`5GtPfdSGKyT9mxUywXl!> z6-@Lg0j}adPB_MY5~0BJA8}B@{2$H%o!}ZcVP2kDp##pP*0>tejm+(JRWh{BBK?1d z9_tME`EquQYDLz=kw>~;nYv+DF1E;jKwb-`dB!h|`VOT>4?k4%*hin<>H22Z^hO;E z)hO!lDO3Ir=bul6O8JO!uFLx4o(mSO*}XS?ab4NWDgMuXovf=n+~=3qPDkxe?>=N| z6JxCAQjgZkMc)mpu_nHjXYJz|V9yPW-0q+7Q~1dhM0mZR(;h!Rcfa-Crr-XEXny_U zVBN+cUhmEA(_&Ga+lJq6435|o`$u~J&(A$w@~TyD^IMnwX6#K5$zJ;NwB#{sp7!?7 zKD**)&MZs{a4?A82;9E;(q32^|>3h&A%VH*N|Ew zyF-k*eeRmypoq-KbwrXnx<#956CN8v>ow@OrE8tI+VZ^hx?hi8xMKKvj?e$xJ4DF+ zI^;vL1+XAW(nydr3Mxmw&)5ud4qE3h;@y4Bio=beujlW%meVX<89I68-2R=<-FOF6 zj63q;k4JmXZNA)HN-iHXXIJ)tGXobzE*jG2%)UWxd*?+Q_L#S6(i_Xe>+FGh-b%iG z>A~ZVe;V+3a@Vvk+HZ}>&d(aVy6&8?jxXOiFL&EBD|nKi@C-pVUymtsrVnTMZ{Xa^B z_dkn+VrQ2Hs5S@=srpxTyXL>+{2wSygXjOE;P01_000000000000000000000000` GS@?f2GKtIp literal 0 Hc$@ $TESTTMP/ext/small_inline.py + > from mercurial import revlog + > revlog._maxinline = 8 + > EOF + + $ cat << EOF >> $HGRCPATH + > [extensions] + > small_inline=$TESTTMP/ext/small_inline.py + > EOF + + $ mkdir repo-to-fix-not-inline + $ cd repo-to-fix-not-inline + $ tar xf $TESTDIR/bundles/issue6528.tar + $ echo b >> b.txt + $ hg commit -qm "inline -> separate" + $ find .hg -name *b.txt.d + .hg/store/data/b.txt.d + +Status is correct, but the problem is still there, in the earlier revision + $ hg st + $ hg up 3 + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg st + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + +Run the fix on the non-inline revlog + $ hg debug-repair-issue6528 + found corrupted revision 1 for filelog 'data/D.txt.i' + repaired revision 1 of 'filelog data/D.txt.i' + found corrupted revision 1 for filelog 'data/b.txt.i' + found corrupted revision 3 for filelog 'data/b.txt.i' + repaired revision 1 of 'filelog data/b.txt.i' + repaired revision 3 of 'filelog data/b.txt.i' + +Check that it worked + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + $ hg debug-repair-issue6528 + no corrupted revisions were found + $ hg st