diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -105,6 +105,7 @@ from .revlogutils import ( deltas as deltautil, nodemap, + rewrite, sidedata, ) @@ -1451,6 +1452,52 @@ ui.write(b"%s\n" % f) +@command( + b"debug-fix-issue6528", + [ + ( + b'', + b'to-report', + b'', + _(b'build a report of corrupted revisions to this file'), + _(b'FILE'), + ), + ( + b'', + b'from-report', + b'', + _(b'fix revisions listed in this report file'), + _(b'FILE'), + ), + ] + + cmdutil.dryrunopts, +) +def debug_fix_issue6528(ui, repo, **opts): + """find corrupted revisions and fix them. See issue6528 for more details. + + The `--to-report` and `--from-report` flags allow you to cache and reuse the + computation of corrupted revisions for a given repository across clones. + The report format is line-based (with empty lines ignored): + ... + + Though unlikely, there can be multiple broken revisions per filelog. + + Note that this does *not* mean that this fixes future corrupted revisions, that + needs a separate fix at the exchange level that hasn't been written yet (as of + 5.9rc0). + """ + cmdutil.check_incompatible_arguments( + opts, 'to_report', ['from_report', 'dry_run'] + ) + dry_run = bool(opts.get('dry_run')) + to_report = opts.get('to_report') + from_report = opts.get('from_report') + + rewrite.fix_issue6528( + ui, repo, dry_run=dry_run, to_report=to_report, from_report=from_report + ) + + @command(b'debugformat', [] + cmdutil.formatteropts) def debugformat(ui, repo, **opts): """display format information about the current repository diff --git a/mercurial/revlogutils/rewrite.py b/mercurial/revlogutils/rewrite.py --- a/mercurial/revlogutils/rewrite.py +++ b/mercurial/revlogutils/rewrite.py @@ -7,6 +7,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import binascii import contextlib import os @@ -472,3 +473,195 @@ new_index_file.write(entry_bin) revlog._docket.index_end = new_index_file.tell() revlog._docket.data_end = new_data_file.tell() + + +def _filelog_from_path(repo, path): + """Returns the filelog for the given `path`. Stolen from `engine.py`""" + + from .. import filelog # avoid cycle + + # Drop the extension and the `data/` prefix + path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) + if len(path_part) < 2: + msg = _(b"cannot recognize filelog from filename: '%s'") + msg %= path + raise error.Abort(msg) + path = path_part[1] + fl = filelog.filelog(repo.svfs, path) + rl = fl._revlog + assert rl._format_version == constants.REVLOGV1, rl._format_version + return fl + + +def _reorder_filelog_parents(fl, to_fix): + """ + Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the new + version to disk, overwriting the old one with a rename. + """ + from ..pure import parsers + + assert len(to_fix) > 0 + rl = fl._revlog + index_format = parsers.IndexObject.index_format + + new_file_path = rl._indexfile + b'tmp-parents-fix' + util.copyfile( + rl.opener.join(rl._indexfile), + rl.opener.join(new_file_path), + checkambig=rl._checkambig, + ) + + with rl.opener(new_file_path, mode=b"r+") as fp: + if rl._inline: + index = parsers.InlinedIndexObject(fp.read()) + for rev in fl.revs(): + entry = rl.index[rev] + new_entry = list(entry) + if rev in to_fix: + # Swap p1 and p2 + new_entry[5], new_entry[6] = entry[6], entry[5] + packed = index_format.pack(*new_entry[:8]) + offset = index._calculate_index(rev) + fp.seek(offset) + fp.write(packed) + else: + for rev in to_fix: + entry = rl.index[rev] + new_entry = list(entry) + # Swap p1 and p2 + new_entry[5], new_entry[6] = entry[6], entry[5] + packed = index_format.pack(*new_entry[:8]) + offset = rev * index_format.size + fp.seek(offset) + fp.write(packed) + + rl.opener.rename(new_file_path, rl._indexfile) + rl.clearcaches() + rl._loadindex() + + +def _is_revision_corrupted(ui, fl, filerev, path): + try: + raw_text = fl.rawdata(filerev) + except error.CensoredNodeError: + # We don't care about censored nodes as they never carry metadata + return False + has_meta = raw_text.startswith(b'\x01\n') + if has_meta: + (p1, p2) = fl.parentrevs(filerev) + if p1 != nullrev and p2 == nullrev: + msg = b"found corrupted revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, path)) + return True + return False + + +def _from_report(ui, repo, context, from_report, dry_run): + """ + Fix the revisions given in the `from_report` file, but still checks if the + revisions are indeed corrupted to prevent an unfortunate cyclic situation. + + See the doc for `debug_fix_issue6528` for the format documentation. + """ + from .. import store # prevent cycle + + ui.write(_(b"Loading report file '%s'\n") % from_report) + + with context(), open(from_report, mode='rb') as f: + for line in f.read().split(b'\n'): + if not line: + continue + encoded, filenodes = line.split(b' ', 1) + filename = store.decodefilename(encoded) + fl = _filelog_from_path(repo, filename) + to_fix = set( + fl.rev(binascii.unhexlify(n)) for n in filenodes.split() + ) + excluded = set() + + for filerev in to_fix: + if not _is_revision_corrupted(ui, fl, filerev, filename): + msg = _(b"Revision %s of file '%s' is not corrupted\n") + ui.warn( + msg % (binascii.hexlify(fl.node(filerev)), filename) + ) + excluded.add(filerev) + + to_fix = to_fix - excluded + if len(to_fix) == 0: + msg = _(b"No corrupted revisions were found for '%s'\n") + ui.write(msg % filename) + continue + if not dry_run: + _reorder_filelog_parents(fl, to_fix) + + +def fix_issue6528(ui, repo, dry_run=False, to_report=None, from_report=None): + from .. import store # avoid cycle + + @contextlib.contextmanager + def context(): + if dry_run or to_report: # No need for locking + yield + else: + with repo.wlock(), repo.lock(), ui.uninterruptible(): + yield + + if from_report: + return _from_report(ui, repo, context, from_report, dry_run) + + report_entries = [] + + with context(): + total = sum( + 1 + for (t, p, _e, _s) in repo.store.datafiles() + if p.endswith(b'.i') and t & store.FILEFLAGS_FILELOG + ) + + progress = ui.makeprogress( + _(b"Looking for corrupted revisions"), + unit=_(b"filelogs"), + total=total, + ) + found_nothing = True + + for file_type, path, encoded, _size in repo.store.datafiles(): + if ( + not path.endswith(b'.i') + or not file_type & store.FILEFLAGS_FILELOG + ): + continue + progress.increment() + fl = _filelog_from_path(repo, path) + + # Set of filerevs (or hex filenodes if `to_report`) that need fixing + to_fix = set() + for filerev in fl.revs(): + # TODO speed up by looking at the start of the delta + # If it hasn't changed, it's not worth looking at the other revs + # in the same chain + corrupted = _is_revision_corrupted(ui, fl, filerev, path) + if corrupted: + found_nothing = False + if not dry_run: + if to_report: + to_fix.add(binascii.hexlify(fl.node(filerev))) + else: + to_fix.add(filerev) + + if len(to_fix) > 0: + if to_report: + report_entries.append((encoded, to_fix)) + else: + _reorder_filelog_parents(fl, to_fix) + + if found_nothing: + ui.write(_(b"No corrupted revisions were found\n")) + + if to_report and len(report_entries) > 0: + with open(to_report, mode="wb") as f: + for path, to_fix in report_entries: + f.write(b"%s %s\n" % (path, b" ".join(to_fix))) + + progress.complete() diff --git a/tests/bundles/issue6528.tar b/tests/bundles/issue6528.tar new file mode 100644 index 0000000000000000000000000000000000000000..f41b94a813beeb987281c9ff72f6bbe53dad0618 GIT binary patch literal 112640 zc%1E>50DhqoyWW9|BOIWBCP0LXhLza%I^06*#%KR1`b_zVFfQ7=-NH=<`2wF58XZM z!d)ez<}~Hi36+G96!G8rC+8)(sAw);+>l}XIE!3(c)*I0so5*N1h)Sf zCYqG936xe7ovEhVFdZxjLUsR#SkQ&}94M6{ZY(HhFBbXwl6=8e;xY@jLF`j5)dBx>1Ppx*MIVHuIP z^v{Vr2j#yLl=@#&q8(W|gY=FewwX3+mj6t+!vBIKh*17JK@7#^d>Tsg=}M*k@oT@? z@~^Fbp|boJL{4a!M>JGqS8Of-EV#%<^oH*u(HbkBAhxJC4G<5<1;U1zNxu z{dWwd^puir=7Zk2{zaC9_rD{6{--x%^{*ps2*sLdqjCKUVTlF(I|1l_dQ+B~zf-tP?cRRhk1pN0Y|i|x?)6taa?5+mPgcQwx>E}@m2vbR zLpdX9g`7V9b5a=e?-1kapDcXns@Qu@Ljr-;F^>MRAe1!HRn2HpAyBja&j}U!5A!ho z?-=9iABWS`B2<WXR?gyksDH#=(sJEk%y3`r_?OHzHf76U1Wgn>)%ZGZRzb@8ltOVq|9GUjhr6q)xTNkgTmzam91-v(n40`=-7hoe~`8_L4 z_$o{&r!J1XlnL5XCS-5p!1@iV2DZ|dMYfLQ(Nel1(`(&hZT~=I{klH$9%CFv4@A~R1|ofz zL@u`<$lTi7k63?PWzSU6Trscf%IroTzEZ~&zgv5|%h$G|t=iYJivZTznZtNy`<|F-9#GE_ z)E}u6)N$&4>TT)`>SY`N0000000000006*e7jM8#l5UsV?+ymMKJ(b^@w&-?+vCB1 zU2eBK7{nh30v^&$23_W_dIHo@g8GR1PwIEnd(so4%cuKm00000000000000m z9)1sLHv9u#PcT6G?K8K}=OF{6+iUGy9{gDlpLqiK#Ooo0L6<%L{{TUKLLJAg|2L=? zs3)m^#{mEU00000000000Gvib0bh``n)+^c(C-W2!=T&c3i@zxz-;ro{9YUy2nO5% zj};blx%_T3C=hV@T)tp15DfZAAHE_Gu*d%&Gw1(*K)psCr9Pzohk70d0000000000 z0002+*&*N!+T;EKZ@@b6`@LqP-|h3`0CVi$K6iQSbC=ua4F=8ee{Ucd2-@xcCkg6L z)Cp_;|C`h+)N?oh0000000000004l`9wXEL3wysk@xNmJzrAx?KM`;C}&0000000000z|>&k>;I9|+XVgx0000000000006)g<{Z*Pl7icP z$I}E$=Pw?CMM=x}tJ4iMr5`iiV9rRne2Op48-Q0u2sv zo%*hH%0QE_&tN1$F!yX&WJ>oK(fmWO0*gZ#k!K`=;TVAx37TnE2Aqn#Zpa!<6LLD2 zl(p(#v)1QRl*>2uV&~FvRm;c*%@eZ;k|b8*{2&N#VH?4XH&4kqS;y_YMCWDuXHuUK zo*8cvvmT}%oi&IL000000000009wFxf9vtL&7Dhrb(S)2I#*2&2-s!fchJ1kc!{{0000000000005YdoD;AnQMktY z>}djJv=KVp-wvq7DfHiwOvPd- zJ9!(8>tB*22K4U)p#SO3c>33oHiTlG&658_^e?gk=-&xI|I?f5`fr|%3Do~Ni39yR z0qB2vGoJoanM~eVSFuSpCZPYY$b$Zz0Q5h-8BhO;tT**JZ(RREn1TAgBY^&=H)H?* zrw&!ot@93sX^xGW`p06geE$oeeM6|xuXhb{T%!>+FTAznP+w!R^afGa#LA0EW(I_LQ()5zF8kNNSF^ z_TExj%^4{(quZC#s58-tK(w@%`Ui8elb`p#HCsEf_m$L(hjDP1 z(rFAEge-O|yd=jYMG^(k?CKS{a1YP)^hhip5Z*_L(QdH*8$vk{ht$A(7z+t{*R?JoDD{czFkfKhbzbb zd5(wrpCi;e?w2Kqy?5dL?2{P0_u3UiV2WewKc7ozatztSFr7V#)UeEld@ zl=`>)Ir+DMRsPp8{#T-Z#;pHC`QLi#@PAA;TK=-kS$p850wPOq3y8qyN z_smr9zb0{OY4kiB;zE2#2#FynRJ;vAPQ z|8_jUmEA0^s+G3`T1vh8S6Zllfe(+-KM(bPXBgH0CcJGd9J@yJ@BLTb2k*T9Zx8%) z@}{ppvvBsUOE#VVlhWvUG89#FyO+_{3&J|aje%t}-;JV*$goi{F2ocRW#fod*q&|_ zmL(=8#TERHm%=?LM21F75JlEP#iS5KAKnIv$t3JDbrjw7wc|Yw+eV z3!^4x^kO-Hlosxx(wg4q|Ir7Cy7Z5qdmU-2YD}~r zu%`Y;+J7bQe+KIRj!=jH8D?Z&Sc~d^;WGb+?e-tk|D0j8{>R|0*v~Uk|7$<$p1<tjh?q&7ew9Fy`rU2du2_;1Sf0CvW}xb1#}PW9_rjmNI+xI42`2-He13 zBqCW+6i!y8?(Xg$J{mL2svg93i!2W8vF+_Ev*#{srcJWSe7j0}j!nF)OaIZNoK2v# znwVGtP(%M%>|^?WI05E=I|EtN{{~{T{a4igiM#Jky1EWtLtHUW`10|&um0h?yN;Dc zU-D4br}W0fF2d!Veb&Cc^EVD$z4+dajR)7B+0NZQ_wNp#x#IBfuG`$(R{cPJi+FI& zio@Mo=Rb60!%g=d{0I5QU+MRFZ+ZK+8*baNMt%9O*wJJV#N~wD_A4tK!Yej@Wo_c6 zH_p9d-J56Kc?SLd@gu(~O-!DRM^U_65Mq2(;bUOW!(dKv0w_HYA?c-t-zO2ZpHN_(z6Bt2ctOAu`x)n+3i3&m#;esv9MVW3k z)+6=AL`4|QHKmwqNK2r>=rTI0W-=**w$AH*_JGrlUH&&|psxyzkNVF|DLH*AeVEk`dGe-ZD8R)A*z2kpn`iK61 zXMpiP82@WlM{gDC?f)&+zX-qoaR%uBh5p}Wb@W!DX8$iMRsQ}1pMd@yVUqp7!-xL- z$){#syE5QCvV6<0ul2p$zgm2E{mZWHvks`g+5W_Z^oF0_+#kE`$gv$ii@bEhKOKIT zJ1~FC@4hH6Jal5-rtRUOs~+<|@%8V$eep#X@1M7{m<+oH&~X3n@S(*|E)QMH%piWf z;@)3+zj5HMhkm^OAk}@-_47aa__{20)yxxTKfU&m{gG`qoTG0{y|`|1^uT{#>APz% z=sn|?hput`cF&=QpLp>6zE9>nx`%vb@a%6`ceUF)gTw@Se{q72vSxx-K4&Fp$E$bV zyI(D(W zqwCV&`oinRoIg80`1U_4%Rk6=4c+`@<-G;3Bp1GB+4!`WU#9b*=ND*%((-@O6TPZX ztNu4K|Er|_#|lvYbA+-fUuBcQPIt-xCn(i_J{waz?KuF}imJ`BQB(h8>VJ~CBrxdT z0dNbcSnM;P`DsU`{##+ArvAg?nDIXz*8g;bvN1oVtOkHj%X$RO)6xzCC@ww+jCuc0 zdbamiYU+RN`X6Bi-v5qJ>i-tF-lAUr7w3PnLYM>pJA&>1CN1<*h1&1`%JpCHa{%MN zj!@V7FR;$b=a$Z{)-U8%Q~zA$`j2c_g7V)HN@rXZr(Xa700000000000000000000 M0000005GNae{ufyI{*Lx literal 0 Hc$@ $TESTTMP/ext/small_inline.py + > from mercurial import revlog + > revlog._maxinline = 8 + > EOF + + $ cat << EOF >> $HGRCPATH + > [extensions] + > small_inline=$TESTTMP/ext/small_inline.py + > EOF + + $ mkdir repo-to-fix-not-inline + $ cd repo-to-fix-not-inline + $ tar xf $TESTDIR/bundles/issue6528.tar + $ echo b >> b.txt + $ hg commit -qm "inline -> separate" + $ find .hg -name *b.txt.d + .hg/store/data/b.txt.d + +Status is correct, but the problem is still there, in the earlier revision + $ hg st + $ hg up 3 + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg st + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 4 468d0263ae92 a58b36ad6b65 000000000000 + +Run the fix on the non-inline revlog + $ hg debug-fix-issue6528 + found corrupted revision 1 for filelog 'data/b.txt.i' + +Check that it worked + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 4 468d0263ae92 a58b36ad6b65 000000000000 + $ hg debug-fix-issue6528 + No corrupted revisions were found + $ hg st