diff --git a/hgext/censor.py b/hgext/censor.py --- a/hgext/censor.py +++ b/hgext/censor.py @@ -32,11 +32,8 @@ from mercurial import ( error, - pycompat, registrar, - revlog, scmutil, - util, ) cmdtable = {} @@ -98,90 +95,5 @@ raise error.Abort(_('cannot censor working directory'), hint=_('clean/delete/update first')) - flogv = flog.version & 0xFFFF - if flogv != revlog.REVLOGV1: - raise error.Abort( - _('censor does not support revlog version %d') % (flogv,)) - - tombstone = revlog.packmeta({"censored": tombstone}, "") - - crev = fctx.filerev() - - if len(tombstone) > flog.rawsize(crev): - raise error.Abort(_( - 'censor tombstone must be no longer than censored data')) - - # Using two files instead of one makes it easy to rewrite entry-by-entry - idxread = repo.svfs(flog.indexfile, 'r') - idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) - if flog.version & revlog.FLAG_INLINE_DATA: - dataread, datawrite = idxread, idxwrite - else: - dataread = repo.svfs(flog.datafile, 'r') - datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) - - # Copy all revlog data up to the entry to be censored. - rio = revlog.revlogio() - offset = flog.start(crev) - - for chunk in util.filechunkiter(idxread, limit=crev * rio.size): - idxwrite.write(chunk) - for chunk in util.filechunkiter(dataread, limit=offset): - datawrite.write(chunk) - - def rewriteindex(r, newoffs, newdata=None): - """Rewrite the index entry with a new data offset and optional new data. - - The newdata argument, if given, is a tuple of three positive integers: - (new compressed, new uncompressed, added flag bits). - """ - offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] - flags = revlog.gettype(offlags) - if newdata: - comp, uncomp, nflags = newdata - flags |= nflags - offlags = revlog.offset_type(newoffs, flags) - e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) - idxwrite.write(rio.packentry(e, None, flog.version, r)) - idxread.seek(rio.size, 1) - - def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): - """Write the given full text to the filelog with the given data offset. - - Returns: - The integer number of data bytes written, for tracking data offsets. - """ - flag, compdata = flog.compress(data) - newcomp = len(flag) + len(compdata) - rewriteindex(r, offs, (newcomp, len(data), nflags)) - datawrite.write(flag) - datawrite.write(compdata) - dataread.seek(flog.length(r), 1) - return newcomp - - # Rewrite censored revlog entry with (padded) tombstone data. - pad = ' ' * (flog.rawsize(crev) - len(tombstone)) - offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) - - # Rewrite all following filelog revisions fixing up offsets and deltas. - for srev in pycompat.xrange(crev + 1, len(flog)): - if crev in flog.parentrevs(srev): - # Immediate children of censored node must be re-added as fulltext. - try: - revdata = flog.revision(srev) - except error.CensoredNodeError as e: - revdata = e.tombstone - dlen = rewrite(srev, offset, revdata) - else: - # Copy any other revision data verbatim after fixing up the offset. - rewriteindex(srev, offset) - dlen = flog.length(srev) - for chunk in util.filechunkiter(dataread, limit=dlen): - datawrite.write(chunk) - offset += dlen - - idxread.close() - idxwrite.close() - if dataread is not idxread: - dataread.close() - datawrite.close() + with repo.transaction(b'censor') as tr: + flog.censorrevision(tr, fnode, tombstone=tombstone) diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -111,6 +111,9 @@ def strip(self, minlink, transaction): return self._revlog.strip(minlink, transaction) + def censorrevision(self, tr, node, tombstone=b''): + return self._revlog.censorrevision(node, tombstone=tombstone) + def files(self): return self._revlog.files() diff --git a/mercurial/repository.py b/mercurial/repository.py --- a/mercurial/repository.py +++ b/mercurial/repository.py @@ -691,6 +691,23 @@ even if it existed in the store previously. """ + def censorrevision(tr, node, tombstone=b''): + """Remove the content of a single revision. + + The specified ``node`` will have its content purged from storage. + Future attempts to access the revision data for this node will + result in failure. + + A ``tombstone`` message can optionally be stored. This message may be + displayed to users when they attempt to access the missing revision + data. + + Storage backends may have stored deltas against the previous content + in this revision. As part of censoring a revision, these storage + backends are expected to rewrite any internally stored deltas such + that they no longer reference the deleted content. + """ + def getstrippoint(minlink): """Find the minimum revision that must be stripped to strip a linkrev. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -2492,3 +2492,92 @@ finally: destrevlog._lazydeltabase = oldlazydeltabase destrevlog._deltabothparents = oldamd + + def censorrevision(self, node, tombstone=b''): + if (self.version & 0xFFFF) == REVLOGV0: + raise error.RevlogError(_('cannot censor with version %d revlogs') % + self.version) + + rev = self.rev(node) + tombstone = packmeta({b'censored': tombstone}, b'') + + if len(tombstone) > self.rawsize(rev): + raise error.Abort(_('censor tombstone must be no longer than ' + 'censored data')) + + # Using two files instead of one makes it easy to rewrite entry-by-entry + idxread = self.opener(self.indexfile, 'r') + idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True) + if self.version & FLAG_INLINE_DATA: + dataread, datawrite = idxread, idxwrite + else: + dataread = self.opener(self.datafile, 'r') + datawrite = self.opener(self.datafile, 'wb', atomictemp=True) + + # Copy all revlog data up to the entry to be censored. + offset = self.start(rev) + + for chunk in util.filechunkiter(idxread, limit=rev * self._io.size): + idxwrite.write(chunk) + for chunk in util.filechunkiter(dataread, limit=offset): + datawrite.write(chunk) + + def rewriteindex(r, newoffs, newdata=None): + """Rewrite the index entry with a new data offset and new data. + + The newdata argument, if given, is a tuple of three positive + integers: (new compressed, new uncompressed, added flag bits). + """ + offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r] + flags = gettype(offlags) + if newdata: + comp, uncomp, nflags = newdata + flags |= nflags + offlags = offset_type(newoffs, flags) + e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) + idxwrite.write(self._io.packentry(e, None, self.version, r)) + idxread.seek(self._io.size, 1) + + def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS): + """Write the given fulltext with the given data offset. + + Returns: + The integer number of data bytes written, for tracking data + offsets. + """ + flag, compdata = self.compress(data) + newcomp = len(flag) + len(compdata) + rewriteindex(r, offs, (newcomp, len(data), nflags)) + datawrite.write(flag) + datawrite.write(compdata) + dataread.seek(self.length(r), 1) + return newcomp + + # Rewrite censored entry with (padded) tombstone data. + pad = ' ' * (self.rawsize(rev) - len(tombstone)) + offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED) + + # Rewrite all following filelog revisions fixing up offsets and deltas. + for srev in pycompat.xrange(rev + 1, len(self)): + if rev in self.parentrevs(srev): + # Immediate children of censored node must be re-added as + # fulltext. + try: + revdata = self.revision(srev) + except error.CensoredNodeError as e: + revdata = e.tombstone + dlen = rewrite(srev, offset, revdata) + else: + # Copy any other revision data verbatim after fixing up the + # offset. + rewriteindex(srev, offset) + dlen = self.length(srev) + for chunk in util.filechunkiter(dataread, limit=dlen): + datawrite.write(chunk) + offset += dlen + + idxread.close() + idxwrite.close() + if dataread is not idxread: + dataread.close() + datawrite.close()