diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -189,6 +189,9 @@ return True + def verifyintegrity(self, state): + return self._revlog.verifyintegrity(state) + # TODO these aren't part of the interface and aren't internal methods. # Callers should be fixed to not use them. diff --git a/mercurial/repository.py b/mercurial/repository.py --- a/mercurial/repository.py +++ b/mercurial/repository.py @@ -318,6 +318,20 @@ _('cannot %s; remote repository does not support the %r ' 'capability') % (purpose, name)) +class iverifyproblem(interfaceutil.Interface): + """Represents a problem with the integrity of the repository. + + Instances of this interface are emitted to describe an integrity issue + with a repository (e.g. corrupt storage, missing data, etc). + + Instances are essentially messages associated with severity. + """ + warning = interfaceutil.Attribute( + """Message indicating a non-fatal problem.""") + + error = interfaceutil.Attribute( + """Message indicating a fatal problem.""") + class irevisiondelta(interfaceutil.Interface): """Represents a delta between one revision and another. @@ -749,6 +763,17 @@ TODO this is used by verify and it should not be part of the interface. """ + def verifyintegrity(state): + """Verifies the integrity of file storage. + + ``state`` is a dict holding state of the verifier process. It can be + used to communicate data between invocations of multiple storage + primitives. + + The method yields objects conforming to the ``iverifyproblem`` + interface. + """ + class idirs(interfaceutil.Interface): """Interface representing a collection of directories from paths. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -254,6 +254,12 @@ revision = attr.ib() delta = attr.ib() +@interfaceutil.implementer(repository.iverifyproblem) +@attr.s(frozen=True) +class revlogproblem(object): + warning = attr.ib(default=None) + error = attr.ib(default=None) + # index v0: # 4 bytes: offset # 4 bytes: compressed length @@ -2581,3 +2587,23 @@ if dataread is not idxread: dataread.close() datawrite.close() + + def verifyintegrity(self, state): + """Verifies the integrity of the revlog. + + Yields ``revlogproblem`` instances describing problems that are + found. + """ + dd, di = self.checksize() + if dd: + yield revlogproblem(error=_('data length off by %d bytes') % dd) + if di: + yield revlogproblem(error=_('index contains %d extra bytes') % di) + + if self.version != REVLOGV0: + if not state['revlogv1']: + yield revlogproblem(warning=_("warning: `%s' uses revlog " + "format 1") % self.indexfile) + elif state['revlogv1']: + yield revlogproblem(warning=_("warning: `%s' uses revlog " + "format 0") % self.indexfile) diff --git a/mercurial/verify.py b/mercurial/verify.py --- a/mercurial/verify.py +++ b/mercurial/verify.py @@ -341,6 +341,10 @@ elif (size > 0 or not revlogv1) and f.startswith('data/'): storefiles.add(_normpath(f)) + state = { + 'revlogv1': self.revlogv1, + } + files = sorted(set(filenodes) | set(filelinkrevs)) revisions = 0 progress = ui.makeprogress(_('checking'), unit=_('files'), @@ -373,7 +377,19 @@ ff) self.fncachewarned = True - self.checklog(fl, f, lr) + if not len(fl) and (self.havecl or self.havemf): + self.err(lr, _("empty or missing %s") % f) + else: + for problem in fl.verifyintegrity(state): + if problem.warning: + self.warn(problem.warning) + elif problem.error: + self.err(lr, problem.error, f) + else: + raise error.ProgrammingError( + 'problem instance does not set warning or error ' + 'attribute: %s' % problem.msg) + seen = {} rp = None for i in fl: diff --git a/tests/test-check-interfaces.py b/tests/test-check-interfaces.py --- a/tests/test-check-interfaces.py +++ b/tests/test-check-interfaces.py @@ -229,4 +229,8 @@ basenode=b'') checkzobject(rdr) + ziverify.verifyClass(repository.iverifyproblem, + revlog.revlogproblem) + checkzobject(revlog.revlogproblem()) + main()