diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -53,7 +53,7 @@ def linkrev(self, rev): return self._revlog.linkrev(rev) - # Used by verify. + # Unused. def flags(self, rev): return self._revlog.flags(rev) @@ -77,7 +77,7 @@ def iscensored(self, rev): return self._revlog.iscensored(rev) - # Used by repo verify. + # Unused. def rawsize(self, rev): return self._revlog.rawsize(rev) diff --git a/mercurial/repository.py b/mercurial/repository.py --- a/mercurial/repository.py +++ b/mercurial/repository.py @@ -341,6 +341,12 @@ error = interfaceutil.Attribute( """Message indicating a fatal problem.""") + node = interfaceutil.Attribute( + """Revision encountering the problem. + + ``None`` means the problem doesn't apply to a single revision. + """) + class irevisiondelta(interfaceutil.Interface): """Represents a delta between one revision and another. @@ -790,6 +796,10 @@ used to communicate data between invocations of multiple storage primitives. + If individual revisions cannot have their revision content resolved, + the method is expected to set the ``skipread`` key to a set of nodes + that encountered problems. + The method yields objects conforming to the ``iverifyproblem`` interface. """ diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -29,6 +29,7 @@ nullhex, nullid, nullrev, + short, wdirfilenodeids, wdirhex, wdirid, @@ -260,6 +261,7 @@ class revlogproblem(object): warning = attr.ib(default=None) error = attr.ib(default=None) + node = attr.ib(default=None) # index v0: # 4 bytes: offset @@ -2644,6 +2646,89 @@ warning=_("warning: '%s' uses revlog format %d; expected %d") % (self.indexfile, version, state['expectedversion'])) + state['skipread'] = set() + + for rev in self: + node = self.node(rev) + + # Verify contents. 4 cases to care about: + # + # common: the most common case + # rename: with a rename + # meta: file content starts with b'\1\n', the metadata + # header defined in filelog.py, but without a rename + # ext: content stored externally + # + # More formally, their differences are shown below: + # + # | common | rename | meta | ext + # ------------------------------------------------------- + # flags() | 0 | 0 | 0 | not 0 + # renamed() | False | True | False | ? + # rawtext[0:2]=='\1\n'| False | True | True | ? + # + # "rawtext" means the raw text stored in revlog data, which + # could be retrieved by "revision(rev, raw=True)". "text" + # mentioned below is "revision(rev, raw=False)". + # + # There are 3 different lengths stored physically: + # 1. L1: rawsize, stored in revlog index + # 2. L2: len(rawtext), stored in revlog data + # 3. L3: len(text), stored in revlog data if flags==0, or + # possibly somewhere else if flags!=0 + # + # L1 should be equal to L2. L3 could be different from them. + # "text" may or may not affect commit hash depending on flag + # processors (see revlog.addflagprocessor). + # + # | common | rename | meta | ext + # ------------------------------------------------- + # rawsize() | L1 | L1 | L1 | L1 + # size() | L1 | L2-LM | L1(*) | L1 (?) + # len(rawtext) | L2 | L2 | L2 | L2 + # len(text) | L2 | L2 | L2 | L3 + # len(read()) | L2 | L2-LM | L2-LM | L3 (?) + # + # LM: length of metadata, depending on rawtext + # (*): not ideal, see comment in filelog.size + # (?): could be "- len(meta)" if the resolved content has + # rename metadata + # + # Checks needed to be done: + # 1. length check: L1 == L2, in all cases. + # 2. hash check: depending on flag processor, we may need to + # use either "text" (external), or "rawtext" (in revlog). + + try: + skipflags = state.get('skipflags', 0) + if skipflags: + skipflags &= self.flags(rev) + + if skipflags: + state['skipread'].add(node) + else: + # Side-effect: read content and verify hash. + self.revision(node) + + l1 = self.rawsize(rev) + l2 = len(self.revision(node, raw=True)) + + if l1 != l2: + yield revlogproblem( + error=_('unpacked size is %d, %d expected') % (l2, l1), + node=node) + + except error.CensoredNodeError: + if state['erroroncensored']: + yield revlogproblem(error=_('censored file data'), + node=node) + state['skipread'].add(node) + except Exception as e: + yield revlogproblem( + error=_('unpacking %s: %s') % (short(node), e), + node=node) + state['skipread'].add(node) + def storageinfo(self, exclusivefiles=False, sharedfiles=False, revisionscount=False, trackedsize=False, storedsize=False): diff --git a/mercurial/verify.py b/mercurial/verify.py --- a/mercurial/verify.py +++ b/mercurial/verify.py @@ -343,7 +343,10 @@ state = { # TODO this assumes revlog storage for changelog. - 'expectedversion': self.repo.changelog.version & 0xFFFF + 'expectedversion': self.repo.changelog.version & 0xFFFF, + 'skipflags': self.skipflags, + # experimental config: censor.policy + 'erroroncensored': ui.config('censor', 'policy') == 'abort', } files = sorted(set(filenodes) | set(filelinkrevs)) @@ -381,18 +384,25 @@ if not len(fl) and (self.havecl or self.havemf): self.err(lr, _("empty or missing %s") % f) else: + # Guard against implementations not setting this. + state['skipread'] = set() for problem in fl.verifyintegrity(state): + if problem.node is not None: + linkrev = fl.linkrev(fl.rev(problem.node)) + else: + linkrev = None + if problem.warning: self.warn(problem.warning) elif problem.error: - self.err(lr, problem.error, f) + self.err(linkrev if linkrev is not None else lr, + problem.error, f) else: raise error.ProgrammingError( 'problem instance does not set warning or error ' 'attribute: %s' % problem.msg) seen = {} - rp = None for i in fl: revisions += 1 n = fl.node(i) @@ -403,75 +413,15 @@ else: del filenodes[f][n] - # Verify contents. 4 cases to care about: - # - # common: the most common case - # rename: with a rename - # meta: file content starts with b'\1\n', the metadata - # header defined in filelog.py, but without a rename - # ext: content stored externally - # - # More formally, their differences are shown below: - # - # | common | rename | meta | ext - # ------------------------------------------------------- - # flags() | 0 | 0 | 0 | not 0 - # renamed() | False | True | False | ? - # rawtext[0:2]=='\1\n'| False | True | True | ? - # - # "rawtext" means the raw text stored in revlog data, which - # could be retrieved by "revision(rev, raw=True)". "text" - # mentioned below is "revision(rev, raw=False)". - # - # There are 3 different lengths stored physically: - # 1. L1: rawsize, stored in revlog index - # 2. L2: len(rawtext), stored in revlog data - # 3. L3: len(text), stored in revlog data if flags==0, or - # possibly somewhere else if flags!=0 - # - # L1 should be equal to L2. L3 could be different from them. - # "text" may or may not affect commit hash depending on flag - # processors (see revlog.addflagprocessor). - # - # | common | rename | meta | ext - # ------------------------------------------------- - # rawsize() | L1 | L1 | L1 | L1 - # size() | L1 | L2-LM | L1(*) | L1 (?) - # len(rawtext) | L2 | L2 | L2 | L2 - # len(text) | L2 | L2 | L2 | L3 - # len(read()) | L2 | L2-LM | L2-LM | L3 (?) - # - # LM: length of metadata, depending on rawtext - # (*): not ideal, see comment in filelog.size - # (?): could be "- len(meta)" if the resolved content has - # rename metadata - # - # Checks needed to be done: - # 1. length check: L1 == L2, in all cases. - # 2. hash check: depending on flag processor, we may need to - # use either "text" (external), or "rawtext" (in revlog). - try: - skipflags = self.skipflags - if skipflags: - skipflags &= fl.flags(i) - if not skipflags: - fl.read(n) # side effect: read content and do checkhash - rp = fl.renamed(n) - # the "L1 == L2" check - l1 = fl.rawsize(i) - l2 = len(fl.revision(n, raw=True)) - if l1 != l2: - self.err(lr, _("unpacked size is %s, %s expected") % - (l2, l1), f) - except error.CensoredNodeError: - # experimental config: censor.policy - if ui.config("censor", "policy") == "abort": - self.err(lr, _("censored file data"), f) - except Exception as inst: - self.exc(lr, _("unpacking %s") % short(n), inst, f) + if n in state['skipread']: + continue # check renames try: + # This requires resolving fulltext (at least on revlogs). We + # may want ``verifyintegrity()`` to pass a set of nodes with + # rename metadata as an optimization. + rp = fl.renamed(n) if rp: if lr is not None and ui.verbose: ctx = lrugetctx(lr)