diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py --- a/mercurial/bundle2.py +++ b/mercurial/bundle2.py @@ -1754,7 +1754,7 @@ def addparttagsfnodescache(repo, bundler, outgoing): # we include the tags fnode cache for the bundle changeset # (as an optional parts) - cache = tags.hgtagsfnodescache(repo.unfiltered()) + cache = repo.hgtagsfnodescache() chunks = [] # .hgtags fnodes are only relevant for head changesets. While we could @@ -2450,27 +2450,10 @@ @parthandler(b'hgtagsfnodes') def handlehgtagsfnodes(op, inpart): - """Applies .hgtags fnodes cache entries to the local repo. - - Payload is pairs of 20 byte changeset nodes and filenodes. - """ - # Grab the transaction so we ensure that we have the lock at this point. - if op.ui.configbool(b'experimental', b'bundle2lazylocking'): - op.gettransaction() - cache = tags.hgtagsfnodescache(op.repo.unfiltered()) - - count = 0 - while True: - node = inpart.read(20) - fnode = inpart.read(20) - if len(node) < 20 or len(fnode) < 20: - op.ui.debug(b'ignoring incomplete received .hgtags fnodes data\n') - break - cache.setfnode(node, fnode) - count += 1 - - cache.write() - op.ui.debug(b'applied %i hgtags fnodes cache entries\n' % count) + """Legacy part, ignored for compatibility with bundles from + or for Mercurial before 5.9. Newer Mercurial computes the cache + efficiently enough during unbundling that the additional transfer + is unnecessary.""" rbcstruct = struct.Struct(b'>III') diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -3864,16 +3864,14 @@ @command(b'debugtagscache', []) def debugtagscache(ui, repo): """display the contents of .hg/cache/hgtagsfnodes1""" - cache = tagsmod.hgtagsfnodescache(repo.unfiltered()) + cache = repo.hgtagsfnodescache() for r in repo: node = repo[r].node() tagsnode = cache.getfnode(node, computemissing=False) if tagsnode: tagsnodedisplay = hex(tagsnode) - elif tagsnode is False: - tagsnodedisplay = b'invalid' else: - tagsnodedisplay = b'missing' + tagsnodedisplay = b'missing or invalid' ui.write(b'%d %s %s\n' % (r, hex(node), tagsnodedisplay)) diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py --- a/mercurial/interfaces/repository.py +++ b/mercurial/interfaces/repository.py @@ -1648,6 +1648,9 @@ def revbranchcache(): pass + def hgtagsfnodescache(): + pass + def register_changeset(rev, changelogrevision): """Extension point for caches for new nodes. diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1356,6 +1356,7 @@ self._filterpats = {} self._datafilters = {} self._transref = self._lockref = self._wlockref = None + self._hgtagsfnodescache = None # A cache for various files under .hg/ that tracks file changes, # (used by the filecache decorator) @@ -1468,6 +1469,8 @@ def _writecaches(self): if self._revbranchcache: self._revbranchcache.write() + if self._hgtagsfnodescache: + self._hgtagsfnodescache.write() def _restrictcapabilities(self, caps): if self.ui.configbool(b'experimental', b'bundle2-advertise'): @@ -2065,8 +2068,17 @@ self._revbranchcache = branchmap.revbranchcache(self.unfiltered()) return self._revbranchcache + @unfilteredmethod + def hgtagsfnodescache(self): + if not self._hgtagsfnodescache: + self._hgtagsfnodescache = tagsmod.hgtagsfnodescache( + self.unfiltered() + ) + return self._hgtagsfnodescache + def register_changeset(self, rev, changelogrevision): self.revbranchcache().setdata(rev, changelogrevision) + self.hgtagsfnodescache().setdata(rev, changelogrevision) def branchtip(self, branch, ignoremissing=False): """return the tip node for a given branch @@ -2702,6 +2714,8 @@ self.filtered(b'served').branchmap() self.filtered(b'served.hidden').branchmap() + self.hgtagsfnodescache()._updatecache(full) + if full: unfi = self.unfiltered() @@ -2717,8 +2731,6 @@ for ctx in self[b'.'].parents(): ctx.manifest() # accessing the manifest is enough - # accessing fnode cache warms the cache - tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs()) # accessing tags warm the cache self.tags() self.filtered(b'served').tags() diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py +++ b/mercurial/statichttprepo.py @@ -215,6 +215,7 @@ self.nodetagscache = None self._branchcaches = branchmap.BranchMapCache() self._revbranchcache = None + self._hgtagsfnodescache = None self.encodepats = None self.decodepats = None self._transref = None diff --git a/mercurial/tags.py b/mercurial/tags.py --- a/mercurial/tags.py +++ b/mercurial/tags.py @@ -492,7 +492,7 @@ without a '.hgtags' file. """ starttime = util.timer() - fnodescache = hgtagsfnodescache(repo.unfiltered()) + fnodescache = repo.hgtagsfnodescache() cachefnode = {} for node in nodes: fnode = fnodescache.getfnode(node) @@ -504,9 +504,7 @@ duration = util.timer() - starttime ui.log( b'tagscache', - b'%d/%d cache hits/lookups in %0.4f seconds\n', - fnodescache.hitcount, - fnodescache.lookupcount, + b'cache update took %0.4f seconds\n', duration, ) return cachefnode @@ -695,186 +693,161 @@ def __init__(self, repo): assert repo.filtername is None - self._repo = repo - - # Only for reporting purposes. - self.lookupcount = 0 - self.hitcount = 0 - + self._delayed = {} + self._delayed_clr = {} + self._hgtagsfile = None try: - data = repo.cachevfs.read(_fnodescachefile) - except (OSError, IOError): - data = b"" - self._raw = bytearray(data) + self._hgtagsrevs = bytearray(repo.cachevfs.read(_fnodescachefile)) + except (IOError, OSError): + self._hgtagsrevs = bytearray() + self._hgtagsrevslen = min( + len(self._hgtagsrevs) // _fnodesrecsize, len(repo) + ) + expected = self._hgtagsrevslen * _fnodesrecsize + if len(self._hgtagsrevs) != expected: + self._hgtagsrevs = self._hgtagsrevs[:expected] + self._oldhgtagsrevslen = self._hgtagsrevslen - # The end state of self._raw is an array that is of the exact length - # required to hold a record for every revision in the repository. - # We truncate or extend the array as necessary. self._dirtyoffset is - # defined to be the start offset at which we need to write the output - # file. This offset is also adjusted when new entries are calculated - # for array members. - cllen = len(repo.changelog) - wantedlen = cllen * _fnodesrecsize - rawlen = len(self._raw) - - self._dirtyoffset = None + def _clear(self): + self._hgtagsrevs = bytearray() + self._oldhgtagsrevslen = 0 + self._hgtagsrevslen = 0 - rawlentokeep = min( - wantedlen, (rawlen // _fnodesrecsize) * _fnodesrecsize - ) - if rawlen > rawlentokeep: - # There's no easy way to truncate array instances. This seems - # slightly less evil than copying a potentially large array slice. - for i in range(rawlen - rawlentokeep): - self._raw.pop() - rawlen = len(self._raw) - self._dirtyoffset = rawlen - if rawlen < wantedlen: - if self._dirtyoffset is None: - self._dirtyoffset = rawlen - # TODO: zero fill entire record, because it's invalid not missing? - self._raw.extend(b'\xff' * (wantedlen - rawlen)) + def _validaterecord(self, record, node): + if record[:4] != node[:4]: + return False + if record[4:] == nullid: + return True + if self._hgtagsfile is None: + self._hgtagsfile = self._repo.file(b'.hgtags') + if not self._hgtagsfile.hasnode(bytes(record[4:])): + # Reopen revlog and check again as it might have been added + # behind our back. + self._hgtagsfile = self._repo.file(b'.hgtags') + if not self._hgtagsfile.hasnode(bytes(record[4:])): + return False + return True - def getfnode(self, node, computemissing=True): + def getfnode(self, nodeorrev, computemissing=True): """Obtain the filenode of the .hgtags file at a specified revision. + If an .hgtags does not exist at the specified revision, nullid is used + as filenode. + If the value is in the cache, the entry will be validated and returned. - Otherwise, the filenode will be computed and returned unless - "computemissing" is False. In that case, None will be returned if - the entry is missing or False if the entry is invalid without - any potentially expensive computation being performed. - - If an .hgtags does not exist at the specified revision, nullid is - returned. + If "computemissing" is False and the value is not in the cache or the + cached entry is invalid, None is returned. + If "computemissing" is True, a missing or invalid value will be + computed and the cache updated. """ - if node == nullid: + if nodeorrev == nullid or nodeorrev == nullrev: return nullid - ctx = self._repo[node] - rev = ctx.rev() - - self.lookupcount += 1 - + repo = self._repo + if isinstance(nodeorrev, int): + rev = nodeorrev + node = repo.changelog.node(rev) + else: + node = nodeorrev + rev = repo.changelog.rev(node) offset = rev * _fnodesrecsize - record = b'%s' % self._raw[offset : offset + _fnodesrecsize] - properprefix = node[0:4] - - # Validate and return existing entry. - if record != _fnodesmissingrec and len(record) == _fnodesrecsize: - fileprefix = record[0:4] + if rev < self._hgtagsrevslen: + record = self._hgtagsrevs[offset : offset + _fnodesrecsize] + if self._validaterecord(record, node): + return bytes(record[4:]) + if not computemissing: + return None + # If there should have been a cache entrance, force a full recompute. + # Otherwise assume that the cache was valid and just compute new + # entries. + self._updatecache(rev < self._hgtagsrevslen) + record = self._hgtagsrevs[offset : offset + _fnodesrecsize] + assert self._validaterecord(record, node) + return bytes(record[4:]) - if fileprefix == properprefix: - self.hitcount += 1 - return record[4:] - - # Fall through. - - # If we get here, the entry is either missing or invalid. - - if not computemissing: - if record != _fnodesmissingrec: - return False - return None - - fnode = None + def _computefnode(self, rev): + # Used by _updatecache cl = self._repo.changelog - p1rev, p2rev = cl._uncheckedparentrevs(rev) - p1node = cl.node(p1rev) - p1fnode = self.getfnode(p1node, computemissing=False) - if p2rev != nullrev: - # There is some no-merge changeset where p1 is null and p2 is set - # Processing them as merge is just slower, but still gives a good - # result. - p2node = cl.node(p1rev) - p2fnode = self.getfnode(p2node, computemissing=False) - if p1fnode != p2fnode: - # we cannot rely on readfast because we don't know against what - # parent the readfast delta is computed - p1fnode = None - if p1fnode: - mctx = ctx.manifestctx() - fnode = mctx.readfast().get(b'.hgtags') - if fnode is None: - fnode = p1fnode - if fnode is None: - # Populate missing entry. - try: - fnode = ctx.filenode(b'.hgtags') - except error.LookupError: - # No .hgtags file on this revision. - fnode = nullid - - self._writeentry(offset, properprefix, fnode) - return fnode - - def setfnode(self, node, fnode): - """Set the .hgtags filenode for a given changeset.""" - assert len(fnode) == 20 - ctx = self._repo[node] + if rev in self._delayed: + # New node + p1, p2 = cl._uncheckedparentrevs(rev) + return self.getfnode(p1 if p1 != nullrev else p2, False) + changelogrevision = self._delayed_clr.get(rev) + if changelogrevision is None: + changelogrevision = cl.changelogrevision(rev) + if b'.hgtags' not in changelogrevision.files: + # Since .hgtags didn't change from the parents, + # just look up the first parent. Topological order + # of the repository means we should already know this. + p1, p2 = cl._uncheckedparentrevs(rev) + return self.getfnode(p1 if p1 != nullrev else p2, False) + mfl = self._repo.manifestlog[changelogrevision.manifest] + mctx = self._repo[rev]._manifestctx + fnode = mctx.readdelta().get(b'.hgtags') + if fnode: + return fnode + try: + return mctx.find(b'.hgtags')[0] + except (error.ManifestLookupError, KeyError): + return nullid - # Do a lookup first to avoid writing if nothing has changed. - if self.getfnode(ctx.node(), computemissing=False) == fnode: - return - - self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode) - - def _writeentry(self, offset, prefix, fnode): - # Slices on array instances only accept other array. - entry = bytearray(prefix + fnode) - self._raw[offset : offset + _fnodesrecsize] = entry - # self._dirtyoffset could be None. - self._dirtyoffset = min(self._dirtyoffset or 0, offset or 0) + def _updatecache(self, full): + repo = self._repo + cl = repo.changelog + startrev = 0 if full else self._hgtagsrevslen + for rev in pycompat.xrange(startrev, self._hgtagsrevslen): + offset = rev * _fnodesrecsize + node = cl.node(rev) + record = self._hgtagsrevs[offset : offset + _fnodesrecsize] + if self._validaterecord(record, node): + continue + fnode = self._computefnode(rev) + if fnode is None: + raise error.Abort(_(b'invalid topology')) + record = node[:4] + fnode + assert self._validaterecord(record, node) + self._hgtagsrevs[offset : offset + _fnodesrecsize] = record + self._oldhgtagsrevslen = min(self._oldhgtagsrevslen, rev) + self._hgtagsrevs.extend( + b'\x00' * (len(repo) - self._hgtagsrevslen) * _fnodesrecsize + ) + for rev in pycompat.xrange(self._hgtagsrevslen, len(repo)): + offset = rev * _fnodesrecsize + fnode = self._computefnode(rev) + if fnode is None: + if full: + raise error.Abort(_(b'invalid topology')) + self._updatecache(True) + return + record = cl.node(rev)[:4] + fnode + assert self._validaterecord(record, cl.node(rev)) + self._hgtagsrevs[offset : offset + _fnodesrecsize] = record + self._hgtagsrevslen += 1 - def write(self): - """Perform all necessary writes to cache file. + def setdata(self, rev, changelogrevision): + """add new data information to the cache""" + if b'.hgtags' not in changelogrevision.files: + p1, p2 = self._repo.changelog._uncheckedparentrevs(rev) + p = p1 if p1 != nullrev else p2 + if p in self._delayed: + p = self._delayed[p] + self._delayed[rev] = p + else: + self._delayed_clr[rev] = changelogrevision - This may no-op if no writes are needed or if a write lock could - not be obtained. - """ - if self._dirtyoffset is None: + def write(self, tr=None): + """Save branch cache if it is dirty.""" + self._updatecache(False) + if self._oldhgtagsrevslen == self._hgtagsrevslen: return - - data = self._raw[self._dirtyoffset :] - if not data: - return - repo = self._repo - try: - lock = repo.lock(wait=False) - except error.LockError: - repo.ui.log( - b'tagscache', - b'not writing .hg/cache/%s because ' - b'lock cannot be acquired\n' % _fnodescachefile, + f = repo.cachevfs.open(_fnodescachefile, b'wb', atomictemp=True) + f.write(self._hgtagsrevs) + f.close() + except (IOError, OSError) as inst: + repo.ui.debug( + b"couldn't write .hgtags revision cache: %s\n" + % stringutil.forcebytestr(inst) ) - return - - try: - f = repo.cachevfs.open(_fnodescachefile, b'ab') - try: - # if the file has been truncated - actualoffset = f.tell() - if actualoffset < self._dirtyoffset: - self._dirtyoffset = actualoffset - data = self._raw[self._dirtyoffset :] - f.seek(self._dirtyoffset) - f.truncate() - repo.ui.log( - b'tagscache', - b'writing %d bytes to cache/%s\n' - % (len(data), _fnodescachefile), - ) - f.write(data) - self._dirtyoffset = None - finally: - f.close() - except (IOError, OSError) as inst: - repo.ui.log( - b'tagscache', - b"couldn't write cache/%s: %s\n" - % (_fnodescachefile, stringutil.forcebytestr(inst)), - ) - finally: - lock.release()