diff --git a/hgext3rd/clindex.pyx b/hgext3rd/clindex.pyx --- a/hgext3rd/clindex.pyx +++ b/hgext3rd/clindex.pyx @@ -23,6 +23,9 @@ # Path to write logs (default: $repo/.hg/cache/clindex.log) logpath = /tmp/a.log + + # build index for faster children access + childmap = True """ from __future__ import absolute_import @@ -30,6 +33,7 @@ import datetime import errno import os +import struct from mercurial import ( changelog, @@ -62,6 +66,8 @@ # for the table of node count and performance. configitem('clindex', 'lagthreshold', default=20000) +configitem('clindex', 'childmap', default=True) + origindextype = policy.importmod('parsers').index # cdef is important for performance because it avoids dict lookups: @@ -73,7 +79,7 @@ cdef readonly localconfig _config cdef readonly nodemap _nodemap cdef _origindex - cdef _vfs + cdef readonly childmap _childmap def __init__(self, data, inlined, vfs, config): assert not inlined @@ -84,7 +90,7 @@ # For example, disabling Rust nodemap temporarily if strip happens. self._config = config.copy() self._nodemap = nodemap(self._origindex, data, vfs, config) - self._vfs = vfs + self._childmap = childmap(vfs, data, config) def ancestors(self, *revs): return self._origindex.ancestors(*revs) @@ -116,6 +122,10 @@ rev = len(self._origindex) + rev self._origindex.insert(rev, entry) self._nodemap[entry[-1]] = rev + if self._config.childmap: + for p in entry[5:7]: + if p > -1: + self._childmap.insert(p, rev) def partialmatch(self, hexnode): return self._nodemap.partialmatch(hexnode) @@ -143,9 +153,11 @@ def destroying(self): _log('clindex: destroying') self._nodemap.destroying() + self._childmap.destroying() def updatecaches(self): self._nodemap.updatecache() + self._childmap.updatecache() cdef class nodemap(object): """mutable nodemap @@ -305,7 +317,7 @@ # index. Ideally we can keep changelog always up-to-date with the # index. But that requires more changes (ex. removing index.insert API # and index takes care of data writes). - candidates.update(k for k in self._overrides.iterkeys() + candidates.update(k for k in self._overrides.iterkeys() if hex(k).startswith(hexprefix)) if len(candidates) == 1: return list(candidates)[0] @@ -325,6 +337,82 @@ self._vfs.tryunlink('nodemap') self._config.nodemap = False +cdef class childmap(object): + cdef readonly _rustchildmap + cdef localconfig _config + cdef readonly bint _updated + cdef readonly _overrides + cdef _vfs + + # empty index buffer has a minimal of 22 * 4 bytes + emptyindex = b'\0' * 22 * 4 + + def __cinit__(self, vfs, changelog, config): + self._config = config + self._overrides = {} + self._vfs = vfs + try: + index = util.buffer(util.mmapread(vfs('childmap'))) + if len(index) < len(self.emptyindex): + index = self.emptyindex + except IOError as ex: + if ex.errno != errno.ENOENT: + raise + _log('childmap: is empty') + index = self.emptyindex + try: + rustchildmap = indexes.childmap(changelog, index) + except Exception as ex: + _log('childmap: corrupted: %r' % ex) + rustchildmap = indexes.childmap(changelog, self.emptyindex) + self._rustchildmap = rustchildmap + self._updated = False + + def updatecache(self): + # updatecache may get called for *many* times. That is, an "outdated" + # changelog object being used across multiple transactions. This test + # avoids unnecessary re-updates. + if self._updated: + return + # childmap was disabled (ex. by destroying()). The changelog is now + # outdated. Do not rely on it building index. + if not self._config.childmap: + return + # Writing childmap has a cost. Do not update it if not lagging too much. + lag = self._rustchildmap.lag() + if lag == 0 or lag < self._config.lagthreshold: + return + _log('childmap: updating (lag=%s)' % lag) + with self._vfs('childmap', 'w', atomictemp=True) as f: + f.write(self._rustchildmap.build()) + self._updated = True + + cpdef insert(self, int parent, int child): + self._overrides.setdefault(parent, []).append(child) + + def __getitem__(self, int rev): + children = self._rustchildmap[rev] + if self._config.verify: + for child in children: + parents = self._rustchildmap.parentrevs(child) or () + if rev not in parents: + _logandraise('childmap: wrong child %r for parent %r' + % (child, rev)) + # pick up overrides + children.extend(self._overrides.get(rev, ())) + return children + + @property + def lag(self): + if self._config.childmap: + return self._rustchildmap.lag() + else: + return 0 + + def destroying(self): + self._vfs.tryunlink('childmap') + self._config.childmap = False + # These are unfortunate. But we need vfs access inside index.__init__. Doing # that properly requires API changes in revlog.__init__ and # revlogio.parseindex that might make things uglier, or break the (potential) @@ -338,12 +426,14 @@ # Lightweight config state that is dedicated for this extensions and is # decoupled from heavy-weight ui object. cdef class localconfig: + cdef public bint childmap cdef public bint nodemap cdef public bint verify cdef public int lagthreshold def copy(self): rhs = localconfig() + rhs.childmap = self.childmap rhs.nodemap = self.nodemap rhs.verify = self.verify rhs.lagthreshold = self.lagthreshold @@ -352,6 +442,7 @@ @classmethod def fromui(cls, ui): self = cls() + self.childmap = ui.configbool('clindex', 'childmap') self.nodemap = ui.configbool('clindex', 'nodemap') self.verify = ui.configbool('clindex', 'verify') self.lagthreshold = ui.configint('clindex', 'lagthreshold') diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -321,7 +321,7 @@ sources=['hgext3rd/clindex.pyx'], include_dirs=['hgext3rd'], extra_compile_args=filter(None, [ - STDC99, WALL, WEXTRA, WCONVERSION, PEDANTIC, + STDC99, WALL, WEXTRA, PEDANTIC, ]), ), ],