diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -41,6 +41,9 @@ # loaded the pack list. REFRESHRATE = 0.1 +# Default cache size limit for the pack files. +DEFAULTCACHESIZE = 100 + if pycompat.isposix: # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. # The 'e' flag will be ignored on older versions of glibc. @@ -51,11 +54,12 @@ class basepackstore(object): def __init__(self, ui, path): self.path = path - self.packs = [] + # lastrefesh is 0 so we'll immediately check for new packs on the first # failure. self.lastrefresh = 0 + packs = [] for filepath in self._getavailablepackfiles(): try: pack = self.getpack(filepath) @@ -70,7 +74,52 @@ if getattr(ex, 'errno', None) != errno.ENOENT: ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) continue - self.packs.append(pack) + packs.append(pack) + + self.packs = self.cachebackedpacks(packs, DEFAULTCACHESIZE) + + class cachebackedpacks(object): + def __init__(self, packs, cachesize): + self._packs = set(packs) + self._lrucache = util.lrucachedict(cachesize) + self._lastpack = None + + # Avoid cold start of the cache by populating the most recent packs + # in the cache. + for i in reversed(range(min(cachesize, len(packs)))): + self._lrucache[packs[i]] = True + + def _registerlastpackusage(self): + if self._lastpack: + # This effectively makes the last used pack the first entry in + # the cache. + self._lrucache[self._lastpack] = True + self._lastpack = None + + def add(self, pack): + self._registerlastpackusage() + + # This method will mostly be called when packs are not in cache. + # Therefore, adding pack to the cache. + self._lrucache[pack] = True + self._packs.add(pack) + + def __iter__(self): + self._registerlastpackusage() + + # Cache iteration is based on LRU. + for pack in self._lrucache: + self._lastpack = pack + yield pack + + cachedpacks = set(pack for pack in self._lrucache) + # Yield for paths not in the cache. + for pack in self._packs - cachedpacks: + self._lastpack = pack + yield pack + + # Data not found in any pack. + self._lastpack = None def _getavailablepackfiles(self): suffixlen = len(self.INDEXSUFFIX) @@ -102,6 +151,11 @@ for pack in self.packs: missing = pack.getmissing(missing) + # Ensures better performance of the cache by keeping the most + # recently accessed pack at the beginning in subsequent iterations. + if not missing: + return missing + if missing: for pack in self.refresh(): missing = pack.getmissing(missing) @@ -134,8 +188,9 @@ new = set(self._getavailablepackfiles()) - previous for filepath in new: - newpacks.append(self.getpack(filepath)) - self.packs.extend(newpacks) + newpack = self.getpack(filepath) + newpacks.append(newpack) + self.packs.add(newpack) return newpacks