diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -48,14 +48,63 @@ else: PACKOPENMODE = 'rb' +class _cachebackedpacks(object): + def __init__(self, packs, cachesize): + self._packs = set(packs) + self._lrucache = util.lrucachedict(cachesize) + self._lastpack = None + + # Avoid cold start of the cache by populating the most recent packs + # in the cache. + for i in reversed(range(min(cachesize, len(packs)))): + self._movetofront(packs[i]) + + def _movetofront(self, pack): + # This effectively makes pack the first entry in the cache. + self._lrucache[pack] = True + + def _registerlastpackusage(self): + if self._lastpack is not None: + self._movetofront(self._lastpack) + self._lastpack = None + + def add(self, pack): + self._registerlastpackusage() + + # This method will mostly be called when packs are not in cache. + # Therefore, adding pack to the cache. + self._movetofront(pack) + self._packs.add(pack) + + def __iter__(self): + self._registerlastpackusage() + + # Cache iteration is based on LRU. + for pack in self._lrucache: + self._lastpack = pack + yield pack + + cachedpacks = set(pack for pack in self._lrucache) + # Yield for paths not in the cache. + for pack in self._packs - cachedpacks: + self._lastpack = pack + yield pack + + # Data not found in any pack. + self._lastpack = None + class basepackstore(object): + # Default cache size limit for the pack files. + DEFAULTCACHESIZE = 100 + def __init__(self, ui, path): self.path = path - self.packs = [] + # lastrefesh is 0 so we'll immediately check for new packs on the first # failure. self.lastrefresh = 0 + packs = [] for filepath in self._getavailablepackfiles(): try: pack = self.getpack(filepath) @@ -70,7 +119,9 @@ if getattr(ex, 'errno', None) != errno.ENOENT: ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) continue - self.packs.append(pack) + packs.append(pack) + + self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) def _getavailablepackfiles(self): suffixlen = len(self.INDEXSUFFIX) @@ -102,6 +153,11 @@ for pack in self.packs: missing = pack.getmissing(missing) + # Ensures better performance of the cache by keeping the most + # recently accessed pack at the beginning in subsequent iterations. + if not missing: + return missing + if missing: for pack in self.refresh(): missing = pack.getmissing(missing) @@ -134,8 +190,9 @@ new = set(self._getavailablepackfiles()) - previous for filepath in new: - newpacks.append(self.getpack(filepath)) - self.packs.extend(newpacks) + newpack = self.getpack(filepath) + newpacks.append(newpack) + self.packs.add(newpack) return newpacks