diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -51,7 +51,8 @@ class basepackstore(object): def __init__(self, ui, path): self.path = path - self.packs = [] + self.pathtopack = self.cachebackedpacks(self.DEFAULTCACHESIZE) + # lastrefesh is 0 so we'll immediately check for new packs on the first # failure. self.lastrefresh = 0 @@ -70,7 +71,46 @@ if getattr(ex, 'errno', None) != errno.ENOENT: ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) continue - self.packs.append(pack) + self.pathtopack[filepath] = pack + + class cachebackedpacks(object): + def __init__(self, cachesize): + self._pathtopack = {} + self._lrucache = util.lrucachedict(cachesize) + + def __len__(self): + return len(self._pathtopack) + + def __contains__(self, path): + return path in self._pathtopack + + def __getitem__(self, path): + if path in self._lrucache: + return self._lrucache[path] + + pack = self._pathtopack[path] + self._lrucache[path] = pack + return pack + + def __setitem__(self, path, pack): + self._pathtopack[path] = pack + + def __delitem__(self, path): + if path in self._lrucache: + del self._lrucache[path] + del self._pathtopack[path] + + def __iter__(self): + cachedpaths = set() + + # cache iteration is based on LRU. + for path in self._lrucache: + cachedpaths.add(path) + yield path + + # yield the paths not in the cache. + for path in set(self._pathtopack.keys()) - cachedpaths: + yield path def _getavailablepackfiles(self): suffixlen = len(self.INDEXSUFFIX) @@ -85,6 +125,10 @@ if ex.errno != errno.ENOENT: raise + # TODO: Refactor/Remove this section because there are negligible + # performance benefits of fetching the most recent files here after + # adding the LRU cache. + # Put most recent pack files first since they contain the most recent # info. files = sorted(files, reverse=True) @@ -99,9 +143,16 @@ def getmissing(self, keys): missing = keys - for pack in self.packs: + + for path in self.pathtopack: + pack = self.pathtopack[path] missing = pack.getmissing(missing) + # Ensures better performance of the cache by keeping the most + # recently accessed pack at the beginning in subsequent iterations. + if not missing: + return missing + if missing: for pack in self.refresh(): missing = pack.getmissing(missing) @@ -109,7 +160,8 @@ return missing def markledger(self, ledger): - for pack in self.packs: + for path in self.pathtopack: + pack = self.pathtopack[path] pack.markledger(ledger) def markforrefresh(self): @@ -130,12 +182,13 @@ newpacks = [] if now > self.lastrefresh + REFRESHRATE: self.lastrefresh = now - previous = set(p.path for p in self.packs) + previous = set(path for path in self.pathtopack) new = set(self._getavailablepackfiles()) - previous for filepath in new: - newpacks.append(self.getpack(filepath)) - self.packs.extend(newpacks) + newpack = self.getpack(filepath) + newpacks.append(newpack) + self.pathtopack[filepath] = newpack return newpacks diff --git a/remotefilelog/datapack.py b/remotefilelog/datapack.py --- a/remotefilelog/datapack.py +++ b/remotefilelog/datapack.py @@ -25,9 +25,13 @@ PACKSUFFIX = '.datapack' class datapackstore(basepack.basepackstore): + INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX + # Default cache size limit for the pack files. + DEFAULTCACHESIZE = 100 + def __init__(self, ui, path, usecdatapack=False): self.usecdatapack = usecdatapack super(datapackstore, self).__init__(ui, path) @@ -42,8 +46,9 @@ raise RuntimeError("must use getdeltachain with datapackstore") def getmeta(self, name, node): - for pack in self.packs: + for path in self.pathtopack: try: + pack = self.pathtopack[path] return pack.getmeta(name, node) except KeyError: pass @@ -57,8 +62,9 @@ raise KeyError((name, hex(node))) def getdeltachain(self, name, node): - for pack in self.packs: + for path in self.pathtopack: try: + pack = self.pathtopack[path] return pack.getdeltachain(name, node) except KeyError: pass diff --git a/remotefilelog/historypack.py b/remotefilelog/historypack.py --- a/remotefilelog/historypack.py +++ b/remotefilelog/historypack.py @@ -36,12 +36,16 @@ INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX + # Default cache size limit for the pack files. + DEFAULTCACHESIZE = 100 + def getpack(self, path): return historypack(path) def getancestors(self, name, node, known=None): - for pack in self.packs: + for path in self.pathtopack: try: + pack = self.pathtopack[path] return pack.getancestors(name, node, known=known) except KeyError: pass @@ -55,8 +59,9 @@ raise KeyError((name, node)) def getnodeinfo(self, name, node): - for pack in self.packs: + for path in self.pathtopack: try: + pack = self.pathtopack[path] return pack.getnodeinfo(name, node) except KeyError: pass