diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -48,14 +48,63 @@ else: PACKOPENMODE = 'rb' +class _cachebackedpacks(object): + def __init__(self, packs, cachesize): + self._packs = set(packs) + self._lrucache = util.lrucachedict(cachesize) + self._lastpack = None + + # Avoid cold start of the cache by populating the most recent packs + # in the cache. + for i in reversed(range(min(cachesize, len(packs)))): + self._movetofront(packs[i]) + + def _movetofront(self, pack): + # This effectively makes pack the first entry in the cache. + self._lrucache[pack] = True + + def _registerlastpackusage(self): + if self._lastpack is not None: + self._movetofront(self._lastpack) + self._lastpack = None + + def add(self, pack): + self._registerlastpackusage() + + # This method will mostly be called when packs are not in cache. + # Therefore, adding pack to the cache. + self._movetofront(pack) + self._packs.add(pack) + + def __iter__(self): + self._registerlastpackusage() + + # Cache iteration is based on LRU. + for pack in self._lrucache: + self._lastpack = pack + yield pack + + cachedpacks = set(pack for pack in self._lrucache) + # Yield for paths not in the cache. + for pack in self._packs - cachedpacks: + self._lastpack = pack + yield pack + + # Data not found in any pack. + self._lastpack = None + class basepackstore(object): + # Default cache size limit for the pack files. + DEFAULTCACHESIZE = 100 + def __init__(self, ui, path): self.path = path - self.packs = [] + # lastrefesh is 0 so we'll immediately check for new packs on the first # failure. self.lastrefresh = 0 + packs = [] for filepath in self._getavailablepackfiles(): try: pack = self.getpack(filepath) @@ -70,7 +119,9 @@ if getattr(ex, 'errno', None) != errno.ENOENT: ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) continue - self.packs.append(pack) + packs.append(pack) + + self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) def _getavailablepackfiles(self): suffixlen = len(self.INDEXSUFFIX) @@ -102,6 +153,11 @@ for pack in self.packs: missing = pack.getmissing(missing) + # Ensures better performance of the cache by keeping the most + # recently accessed pack at the beginning in subsequent iterations. + if not missing: + return missing + if missing: for pack in self.refresh(): missing = pack.getmissing(missing) @@ -134,8 +190,9 @@ new = set(self._getavailablepackfiles()) - previous for filepath in new: - newpacks.append(self.getpack(filepath)) - self.packs.extend(newpacks) + newpack = self.getpack(filepath) + newpacks.append(newpack) + self.packs.add(newpack) return newpacks diff --git a/tests/remotefilelog-datapack.py b/tests/remotefilelog-datapack.py --- a/tests/remotefilelog-datapack.py +++ b/tests/remotefilelog-datapack.py @@ -17,6 +17,7 @@ from remotefilelog.datapack import ( datapack, + datapackstore, fastdatapack, mutabledatapack, ) @@ -267,6 +268,48 @@ actualcontent = pack.getdeltachain(filename, node)[0][4] self.assertEquals(actualcontent, content) + def testPacksCache(self): + """Test that we remember the most recent packs while fetching the delta + chain.""" + + packdir = self.makeTempDir() + packs = [] + + # Ensures that we are not keeping everything in the cache. + numpacks = datapackstore.DEFAULTCACHESIZE * 2 + revisionsperpack = 100 + + for i in range(numpacks): + revisionsinpack = [] + revision = (str(i), self.getFakeHash(), nullid, "content") + + for _ in range(revisionsperpack): + revisionsinpack.append(revision) + revision = ( + str(i), + self.getFakeHash(), + revision[1], + self.getFakeHash() + ) + + self.createPack(revisionsinpack, packdir) + packs.append(revisionsinpack) + + store = datapackstore(mercurial.ui.ui(), packdir) + + random.shuffle(packs) + for pack in packs: + revision = random.choice(pack) + chain = store.getdeltachain(revision[0], revision[1]) + + mostrecentpack = next(iter(store.packs), None) + self.assertEquals( + mostrecentpack.getdeltachain(revision[0], revision[1]), + chain + ) + + self.assertEquals(pack.index(revision) + 1, len(chain)) + # perf test off by default since it's slow def _testIndexPerf(self): random.seed(0)