diff --git a/remotefilelog/__init__.py b/remotefilelog/__init__.py --- a/remotefilelog/__init__.py +++ b/remotefilelog/__init__.py @@ -20,6 +20,10 @@ ``remotefilelog.repackonhggc`` runs repack on hg gc when True ``remotefilelog.prefetchdays`` specifies the maximum age of a commit in days after which it is no longer prefetched. + ``remotefilelog.data.maxrepackpacks`` the maximum number of pack files to + include in an incremental data repack. + ``remotefilelog.history.maxrepackpacks`` the maximum number of pack files to + include in an incremental history repack. """ from . import fileserverclient, remotefilelog, remotefilectx, shallowstore diff --git a/remotefilelog/repack.py b/remotefilelog/repack.py --- a/remotefilelog/repack.py +++ b/remotefilelog/repack.py @@ -180,9 +180,11 @@ gencountlimit = ui.configint('remotefilelog', 'data.gencountlimit', 2) repacksizelimit = ui.configbytes('remotefilelog', 'data.repacksizelimit', '100MB') + maxrepackpacks = ui.configint('remotefilelog', 'data.maxrepackpacks', 50) return _computeincrementalpack(ui, files, generations, datapack.PACKSUFFIX, - datapack.INDEXSUFFIX, gencountlimit, repacksizelimit) + datapack.INDEXSUFFIX, gencountlimit, + repacksizelimit, maxrepackpacks) def _computeincrementalhistorypack(ui, files): generations = ui.configlist("remotefilelog", "history.generations", @@ -194,13 +196,14 @@ gencountlimit = ui.configint('remotefilelog', 'history.gencountlimit', 2) repacksizelimit = ui.configbytes('remotefilelog', 'history.repacksizelimit', '100MB') + maxrepackpacks = ui.configint('remotefilelog', 'history.maxrepackpacks', 50) return _computeincrementalpack(ui, files, generations, historypack.PACKSUFFIX, historypack.INDEXSUFFIX, gencountlimit, - repacksizelimit) + repacksizelimit, maxrepackpacks) def _computeincrementalpack(ui, files, limits, packsuffix, indexsuffix, - gencountlimit, repacksizelimit): + gencountlimit, repacksizelimit, maxrepackpacks): # Group the packs by generation (i.e. by size) generations = [] for i in xrange(len(limits)): @@ -224,39 +227,35 @@ generations[i].append(prefix) break - # Find the largest generation with more than 2 packs and repack it. + # Steps for picking what packs to repack: + # 1. Pick the largest generation with >2 pack files. + # 2. Take the smallest three packs. + # 3. While total-size-of-packs < repacksizelimit: add another pack + + # Find the largest generation with more than gencountlimit packs + genpacks = [] for i, limit in enumerate(limits): if len(generations[i]) > gencountlimit: - # Try to repack 3 things at once. This means if we run an - # incremental repack right after we add a new pack file, we'll still - # decrease the total number of pack files. - count = 3 - if sum(sizes[n] for n in generations[i]) < repacksizelimit: - count = len(generations[i]) - return sorted(generations[i], key=lambda x: sizes[x])[:count] + # Sort to be smallest last, for easy popping later + genpacks.extend(sorted(generations[i], reverse=True, + key=lambda x: sizes[x])) + break - # If no generation has more than 2 packs, repack as many as fit into the - # limit - small = set().union(*generations[1:]) - if len(small) > 1: - total = 0 - packs = [] - for pack in sorted(small, key=lambda x: sizes[x]): - size = sizes[pack] - if total + size < repacksizelimit: - packs.append(pack) - total += size - else: - break + # Take as many packs from the generation as we can + chosenpacks = genpacks[-3:] + genpacks = genpacks[:-3] + repacksize = sum(sizes[n] for n in chosenpacks) + while (repacksize < repacksizelimit and genpacks and + len(chosenpacks) < maxrepackpacks): + chosenpacks.append(genpacks.pop()) + repacksize += sizes[chosenpacks[-1]] - if len(packs) > 1: - return packs + # If there aren't any good candidates for a repack, + # repack the two largest ones. + if not chosenpacks and len(generations[0]) > 1: + chosenpacks = generations[0] - # If there aren't small ones to repack, repack the two largest ones. - if len(generations[0]) > 1: - return generations[0] - - return [] + return chosenpacks def _runrepack(repo, data, history, packpath, category): shallowutil.mkstickygroupdir(repo.ui, packpath) diff --git a/tests/test-treemanifest-repack.t b/tests/test-treemanifest-repack.t --- a/tests/test-treemanifest-repack.t +++ b/tests/test-treemanifest-repack.t @@ -142,12 +142,24 @@ -r--r--r-- 248 544a3b46a61732209116ae50847ec333b75e3765.datapack -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack + $ cd .hg/store/packs/manifests + $ cp d7e689a91ac63385be120a118af9ce8663748f28.datapack x7e689a91ac63385be120a118af9ce8663748f28.datapack + $ cp d7e689a91ac63385be120a118af9ce8663748f28.dataidx x7e689a91ac63385be120a118af9ce8663748f28.dataidx + $ cp 21501384df03b8489b366c5218be639fa08830e4.datapack x1501384df03b8489b366c5218be639fa08830e4.datapack + $ cp 21501384df03b8489b366c5218be639fa08830e4.dataidx x1501384df03b8489b366c5218be639fa08830e4.dataidx + $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack x47263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack + $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx x47263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx + $ cd ../../../../ - repack incremental kicks in once there are a number of packs - $ hg repack --incremental --config remotefilelog.data.generations=300,200 +- (set the repacksizelimit so that we test that we only repack up to 1500 bytes, +- and it leaves one x** datapack behind) + $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=1500B $ ls_l .hg/store/packs/manifests | grep datapack -r--r--r-- 374 201094db51b761cd78352c055b3135178aadfec5.datapack -r--r--r-- 1188 785af77b59c45dd43e3e0e63929d77665c505387.datapack + -r--r--r-- 248 x47263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack + $ rm -f .hg/store/packs/manifests/x* Test repacking from revlogs to pack files on the server $ cd ../master