diff --git a/remotefilelog/repack.py b/remotefilelog/repack.py --- a/remotefilelog/repack.py +++ b/remotefilelog/repack.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +import itertools import os from hgext3rd.extutil import runshellcommand, fcntllock from mercurial import ( @@ -292,32 +293,45 @@ break # Steps for picking what packs to repack: - # 1. Pick the largest generation with >2 pack files. - # 2. Take the smallest three packs. - # 3. While total-size-of-packs < repacksizelimit: add another pack + # 1. Pick the largest generation with > gencountlimit pack files. + # 2. If no such generation exists, consider pack files across generations. + # Exclude packs from the highest generation while considering packs + # across generations because it can have huge packs. + # 3. Starting with two smallest packs, take as many packs as we can within + # the constraints. The current constraints include the total size and the + # number of packs considered for the repacking. - # Find the largest generation with more than gencountlimit packs + # Find the largest generation with more than gencountlimit packs. + # Packs will always be sorted to be smallest last, for easy popping later. genpacks = [] for i, limit in enumerate(limits): if len(generations[i]) > gencountlimit: - # Sort to be smallest last, for easy popping later genpacks.extend(sorted(generations[i], reverse=True, key=lambda x: sizes[x])) break - # Take as many packs from the generation as we can - chosenpacks = genpacks[-3:] - genpacks = genpacks[:-3] - repacksize = sum(sizes[n] for n in chosenpacks) - while (repacksize < repacksizelimit and genpacks and - len(chosenpacks) < maxrepackpacks): - chosenpacks.append(genpacks.pop()) - repacksize += sizes[chosenpacks[-1]] + if not genpacks: + # No generation has gencountlimit packs. Therefore, we need to select + # packs across generations. + genpacks = sorted( + itertools.chain(*generations[1:]), + reverse = True, + key = lambda x: sizes[x] + ) - # If there aren't any good candidates for a repack, - # repack the two largest ones. - if not chosenpacks and len(generations[0]) > 1: - chosenpacks = generations[0] + if len(genpacks) < 2: + # There is no need to repack since we have 0 or 1 packs. + chosenpacks = [] + else: + # At least 2 packs will always be chosen irrespective of any + # constraints. + chosenpacks = genpacks[-2:] + genpacks = genpacks[:-2] + repacksize = sum(sizes[n] for n in chosenpacks) + while (repacksize < repacksizelimit and genpacks and + len(chosenpacks) < maxrepackpacks): + chosenpacks.append(genpacks.pop()) + repacksize += sizes[chosenpacks[-1]] return chosenpacks diff --git a/tests/test-treemanifest-repack.t b/tests/test-treemanifest-repack.t --- a/tests/test-treemanifest-repack.t +++ b/tests/test-treemanifest-repack.t @@ -133,42 +133,57 @@ -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack -- repack incremental does nothing here because there are so few packs - $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=300 +- repack incremental always repacks at least 2 smallest packs outside the +highest generation despite the constraints even if it has to go across +generations. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 $ ls_l .hg/store/packs/manifests | grep datapack - -r--r--r-- 248 21501384df03b8489b366c5218be639fa08830e4.datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack - -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ ls_l .hg/store/packs/manifests | grep datapack - -r--r--r-- 248 21501384df03b8489b366c5218be639fa08830e4.datapack -r--r--r-- 248 347263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack -r--r--r-- 248 544a3b46a61732209116ae50847ec333b75e3765.datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack + -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack + +- Now, we have 3 packs in the generation with > 200 bytes. Therefore, the next +incremental repack will consider packs from that generation. Also, the size +limit will be honored and one of the packs with size 248 won't be considered for +repacking. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 + $ ls_l .hg/store/packs/manifests | grep datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 505 75394b4a2dce16d46dcaa882386a6d8b91246f96.datapack -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack - -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack - $ cd .hg/store/packs/manifests - $ cp d7e689a91ac63385be120a118af9ce8663748f28.datapack x7e689a91ac63385be120a118af9ce8663748f28.datapack - $ cp d7e689a91ac63385be120a118af9ce8663748f28.dataidx x7e689a91ac63385be120a118af9ce8663748f28.dataidx - $ cp 21501384df03b8489b366c5218be639fa08830e4.datapack x1501384df03b8489b366c5218be639fa08830e4.datapack - $ cp 21501384df03b8489b366c5218be639fa08830e4.dataidx x1501384df03b8489b366c5218be639fa08830e4.dataidx - $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack x47263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack - $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx x47263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx - $ cd ../../../../ -- repack incremental kicks in once there are a number of packs -- (set the repacksizelimit so that we test that we only repack up to 1500 bytes, -- and it leaves one datapack behind) - $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=1500B +- Now, we have 3 packs in the generation with > 300 bytes. Therefore, the next +incremental repack will consider packs from that generation. Also, the size +limit will be honored and one of the packs with size 505 won't be considered for +repacking. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 + $ ls_l .hg/store/packs/manifests | grep datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 890 813e64604219dfc585465b77dcd570a0bc631022.datapack + -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack + +- No generation has sufficient number of packs to be considered for the repack. +Therefore, we will repack across generations till we don't go beyond the +constraints. In this case, we have to set the size limit high enough for all +packs to be considered for the repacking and we have to ensure the packs are not +in the highest generation because they are not considered for repacking. + $ hg repack --incremental --config remotefilelog.data.repacksizelimit=1600B \ + > --config remotefilelog.data.generations=900,300,200 $ ls_l .hg/store/packs/manifests | grep datapack | wc -l - .*3 (re) - $ ls_l .hg/store/packs/manifests | grep datapack | grep 248 - -r--r--r-- 248 *.datapack (glob) -- Clean up the pile of packs we made - $ hg repack + .*1 (re) Test repacking from revlogs to pack files on the server $ cd ../master @@ -263,16 +278,9 @@ -r--r--r-- 264 e9093d2d887ff14457d43338fcb3994e92051853.datapack -r--r--r-- 154 f9657fdc11d7c9847208da3f1245b38c5981df79.datapack -- Two packs doesn't meet the bar for repack. Only turn revlogs into a pack. +- Two packs meets the bar. Repack new revlogs and old pack into one. $ echo >> a $ hg commit -m 'modify a' $ hg repack --incremental --config remotefilelog.data.generations=300,20 $ ls_l .hg/cache/packs/manifests/ | grep datapack - -r--r--r-- 154 0adbde90bc92c6f23e46180a9d7885c8e2499173.datapack - -r--r--r-- 264 e9093d2d887ff14457d43338fcb3994e92051853.datapack - -r--r--r-- 154 f9657fdc11d7c9847208da3f1245b38c5981df79.datapack - -- Three packs meets the bar. Repack new revlogs and old pack into one. - $ hg repack --incremental --config remotefilelog.data.generations=300,20 - $ ls_l .hg/cache/packs/manifests/ | grep datapack -r--r--r-- 496 bc6c2ebb080844d7a227dacbc847a5b375ec620c.datapack