diff --git a/remotefilelog/repack.py b/remotefilelog/repack.py --- a/remotefilelog/repack.py +++ b/remotefilelog/repack.py @@ -264,32 +264,40 @@ break # Steps for picking what packs to repack: - # 1. Pick the largest generation with >2 pack files. - # 2. Take the smallest three packs. - # 3. While total-size-of-packs < repacksizelimit: add another pack + # 1. Pick the largest generation with > gencountlimit pack files. + # 2. If no such generation exists, consider pack files across generations. + # 3. Starting with two smallest packs, take as many packs as we can within + # the constraints. The current constraints include the total size and the + # number of packs considered for the repacking. - # Find the largest generation with more than gencountlimit packs + # Find the largest generation with more than gencountlimit packs. + # Packs will always be sorted to be smallest last, for easy popping later. genpacks = [] for i, limit in enumerate(limits): if len(generations[i]) > gencountlimit: - # Sort to be smallest last, for easy popping later genpacks.extend(sorted(generations[i], reverse=True, key=lambda x: sizes[x])) break - # Take as many packs from the generation as we can - chosenpacks = genpacks[-3:] - genpacks = genpacks[:-3] - repacksize = sum(sizes[n] for n in chosenpacks) - while (repacksize < repacksizelimit and genpacks and - len(chosenpacks) < maxrepackpacks): - chosenpacks.append(genpacks.pop()) - repacksize += sizes[chosenpacks[-1]] + if not genpacks: + # No generation has gencountlimit packs. Therefore, we need to select + # packs across generations. + genpacks = sorted( + set().union(*generations), reverse=True, key=lambda x: sizes[x]) - # If there aren't any good candidates for a repack, - # repack the two largest ones. - if not chosenpacks and len(generations[0]) > 1: - chosenpacks = generations[0] + if len(genpacks) < 2: + # There is no need to repack since we have 0 or 1 pack. + chosenpacks = [] + else: + # At least 2 packs will always be chosen irrespective of any + # constraints. + chosenpacks = genpacks[-2:] + genpacks = genpacks[:-2] + repacksize = sum(sizes[n] for n in chosenpacks) + while (repacksize < repacksizelimit and genpacks and + len(chosenpacks) < maxrepackpacks): + chosenpacks.append(genpacks.pop()) + repacksize += sizes[chosenpacks[-1]] return chosenpacks diff --git a/tests/test-remotefilelog-repack-fast.t b/tests/test-remotefilelog-repack-fast.t --- a/tests/test-remotefilelog-repack-fast.t +++ b/tests/test-remotefilelog-repack-fast.t @@ -318,11 +318,10 @@ $ ls_l $TESTTMP/hgcache/master/packs/ | grep histpack -r--r--r-- 336 094b530486dad4427a0faf6bcbc031571b99ca24.histpack -1 gen3 pack, 1 gen0 pack - does nothing +1 gen3 pack, 1 gen0 pack - packs everything $ hg repack --incremental $ ls_l $TESTTMP/hgcache/master/packs/ | grep datapack - -r--r--r-- 59 5b7dec902026f0cddb0ef8acb62f27b5698494d4.datapack - -r--r--r-- 225 8fe685c56f6f7edf550bfcec74eeecc5f3c2ba15.datapack + -r--r--r-- 301 09b8bf49256b3fc2175977ba97d6402e91a9a604.datapack $ ls_l $TESTTMP/hgcache/master/packs/ | grep histpack -r--r--r-- 336 094b530486dad4427a0faf6bcbc031571b99ca24.histpack diff --git a/tests/test-remotefilelog-repack.t b/tests/test-remotefilelog-repack.t --- a/tests/test-remotefilelog-repack.t +++ b/tests/test-remotefilelog-repack.t @@ -325,11 +325,10 @@ $ ls_l $TESTTMP/hgcache/master/packs/ | grep histpack -r--r--r-- 336 094b530486dad4427a0faf6bcbc031571b99ca24.histpack -1 gen3 pack, 1 gen0 pack - does nothing +1 gen3 pack, 1 gen0 pack - packs everything $ hg repack --incremental $ ls_l $TESTTMP/hgcache/master/packs/ | grep datapack - -r--r--r-- 59 5b7dec902026f0cddb0ef8acb62f27b5698494d4.datapack - -r--r--r-- 225 8fe685c56f6f7edf550bfcec74eeecc5f3c2ba15.datapack + -r--r--r-- 301 09b8bf49256b3fc2175977ba97d6402e91a9a604.datapack $ ls_l $TESTTMP/hgcache/master/packs/ | grep histpack -r--r--r-- 336 094b530486dad4427a0faf6bcbc031571b99ca24.histpack diff --git a/tests/test-treemanifest-repack.t b/tests/test-treemanifest-repack.t --- a/tests/test-treemanifest-repack.t +++ b/tests/test-treemanifest-repack.t @@ -133,42 +133,55 @@ -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack -- repack incremental does nothing here because there are so few packs - $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=300 +- repack incremental always repacks at least 2 smallest packs despite the +constraints even if it has to go across generations. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 $ ls_l .hg/store/packs/manifests | grep datapack - -r--r--r-- 248 21501384df03b8489b366c5218be639fa08830e4.datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack - -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ echo b >> dir/b && hg commit -Aqm 'modify dir/b' $ ls_l .hg/store/packs/manifests | grep datapack - -r--r--r-- 248 21501384df03b8489b366c5218be639fa08830e4.datapack -r--r--r-- 248 347263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack -r--r--r-- 248 544a3b46a61732209116ae50847ec333b75e3765.datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack + -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack + +- Now, we have 3 packs in the generation with > 200 bytes. Therefore, the next +incremental repack will consider packs from that generation. Also, the size +limit will be honored and one of the packs with size 248 won't be considered for +repacking. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 + $ ls_l .hg/store/packs/manifests | grep datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 505 75394b4a2dce16d46dcaa882386a6d8b91246f96.datapack -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack -r--r--r-- 386 d15c09a9a5a13bb689bd9764455a415a20dc885e.datapack - -r--r--r-- 248 d7e689a91ac63385be120a118af9ce8663748f28.datapack - $ cd .hg/store/packs/manifests - $ cp d7e689a91ac63385be120a118af9ce8663748f28.datapack x7e689a91ac63385be120a118af9ce8663748f28.datapack - $ cp d7e689a91ac63385be120a118af9ce8663748f28.dataidx x7e689a91ac63385be120a118af9ce8663748f28.dataidx - $ cp 21501384df03b8489b366c5218be639fa08830e4.datapack x1501384df03b8489b366c5218be639fa08830e4.datapack - $ cp 21501384df03b8489b366c5218be639fa08830e4.dataidx x1501384df03b8489b366c5218be639fa08830e4.dataidx - $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack x47263bf1efbdb5bf7e1d1565b6b504073fb9093.datapack - $ cp 347263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx x47263bf1efbdb5bf7e1d1565b6b504073fb9093.dataidx - $ cd ../../../../ -- repack incremental kicks in once there are a number of packs -- (set the repacksizelimit so that we test that we only repack up to 1500 bytes, -- and it leaves one datapack behind) - $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=1500B +- Now, we have 3 packs in the generation with > 300 bytes. Therefore, the next +incremental repack will consider packs from that generation. Also, the size +limit will be honored and one of the packs with size 505 won't be considered for +repacking. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=300 + $ ls_l .hg/store/packs/manifests | grep datapack + -r--r--r-- 505 63e9ec504e6f48299553359c9a00bc85d562fc01.datapack + -r--r--r-- 890 813e64604219dfc585465b77dcd570a0bc631022.datapack + -r--r--r-- 248 863908ef8149261ab0d891c2344d8e8766c39441.datapack + +- No generation has sufficient number of packs to be considered for the +repacking. Therefore, we will repack across generations till we don't go beyond +the constraints. In this case, we have set the size limit high enough for all +packs to be considered for the repacking. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 \ + > --config remotefilelog.data.repacksizelimit=1600B $ ls_l .hg/store/packs/manifests | grep datapack | wc -l - .*3 (re) - $ ls_l .hg/store/packs/manifests | grep datapack | grep 248 - -r--r--r-- 248 *.datapack (glob) -- Clean up the pile of packs we made - $ hg repack + .*1 (re) Test repacking from revlogs to pack files on the server $ cd ../master @@ -263,16 +276,9 @@ -r--r--r-- 264 e9093d2d887ff14457d43338fcb3994e92051853.datapack -r--r--r-- 154 f9657fdc11d7c9847208da3f1245b38c5981df79.datapack -- Two packs doesn't meet the bar for repack. Only turn revlogs into a pack. +- Two packs meets the bar. Repack new revlogs and old pack into one. $ echo >> a $ hg commit -m 'modify a' $ hg repack --incremental --config remotefilelog.data.generations=300,20 $ ls_l .hg/cache/packs/manifests/ | grep datapack - -r--r--r-- 154 0adbde90bc92c6f23e46180a9d7885c8e2499173.datapack - -r--r--r-- 264 e9093d2d887ff14457d43338fcb3994e92051853.datapack - -r--r--r-- 154 f9657fdc11d7c9847208da3f1245b38c5981df79.datapack - -- Three packs meets the bar. Repack new revlogs and old pack into one. - $ hg repack --incremental --config remotefilelog.data.generations=300,20 - $ ls_l .hg/cache/packs/manifests/ | grep datapack -r--r--r-- 496 bc6c2ebb080844d7a227dacbc847a5b375ec620c.datapack