diff --git a/remotefilelog/repack.py b/remotefilelog/repack.py --- a/remotefilelog/repack.py +++ b/remotefilelog/repack.py @@ -142,25 +142,38 @@ files = osutil.listdir(packpath, stat=True) - datapacks = _computeincrementaldatapack(repo.ui, files) - fullpaths = list(os.path.join(packpath, p) for p in datapacks) - datapacks = list(datapack.datapack(p) for p in fullpaths) + def topacks(files, constructor): + paths = list(os.path.join(packpath, p) for p in files) + packs = list(constructor(p) for p in paths) + return packs + + datapacks = topacks(_computeincrementaldatapack(repo.ui, files), + datapack.datapack) datapacks.extend(s for s in datastore if not isinstance(s, datapack.datapackstore)) - historypacks = _computeincrementalhistorypack(repo.ui, files) - fullpaths = list(os.path.join(packpath, p) for p in historypacks) - historypacks = list(historypack.historypack(p) for p in fullpaths) + historypacks = topacks(_computeincrementalhistorypack(repo.ui, files), + historypack.historypack) historypacks.extend(s for s in historystore if not isinstance(s, historypack.historypackstore)) - datasource = contentstore.unioncontentstore( - *datapacks, - allowincomplete=allowincompletedata) - historysource = metadatastore.unionmetadatastore(*historypacks, - allowincomplete=True) - - _runrepack(repo, datasource, historysource, packpath, category) + allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX, + historypack.INDEXSUFFIX) + allhistorypacks = topacks((f for f, mode, stat in allhistoryfiles), + historypack.historypack) + allhistorypacks.extend(s for s in historystore + if not isinstance(s, historypack.historypackstore)) + _runrepack(repo, + contentstore.unioncontentstore( + *datapacks, + allowincomplete=allowincompletedata), + metadatastore.unionmetadatastore( + *historypacks, + allowincomplete=True), + packpath, category, + fullhistory=metadatastore.unionmetadatastore( + *allhistorypacks, + allowincomplete=True)) def _computeincrementaldatapack(ui, files): """Given a set of pack files and a set of generation size limits, this @@ -202,13 +215,8 @@ historypack.PACKSUFFIX, historypack.INDEXSUFFIX, gencountlimit, repacksizelimit, maxrepackpacks) -def _computeincrementalpack(ui, files, limits, packsuffix, indexsuffix, - gencountlimit, repacksizelimit, maxrepackpacks): - # Group the packs by generation (i.e. by size) - generations = [] - for i in xrange(len(limits)): - generations.append([]) - sizes = {} +def _allpackfileswithsuffix(files, packsuffix, indexsuffix): + result = [] fileset = set(fn for fn, mode, stat in files) for filename, mode, stat in files: if not filename.endswith(packsuffix): @@ -219,7 +227,20 @@ # Don't process a pack if it doesn't have an index. if (prefix + indexsuffix) not in fileset: continue + result.append((filename[:-len(packsuffix)], mode, stat)) + return result + +def _computeincrementalpack(ui, files, limits, packsuffix, indexsuffix, + gencountlimit, repacksizelimit, maxrepackpacks): + # Group the packs by generation (i.e. by size) + generations = [] + for i in xrange(len(limits)): + generations.append([]) + + sizes = {} + for prefix, mode, stat in _allpackfileswithsuffix(files, packsuffix, + indexsuffix): size = stat.st_size sizes[prefix] = size for i, limit in enumerate(limits): @@ -257,7 +278,7 @@ return chosenpacks -def _runrepack(repo, data, history, packpath, category): +def _runrepack(repo, data, history, packpath, category, fullhistory=None): shallowutil.mkstickygroupdir(repo.ui, packpath) def isold(repo, filename, node): @@ -275,7 +296,10 @@ return filetime[0] < limit garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack') - packer = repacker(repo, data, history, category, gc=garbagecollect, + if not fullhistory: + fullhistory = history + packer = repacker(repo, data, history, fullhistory, category, + gc=garbagecollect, isold=isold) # internal config: remotefilelog.datapackversion @@ -354,10 +378,11 @@ """Class for orchestrating the repack of data and history information into a new format. """ - def __init__(self, repo, data, history, category, gc=False, isold=None): + def __init__(self, repo, data, history, fullhistory, category, gc=False, isold=None): self.repo = repo self.data = data self.history = history + self.fullhistory = fullhistory self.unit = constants.getunits(category) self.garbagecollect = gc if self.garbagecollect: @@ -409,7 +434,7 @@ ui.progress(_("building history"), i, unit='nodes', total=len(nodes)) try: - ancestors.update(self.history.getancestors(filename, node, + ancestors.update(self.fullhistory.getancestors(filename, node, known=ancestors)) except KeyError: # Since we're packing data entries, we may not have the @@ -420,6 +445,9 @@ # Order the nodes children first, so we can produce reverse deltas orderednodes = list(reversed(self._toposort(ancestors))) + if len(nohistory) > 0: + ui.debug('repackdata: %d nodes without history\n' % + len(nohistory)) orderednodes.extend(sorted(nohistory)) # Compute deltas and write to the pack diff --git a/tests/test-treemanifest-repack.t b/tests/test-treemanifest-repack.t --- a/tests/test-treemanifest-repack.t +++ b/tests/test-treemanifest-repack.t @@ -154,11 +154,72 @@ - repack incremental kicks in once there are a number of packs - (set the repacksizelimit so that we test that we only repack up to 1500 bytes, - and it leaves one datapack behind) + $ cp -R .hg/store/packs .hg/store/packs.bak $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=1500B $ ls_l .hg/store/packs/manifests | grep datapack | wc -l .*3 (re) $ ls_l .hg/store/packs/manifests | grep datapack | grep 248 -r--r--r-- 248 *.datapack (glob) + +- Verify that full history is used to choose node order for data repack, even if +- we actually repack fewer history packs. + $ rm -r .hg/store/packs + $ mv .hg/store/packs.bak .hg/store/packs + +- history.maxrepackpacks=0 is passed to ensure no history packs are repacked. If +- any entries have missing ancestry, a debug warning is printed. + $ hg repack --incremental --config remotefilelog.data.generations=300,200 --config remotefilelog.data.repacksizelimit=1500B --config remotefilelog.history.maxrepackpacks=0 --debug + +- Inspect all data packs. We except to see none with > 1 delta bases of 000000000000 +- (which would indicate we made duplicative chains.) + $ ls .hg/store/packs/manifests/*.datapack | xargs -L1 hg debugdatapack + + + Node Delta Base Delta Length + e24ac4639900 000000000000 89 + 1e27a5956e61 e24ac4639900 58 + + dir + Node Delta Base Delta Length + f175c774e18b 000000000000 43 + + dir + Node Delta Base Delta Length + 984db90696cd 000000000000 43 + + + Node Delta Base Delta Length + 10d05e165f71 000000000000 89 + + + Node Delta Base Delta Length + edbe899c31e7 000000000000 89 + 23d6a1af5d5b edbe899c31e7 58 + 53dd6ae0d984 23d6a1af5d5b 58 + 10d05e165f71 53dd6ae0d984 58 + fba208634b61 10d05e165f71 58 + + dir + Node Delta Base Delta Length + 31a760f1fb09 000000000000 43 + d62223eee804 31a760f1fb09 55 + 6bfa2a87412d d62223eee804 55 + 984db90696cd 6bfa2a87412d 55 + 84977318a34f 984db90696cd 55 + + + + + + + + + + + + + + - Clean up the pile of packs we made $ hg repack