diff --git a/cstore/py-datapackstore.h b/cstore/py-datapackstore.h --- a/cstore/py-datapackstore.h +++ b/cstore/py-datapackstore.h @@ -369,6 +369,10 @@ Py_RETURN_NONE; } +static PyObject *uniondatapackstore_getmetrics(py_uniondatapackstore *self) { + return PyDict_New(); +} + // --------- UnionDatapackStore Declaration --------- static PyMethodDef uniondatapackstore_methods[] = { @@ -376,6 +380,7 @@ {"getdeltachain", (PyCFunction)uniondatapackstore_getdeltachain, METH_VARARGS, ""}, {"getmissing", (PyCFunction)uniondatapackstore_getmissing, METH_O, ""}, {"markforrefresh", (PyCFunction)uniondatapackstore_markforrefresh, METH_NOARGS, ""}, + {"getmetrics", (PyCFunction)uniondatapackstore_getmetrics, METH_NOARGS, ""}, {NULL, NULL} }; diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -1,6 +1,8 @@ from __future__ import absolute_import import errno, hashlib, mmap, os, struct, time + +from collections import defaultdict from mercurial import policy, pycompat, util from mercurial.i18n import _ from mercurial import vfs as vfsmod @@ -56,7 +58,7 @@ # lastrefesh is 0 so we'll immediately check for new packs on the first # failure. self.lastrefresh = 0 - for filepath in self._getavailablepackfiles(): + for filepath, __, __ in self._getavailablepackfilessorted(): try: pack = self.getpack(filepath) except Exception as ex: @@ -73,33 +75,77 @@ self.packs.append(pack) def _getavailablepackfiles(self): - suffixlen = len(self.INDEXSUFFIX) + """For each pack file (a index/data file combo), yields: + (full path without extension, mtime, size) - totalsize = 0 - files = [] - filenames = set() + mtime will be the mtime of the index/data file (whichever is newer) + size is the combined size of index/data file + """ + indexsuffixlen = len(self.INDEXSUFFIX) + packsuffixlen = len(self.PACKSUFFIX) + + ids = set() + sizes = defaultdict(lambda: 0) + mtimes = defaultdict(lambda: []) try: for filename, size, stat in osutil.listdir(self.path, stat=True): - files.append((stat.st_mtime, filename)) - filenames.add(filename) - totalsize += size + id = None + if filename[-indexsuffixlen:] == self.INDEXSUFFIX: + id = filename[:-indexsuffixlen] + elif filename[-packsuffixlen:] == self.PACKSUFFIX: + id = filename[:-packsuffixlen] + + # Since we expect to have two files corresponding to each ID + # (the index file and the pack file), we can yield once we see + # it twice. + if id: + sizes[id] += size # Sum both files' sizes together + mtimes[id].append(stat.st_mtime) + if id in ids: + yield (os.path.join(self.path, id), max(mtimes[id]), + sizes[id]) + else: + ids.add(id) except OSError as ex: if ex.errno != errno.ENOENT: raise - numpacks = len(filenames) - self.ui.log("packsizes", "packstore %s has %d packs totaling %s\n" % - (self.path, numpacks, util.bytecount(totalsize)), - numpacks=numpacks, - totalsize=totalsize) - # Put most recent pack files first since they contain the most recent - # info. + def _getavailablepackfilessorted(self): + """Like `_getavailablepackfiles`, but also sorts the files by mtime, + yielding newest files first. + + This is desirable, since it is more likely newer packfiles have more + desirable data. + """ + files = [] + for path, mtime, size in self._getavailablepackfiles(): + files.append((mtime, size, path)) files = sorted(files, reverse=True) - for mtime, filename in files: - packfilename = '%s%s' % (filename[:-suffixlen], self.PACKSUFFIX) - if (filename[-suffixlen:] == self.INDEXSUFFIX - and packfilename in filenames): - yield os.path.join(self.path, filename)[:-suffixlen] + for mtime, size, path in files: + yield path, mtime, size + + def gettotalsizeandcount(self): + """Returns the total disk size (in bytes) of all the pack files in + this store, and the count of pack files. + + (This might be smaller than the total size of the ``self.path`` + directory, since this only considers fuly-writen pack files, and not + temporary files or other detritus on the directory.) + """ + totalsize = 0 + count = 0 + for __, __, size in self._getavailablepackfiles(): + totalsize += size + count += 1 + return totalsize, count + + def getmetrics(self): + """Returns metrics on the state of this store.""" + size, count = self.gettotalsizeandcount() + return { + 'numpacks': count, + 'totalpacksize': size, + } def getpack(self, path): raise NotImplemented() @@ -138,10 +184,9 @@ if now > self.lastrefresh + REFRESHRATE: self.lastrefresh = now previous = set(p.path for p in self.packs) - new = set(self._getavailablepackfiles()) - previous - - for filepath in new: - newpacks.append(self.getpack(filepath)) + for filepath, __, __ in self._getavailablepackfilessorted(): + if filepath not in previous: + newpacks.append(self.getpack(filepath)) self.packs.extend(newpacks) return newpacks diff --git a/remotefilelog/contentstore.py b/remotefilelog/contentstore.py --- a/remotefilelog/contentstore.py +++ b/remotefilelog/contentstore.py @@ -93,6 +93,10 @@ pass raise KeyError((name, hex(node))) + def getmetrics(self): + metrics = [s.getmetrics() for s in self.stores] + return shallowutil.sumdicts(*metrics) + def _getpartialchain(self, name, node): """Returns a partial delta chain for the given name/node pair. diff --git a/remotefilelog/metadatastore.py b/remotefilelog/metadatastore.py --- a/remotefilelog/metadatastore.py +++ b/remotefilelog/metadatastore.py @@ -100,6 +100,10 @@ for store in self.stores: store.markledger(ledger) + def getmetrics(self): + metrics = [s.getmetrics() for s in self.stores] + return shallowutil.sumdicts(*metrics) + class remotefilelogmetadatastore(basestore.basestore): def getancestors(self, name, node, known=None): """Returns as many ancestors as we're aware of. diff --git a/remotefilelog/shallowrepo.py b/remotefilelog/shallowrepo.py --- a/remotefilelog/shallowrepo.py +++ b/remotefilelog/shallowrepo.py @@ -77,7 +77,8 @@ repo.shareddatastores.append(packcontentstore) repo.sharedhistorystores.append(packmetadatastore) - + shallowutil.reportpackmetrics(repo.ui, 'filestore', packcontentstore, + packmetadatastore) return packcontentstore, packmetadatastore def makeunionstores(repo): @@ -104,6 +105,8 @@ fileservicehistorywrite = packmetadatastore repo.fileservice.setstore(repo.contentstore, repo.metadatastore, fileservicedatawrite, fileservicehistorywrite) + shallowutil.reportpackmetrics(repo.ui, 'filestore', + packcontentstore, packmetadatastore) def wraprepo(repo): class shallowrepository(repo.__class__): diff --git a/remotefilelog/shallowutil.py b/remotefilelog/shallowutil.py --- a/remotefilelog/shallowutil.py +++ b/remotefilelog/shallowutil.py @@ -7,6 +7,8 @@ from __future__ import absolute_import import errno, hashlib, os, stat, struct, tempfile + +from collections import defaultdict from mercurial import filelog, revlog, util, error from mercurial.i18n import _ @@ -93,6 +95,30 @@ text = text[s + 2:] return meta or {}, text +def sumdicts(*dicts): + """Adds all the values of *dicts together into one dictionary. This assumes + the values in *dicts are all summable. + + e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1} + """ + result = defaultdict(lambda: 0) + for dict in dicts: + for k, v in dict.iteritems(): + result[k] += v + return result + +def prefixkeys(dict, prefix): + """Returns ``dict`` with ``prefix`` prepended to all its keys.""" + result = {} + for k, v in dict.iteritems(): + result[prefix + k] = v + return result + +def reportpackmetrics(ui, prefix, *stores): + dicts = [s.getmetrics() for s in stores] + dict = prefixkeys(sumdicts(*dicts), prefix + '_') + ui.log(prefix + "_packsizes", "", **dict) + def _parsepackmeta(metabuf): """parse datapack meta, bytes () -> dict diff --git a/treemanifest/__init__.py b/treemanifest/__init__.py --- a/treemanifest/__init__.py +++ b/treemanifest/__init__.py @@ -227,6 +227,7 @@ datastore = cstore.datapackstore(packpath) localdatastore = cstore.datapackstore(localpackpath) # TODO: can't use remotedatastore with cunionstore yet + # TODO make reportmetrics work with cstore mfl.datastore = cstore.uniondatapackstore([localdatastore, datastore]) else: datastore = datapackstore(repo.ui, packpath, usecdatapack=usecdatapack) @@ -259,6 +260,8 @@ localhistorystore, writestore=localhistorystore, ) + shallowutil.reportpackmetrics(repo.ui, 'treestore', mfl.datastore, + mfl.historystore) class treemanifestlog(manifest.manifestlog): def __init__(self, opener, treemanifest=False): @@ -1451,6 +1454,9 @@ def markledger(self, ledger): pass + def getmetrics(self): + return {} + def serverrepack(repo, incremental=False): packpath = repo.vfs.join('cache/packs/%s' % PACK_CATEGORY)