@akushner thought it'd be helpful to have a dedicated debug command to run if packfile unpleasantness is suspected, and I thought it'd be useful too.
Also added some missing metrics on remotefilelog stores.
Lint Skipped |
Unit Tests Skipped |
remotefilelog/basestore.py | ||
---|---|---|
46 | We may want a try/catch around all of these metric functions that call _getfiles. _getfiles could throw an exception if the directory isn't readable for some reason, and we don't want metric logging code to bring down the process. Or we add the try/catch to _getfiles or _listkeys and print a warning if the directory is inaccesible, then return an empty list. | |
47 | If we're just counting, we can use _listkeys() instead, so we don't have to resolve the filenames. | |
55 | If we're just getting the file size, we can use _listkeys to get the information necessary to compute the file path to get the size. | |
59 | Isn't getmetrics called at store creation time now? By your previous diff. If so, getnumfiles is expensive right? How do we ensure that we don't accidentally call getnumfiles every time we want to log metrics. | |
remotefilelog/debugcommands.py | ||
360 | I'd write out the full name debugpackstatus. st isn't a super common acronym in the code base. |
Ready for another look. Please check my C.
remotefilelog/basestore.py | ||
---|---|---|
59 | From our IRL chat, added verbose=False. |
Needs a test
remotefilelog/basestore.py | ||
---|---|---|
58–59 | os.path.join(self._getrootpath(), fnhashhex[:2], fnhashhex[2:]) would be the more platform agnostic way of doing this. |
I flushed this out a bit since the last change, adding getmetrics() to datapackstore and printing the paths for remotefilelog stores, too.
remotefilelog/debugcommands.py | ||
---|---|---|
201 | Not really related, but I noticed that debugdatapack aborted too early if one of the paths had any errors. I could split this out if you think it's warranted. |
remotefilelog/debugcommands.py | ||
---|---|---|
376 | It's based on Ruby/Underscore.js's name for the same function. Then again, who ever said Ruby was a paragon of naming? |
remotefilelog/debugcommands.py | ||
---|---|---|
376 | Yes. Whether 0 or [] should be False is a separate topic (I personally prefer the Ruby/Lua way - only nil and false are False). It seems you're using it to filter paths, which is fine I guess? | |
384–386 | Maybe more common in Python: [p for s in store.stores if s for p in getpaths(s)] It's much shorter. Although I personally do not enjoy list comprehensive that much. |
remotefilelog/debugcommands.py | ||
---|---|---|
376 | Yeah, it doesn't matter today but I was looking ahead if we were to generalize this function / move it into a util. Then I'd expect it to behave the Ruby way too (or at least have a different name, like filterfalsy). | |
384–386 | Interesting, I didn't know you could chain them. I'll take the shorter way. |
Path | Packages | |||
---|---|---|---|---|
M | cstore/py-datapackstore.h (4 lines) | |||
M | remotefilelog/__init__.py (4 lines) | |||
M | remotefilelog/basepack.py (2 lines) | |||
M | remotefilelog/basestore.py (47 lines) | |||
M | remotefilelog/contentstore.py (7 lines) | |||
M | remotefilelog/debugcommands.py (33 lines) | |||
M | remotefilelog/metadatastore.py (7 lines) | |||
M | treemanifest/__init__.py (2 lines) |
} | } | ||||
} | } | ||||
static PyObject *uniondatapackstore_markforrefresh(py_uniondatapackstore *self) { | static PyObject *uniondatapackstore_markforrefresh(py_uniondatapackstore *self) { | ||||
self->uniondatapackstore->markForRefresh(); | self->uniondatapackstore->markForRefresh(); | ||||
Py_RETURN_NONE; | Py_RETURN_NONE; | ||||
} | } | ||||
static PyObject *uniondatapackstore_getmetrics(py_uniondatapackstore *self) { | static PyObject *uniondatapackstore_getmetrics(py_uniondatapackstore *self, PyObject* kw) { | ||||
return PyDict_New(); | return PyDict_New(); | ||||
} | } | ||||
// --------- UnionDatapackStore Declaration --------- | // --------- UnionDatapackStore Declaration --------- | ||||
static PyMethodDef uniondatapackstore_methods[] = { | static PyMethodDef uniondatapackstore_methods[] = { | ||||
{"get", (PyCFunction)uniondatapackstore_get, METH_VARARGS, ""}, | {"get", (PyCFunction)uniondatapackstore_get, METH_VARARGS, ""}, | ||||
{"getdeltachain", (PyCFunction)uniondatapackstore_getdeltachain, METH_VARARGS, ""}, | {"getdeltachain", (PyCFunction)uniondatapackstore_getdeltachain, METH_VARARGS, ""}, | ||||
{"getmissing", (PyCFunction)uniondatapackstore_getmissing, METH_O, ""}, | {"getmissing", (PyCFunction)uniondatapackstore_getmissing, METH_O, ""}, | ||||
{"markforrefresh", (PyCFunction)uniondatapackstore_markforrefresh, METH_NOARGS, ""}, | {"markforrefresh", (PyCFunction)uniondatapackstore_markforrefresh, METH_NOARGS, ""}, | ||||
{"getmetrics", (PyCFunction)uniondatapackstore_getmetrics, METH_NOARGS, ""}, | {"getmetrics", (PyCFunction)uniondatapackstore_getmetrics, METH_KEYWORDS, ""}, | ||||
{NULL, NULL} | {NULL, NULL} | ||||
}; | }; | ||||
static PyTypeObject uniondatapackstoreType = { | static PyTypeObject uniondatapackstoreType = { | ||||
PyObject_HEAD_INIT(NULL) | PyObject_HEAD_INIT(NULL) | ||||
0, /* ob_size */ | 0, /* ob_size */ | ||||
"cstore.uniondatapackstore", /* tp_name */ | "cstore.uniondatapackstore", /* tp_name */ | ||||
sizeof(py_uniondatapackstore), /* tp_basicsize */ | sizeof(py_uniondatapackstore), /* tp_basicsize */ |
def debugdatapack(ui, *paths, **opts): | def debugdatapack(ui, *paths, **opts): | ||||
return debugcommands.debugdatapack(ui, *paths, **opts) | return debugcommands.debugdatapack(ui, *paths, **opts) | ||||
@command('debughistorypack', [ | @command('debughistorypack', [ | ||||
], _('hg debughistorypack <path>'), norepo=True) | ], _('hg debughistorypack <path>'), norepo=True) | ||||
def debughistorypack(ui, path, **opts): | def debughistorypack(ui, path, **opts): | ||||
return debugcommands.debughistorypack(ui, path) | return debugcommands.debughistorypack(ui, path) | ||||
@command('debugpackstatus', [], 'hg debugpackstatus') | |||||
def debugpackstatus(repo, ui, **opts): | |||||
return debugcommands.debugpackstatus(repo, ui) | |||||
@command('debugkeepset', [ | @command('debugkeepset', [ | ||||
], _('hg debugkeepset')) | ], _('hg debugkeepset')) | ||||
def debugkeepset(ui, repo, **opts): | def debugkeepset(ui, repo, **opts): | ||||
# The command is used to measure keepset computation time | # The command is used to measure keepset computation time | ||||
def keyfn(fname, fnode): | def keyfn(fname, fnode): | ||||
return fileserverclient.getcachekey(repo.name, fname, hex(fnode)) | return fileserverclient.getcachekey(repo.name, fname, hex(fnode)) | ||||
repackmod.keepset(repo, keyfn) | repackmod.keepset(repo, keyfn) | ||||
return | return |
""" | """ | ||||
totalsize = 0 | totalsize = 0 | ||||
count = 0 | count = 0 | ||||
for __, __, size in self._getavailablepackfiles(): | for __, __, size in self._getavailablepackfiles(): | ||||
totalsize += size | totalsize += size | ||||
count += 1 | count += 1 | ||||
return totalsize, count | return totalsize, count | ||||
def getmetrics(self): | def getmetrics(self, verbose=False): | ||||
"""Returns metrics on the state of this store.""" | """Returns metrics on the state of this store.""" | ||||
size, count = self.gettotalsizeandcount() | size, count = self.gettotalsizeandcount() | ||||
return { | return { | ||||
'numpacks': count, | 'numpacks': count, | ||||
'totalpacksize': size, | 'totalpacksize': size, | ||||
} | } | ||||
def getpack(self, path): | def getpack(self, path): |
"""Returns the metadata dict for given node.""" | """Returns the metadata dict for given node.""" | ||||
for store in self.stores: | for store in self.stores: | ||||
try: | try: | ||||
return store.getmeta(name, node) | return store.getmeta(name, node) | ||||
except KeyError: | except KeyError: | ||||
pass | pass | ||||
raise KeyError((name, hex(node))) | raise KeyError((name, hex(node))) | ||||
def getmetrics(self): | def getmetrics(self, verbose=False): | ||||
metrics = [s.getmetrics() for s in self.stores] | metrics = [s.getmetrics(verbose=verbose) for s in self.stores] | ||||
return shallowutil.sumdicts(*metrics) | return shallowutil.sumdicts(*metrics) | ||||
def _getpartialchain(self, name, node): | def _getpartialchain(self, name, node): | ||||
"""Returns a partial delta chain for the given name/node pair. | """Returns a partial delta chain for the given name/node pair. | ||||
A partial chain is a chain that may not be terminated in a full-text. | A partial chain is a chain that may not be terminated in a full-text. | ||||
""" | """ | ||||
for store in self.stores: | for store in self.stores: | ||||
raise RuntimeError("cannot add to a remote store") | raise RuntimeError("cannot add to a remote store") | ||||
def getmissing(self, keys): | def getmissing(self, keys): | ||||
return keys | return keys | ||||
def markledger(self, ledger, options=None): | def markledger(self, ledger, options=None): | ||||
pass | pass | ||||
def getmetrics(self, verbose=False): | |||||
return {} | |||||
class manifestrevlogstore(object): | class manifestrevlogstore(object): | ||||
def __init__(self, repo): | def __init__(self, repo): | ||||
self._store = repo.store | self._store = repo.store | ||||
self._svfs = repo.svfs | self._svfs = repo.svfs | ||||
self._revlogs = dict() | self._revlogs = dict() | ||||
self._cl = revlog.revlog(self._svfs, '00changelog.i') | self._cl = revlog.revlog(self._svfs, '00changelog.i') | ||||
self._repackstartlinkrev = 0 | self._repackstartlinkrev = 0 | ||||
# debugcommands.py - debug logic for remotefilelog | # debugcommands.py - debug logic for remotefilelog | ||||
# | # | ||||
# Copyright 2013 Facebook, Inc. | # Copyright 2013 Facebook, Inc. | ||||
# | # | ||||
# This software may be used and distributed according to the terms of the | # This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | # GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
from mercurial import error, filelog, revlog | from mercurial import error, filelog, revlog, util | ||||
from mercurial.node import bin, hex, nullid, short | from mercurial.node import bin, hex, nullid, short | ||||
from mercurial.i18n import _ | from mercurial.i18n import _ | ||||
from . import ( | from . import ( | ||||
constants, | constants, | ||||
datapack, | datapack, | ||||
fileserverclient, | fileserverclient, | ||||
historypack, | historypack, | ||||
shallowrepo, | shallowrepo, | ||||
copyfrom = raw[(start + 80):divider] | copyfrom = raw[(start + 80):divider] | ||||
mapping[currentnode] = (p1, p2, linknode, copyfrom) | mapping[currentnode] = (p1, p2, linknode, copyfrom) | ||||
start = divider + 1 | start = divider + 1 | ||||
return size, firstnode, mapping | return size, firstnode, mapping | ||||
def debugdatapack(ui, *paths, **opts): | def debugdatapack(ui, *paths, **opts): | ||||
for path in paths: | for path in paths: | ||||
Not really related, but I noticed that debugdatapack aborted too early if one of the paths had any errors. I could split this out if you think it's warranted. phillco: Not really related, but I noticed that `debugdatapack` aborted too early if one of the paths… | |||||
quark: No need to split. | |||||
if '.data' in path: | if '.data' in path: | ||||
path = path[:path.index('.data')] | path = path[:path.index('.data')] | ||||
ui.write("%s:\n" % path) | ui.write("%s:\n" % path) | ||||
dpack = datapack.datapack(path) | dpack = datapack.datapack(path) | ||||
node = opts.get('node') | node = opts.get('node') | ||||
if node: | if node: | ||||
deltachain = dpack.getdeltachain('', bin(node)) | deltachain = dpack.getdeltachain('', bin(node)) | ||||
dumpdeltachain(ui, deltachain, **opts) | dumpdeltachain(ui, deltachain, **opts) | ||||
"P1 Node".ljust(14), | "P1 Node".ljust(14), | ||||
"P2 Node".ljust(14), | "P2 Node".ljust(14), | ||||
"Link Node".ljust(14), | "Link Node".ljust(14), | ||||
"Copy From")) | "Copy From")) | ||||
lastfilename = filename | lastfilename = filename | ||||
ui.write("%s %s %s %s %s\n" % (short(node), short(p1node), | ui.write("%s %s %s %s %s\n" % (short(node), short(p1node), | ||||
short(p2node), short(linknode), copyfrom)) | short(p2node), short(linknode), copyfrom)) | ||||
def debugpackstatus(ui, repo): | |||||
I'd write out the full name debugpackstatus. st isn't a super common acronym in the code base. durham: I'd write out the full name `debugpackstatus`. `st` isn't a super common acronym in the code… | |||||
def format(metrics, paths=None): | |||||
parts = [] | |||||
if 'numpacks' in metrics: | |||||
parts.append("%d packs consuming %s" % (metrics['numpacks'], | |||||
util.bytecount(metrics['totalpacksize']))) | |||||
if 'numloosefiles' in metrics: | |||||
parts.append("%d loose files consuming %s" % ( | |||||
metrics['numloosefiles'], | |||||
util.bytecount(metrics['totalloosesize']))) | |||||
if paths: | |||||
parts.append("in %s" % ",".join(paths)) | |||||
return ", ".join(parts) | |||||
mfl = repo.manifestlog | |||||
ui.write(("Local Tree Store: %s\n" % format( | |||||
durham: I'd rename this 'removenone' or something. | |||||
It's based on Ruby/Underscore.js's name for the same function. Then again, who ever said Ruby was a paragon of naming? phillco: It's based on Ruby/Underscore.js's name for the same function. Then again, who ever said Ruby… | |||||
quark: Could you use `filter(None, list)`? It is more common in python. | |||||
@quark looks like that removes other falsy values too, like 0. But, I guess that is desirable? phillco: @quark looks like that removes other falsy values too, like 0. But, I guess that is desirable? | |||||
Yes. Whether 0 or [] should be False is a separate topic (I personally prefer the Ruby/Lua way - only nil and false are False). It seems you're using it to filter paths, which is fine I guess? quark: Yes. Whether `0` or `[]` should be `False` is a separate topic (I personally prefer the… | |||||
Yeah, it doesn't matter today but I was looking ahead if we were to generalize this function / move it into a util. Then I'd expect it to behave the Ruby way too (or at least have a different name, like filterfalsy). phillco: Yeah, it doesn't matter today but I was looking ahead if we were to generalize this function /… | |||||
shallowutil.sumdicts(*[s.getmetrics(verbose=True) for s in | |||||
mfl.localdatastores]), | |||||
[s.path for p in mfl.localdatastores] | |||||
))) | |||||
ui.write(("Shared Tree Store: %s\n" % format( | |||||
shallowutil.sumdicts(*[s.getmetrics(verbose=True) for s in | |||||
mfl.shareddatastores]), | |||||
[s.path for p in mfl.localdatastores] | |||||
))) | |||||
ui.write(("File Content Store: %s\n" % | |||||
Maybe more common in Python: [p for s in store.stores if s for p in getpaths(s)] It's much shorter. Although I personally do not enjoy list comprehensive that much. quark: Maybe more common in Python:
[p for s in store.stores if s for p in getpaths(s)]
It's much… | |||||
Interesting, I didn't know you could chain them. I'll take the shorter way. phillco: Interesting, I didn't know you could chain them. I'll take the shorter way. | |||||
format(repo.contentstore.getmetrics(verbose=True)))) | |||||
ui.write(("File Data Store: %s\n" % | |||||
format(repo.metadatastore.getmetrics(verbose=True)))) | |||||
def debugwaitonrepack(repo): | def debugwaitonrepack(repo): | ||||
with repo._lock(repo.svfs, "repacklock", True, None, | with repo._lock(repo.svfs, "repacklock", True, None, | ||||
None, _('repacking %s') % repo.origroot): | None, _('repacking %s') % repo.origroot): | ||||
pass | pass | ||||
def debugwaitonprefetch(repo): | def debugwaitonprefetch(repo): | ||||
with repo._lock(repo.svfs, "prefetchlock", True, None, | with repo._lock(repo.svfs, "prefetchlock", True, None, | ||||
None, _('prefetching in %s') % repo.origroot): | None, _('prefetching in %s') % repo.origroot): | ||||
pass | pass | ||||
I'd use the words "File Data Store" and "File History Store" in the user facing code. content and metadata were old terms and should probably be replaced throughout the code. durham: I'd use the words "File Data Store" and "File History Store" in the user facing code. content… |
if missing: | if missing: | ||||
missing = store.getmissing(missing) | missing = store.getmissing(missing) | ||||
return missing | return missing | ||||
def markledger(self, ledger, options=None): | def markledger(self, ledger, options=None): | ||||
for store in self.stores: | for store in self.stores: | ||||
store.markledger(ledger, options) | store.markledger(ledger, options) | ||||
def getmetrics(self): | def getmetrics(self, verbose=False): | ||||
metrics = [s.getmetrics() for s in self.stores] | metrics = [s.getmetrics(verbose=verbose) for s in self.stores] | ||||
return shallowutil.sumdicts(*metrics) | return shallowutil.sumdicts(*metrics) | ||||
class remotefilelogmetadatastore(basestore.basestore): | class remotefilelogmetadatastore(basestore.basestore): | ||||
def getancestors(self, name, node, known=None): | def getancestors(self, name, node, known=None): | ||||
"""Returns as many ancestors as we're aware of. | """Returns as many ancestors as we're aware of. | ||||
return value: { | return value: { | ||||
node: (p1, p2, linknode, copyfrom), | node: (p1, p2, linknode, copyfrom), | ||||
def add(self, name, node, data): | def add(self, name, node, data): | ||||
raise RuntimeError("cannot add to a remote store") | raise RuntimeError("cannot add to a remote store") | ||||
def getmissing(self, keys): | def getmissing(self, keys): | ||||
return keys | return keys | ||||
def markledger(self, ledger, options=None): | def markledger(self, ledger, options=None): | ||||
pass | pass | ||||
def getmetrics(self, verbose=False): | |||||
return {} |
raise RuntimeError("cannot add to a remote store") | raise RuntimeError("cannot add to a remote store") | ||||
def getmissing(self, keys): | def getmissing(self, keys): | ||||
return keys | return keys | ||||
def markledger(self, ledger, options=None): | def markledger(self, ledger, options=None): | ||||
pass | pass | ||||
def getmetrics(self): | def getmetrics(self, verbose=False): | ||||
return {} | return {} | ||||
def serverrepack(repo, incremental=False, options=None): | def serverrepack(repo, incremental=False, options=None): | ||||
packpath = repo.vfs.join('cache/packs/%s' % PACK_CATEGORY) | packpath = repo.vfs.join('cache/packs/%s' % PACK_CATEGORY) | ||||
revlogstore = manifestrevlogstore(repo) | revlogstore = manifestrevlogstore(repo) | ||||
try: | try: |
We may want a try/catch around all of these metric functions that call _getfiles. _getfiles could throw an exception if the directory isn't readable for some reason, and we don't want metric logging code to bring down the process. Or we add the try/catch to _getfiles or _listkeys and print a warning if the directory is inaccesible, then return an empty list.