When calling prefetch in remotefilelog, also prefetch lfs
files.
We are using the same hook mechanism that remotefilelog is already using
for LFS by having remotefilelog call into LFS.
( )
quark |
Restricted Project |
When calling prefetch in remotefilelog, also prefetch lfs
files.
We are using the same hook mechanism that remotefilelog is already using
for LFS by having remotefilelog call into LFS.
run tests on test-lfs-remotefilelog-prefetch.t
Automatic diff as part of commit; lint not applicable. |
Automatic diff as part of commit; unit tests not applicable. |
Nice feature! Could you move the fileserverclient wrapper related methods from lfs to remtoefilelog?
Currently, lfs does not couple with any remotefilelog internals (ex. fileserverclient) intentionally as an attempt to make future upstreaming work easier. The filelog wrapper code works both for remotefilelog and hg filelog.
lgtm but @quark's request is probably reasonable.
hgext3rd/lfs/wrapper.py | ||
---|---|---|
258 | Generally hash is used to refer to the 40 character version, and node refers to the 20 character one. So usually we dont' import the node module directly because it prevents us from using 'node' as a variable, so we just import bin/hex directly. |
Path | Packages | |||
---|---|---|---|---|
M | hgext3rd/lfs/wrapper.py (5 lines) | |||
M | remotefilelog/__init__.py (1 line) | |||
M | remotefilelog/fileserverclient.py (24 lines) | |||
M | tests/library.sh (24 lines) | |||
A | M | tests/test-remotefilelog-lfs-prefetch.t (99 lines) |
def _lfsloaded(loaded=False): | def _lfsloaded(loaded=False): | ||||
lfsmod = None | lfsmod = None | ||||
try: | try: | ||||
lfsmod = extensions.find('lfs') | lfsmod = extensions.find('lfs') | ||||
except KeyError: | except KeyError: | ||||
pass | pass | ||||
if lfsmod: | if lfsmod: | ||||
lfsmod.wrapfilelog(remotefilelog.remotefilelog) | lfsmod.wrapfilelog(remotefilelog.remotefilelog) | ||||
fileserverclient._lfsmod = lfsmod | |||||
extensions.afterloaded('lfs', _lfsloaded) | extensions.afterloaded('lfs', _lfsloaded) | ||||
# debugdata needs remotefilelog.len to work | # debugdata needs remotefilelog.len to work | ||||
extensions.wrapcommand(commands.table, 'debugdata', debugdatashallow) | extensions.wrapcommand(commands.table, 'debugdata', debugdatashallow) | ||||
def cloneshallow(orig, ui, repo, *args, **opts): | def cloneshallow(orig, ui, repo, *args, **opts): | ||||
if opts.get('shallow'): | if opts.get('shallow'): | ||||
repos = [] | repos = [] |
import hashlib, os, time, io, struct | import hashlib, os, time, io, struct | ||||
import itertools | import itertools | ||||
from mercurial.i18n import _ | from mercurial.i18n import _ | ||||
from mercurial.node import hex, bin, nullid | from mercurial.node import hex, bin, nullid | ||||
from mercurial import ( | from mercurial import ( | ||||
error, | error, | ||||
httppeer, | httppeer, | ||||
revlog, | |||||
sshpeer, | sshpeer, | ||||
util, | util, | ||||
util, | |||||
wireproto, | wireproto, | ||||
) | ) | ||||
from . import ( | from . import ( | ||||
connectionpool, | connectionpool, | ||||
constants, | constants, | ||||
shallowutil, | shallowutil, | ||||
wirepack, | wirepack, | ||||
) | ) | ||||
from .contentstore import unioncontentstore | from .contentstore import unioncontentstore | ||||
from .metadatastore import unionmetadatastore | from .metadatastore import unionmetadatastore | ||||
from .lz4wrapper import lz4decompress | from .lz4wrapper import lz4decompress | ||||
# Statistics for debugging | # Statistics for debugging | ||||
fetchcost = 0 | fetchcost = 0 | ||||
fetches = 0 | fetches = 0 | ||||
fetched = 0 | fetched = 0 | ||||
fetchmisses = 0 | fetchmisses = 0 | ||||
_lfsmod = None | |||||
_downloading = _('downloading') | _downloading = _('downloading') | ||||
def getcachekey(reponame, file, id): | def getcachekey(reponame, file, id): | ||||
pathhash = hashlib.sha1(file).hexdigest() | pathhash = hashlib.sha1(file).hexdigest() | ||||
return os.path.join(reponame, pathhash[:2], pathhash[2:], id) | return os.path.join(reponame, pathhash[:2], pathhash[2:], id) | ||||
def getlocalkey(file, id): | def getlocalkey(file, id): | ||||
pathhash = hashlib.sha1(file).hexdigest() | pathhash = hashlib.sha1(file).hexdigest() | ||||
missingids = [(file, hex(id)) for file, id in missingids] | missingids = [(file, hex(id)) for file, id in missingids] | ||||
fetched += len(missingids) | fetched += len(missingids) | ||||
start = time.time() | start = time.time() | ||||
missingids = self.request(missingids) | missingids = self.request(missingids) | ||||
if missingids: | if missingids: | ||||
raise error.Abort(_("unable to download %d files") % | raise error.Abort(_("unable to download %d files") % | ||||
len(missingids)) | len(missingids)) | ||||
fetchcost += time.time() - start | fetchcost += time.time() - start | ||||
self._lfsprefetch(fileids) | |||||
def _lfsprefetch(self, fileids): | |||||
if not _lfsmod or not hasattr(self.repo.svfs, 'lfslocalblobstore'): | |||||
return | |||||
if not _lfsmod.wrapper.candownload(self.repo): | |||||
return | |||||
pointers = [] | |||||
store = self.repo.svfs.lfslocalblobstore | |||||
for file, id in fileids: | |||||
nodehash = bin(id) | |||||
rlog = self.repo.file(file) | |||||
if rlog.flags(nodehash) & revlog.REVIDX_EXTSTORED: | |||||
text = rlog.revision(nodehash, raw=True) | |||||
p = _lfsmod.pointer.deserialize(text) | |||||
oid = p.oid() | |||||
if not store.has(oid): | |||||
pointers.append(p) | |||||
if len(pointers) > 0: | |||||
self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store) | |||||
assert all(store.has(p.oid()) for p in pointers) | |||||
def logstacktrace(self): | def logstacktrace(self): | ||||
import traceback | import traceback | ||||
self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s', | self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s', | ||||
''.join(traceback.format_stack())) | ''.join(traceback.format_stack())) |
[remotefilelog] | [remotefilelog] | ||||
reponame=master | reponame=master | ||||
datapackversion=1 | datapackversion=1 | ||||
[phases] | [phases] | ||||
publish=False | publish=False | ||||
EOF | EOF | ||||
} | } | ||||
hgcloneshallowlfs() { | |||||
local name | |||||
local dest | |||||
local lfsdir | |||||
orig=$1 | |||||
shift | |||||
dest=$1 | |||||
shift | |||||
lfsdir=$1 | |||||
shift | |||||
hg clone --shallow --config "extensions.lfs=" --config "lfs.url=$lfsdir" --config remotefilelog.reponame=master $orig $dest $@ | |||||
cat >> $dest/.hg/hgrc <<EOF | |||||
[extensions] | |||||
lfs= | |||||
[lfs] | |||||
url=$lfsdir | |||||
[remotefilelog] | |||||
reponame=master | |||||
datapackversion=1 | |||||
[phases] | |||||
publish=False | |||||
EOF | |||||
} | |||||
hginit() { | hginit() { | ||||
local name | local name | ||||
name=$1 | name=$1 | ||||
shift | shift | ||||
hg init $name $@ | hg init $name $@ | ||||
} | } | ||||
clearcache() { | clearcache() { |
$ PYTHONPATH=$TESTDIR/..:$PYTHONPATH | |||||
$ export PYTHONPATH | |||||
$ LFSPATH=$TESTTMP/lfs | |||||
$ export LFSPATH | |||||
$ mkdir $LFSPATH | |||||
$ . "$TESTDIR/library.sh" | |||||
$ hginit master | |||||
$ cd master | |||||
$ cat >> $HGRCPATH <<EOF | |||||
> [extensions] | |||||
> lfs=$TESTDIR/../hgext3rd/lfs | |||||
> [lfs] | |||||
> url=file://$LFSPATH | |||||
> EOF | |||||
$ cat >> .hg/hgrc <<EOF | |||||
> [remotefilelog] | |||||
> server=True | |||||
> EOF | |||||
$ echo x > x | |||||
$ echo z > z | |||||
$ hg commit -qAm x | |||||
$ echo x2 > x | |||||
$ echo y > y | |||||
$ hg commit -qAm y | |||||
$ echo large > large | |||||
$ hg --config 'lfs.threshold=1' commit -qAm y | |||||
$ hg bookmark foo | |||||
$ hg debuglfsupload -r tip | |||||
$ cd .. | |||||
# prefetch a revision | |||||
$ hgcloneshallowlfs ssh://user@dummy/master shallow file://$LFSPATH --noupdate | |||||
streaming all changes | |||||
2 files to transfer, 774 bytes of data | |||||
transferred 774 bytes in * seconds (*/sec) (glob) | |||||
searching for changes | |||||
no changes found | |||||
$ cd shallow | |||||
$ hg prefetch -r 0 | |||||
2 files fetched over 1 fetches - (2 misses, 0.00% hit ratio) over *s (glob) | |||||
$ hg cat -r 0 x | |||||
x | |||||
# prefetch a range of revisions | |||||
$ clearcache | |||||
$ hg prefetch -r 0::1 | |||||
4 files fetched over 1 fetches - (4 misses, 0.00% hit ratio) over *s (glob) | |||||
$ hg cat -r 0 x | |||||
x | |||||
$ hg cat -r 1 x | |||||
x2 | |||||
# prefetch certain files | |||||
$ clearcache | |||||
$ hg prefetch -r 1 x | |||||
1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob) | |||||
$ hg cat -r 1 x | |||||
x2 | |||||
$ hg cat -r 1 y | |||||
y | |||||
1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob) | |||||
# prefetch large file | |||||
$ hg prefetch -r 2 | |||||
2 files fetched over 1 fetches - (2 misses, 0.00% hit ratio) over *s (glob) | |||||
# prefetch on pull when configured | |||||
$ printf "[remotefilelog]\npullprefetch=bookmark()\n" >> .hg/hgrc | |||||
$ hg strip tip | |||||
saved backup bundle to $TESTTMP/shallow/.hg/strip-backup/730e2b7b175c-acada81e-backup.hg (glob) | |||||
$ clearcache | |||||
$ hg pull | |||||
pulling from ssh://user@dummy/master | |||||
searching for changes | |||||
adding changesets | |||||
adding manifests | |||||
adding file changes | |||||
added 1 changesets with 0 changes to 0 files | |||||
updating bookmark foo | |||||
(run 'hg update' to get a working copy) | |||||
prefetching file contents | |||||
4 files fetched over 1 fetches - (4 misses, 0.00% hit ratio) over *s (glob) | |||||
$ hg up tip | |||||
4 files updated, 0 files merged, 0 files removed, 0 files unresolved |
Generally hash is used to refer to the 40 character version, and node refers to the 20 character one. So usually we dont' import the node module directly because it prevents us from using 'node' as a variable, so we just import bin/hex directly.