diff --git a/hgext3rd/gitlookup.py b/hgext3rd/gitlookup.py --- a/hgext3rd/gitlookup.py +++ b/hgext3rd/gitlookup.py @@ -27,13 +27,18 @@ # Define the location of the map file with the mapfile config option. mapfile = + # The config option onlymapdelta controls how the server handles the hg-git + # map. A True value corresponds to serving only missing map data while False + # corresponds to serving the complete map. + onlymapdelta = False + ''' from mercurial import bundle2, exchange, encoding, extensions, hg from mercurial import localrepo, util, wireproto, error, registrar -from mercurial.node import bin, nullid +from mercurial.node import bin, hex, nullid from mercurial.i18n import _ -import errno, urllib +import errno, json, urllib cmdtable = {} command = registrar.command(cmdtable) @@ -98,6 +103,7 @@ other = hg.peer(repo, {}, source) ui.status(_('getting git metadata from %s\n') % util.hidepassword(source)) + kwargs = {'bundlecaps': exchange.caps20to10(repo)} capsblob = bundle2.encodecaps(bundle2.getrepocaps(repo)) kwargs['bundlecaps'].add('bundle2=' + urllib.quote(capsblob)) @@ -106,6 +112,7 @@ kwargs['bundlecaps'].add('fb_gitmeta') kwargs['heads'] = [nullid] kwargs['cg'] = False + kwargs['common'] = _getcommonheads(repo) bundle = other.getbundle('pull', **kwargs) try: op = bundle2.processbundle(repo, bundle) @@ -115,8 +122,10 @@ ui.status(_('wrote %d files (%d bytes)\n') % (len(writebytes), sum(writebytes))) -gitmetafiles = set(['git-mapfile', 'git-named-branches', 'git-tags', - 'git-remote-refs']) +hgheadsfile = 'git-synced-hgheads' +gitmapfile = 'git-mapfile' +gitmetafiles = set( + [gitmapfile, 'git-named-branches', 'git-tags', 'git-remote-refs']) def _getfile(repo, filename): try: @@ -127,11 +136,117 @@ return None +def _getcommonheads(repo): + commonheads = [] + f = _getfile(repo, hgheadsfile) + if f: + commonheads = f.readlines() + commonheads = [bin(x.strip()) for x in commonheads] + return commonheads + +def _isheadmissing(repo, heads): + return not all(repo.known(heads)) + +def _getmissinglines(mapfile, missinghashes): + missinglines = set() + + # Avoid expensive lookup through the map file if there is no missing hash. + if not missinghashes: + return missinglines + + hashestofind = missinghashes.copy() + for line in mapfile: + gitsha, hgsha = line.strip().split(' ', 1) + if hgsha in hashestofind: + missinglines.add(line) + + # Return the missing lines if we found all of them. + hashestofind.remove(hgsha) + if not hashestofind: + return missinglines + + raise error.Abort(_('gitmeta: missing hashes in file %s') % mapfile.name) + +class _githgmappayload(object): + def __init__(self, needfullsync, newheads, missinglines): + self.needfullsync = needfullsync + self.newheads = newheads + self.missinglines = missinglines + + def _todict(self): + d = {} + d['needfullsync'] = self.needfullsync + d['newheads'] = list(self.newheads) + d['missinglines'] = list(self.missinglines) + return d + + def tojson(self): + return json.dumps(self._todict()) + + @classmethod + def _fromdict(cls, d): + needfullsync = d['needfullsync'] + newheads = set(d['newheads']) + missinglines = set(d['missinglines']) + return cls(needfullsync, newheads, missinglines) + + @classmethod + def fromjson(cls, jsonstr): + d = json.loads(jsonstr) + return cls._fromdict(d) + +@exchange.getbundle2partsgenerator('b2x:fb:gitmeta:githgmap') +def _getbundlegithgmappart(bundler, repo, source, bundlecaps=None, **kwargs): + '''send missing git to hg map data via bundle2''' + if 'fb_gitmeta' in bundlecaps: + # Do nothing if the config indicates serving the complete git-hg map + # file. _getbundlegitmetapart will handle serving the complete file in + # this case. + if not repo.ui.configbool('gitlookup', 'onlymapdelta', False): + return + + mapfile = _getfile(repo, gitmapfile) + if not mapfile: + return + + commonheads = kwargs['common'] + + # If there are missing heads, we will sync everything. + if _isheadmissing(repo, commonheads): + commonheads = [] + + needfullsync = (len(commonheads) == 0) + + heads = repo.heads() + newheads = set(hex(head) for head in heads) + + missingcommits = repo.changelog.findmissing(commonheads, heads) + missinghashes = set(hex(commit) for commit in missingcommits) + missinglines = _getmissinglines(mapfile, missinghashes) + + payload = _githgmappayload(needfullsync, newheads, missinglines) + serializedpayload = payload.tojson() + part = bundle2.bundlepart( + 'b2x:fb:gitmeta:githgmap', + [('filename', gitmapfile)], + data = serializedpayload + ) + + bundler.addpart(part) + @exchange.getbundle2partsgenerator('b2x:fb:gitmeta') def _getbundlegitmetapart(bundler, repo, source, bundlecaps=None, **kwargs): '''send git metadata via bundle2''' if 'fb_gitmeta' in bundlecaps: - for fname in sorted(gitmetafiles): + filestooverwrite = gitmetafiles + + # Exclude the git-hg map file if the config indicates that the server + # should only be serving the missing map data. _getbundle2partsgenerator + # will serve the missing map data in this case. + if repo.ui.configbool('gitlookup', 'onlymapdelta', False): + filestooverwrite = filestooverwrite - set([gitmapfile]) + + for fname in sorted(filestooverwrite): f = _getfile(repo, fname) if not f: continue @@ -159,6 +274,39 @@ return True +@bundle2.parthandler('b2x:fb:gitmeta:githgmap', ('filename',)) +@bundle2.parthandler('fb:gitmeta:githgmap', ('filename',)) +def bundle2getgithgmap(op, part): + params = dict(part.mandatoryparams) + if _validatepartparams(op, params): + filename = params['filename'] + with op.repo.wlock(): + data = _githgmappayload.fromjson(part.read()) + missinglines = data.missinglines + + # No need to update anything if already in sync. + if not missinglines: + return + + if data.needfullsync: + newlines = missinglines + else: + mapfile = _getfile(op.repo, filename) + if mapfile: + currentlines = set(mapfile.readlines()) + if currentlines & missinglines: + msg = 'warning: gitmeta: unexpected lines in .hg/%s\n' + op.repo.ui.warn(_(msg) % filename) + + currentlines.update(missinglines) + newlines = currentlines + else: + raise error.Abort( + _('gitmeta: could not read from .hg/%s') % filename) + + _writefile(op, filename, ''.join(newlines)) + _writefile(op, hgheadsfile, '\n'.join(data.newheads)) + @bundle2.parthandler('b2x:fb:gitmeta', ('filename',)) @bundle2.parthandler('fb:gitmeta', ('filename',)) def bundle2getgitmeta(op, part): diff --git a/tests/test-git-getmeta.t b/tests/test-git-getmeta.t --- a/tests/test-git-getmeta.t +++ b/tests/test-git-getmeta.t @@ -1,3 +1,5 @@ +#testcases onlymapdelta.true onlymapdelta.false + $ echo "[extensions]" >> $HGRCPATH $ echo "gitlookup = $TESTDIR/../hgext3rd/gitlookup.py" >> $HGRCPATH $ echo "gitrevset = $TESTDIR/../hgext3rd/gitrevset.py" >> $HGRCPATH @@ -30,7 +32,6 @@ $ cd ../.. $ hg clone repo1 repo2 -q $ cd repo2 - $ hg gitgetmeta -v getting git metadata from $TESTTMP/repo1 writing .hg/git-mapfile @@ -47,18 +48,42 @@ $ cat .hg/git-tags ffffffffffffffffffffffffffffffffffffffff 0.1 +Simulate config change from serving complete hg map to only missing delta +------------------------------------------------------------------------- + +Making this change here instead of during repo setup earlier ensures that we +test scenarios where the config changes after repos have been syncing. +#if onlymapdelta.true + $ cd ../repo1/.hg + $ echo "onlymapdelta = True" >> hgrc + $ cd .. +#endif + Clone new repo from remote repo and check that git metadata syncs properly -------------------------------------------------------------------------- $ cd .. $ hg clone ssh://user@dummy/repo1 repo3 -q $ cd repo3 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from ssh://user@dummy/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 4 files (223 bytes) + + $ sort .hg/git-synced-hgheads + fc5f87aa174b7d4016abf3e908fd63cc99774540 +#else $ hg gitgetmeta -v getting git metadata from ssh://user@dummy/repo1 writing .hg/git-mapfile writing .hg/git-remote-refs writing .hg/git-tags wrote 3 files (183 bytes) +#endif $ sort .hg/git-mapfile ffffffffffffffffffffffffffffffffffffffff fc5f87aa174b7d4016abf3e908fd63cc99774540 @@ -69,6 +94,22 @@ $ cat .hg/git-tags ffffffffffffffffffffffffffffffffffffffff 0.1 +Redundant sync just to see that the hg-git map file is not synced with +onlymapdelta being True +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from ssh://user@dummy/repo1 + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 2 files (101 bytes) + + $ cat .hg/git-remote-refs + ffffffffffffffffffffffffffffffffffffffff default/master + + $ cat .hg/git-tags + ffffffffffffffffffffffffffffffffffffffff 0.1 +#endif + Make changes upstream and check that they get reflected in clones ----------------------------------------------------------------- @@ -81,12 +122,25 @@ Check local repo syncing $ cd ../repo2 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 4 files (305 bytes) + + $ sort .hg/git-synced-hgheads + d4a59f7c570a8794e6ec20865090e7b848395b92 +#else $ hg gitgetmeta -v getting git metadata from $TESTTMP/repo1 writing .hg/git-mapfile writing .hg/git-remote-refs writing .hg/git-tags wrote 3 files (265 bytes) +#endif $ sort .hg/git-mapfile 1111111111111111111111111111111111111111 d4a59f7c570a8794e6ec20865090e7b848395b92 @@ -115,6 +169,20 @@ Check remote repo syncing $ cd ../repo3 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from ssh://user@dummy/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (529 bytes) + + $ sort .hg/git-synced-hgheads + 8ea31c3efb6d2edb6d9fe608c29034e7e7ed5f91 + c411819f7fd6036d50b17a28d3edb7aa9121985a +#else $ hg gitgetmeta -v getting git metadata from ssh://user@dummy/repo1 writing .hg/git-mapfile @@ -122,6 +190,7 @@ writing .hg/git-remote-refs writing .hg/git-tags wrote 4 files (448 bytes) +#endif $ sort .hg/git-mapfile 1111111111111111111111111111111111111111 d4a59f7c570a8794e6ec20865090e7b848395b92 @@ -148,6 +217,19 @@ Check local repo syncing $ cd ../repo2 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (406 bytes) + + $ sort .hg/git-synced-hgheads + c411819f7fd6036d50b17a28d3edb7aa9121985a +#else $ hg gitgetmeta -v getting git metadata from $TESTTMP/repo1 writing .hg/git-mapfile @@ -155,6 +237,7 @@ writing .hg/git-remote-refs writing .hg/git-tags wrote 4 files (366 bytes) +#endif $ sort .hg/git-mapfile 1111111111111111111111111111111111111111 d4a59f7c570a8794e6ec20865090e7b848395b92 @@ -185,6 +268,19 @@ Check remote repo syncing $ cd ../repo3 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from ssh://user@dummy/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (324 bytes) + + $ sort .hg/git-synced-hgheads + 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 +#else $ hg gitgetmeta -v getting git metadata from ssh://user@dummy/repo1 writing .hg/git-mapfile @@ -192,6 +288,7 @@ writing .hg/git-remote-refs writing .hg/git-tags wrote 4 files (284 bytes) +#endif $ sort .hg/git-mapfile 2222222222222222222222222222222222222222 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 @@ -208,6 +305,19 @@ Check local repo syncing $ cd ../repo2 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (324 bytes) + + $ sort .hg/git-synced-hgheads + 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 +#else $ hg gitgetmeta -v getting git metadata from $TESTTMP/repo1 writing .hg/git-mapfile @@ -215,6 +325,7 @@ writing .hg/git-remote-refs writing .hg/git-tags wrote 4 files (284 bytes) +#endif $ sort .hg/git-mapfile 2222222222222222222222222222222222222222 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 @@ -242,6 +353,20 @@ Check local repo syncing $ cd ../repo2 +#if onlymapdelta.true + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (447 bytes) + + $ sort .hg/git-synced-hgheads + 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 + 627ddeb6657d60a21b87c725b5c4e60d91b75f19 +#else $ hg gitgetmeta -v getting git metadata from $TESTTMP/repo1 writing .hg/git-mapfile @@ -249,6 +374,7 @@ writing .hg/git-remote-refs writing .hg/git-tags wrote 4 files (366 bytes) +#endif $ sort .hg/git-mapfile 2222222222222222222222222222222222222222 3bfa460515b210d1e6f7e21bde166ef5c5f0d9b6 @@ -264,6 +390,54 @@ $ cat .hg/git-tags ffffffffffffffffffffffffffffffffffffffff 0.1 +Check corrupted git-hg map data +------------------------------- + +This is only valid when we are serving missing map data because when we serve +the complete map, we just simply serve the file without any validations on the +map data. +#if onlymapdelta.true + $ cd ../repo1 + $ touch a + $ hg ci -Aqm "adding a" + $ hg log -r . --template "{node}\n" + 1e2e1480acd77a0155ee53e30aab1bb4a08f9f22 + +Not updating the map file intentionally to simulate missing map data. Instead, +we try to sync changes and check that the syncing fails. + $ cd ../repo2 + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + abort: gitmeta: missing hashes in file $TESTTMP/repo1/.hg/git-mapfile + [255] + +Now adding the map entries to both the repos to simulate corruption on the +client side + $ cd ../repo1 + $ echo "4444444444444444444444444444444444444444 1e2e1480acd77a0155ee53e30aab1bb4a08f9f22" >> .hg/git-mapfile + $ cd ../repo2 + $ echo "4444444444444444444444444444444444444444 1e2e1480acd77a0155ee53e30aab1bb4a08f9f22" >> .hg/git-mapfile + $ hg gitgetmeta -v + getting git metadata from $TESTTMP/repo1 + warning: gitmeta: unexpected lines in .hg/git-mapfile + writing .hg/git-mapfile + writing .hg/git-synced-hgheads + writing .hg/git-named-branches + writing .hg/git-remote-refs + writing .hg/git-tags + wrote 5 files (529 bytes) + +Strip the last commit and restore map entries to have same state as +onlymapdelta.false + + $ grep -v "1e2e1480acd77a0155ee53e30aab1bb4a08f9f22" .hg/git-mapfile > tempfile + $ mv tempfile .hg/git-mapfile + $ cd ../repo1 + $ hg strip -q "tip" + $ grep -v "1e2e1480acd77a0155ee53e30aab1bb4a08f9f22" .hg/git-mapfile > tempfile + $ mv tempfile .hg/git-mapfile +#endif + Check that our revset and template mappings work ------------------------------------------------