diff --git a/hgext/git/__init__.py b/hgext/git/__init__.py new file mode 100644 --- /dev/null +++ b/hgext/git/__init__.py @@ -0,0 +1,218 @@ +"""Grant Mercurial the ability to operate on Git repositories. (EXPERIMENTAL) + +This is currently super experimental. It probably will consume your +firstborn a la Rumpelstiltskin, etc. +""" + +from __future__ import absolute_import + +import os + +from mercurial import ( + commands, + debugcommands, + extensions, + hg, + localrepo, + node as nodemod, + store, +) +from mercurial.interfaces import ( + repository, + util as interfaceutil, +) + +from . import ( + dirstate, + gitlog, + index, +) + +import pygit2 + +# TODO: extract an interface for this in core +class gitstore(object): # store.basicstore): + def __init__(self, path, vfstype): + self.vfs = vfstype(path) + self.path = self.vfs.base + self.createmode = store._calcmode(self.vfs) + # above lines should go away in favor of: + # super(gitstore, self).__init__(path, vfstype) + + self.git = pygit2.Repository(os.path.normpath( + os.path.join(path, '..', '.git'))) + self._db = index.get_index(self.git) + + def join(self, f): + """Fake store.join method for git repositories. + + For the most part, store.join is used for @storecache + decorators to invalidate caches when various files + change. We'll map the ones we care about, and ignore the rest. + """ + if f in ('00changelog.i', '00manifest.i'): + # This is close enough: in order for the changelog cache + # to be invalidated, HEAD will have to change. + return os.path.join(self.path, 'HEAD') + elif f == 'lock': + # TODO: we probably want to map this to a git lock, I + # suspect index.lock. We should figure out what the + # most-alike file is in git-land. For now we're risking + # bad concurrency errors if another git client is used. + return os.path.join(self.path, 'hgit-bogus-lock') + elif f in ('obsstore', 'phaseroots', 'narrowspec', 'bookmarks'): + return os.path.join(self.path, '..', '.hg', f) + raise NotImplementedError('Need to pick file for %s.' % f) + + def changelog(self, trypending): + # TODO we don't have a plan for trypending in hg's git support yet + return gitlog.changelog(self.git, self._db) + + def manifestlog(self, repo, storenarrowmatch): + # TODO handle storenarrowmatch and figure out if we need the repo arg + return gitlog.manifestlog(self.git, self._db) + + def invalidatecaches(self): + pass + + def write(self, tr=None): + # normally this handles things like fncache writes, which we don't have + pass + +def _makestore(orig, requirements, storebasepath, vfstype): + if (os.path.exists(os.path.join(storebasepath, 'this-is-git')) + and os.path.exists(os.path.join(storebasepath, '..', '.git'))): + return gitstore(storebasepath, vfstype) + return orig(requirements, storebasepath, vfstype) + +class gitfilestorage(object): + def file(self, path): + if path[0:1] == b'/': + path = path[1:] + return gitlog.filelog(self.store.git, self.store._db, path) + +def _makefilestorage(orig, requirements, features, **kwargs): + store = kwargs['store'] + if isinstance(store, gitstore): + return gitfilestorage + return orig(requirements, features, **kwargs) + +def _setupdothg(ui, path): + dothg = os.path.join(path, '.hg') + if os.path.exists(dothg): + ui.warn(_('git repo already initialized for hg\n')) + else: + os.mkdir(os.path.join(path, b'.hg')) + # TODO is it ok to extend .git/info/exclude like this? + with open(os.path.join(path, b'.git', + b'info', b'exclude'), 'ab') as exclude: + exclude.write(b'\n.hg\n') + with open(os.path.join(dothg, b'this-is-git'), 'w') as f: + pass + with open(os.path.join(dothg, b'requirements'), 'w') as f: + f.write(b'git\n') + +_BMS_PREFIX = 'refs/heads/' + +class gitbmstore(object): + def __init__(self, gitrepo): + self.gitrepo = gitrepo + + def __contains__(self, name): + return (_BMS_PREFIX + name) in self.gitrepo.references + + def __iter__(self): + for r in self.gitrepo.listall_references(): + if r.startswith(_BMS_PREFIX): + yield r[len(_BMS_PREFIX):] + + def __getitem__(self, k): + return self.gitrepo.references[_BMS_PREFIX + k].peel().id.raw + + def get(self, k, default=None): + try: + if k in self: + return self[k] + return default + except pygit2.InvalidSpecError: + return default + + @property + def active(self): + h = self.gitrepo.references['HEAD'] + if not isinstance(h.target, + str) or not h.target.startswith(_BMS_PREFIX): + return None + return h.target[len(_BMS_PREFIX):] + + @active.setter + def active(self, mark): + raise NotImplementedError + + def names(self, node): + r = [] + for ref in self.gitrepo.listall_references(): + if not ref.startswith(_BMS_PREFIX): + continue + if self.gitrepo.references[ref].peel().id.raw != node: + continue + r.append(ref[len(_BMS_PREFIX):]) + return r + + # Cleanup opportunity: this is *identical* to core's bookmarks store. + def expandname(self, bname): + if bname == '.': + if self.active: + return self.active + raise error.RepoLookupError(_("no active bookmark")) + return bname + + def applychanges(self, repo, tr, changes): + """Apply a list of changes to bookmarks + """ + # TODO: this should respect transactions, but that's going to + # require enlarging the gitbmstore to know how to do in-memory + # temporary writes and read those back prior to transaction + # finalization. + for name, node in changes: + if node is None: + self.gitrepo.references.delete(_BMS_PREFIX+name) + else: + self.gitrepo.references.create( + _BMS_PREFIX+name, nodemod.hex(node), force=True) + +def init(orig, ui, dest='.', **opts): + if opts.get('git', False): + inited = False + path = os.path.abspath(dest) + # TODO: walk up looking for the git repo + gr = pygit2.Repository(os.path.join(path, '.git')) + _setupdothg(ui, path) + return 0 # debugcommands.debugrebuilddirstate( + # ui, hg.repository(ui, path), rev='.') + return orig(ui, dest=dest, **opts) + +def reposetup(ui, repo): + if isinstance(repo.store, gitstore): + orig = repo.__class__ + + class gitlocalrepo(orig): + + def _makedirstate(self): + # TODO narrow support here + return dirstate.gitdirstate( + self.ui, self.vfs.base, self.store.git) + + @property + def _bookmarks(self): + return gitbmstore(self.store.git) + + repo.__class__ = gitlocalrepo + return repo + +def extsetup(ui): + extensions.wrapfunction(localrepo, 'makestore', _makestore) + extensions.wrapfunction(localrepo, 'makefilestorage', _makefilestorage) + # Inject --git flag for `hg init` + entry = extensions.wrapcommand(commands.table, 'init', init) + entry[1].extend([('', 'git', None, 'setup up a git repository instead of hg')]) diff --git a/hgext/git/dirstate.py b/hgext/git/dirstate.py new file mode 100644 --- /dev/null +++ b/hgext/git/dirstate.py @@ -0,0 +1,254 @@ +from __future__ import absolute_import + +import contextlib +import errno +import os +import stat + +from mercurial import ( + dirstate, + error, + extensions, + match as matchmod, + node as nodemod, + scmutil, + util, +) +from mercurial.interfaces import ( + dirstate as intdirstate, + util as interfaceutil, +) +from mercurial.i18n import _ + +import pygit2 + + +def readpatternfile(orig, filepath, warn, sourceinfo=False): + if not ('info/exclude' in filepath or filepath.endswith('.gitignore')): + return orig(filepath, warn, sourceinfo=False) + result = [] + warnings = [] + with open(filepath, 'rb') as fp: + for l in fp: + l = l.strip() + if not l or l.startswith('#'): + continue + if l.startswith('!'): + # on reflection, I think /foo is just glob: + warnings.append('unsupported ignore pattern %s' % l) + continue + if l.startswith('/'): + result.append('glob:' + l[1:]) + else: + result.append('relglob:' + l) + return result, warnings +extensions.wrapfunction(matchmod, 'readpatternfile', readpatternfile) + + +_STATUS_MAP = { + pygit2.GIT_STATUS_CONFLICTED: 'm', + pygit2.GIT_STATUS_CURRENT: 'n', + pygit2.GIT_STATUS_IGNORED: '?', + pygit2.GIT_STATUS_INDEX_DELETED: 'r', + pygit2.GIT_STATUS_INDEX_MODIFIED: 'n', + pygit2.GIT_STATUS_INDEX_NEW: 'a', + pygit2.GIT_STATUS_INDEX_RENAMED: 'a', + pygit2.GIT_STATUS_INDEX_TYPECHANGE: 'n', + pygit2.GIT_STATUS_WT_DELETED: 'r', + pygit2.GIT_STATUS_WT_MODIFIED: 'n', + pygit2.GIT_STATUS_WT_NEW: '?', + pygit2.GIT_STATUS_WT_RENAMED: 'a', + pygit2.GIT_STATUS_WT_TYPECHANGE: 'n', + pygit2.GIT_STATUS_WT_UNREADABLE: '?', +} + + +@interfaceutil.implementer(intdirstate.idirstate) +class gitdirstate(object): + + def __init__(self, ui, root, gitrepo): + self._ui = ui + self._root = os.path.dirname(root) + self.git = gitrepo + + def p1(self): + return self.git.head.peel().id.raw + + def p2(self): + # TODO: MERGE_HEAD? something like that, right? + return nodemod.nullid + + def setparents(self, p1, p2=nodemod.nullid): + assert p2 == nodemod.nullid, 'TODO merging support' + self.git.head.set_target(nodemod.hex(p1)) + + @util.propertycache + def identity(self): + self.identity = util.filestat.frompath( + os.path.join(self.root, '.git', 'index')) + + def branch(self): + return b'default' + + def parents(self): + # TODO how on earth do we find p2 if a merge is in flight? + return self.p1(), nodemod.nullid + + def __iter__(self): + # TODO is this going to give us unicodes on py3? + return (f.path for f in self.git.index) + + def items(self): + for ie in self.git.index: + yield ie.path, None # value should be a dirstatetuple + + # py2,3 compat forward + iteritems = items + + def __getitem__(self, filename): + try: + gs = self.git.status_file(filename) + except KeyError: + return '?' + return _STATUS_MAP[gs] + + def __contains__(self, filename): + try: + gs = self.git.status_file(filename) + return _STATUS_MAP[gs] != '?' + except KeyError: + return False + + def status(self, match, subrepos, ignored, clean, unknown): + # TODO handling of clean files - can we get that from git.status()? + modified, added, removed, deleted, unknown, ignored, clean = ( + [], [], [], [], [], [], []) + gstatus = self.git.status() + for path, status in gstatus.items(): + if status == pygit2.GIT_STATUS_IGNORED: + if path.endswith('/'): + continue + ignored.append(path) + elif status in (pygit2.GIT_STATUS_WT_MODIFIED, + pygit2.GIT_STATUS_INDEX_MODIFIED, + pygit2.GIT_STATUS_WT_MODIFIED|pygit2.GIT_STATUS_INDEX_MODIFIED): + modified.append(path) + elif status == pygit2.GIT_STATUS_INDEX_NEW: + added.append(path) + elif status == pygit2.GIT_STATUS_WT_NEW: + unknown.append(path) + elif status == pygit2.GIT_STATUS_WT_DELETED: + deleted.append(path) + elif status == pygit2.GIT_STATUS_INDEX_DELETED: + removed.append(path) + else: + raise error.Abort('unhandled case: status for %r is %r' % ( + path, status)) + + # TODO are we really always sure of status here? + return False, scmutil.status( + modified, added, removed, deleted, unknown, ignored, clean) + + def flagfunc(self, buildfallback): + # TODO we can do better + return buildfallback() + + def getcwd(self): + # TODO is this a good way to do this? + return os.path.dirname(os.path.dirname(self.git.path)) + + def normalize(self, path): + assert util.normcase(path) == path, 'TODO handling of case folding' + return path + + @property + def _checklink(self): + return util.checklink(os.path.dirname(self.git.path)) + + def copies(self): + # TODO support copies? + return {} + + # # TODO what the heck is this + _filecache = set() + + def pendingparentchange(self): + # TODO: we need to implement the context manager bits and + # correctly stage/revert index edits. + return False + + def write(self, tr): + + if tr: + + def writeinner(category): + self.git.index.write() + + tr.addpending('gitdirstate', writeinner) + else: + self.git.index.write() + + def pathto(self, f, cwd=None): + if cwd == None: + cwd = self.getcwd() + # TODO core dirstate does something about slashes here + r = util.pathto(self._root, cwd, f) + return r + + def matches(self, match): + return [x.path for x in self.git.index if match(x.path)] + + def normal(self, f, parentfiledata=None): + """Mark a file normal and clean.""" + # TODO: for now we just let libgit2 re-stat the file. We can + # clearly do better. + + def normallookup(self, f): + """Mark a file normal, but possibly dirty.""" + # TODO: for now we just let libgit2 re-stat the file. We can + # clearly do better. + + def walk(self, match, subrepos, unknown, ignored, full=True): + # TODO: we need to use .status() and not iterate the index, + # because the index doesn't force a re-walk and so `hg add` of + # a new file without an intervening call to status will + # silently do nothing. + r = {} + cwd = self.getcwd() + for path, status in self.git.status().items(): + if path.startswith('.hg/'): + continue + if not match(path): + continue + # TODO construct the stat info from the status object? + try: + s = os.stat(os.path.join(cwd, path)) + except OSError as e: + if e.errno != errno.ENOENT: + raise + continue + r[path] = s + return r + + def savebackup(self, tr, backupname): + # TODO: figure out a strategy for saving index backups. + pass + + def restorebackup(self, tr, backupname): + # TODO: figure out a strategy for saving index backups. + pass + + def add(self, f): + self.git.index.add(f) + + def drop(self, f): + self.git.index.remove(f) + + def copied(self, path): + # TODO: track copies? + return None + + @contextlib.contextmanager + def parentchange(self): + # TODO: track this maybe? + yield diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py new file mode 100644 --- /dev/null +++ b/hgext/git/gitlog.py @@ -0,0 +1,449 @@ +from __future__ import absolute_import + +from mercurial.i18n import _ + +from mercurial import ( + ancestor, + changelog as hgchangelog, + dagop, + error, + manifest, + match as matchmod, + node as nodemod, + pycompat, + revlog, +) +from mercurial.interfaces import ( + repository, + util as interfaceutil, +) +from mercurial.utils import ( + stringutil, +) +from . import ( + index, +) + +import pygit2 + +class baselog(object): # revlog.revlog): + """Common implementations between changelog and manifestlog.""" + def __init__(self, gr, db): + self.gitrepo = gr + self._db = db + + def __len__(self): + return int(self._db.execute( + 'SELECT COUNT(*) FROM changelog').fetchone()[0]) + + def rev(self, n): + if n == nodemod.nullid: + return -1 + t = self._db.execute( + 'SELECT rev FROM changelog WHERE node = ?', + (nodemod.hex(n),)).fetchone() + if t is None: + raise error.LookupError(n, '00changelog.i', _('no node')) + return t[0] + + def node(self, r): + if r == nodemod.nullrev: + return nodemod.nullid + t = self._db.execute( + 'SELECT node FROM changelog WHERE rev = ?', + (r,)).fetchone() + if t is None: + raise error.LookupError(r, '00changelog.i', _('no node')) + return nodemod.bin(t[0]) + + def hasnode(self, n): + t = self._db.execute( + 'SELECT node FROM changelog WHERE node = ?', + (n,)).fetchone() + return t is not None + +# TODO: an interface for the changelog type? +class changelog(baselog): + + def __contains__(self, rev): + try: + self.node(rev) + return True + except error.LookupError: + return False + + @property + def filteredrevs(self): + # TODO: we should probably add a refs/hg/ namespace for hidden + # heads etc, but that's an idea for later. + return set() + + @property + def nodemap(self): + r = { + nodemod.bin(v[0]): v[1] for v in + self._db.execute('SELECT node, rev FROM changelog')} + r[nodemod.nullid] = nodemod.nullrev + return r + + def tip(self): + t = self._db.execute( + 'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1').fetchone() + if t: + return nodemod.hex(t[0]) + return nodemod.nullid + + def revs(self, start=0, stop=None): + if stop is None: + stop = self.tip() + t = self._db.execute( + 'SELECT rev FROM changelog ' + 'WHERE rev >= ? AND rev <= ? ' + 'ORDER BY REV ASC', + (start, stop)) + return (int(r[0]) for r in t) + + def _partialmatch(self, id): + if nodemod.wdirhex.startswith(id): + raise error.WdirUnsupported + candidates = [nodemod.bin(x[0]) for x in self._db.execute( + 'SELECT node FROM changelog WHERE node LIKE ?', (id + '%', ))] + if nodemod.nullhex.startswith(id): + candidates.append(nodemod.nullid) + if len(candidates) > 1: + raise error.AmbiguousPrefixLookupError( + id, '00changelog.i', _('ambiguous identifier')) + if candidates: + return candidates[0] + return None + + def flags(self, rev): + return 0 + + def shortest(self, node, minlength=1): + nodehex = nodemod.hex(node) + for attempt in pycompat.xrange(minlength, len(nodehex)+1): + candidate = nodehex[:attempt] + matches = int(self._db.execute( + 'SELECT COUNT(*) FROM changelog WHERE node LIKE ?', + (nodehex + '%',)).fetchone()[0]) + if matches == 1: + return candidate + return nodehex + + def headrevs(self, revs=None): + realheads = [int(x[0]) for x in + self._db.execute( + 'SELECT rev FROM changelog ' + 'INNER JOIN heads ON changelog.node = heads.node')] + if revs: + return sorted([r for r in revs if r in realheads]) + return sorted(realheads) + + def changelogrevision(self, nodeorrev): + # Ensure we have a node id + if isinstance(nodeorrev, int): + n = self.node(nodeorrev) + else: + n = nodeorrev + # handle looking up nullid + if n == nodemod.nullid: + return hgchangelog._changelogrevision(extra={}) + hn = nodemod.hex(n) + # We've got a real commit! + files = [r[0] for r in self._db.execute( + 'SELECT filename FROM changedfiles ' + 'WHERE node = ? and filenode != ?', + (hn, nodemod.nullhex))] + filesremoved = [r[0] for r in self._db.execute( + 'SELECT filename FROM changedfiles ' + 'WHERE node = ? and filenode = ?', + (hn, nodemod.nullhex))] + c = self.gitrepo[hn] + return hgchangelog._changelogrevision( + manifest=n, # pretend manifest the same as the commit node + user='%s <%s>' % (c.author.name.encode('utf8'), + c.author.email.encode('utf8')), + # TODO: a fuzzy memory from hg-git hacking says this should be -offset + date=(c.author.time, c.author.offset), + files=files, + # TODO filesadded in the index + filesremoved=filesremoved, + description=c.message.encode('utf8'), + # TODO do we want to handle extra? how? + extra={b'branch': b'default'}, + ) + + def ancestors(self, revs, stoprev=0, inclusive=False): + revs = list(revs) + tip = self.tip() + for r in revs: + if r > tip: + raise IndexError('Invalid rev %r' % r) + return ancestor.lazyancestors( + self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive) + + # Cleanup opportunity: this is *identical* to the revlog.py version + def descendants(self, revs): + return dagop.descendantrevs(revs, self.revs, self.parentrevs) + + def reachableroots(self, minroot, heads, roots, includepath=False): + return dagop._reachablerootspure(self.parentrevs, + minroot, roots, heads, includepath) + + # Cleanup opportunity: this is *identical* to the revlog.py version + def isancestor(self, a, b): + a, b = self.rev(a), self.rev(b) + return self.isancestorrev(a, b) + + # Cleanup opportunity: this is *identical* to the revlog.py version + def isancestorrev(self, a, b): + if a == nodemod.nullrev: + return True + elif a == b: + return True + elif a > b: + return False + return bool(self.reachableroots(a, [b], [a], includepath=False)) + + def parentrevs(self, rev): + n = self.node(rev) + hn = nodemod.hex(n) + c = self.gitrepo[hn] + p1 = p2 = nodemod.nullrev + if c.parents: + p1 = self.rev(c.parents[0].id.raw) + if len(c.parents) > 2: + raise error.Abort('TODO octopus merge handling') + if len(c.parents) == 2: + p2 = self.rev(c.parents[0].id.raw) + return p1, p2 + + # Private method is used at least by the tags code. + _uncheckedparentrevs = parentrevs + + def commonancestorsheads(self, a, b): + # TODO the revlog verson of this has a C path, so we probably + # need to optimize this... + a, b = self.rev(a), self.rev(b) + return [self.node(n) for n in + ancestor.commonancestorsheads(self.parentrevs, a, b)] + + def branchinfo(self, rev): + """Git doesn't do named branches, so just put everything on default.""" + return b'default', False + + def delayupdate(self, tr): + # TODO: I think we can elide this because we're just dropping + # an object in the git repo? + pass + + def add(self, manifest, files, desc, transaction, p1, p2, + user, date=None, extra=None, p1copies=None, p2copies=None, + filesadded=None, filesremoved=None): + parents = [] + hp1, hp2 = nodemod.hex(p1), nodemod.hex(p2) + if p1 != nodemod.nullid: + parents.append(hp1) + if p2 and p2 != nodemod.nullid: + parents.append(hp2) + assert date is not None + timestamp, tz = date + sig = pygit2.Signature(stringutil.person(user), stringutil.email(user), + timestamp, tz) + oid = self.gitrepo.create_commit( + None, sig, sig, desc, + nodemod.hex(manifest), parents) + # Set up an internal reference to force the commit into the + # changelog. Hypothetically, we could even use this refs/hg/ + # namespace to allow for anonymous heads on git repos, which + # would be neat. + self.gitrepo.references.create( + 'refs/hg/internal/latest-commit', oid, force=True) + # Reindex now to pick up changes + index._index_repo(self.gitrepo, self._db, lambda x, y: None) + return oid.raw + +# TODO: Make a split between mutable and immutable manifest types here. +class gittreemanifest(object): + def __init__(self, gt, builderfn): + self._builderfn = builderfn + self._tree = gt + self._builder = None + + def __contains__(self, k): + if self._builder: + return self._builder.get(k) is not None + return k in self._tree + + def __getitem__(self, k): + if self._builder: + match = self._builder.get(k) + if match is None: + raise error.LookupError('File %r not found in tree %r' % ( + k, self._tree.id.hex)) + return match + try: + return self._tree[k].id.raw + except ValueError: + raise error.LookupError('File %r not found in tree %r' % ( + k, self._tree.id.hex)) + + def __setitem__(self, k, v): + if self._builder is None: + self._builder = self._builderfn() + self._builder.insert(k, nodemod.hex(v), pygit2.GIT_FILEMODE_BLOB) + + def setflag(self, p, flag): + oid = self._builder.get(p).id + if not flag: + self._builder.insert(p, oid, pygit2.GIT_FILEMODE_BLOB) + elif flag == 'x': + self._builder.insert(p, oid, pygit2.GIT_FILEMODE_BLOB_EXECUTABLE) + elif flag == 'l': + self._builder.insert(p, oid, pygit2.GIT_FILEMODE_LINK) + else: + raise ValueError('Illegal flag value %r on path %r' % flag, p) + + def flags(self, k): + # TODO flags handling + return '' + + def _walkonetree(self, tree, match, subdir): + for te in tree: + # TODO: can we prune dir walks with the matcher? + realname = subdir + te.name + if te.type == r'tree': + for inner in self._walkonetree( + self.gitrepo[te.id], match, realname + '/'): + yield inner + if not match(realname): + continue + yield realname + + def walk(self, match): + return self._walkonetree(self._tree, match, '') + + def get(self, fname, default=None): + if fname in self: + return self[fname] + return default + +@interfaceutil.implementer(repository.imanifestrevisionstored) +class gittreemanifestctx(object): + def __init__(self, repo, gittree): + self._repo = repo + self._tree = gittree + self._builder = None + + def _getbuilder(self): + if self._builder is None: + self._builder = self._repo.TreeBuilder(self._tree) + return self._builder + + def read(self): + return gittreemanifest(self._tree, self._getbuilder) + + def find(self, path): + self.read()[path] + + def copy(self): + return gittreemanifestctx(self._repo, self._tree) + + def write(self, transaction, link, p1, p2, added, removed, match=None): + # We're not (for now, anyway) going to audit filenames, so we + # can ignore added and removed. + + # TODO what does this match argument get used for? hopefully + # just narrow? + assert not match or isinstance(match, matchmod.alwaysmatcher) + return self._getbuilder().write().raw + +class manifestlog(baselog): + + def __getitem__(self, node): + return self.get('', node) + + def get(self, relpath, node): + if node == nodemod.nullid: + return manifest.memtreemanifestctx(self, relpath) + commit = self.gitrepo[nodemod.hex(node)] + t = commit.tree + if relpath: + parts = relpath.split('/') + for p in parts: + te = t[p] + t = self.gitrepo[te.id] + return gittreemanifestctx(self.gitrepo, t) + +@interfaceutil.implementer(repository.ifilestorage) +class filelog(baselog): + def __init__(self, gr, db, path): + super(filelog, self).__init__(gr, db) + self.path = path + + def read(self, node): + return self.gitrepo[nodemod.hex(node)].data + + def lookup(self, node): + if len(node) not in (20, 40): + node = int(node) + if isinstance(node, int): + assert False, 'todo revnums for nodes' + if len(node) == 40: + hnode = node + node = nodemod.bin(node) + else: + hnode = nodemod.hex(node) + if hnode in self.gitrepo: + return node + raise error.LookupError(self.path, node, _('no match found')) + + def cmp(self, node, text): + """Returns True if text is different than content at `node`.""" + return self.read(node) != text + + def add(self, text, meta, transaction, link, p1=None, p2=None): + assert not meta # Should we even try to handle this? + return self.gitrepo.create_blob(text).raw + + def __iter__(self): + for clrev in self._db.execute(''' +SELECT rev FROM changelog +INNER JOIN changedfiles ON changelog.node = changedfiles.node +WHERE changedfiles.filename = ? AND changedfiles.filenode != ? + ''', (self.path, nodemod.nullhex)): + yield clrev[0] + + def linkrev(self, fr): + return fr + + def rev(self, node): + return int(self._db.execute(''' +SELECT rev FROM changelog +INNER JOIN changedfiles ON changelog.node = changedfiles.node +WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''', ( + self.path, nodemod.hex(node))).fetchone()[0]) + + def node(self, rev): + return nodemod.bin(self._db.execute( +'''SELECT filenode FROM changedfiles +INNER JOIN changelog ON changelog.node = changedfiles.node +WHERE changelog.rev = ? AND filename = ? +''', (rev, self.path)).fetchone()[0]) + + def parents(self, node): + ps = [] + for p in self._db.execute( +'''SELECT p1filenode, p2filenode FROM changedfiles +WHERE filenode = ? AND filename = ? +''', (nodemod.hex(node), self.path)).fetchone(): + if p is not None: + ps.append(nodemod.bin(p)) + else: + ps.append(nodemod.nullid) + return ps + + def renamed(self, node): + # TODO: renames/copies + return False diff --git a/hgext/git/index.py b/hgext/git/index.py new file mode 100644 --- /dev/null +++ b/hgext/git/index.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import + +import os +import sqlite3 + +from mercurial.i18n import _ + +from mercurial import ( + encoding, + error, + node as nodemod, +) + +import pygit2 + +_CURRENT_SCHEMA_VERSION = 1 +_SCHEMA = """ +CREATE TABLE refs ( + -- node and name are unique together. There may be more than one name for + -- a given node, and there may be no name at all for a given node (in the + -- case of an anonymous hg head). + node TEXT NOT NULL, + name TEXT +); + +-- The topological heads of the changelog, which hg depends on. +CREATE TABLE heads ( + node TEXT NOT NULL +); + +-- A total ordering of the changelog +CREATE TABLE changelog ( + rev INTEGER NOT NULL PRIMARY KEY, + node TEXT NOT NULL, + p1 TEXT, + p2 TEXT +); + +CREATE UNIQUE INDEX changelog_node_idx ON changelog(node); +CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node); + +-- Changed files for each commit, which lets us dynamically build +-- filelogs. +CREATE TABLE changedfiles ( + node TEXT NOT NULL, + filename TEXT NOT NULL, + -- 40 zeroes for deletions + filenode TEXT NOT NULL, +-- to handle filelog parentage: + p1node TEXT, + p1filenode TEXT, + p2node TEXT, + p2filenode TEXT +); + +CREATE INDEX changedfiles_nodes_idx + ON changedfiles(node); + +PRAGMA user_version=%d +""" % _CURRENT_SCHEMA_VERSION + +def _createdb(path): + # print('open db', path) + # import traceback + # traceback.print_stack() + db = sqlite3.connect(encoding.strfromlocal(path)) + db.text_factory = bytes + + res = db.execute(r'PRAGMA user_version').fetchone()[0] + + # New database. + if res == 0: + for statement in _SCHEMA.split(';'): + db.execute(statement.strip()) + + db.commit() + + elif res == _CURRENT_SCHEMA_VERSION: + pass + + else: + raise error.Abort(_('sqlite database has unrecognized version')) + + db.execute(r'PRAGMA journal_mode=WAL') + + return db + +_OUR_ORDER = (pygit2.GIT_SORT_TOPOLOGICAL | + pygit2.GIT_SORT_TIME | + pygit2.GIT_SORT_REVERSE) + +_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2 + +def _find_nearest_ancestor_introducing_node( + db, gitrepo, file_path, walk_start, filenode): + """Find the nearest ancestor that introduces a file node. + + Args: + db: a handle to our sqlite database. + gitrepo: A pygit2.Repository instance. + file_path: the path of a file in the repo + walk_start: a pygit2.Oid that is a commit where we should start walking + for our nearest ancestor. + + Returns: + A hexlified SHA that is the commit ID of the next-nearest parent. + """ + parent_options = {row[0] for row in db.execute( + 'SELECT node FROM changedfiles ' + 'WHERE filename = ? AND filenode = ?', + (file_path, filenode))} + inner_walker = gitrepo.walk(walk_start, _OUR_ORDER) + for w in inner_walker: + if w.id.hex in parent_options: + return w.id.hex + +def _index_repo(gitrepo, db, progress_cb): + # Identify all references so we can tell the walker to visit all of them. + all_refs = gitrepo.listall_references() + walker = None + possible_heads = set() + for pos, ref in enumerate(all_refs): + progress_cb('refs', pos) + if not ( + ref.startswith('refs/heads/') # local branch + or ref.startswith('refs/tags/') # tag + or ref.startswith('refs/remotes/') # remote branch + or ref.startswith('refs/hg/') # from this extension + ): + continue + try: + start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT) + except ValueError: + # No commit to be found, so we don't care for hg's purposes. + continue + possible_heads.add(start.id.hex) + if walker is None: + walker = gitrepo.walk(start.id, _OUR_ORDER) + else: + walker.push(start.id) + # Empty out the existing changelog. Even for large-ish histories + # we can do the top-level "walk all the commits" dance very + # quickly as long as we don't need to figure out the changed files + # list. + db.execute('DELETE FROM changelog') + progress_cb('refs', None) + # This walker is sure to visit all the revisions in history, but + # only once. + for pos, commit in enumerate(walker): + progress_cb('commits', pos) + r = commit.id.raw + p1 = p2 = nodemod.nullhex + if len(commit.parents) > 2: + raise error.ProgrammingError( + ("git support can't handle octopus merges, " + "found a commit with %d parents :(") % len(commit.parents)) + if commit.parents: + p1 = commit.parents[0].id.hex + if len(commit.parents) == 2: + p2 = commit.parents[1].id.hex + db.execute( + 'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)', + (pos, commit.id.hex, p1, p2)) + + num_changedfiles = db.execute( + "SELECT COUNT(*) from changedfiles WHERE node = ?", + (commit.id.hex,)).fetchone()[0] + if not num_changedfiles: + files = {} + # I *think* we only need to check p1 for changed files + # (and therefore linkrevs), because any node that would + # actually have this commit as a linkrev would be + # completely new in this rev. + p1 = commit.parents[0].id.hex if commit.parents else None + if p1 is not None: + patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS) + else: + patchgen = commit.tree.diff_to_tree( + swap=True, flags=_DIFF_FLAGS) + new_files = (p.delta.new_file for p in patchgen) + files = {nf.path: nf.id.hex for nf in new_files + if nf.id.raw != nodemod.nullid} + for p, n in files.items(): + # List of previous node, commit whose ancestry we + # should search. + parents = [] + for parent in commit.parents: + t = parent.tree + for comp in p.split('/'): + try: + t = gitrepo[t[comp].id] + except KeyError: + break + else: + introducer = _find_nearest_ancestor_introducing_node( + db, gitrepo, p, parent.id, t.id.hex) + parents.append((introducer, t.id.hex)) + p1node = p1fnode = p2node = p2fnode = None + if parents: + p1node, p1fnode = parents[0] + if len(parents) == 2: + p2node, p2fnode = parents[1] + if len(parents) > 2: + raise error.ProgrammingError( + "git support can't handle octopus merges") + db.execute( + 'INSERT INTO changedfiles (' + 'node, filename, filenode, p1node, p1filenode, p2node, ' + 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', + (commit.id.hex, p, n, p1node, p1fnode, p2node, p2fnode)) + db.execute('DELETE FROM heads') + for h in possible_heads: + haschild = db.execute( + 'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', + (h, h)).fetchone()[0] + if not haschild: + db.execute('INSERT INTO heads (node) VALUES(?)', (h,)) + + db.commit() + progress_cb('commits', None) + +def get_index(gitrepo): + cachepath = os.path.join(gitrepo.path, '..', '.hg', 'cache') + if not os.path.exists(cachepath): + os.makedirs(cachepath) + dbpath = os.path.join(cachepath, 'git-commits.sqlite') + db = _createdb(dbpath) + # TODO check against gitrepo heads before doing a full index + # TODO thread a ui.progress call into this layer + _index_repo(gitrepo, db, lambda x, y: None) + return db diff --git a/mercurial/transaction.py b/mercurial/transaction.py --- a/mercurial/transaction.py +++ b/mercurial/transaction.py @@ -473,8 +473,11 @@ self._generatefiles(group=gengroupprefinalize) categories = sorted(self._finalizecallback) for cat in categories: - self._finalizecallback[cat](self) - # Prevent double usage and help clear cycles. + try: + self._finalizecallback[cat](self) + except TypeError as e: + raise TypeError('%r: %r (%r)' % (cat, self._finalizecallback[cat], e)) + # Prevent double usage and help clear cycles. self._finalizecallback = None self._generatefiles(group=gengrouppostfinalize) diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -1087,6 +1087,7 @@ 'hgext', 'hgext.convert', 'hgext.fsmonitor', 'hgext.fastannotate', 'hgext.fsmonitor.pywatchman', + 'hgext.git', 'hgext.highlight', 'hgext.infinitepush', 'hgext.largefiles', 'hgext.lfs', 'hgext.narrow', diff --git a/tests/test-check-interfaces.py b/tests/test-check-interfaces.py --- a/tests/test-check-interfaces.py +++ b/tests/test-check-interfaces.py @@ -44,6 +44,11 @@ wireprotov2server, ) +from hgext.git import ( + dirstate as gitdirstate, + gitlog, +) + testdir = os.path.dirname(__file__) rootdir = pycompat.fsencode(os.path.normpath(os.path.join(testdir, '..'))) @@ -193,6 +198,10 @@ ziverify.verifyClass(intdirstate.idirstate, dirstate.dirstate) + # git interop implementations + ziverify.verifyClass(intdirstate.idirstate, gitdirstate.gitdirstate) + ziverify.verifyClass(repository.ifilestorage, gitlog.filelog) + vfs = vfsmod.vfs(b'.') fl = filelog.filelog(vfs, b'dummy.i') checkzobject(fl, allowextra=True) diff --git a/tests/test-git-interop.t b/tests/test-git-interop.t new file mode 100644 --- /dev/null +++ b/tests/test-git-interop.t @@ -0,0 +1,190 @@ +This test requires pygit2: + > python -c 'import pygit2' || exit 80 + +Setup: + > GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME + > GIT_AUTHOR_EMAIL='test@example.org'; export GIT_AUTHOR_EMAIL + > GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE + > GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME + > GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL + > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE + + > count=10 + > gitcommit() { + > GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000"; + > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE" + > git commit "$@" >/dev/null 2>/dev/null || echo "git commit error" + > count=`expr $count + 1` + > } + + > echo "[extensions]" >> $HGRCPATH + > echo "git=" >> $HGRCPATH + +Make a new repo with git: + $ mkdir foo + $ cd foo + $ git init + Initialized empty Git repository in $TESTTMP/foo/.git/ +Ignore the .hg directory within git: + $ echo .hg >> .git/info/exclude + $ echo alpha > alpha + $ git add alpha + $ gitcommit -am 'Add alpha' + $ echo beta > beta + $ git add beta + $ gitcommit -am 'Add beta' + $ echo gamma > gamma + $ git status + On branch master + Untracked files: + (use "git add ..." to include in what will be committed) + + gamma + + nothing added to commit but untracked files present (use "git add" to track) + +Without creating the .hg, hg status fails: + $ hg status + abort: no repository found in '$TESTTMP/foo' (.hg not found)! + [255] +But if you run hg init --git, it works: + $ hg init --git + $ hg id + 3d9be8deba43 master + $ hg status + ? gamma +Log works too: + $ hg log + changeset: 1:3d9be8deba43 + bookmark: master + user: test + date: Mon Jan 01 00:00:11 2007 +0000 + summary: Add beta + + changeset: 0:c5864c9d16fb + user: test + date: Mon Jan 01 00:00:10 2007 +0000 + summary: Add alpha + + + +and bookmarks: + $ hg bookmarks + * master 1:3d9be8deba43 + +diff even works transparently in both systems: + $ echo blah >> alpha + $ git diff + diff --git a/alpha b/alpha + index 4a58007..faed1b7 100644 + --- a/alpha + +++ b/alpha + @@ -1 +1,2 @@ + alpha + +blah + $ hg diff --git + diff --git a/alpha b/alpha + --- a/alpha + +++ b/alpha + @@ -1,1 +1,2 @@ + alpha + +blah + +Remove a file, it shows as such: + $ rm alpha + $ hg status + ! alpha + ? gamma + +Revert works: + $ hg revert alpha --traceback + $ hg status + ? gamma + $ git status + On branch master + Untracked files: + (use "git add ..." to include in what will be committed) + + gamma + + nothing added to commit but untracked files present (use "git add" to track) + +Add shows sanely in both: + $ hg add gamma + $ hg status + A gamma + $ hg files + alpha + beta + gamma + $ git ls-files + alpha + beta + gamma + $ git status + On branch master + Changes to be committed: + (use "git reset HEAD ..." to unstage) + + new file: gamma + + +forget does what it should as well: + $ hg forget gamma + $ hg status + ? gamma + $ git status + On branch master + Untracked files: + (use "git add ..." to include in what will be committed) + + gamma + + nothing added to commit but untracked files present (use "git add" to track) + +hg log FILE + + $ echo a >> alpha + $ hg ci -m 'more alpha' + $ echo b >> beta + $ hg ci -m 'more beta' + $ echo a >> alpha + $ hg ci -m 'even more alpha' + $ hg log -G alpha + @ changeset: 4:bd975ddde71c + : bookmark: master + : user: test + : date: Thu Jan 01 00:00:00 1970 +0000 + : summary: even more alpha + : + o changeset: 2:77f597222800 + : user: test + : date: Thu Jan 01 00:00:00 1970 +0000 + : summary: more alpha + : + o changeset: 0:c5864c9d16fb + user: test + date: Mon Jan 01 00:00:10 2007 +0000 + summary: Add alpha + + $ hg log -G beta + o changeset: 3:b40d4fed5e27 + : user: test + : date: Thu Jan 01 00:00:00 1970 +0000 + : summary: more beta + : + o changeset: 1:3d9be8deba43 + | user: test + ~ date: Mon Jan 01 00:00:11 2007 +0000 + summary: Add beta + + +hg annotate + + $ hg annotate alpha + 0: alpha + 2: a + 4: a + $ hg annotate beta + 1: beta + 3: b