diff --git a/hgext/fastexport.py b/hgext/fastexport.py new file mode 100644 --- /dev/null +++ b/hgext/fastexport.py @@ -0,0 +1,196 @@ +# Copyright 2019 Joerg Sonnenberger +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +'''export repositories as git fast-import stream''' +from __future__ import absolute_import +import re + +from mercurial.i18n import _ +from mercurial.node import ( + nullrev, +) +from mercurial.utils import ( + stringutil, +) +from mercurial import ( + cmdutil, + commands, + destutil, + error, + formatter, + graphmod, + logcmdutil, + phases, + pycompat, + registrar, + revset, + revsetlang, + scmutil, +) +from .convert import convcmd + +# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for +# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should +# be specifying the version(s) of Mercurial they are tested with, or +# leave the attribute unspecified. +testedwith = 'ships-with-hg-core' + +cmdtable = {} +command = registrar.command(cmdtable) + +GIT_PERSON_PROHIBITED = re.compile('[<>\n"]') +GIT_EMAIL_PROHIBITED = re.compile('[<> \n]') + +def convert_to_git_user(authormap, user, rev): + mapped_user = authormap.get(user, user) + user_person = stringutil.person(mapped_user) + user_email = stringutil.email(mapped_user) + if (GIT_EMAIL_PROHIBITED.match(user_email) or + GIT_PERSON_PROHIBITED.match(user_person)): + raise error.Abort(_('Unable to parse user into person and ' + 'email for revision %s') % rev) + if user_person: + return '"%s" <%s>' % (user_person, user_email) + else: + return '<%s>' % user_email + +def convert_to_git_date(date): + timestamp, utcoff = date + tzsign = "+" if utcoff < 0 else "-" + if utcoff % 60 != 0: + raise error.Abort(_('UTC offset in %s is not an integer ' + 'number of seconds') % (date,)) + utcoff = abs(utcoff) // 60 + tzh = utcoff // 60 + tzmin = utcoff % 60 + return '%d %s%02d%02d' % (int(timestamp), tzsign, tzh, tzmin) + +def convert_to_git_ref(branch): + # XXX filter/map depending on git restrictions + return 'refs/heads/%s' % branch + +def write_data(buf, data, skip_newline): + buf.append('data %d\n' % len(data)) + buf.append(data) + if not skip_newline or data[-1:] != '\n': + buf.append('\n') + +def export_commit(ui, repo, rev, marks, authormap): + ctx = repo[rev] + revid = ctx.hex() + if revid in marks: + ui.warn(_('warning: revision %s already exported, skipped\n') % revid) + return + parents = [p for p in ctx.parents() if p.rev() != nullrev] + for p in parents: + if p.hex() not in marks: + ui.warn(_('warning: parent %s of %s has not been exported, ' + 'skipped\n') % (p, revid)) + return + for fname in ctx.files(): + if fname not in ctx: + continue + filectx = ctx.filectx(fname) + filerev = filectx.filenode().encode('hex') + if filerev not in marks: + mark = len(marks) + 1 + marks[filerev] = mark + data = filectx.data() + buf = ['blob\n', 'mark :%d\n' % mark] + write_data(buf, data, False) + ui.write(*buf, keepprogressbar=True) + del buf + + mark = len(marks) + 1 + marks[revid] = mark + user = convert_to_git_user(authormap, ctx.user(), revid) + date = convert_to_git_date(ctx.date()) + ref = convert_to_git_ref(ctx.branch()) + description = ctx.description() + buf = ['commit %s\n' % ref, + 'mark :%d\n' % mark, + 'committer %s %s\n' % (user, date), + ] + write_data(buf, description, True) + if parents: + buf.append('from :%d\n' % marks[parents[0].hex()]) + if len(parents) == 2: + buf.append('merge :%d\n' % marks[parents[1].hex()]) + p0ctx = repo[parents[0]] + files = ctx.manifest().diff(p0ctx.manifest()) + else: + files = ctx.repo().changelog.readfiles(ctx.node()) + filebuf = [] + for fname in files: + if fname not in ctx: + filebuf.append((fname, 'D %s\n' % fname)) + else: + filectx = ctx.filectx(fname) + filerev = filectx.filenode() + fileperm = "755" if filectx.isexec() else "644" + changed = 'M %s :%d %s\n' % (fileperm, + marks[filerev.encode('hex')], + fname) + filebuf.append((fname, changed)) + filebuf.sort() + buf.extend(changed for (fname, changed) in filebuf) + del filebuf + buf.append('\n') + ui.write(*buf, keepprogressbar=True) + del buf + +isrev = re.compile('^[0-9a-f]{40}$') + +@command('fastexport', [ + ('r', 'rev', [], + _('revisions to export'), _('REV')), + ('i', 'import-marks', '', + _('old marker file to read'), _('FILE')), + ('e', 'export-marks', '', + _('new marker file to write'), _('FILE')), + ('A', 'authormap', '', + _('remap usernames using this file'), _('FILE')), + ], _('[OPTION]... [REV]...'), + helpcategory=command.CATEGORY_IMPORT_EXPORT) +def fastexport(ui, repo, *revs, **opts): + opts = pycompat.byteskwargs(opts) + + revs += tuple(opts.get('rev', [])) + if not revs: + revs = scmutil.revrange(repo, [':']) + else: + revs = scmutil.revrange(repo, revs) + if not revs: + raise error.Abort(_('no revisions matched')) + authorfile = opts.get('authormap') + if authorfile: + authormap = convcmd.readauthormap(ui, authorfile) + else: + authormap = {} + + import_marks = opts.get('import_marks') + marks = {} + if import_marks: + with open(import_marks) as import_marks_file: + for line in import_marks_file: + line = line.strip() + if not isrev.match(line) or line in marks: + raise error.Abort(_('Corrupted marker file')) + marks[line] = len(marks) + 1 + + revs.sort() + with ui.makeprogress(_('exporting'), unit=_('revisions'), + total=len(revs)) as progress: + for rev in revs: + export_commit(ui, repo, rev, marks, authormap) + progress.increment() + + export_marks = opts.get('export_marks') + if export_marks: + with open(export_marks, 'w') as export_marks_file: + output_marks = [None] * len(marks) + for k, v in marks.items(): + output_marks[v - 1] = k + for k in output_marks: + export_marks_file.write(k + '\n') diff --git a/tests/test-fastexport.t b/tests/test-fastexport.t new file mode 100644 --- /dev/null +++ b/tests/test-fastexport.t @@ -0,0 +1,709 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastexport= + > EOF + + $ hg init + + $ hg debugbuilddag -mon '+2:tbase @name1 +3:thead1 fastexport.blob + $ cat fastexport.blob + blob + mark :1 + data 65 + 0 r0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :2 + data 3 + r0 + + commit refs/heads/default + mark :3 + committer "debugbuilddag" 0 -0000 + data 2 + r0 + M 644 :1 mf + M 644 :2 nf0 + M 644 :2 of + + blob + mark :4 + data 68 + 0 r0 + 1 + 2 r1 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :5 + data 3 + r1 + + blob + mark :6 + data 3 + r1 + + commit refs/heads/default + mark :7 + committer "debugbuilddag" 1 -0000 + data 2 + r1 + from :3 + M 644 :4 mf + M 644 :5 nf1 + M 644 :6 of + + blob + mark :8 + data 71 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :9 + data 3 + r2 + + blob + mark :10 + data 3 + r2 + + commit refs/heads/name1 + mark :11 + committer "debugbuilddag" 2 -0000 + data 2 + r2 + from :7 + M 644 :8 mf + M 644 :9 nf2 + M 644 :10 of + + blob + mark :12 + data 74 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 r3 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :13 + data 3 + r3 + + blob + mark :14 + data 3 + r3 + + commit refs/heads/name1 + mark :15 + committer "debugbuilddag" 3 -0000 + data 2 + r3 + from :11 + M 644 :12 mf + M 644 :13 nf3 + M 644 :14 of + + blob + mark :16 + data 77 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 r3 + 7 + 8 r4 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :17 + data 3 + r4 + + blob + mark :18 + data 3 + r4 + + commit refs/heads/name1 + mark :19 + committer "debugbuilddag" 4 -0000 + data 2 + r4 + from :15 + M 644 :16 mf + M 644 :17 nf4 + M 644 :18 of + + blob + mark :20 + data 71 + 0 r0 + 1 + 2 r1 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 r5 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :21 + data 3 + r5 + + blob + mark :22 + data 3 + r5 + + commit refs/heads/name2 + mark :23 + committer "debugbuilddag" 5 -0000 + data 2 + r5 + from :7 + M 644 :20 mf + M 644 :21 nf5 + M 644 :22 of + + blob + mark :24 + data 74 + 0 r0 + 1 + 2 r1 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :25 + data 3 + r6 + + blob + mark :26 + data 3 + r6 + + commit refs/heads/name2 + mark :27 + committer "debugbuilddag" 6 -0000 + data 2 + r6 + from :23 + M 644 :24 mf + M 644 :25 nf6 + M 644 :26 of + + blob + mark :28 + data 77 + 0 r0 + 1 + 2 r1 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 r7 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :29 + data 3 + r7 + + blob + mark :30 + data 3 + r7 + + commit refs/heads/name2 + mark :31 + committer "debugbuilddag" 7 -0000 + data 2 + r7 + from :27 + M 644 :28 mf + M 644 :29 nf7 + M 644 :30 of + + blob + mark :32 + data 80 + 0 r0 + 1 + 2 r1 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 r7 + 15 + 16 r8 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + + blob + mark :33 + data 3 + r8 + + blob + mark :34 + data 3 + r8 + + commit refs/heads/name2 + mark :35 + committer "debugbuilddag" 8 -0000 + data 2 + r8 + from :31 + M 644 :32 mf + M 644 :33 nf8 + M 644 :34 of + + blob + mark :36 + data 92 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 r3 + 7 + 8 r4 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 r7 + 15 + 16 r8 + 17 + 18 r9 + 19 + 20 + 21 + 22 + 23 + + blob + mark :37 + data 3 + r9 + + blob + mark :38 + data 3 + r9 + + commit refs/heads/both + mark :39 + committer "debugbuilddag" 9 -0000 + data 2 + r9 + from :35 + merge :19 + M 644 :36 mf + M 644 :9 nf2 + M 644 :13 nf3 + M 644 :17 nf4 + M 644 :37 nf9 + M 644 :38 of + + blob + mark :40 + data 96 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 r3 + 7 + 8 r4 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 r7 + 15 + 16 r8 + 17 + 18 r9 + 19 + 20 r10 + 21 + 22 + 23 + + blob + mark :41 + data 4 + r10 + + blob + mark :42 + data 4 + r10 + + commit refs/heads/both + mark :43 + committer "debugbuilddag" 10 -0000 + data 3 + r10 + from :39 + M 644 :40 mf + M 644 :41 nf10 + M 644 :42 of + + blob + mark :44 + data 100 + 0 r0 + 1 + 2 r1 + 3 + 4 r2 + 5 + 6 r3 + 7 + 8 r4 + 9 + 10 r5 + 11 + 12 r6 + 13 + 14 r7 + 15 + 16 r8 + 17 + 18 r9 + 19 + 20 r10 + 21 + 22 r11 + 23 + + blob + mark :45 + data 4 + r11 + + blob + mark :46 + data 4 + r11 + + commit refs/heads/both + mark :47 + committer "debugbuilddag" 11 -0000 + data 3 + r11 + from :43 + M 644 :44 mf + M 644 :45 nf11 + M 644 :46 of + + commit refs/heads/both + mark :48 + committer "debugbuilddag" 12 -0000 + data 3 + r12 + from :43 + D nf10 + + commit refs/heads/both + mark :49 + committer "test" 13 -0000 + data 13 + debugbuilddag + from :47 + merge :48 + D nf10 + diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -364,6 +364,7 @@ eol automatically manage newlines in repository files extdiff command to allow external programs to compare revisions factotum http authentication with factotum + fastexport export repositories as git fast-import stream githelp try mapping git commands to Mercurial commands gpg commands to sign and verify changesets hgk browse the repository in a graphical way