diff --git a/hgext/fastannotate/__init__.py b/hgext/fastannotate/__init__.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/__init__.py @@ -0,0 +1,185 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# fastannotate: faster annotate implementation using linelog +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +"""yet another annotate implementation that might be faster (EXPERIMENTAL) + +The fastannotate extension provides a 'fastannotate' command that makes +use of the linelog data structure as a cache layer and is expected to +be faster than the vanilla 'annotate' if the cache is present. + +In most cases, fastannotate requires a setup that mainbranch is some pointer +that always moves forward, to be most efficient. + +Using fastannotate together with linkrevcache would speed up building the +annotate cache greatly. Run "debugbuildlinkrevcache" before +"debugbuildannotatecache". + +:: + + [fastannotate] + # specify the main branch head. the internal linelog will only contain + # the linear (ignoring p2) "mainbranch". since linelog cannot move + # backwards without a rebuild, this should be something that always moves + # forward, usually it is "master" or "@". + mainbranch = master + + # fastannotate supports different modes to expose its feature. + # a list of combination: + # - fastannotate: expose the feature via the "fastannotate" command which + # deals with everything in a most efficient way, and provides extra + # features like --deleted etc. + # - fctx: replace fctx.annotate implementation. note: + # a. it is less efficient than the "fastannotate" command + # b. it will make it practically impossible to access the old (disk + # side-effect free) annotate implementation + # c. it implies "hgweb". + # - hgweb: replace hgweb's annotate implementation. conflict with "fctx". + # (default: fastannotate) + modes = fastannotate + + # default format when no format flags are used (default: number) + defaultformat = changeset, user, date + + # serve the annotate cache via wire protocol (default: False) + # tip: the .hg/fastannotate directory is portable - can be rsynced + server = True + + # build annotate cache on demand for every client request (default: True) + # disabling it could make server response faster, useful when there is a + # cronjob building the cache. + serverbuildondemand = True + + # update local annotate cache from remote on demand + # (default: True for remotefilelog repo, False otherwise) + client = True + + # path to use when connecting to the remote server (default: default) + remotepath = default + + # share sshpeer with remotefilelog. this would allow fastannotate to peek + # into remotefilelog internals, and steal its sshpeer, or in the reversed + # direction: donate its sshpeer to remotefilelog. disable this if + # fastannotate and remotefilelog should not share a sshpeer when their + # endpoints are different and incompatible. (default: True) + clientsharepeer = True + + # minimal length of the history of a file required to fetch linelog from + # the server. (default: 10) + clientfetchthreshold = 10 + + # use flock instead of the file existence lock + # flock may not work well on some network filesystems, but they avoid + # creating and deleting files frequently, which is faster when updating + # the annotate cache in batch. if you have issues with this option, set it + # to False. (default: True if flock is supported, False otherwise) + useflock = True + + # for "fctx" mode, always follow renames regardless of command line option. + # this is a BC with the original command but will reduced the space needed + # for annotate cache, and is useful for client-server setup since the + # server will only provide annotate cache with default options (i.e. with + # follow). do not affect "fastannotate" mode. (default: True) + forcefollow = True + + # for "fctx" mode, always treat file as text files, to skip the "isbinary" + # check. this is consistent with the "fastannotate" command and could help + # to avoid a file fetch if remotefilelog is used. (default: True) + forcetext = True + + # use unfiltered repo for better performance. + unfilteredrepo = True + + # sacrifice correctness in some corner cases for performance. it does not + # affect the correctness of the annotate cache being built. the option + # is experimental and may disappear in the future (default: False) + perfhack = True +""" + +from __future__ import absolute_import + +from mercurial.i18n import _ +from mercurial import ( + error as hgerror, + localrepo, + registrar, + util, +) + +from . import ( + commands, + context, + protocol, +) + +# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for +# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should +# be specifying the version(s) of Mercurial they are tested with, or +# leave the attribute unspecified. +testedwith = 'ships-with-hg-core' + +cmdtable = commands.cmdtable + +configtable = {} +configitem = registrar.configitem(configtable) + +configitem('fastannotate', 'modes', default=['fastannotate']) +configitem('fastannotate', 'server', default=False) +configitem('fastannotate', 'useflock', default=True) +configitem('fastannotate', 'client') +configitem('fastannotate', 'unfilteredrepo', default=True) +configitem('fastannotate', 'defaultformat', default=['number']) +configitem('fastannotate', 'perfhack', default=False) +configitem('fastannotate', 'mainbranch') +configitem('fastannotate', 'forcetext', default=True) +configitem('fastannotate', 'forcefollow', default=True) +configitem('fastannotate', 'clientfetchthreshold', default=10) +configitem('fastannotate', 'clientsharepeer', default=True) +configitem('fastannotate', 'serverbuildondemand', default=True) +configitem('fastannotate', 'remotepath', default='default') + +def _flockavailable(): + try: + import fcntl + fcntl.flock + except StandardError: + return False + else: + return True + +def uisetup(ui): + modes = set(ui.configlist('fastannotate', 'modes')) + if 'fctx' in modes: + modes.discard('hgweb') + for name in modes: + if name == 'fastannotate': + commands.registercommand() + elif name == 'hgweb': + from . import support + support.replacehgwebannotate() + elif name == 'fctx': + from . import support + support.replacefctxannotate() + support.replaceremotefctxannotate() + commands.wrapdefault() + else: + raise hgerror.Abort(_('fastannotate: invalid mode: %s') % name) + + if ui.configbool('fastannotate', 'server'): + protocol.serveruisetup(ui) + + if ui.configbool('fastannotate', 'useflock', _flockavailable()): + context.pathhelper.lock = context.pathhelper._lockflock + + # fastannotate has its own locking, without depending on repo lock + localrepo.localrepository._wlockfreeprefix.add('fastannotate/') + +def reposetup(ui, repo): + client = ui.configbool('fastannotate', 'client', default=None) + if client is None: + if util.safehasattr(repo, 'requirements'): + client = 'remotefilelog' in repo.requirements + if client: + protocol.clientreposetup(ui, repo) diff --git a/hgext/fastannotate/commands.py b/hgext/fastannotate/commands.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/commands.py @@ -0,0 +1,281 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# commands: fastannotate commands +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import os + +from mercurial.i18n import _ +from mercurial import ( + commands, + error, + extensions, + patch, + pycompat, + registrar, + scmutil, + util, +) + +from . import ( + context as facontext, + error as faerror, + formatter as faformatter, +) + +cmdtable = {} +command = registrar.command(cmdtable) + +def _matchpaths(repo, rev, pats, opts, aopts=facontext.defaultopts): + """generate paths matching given patterns""" + perfhack = repo.ui.configbool('fastannotate', 'perfhack') + + # disable perfhack if: + # a) any walkopt is used + # b) if we treat pats as plain file names, some of them do not have + # corresponding linelog files + if perfhack: + # cwd related to reporoot + reporoot = os.path.dirname(repo.path) + reldir = os.path.relpath(pycompat.getcwd(), reporoot) + if reldir == '.': + reldir = '' + if any(opts.get(o[1]) for o in commands.walkopts): # a) + perfhack = False + else: # b) + relpats = [os.path.relpath(p, reporoot) if os.path.isabs(p) else p + for p in pats] + # disable perfhack on '..' since it allows escaping from the repo + if any(('..' in f or + not os.path.isfile( + facontext.pathhelper(repo, f, aopts).linelogpath)) + for f in relpats): + perfhack = False + + # perfhack: emit paths directory without checking with manifest + # this can be incorrect if the rev dos not have file. + if perfhack: + for p in relpats: + yield os.path.join(reldir, p) + else: + def bad(x, y): + raise error.Abort("%s: %s" % (x, y)) + ctx = scmutil.revsingle(repo, rev) + m = scmutil.match(ctx, pats, opts, badfn=bad) + for p in ctx.walk(m): + yield p + +fastannotatecommandargs = { + 'options': [ + ('r', 'rev', '.', _('annotate the specified revision'), _('REV')), + ('u', 'user', None, _('list the author (long with -v)')), + ('f', 'file', None, _('list the filename')), + ('d', 'date', None, _('list the date (short with -q)')), + ('n', 'number', None, _('list the revision number (default)')), + ('c', 'changeset', None, _('list the changeset')), + ('l', 'line-number', None, _('show line number at the first ' + 'appearance')), + ('e', 'deleted', None, _('show deleted lines (slow) (EXPERIMENTAL)')), + ('', 'no-content', None, _('do not show file content (EXPERIMENTAL)')), + ('', 'no-follow', None, _("don't follow copies and renames")), + ('', 'linear', None, _('enforce linear history, ignore second parent ' + 'of merges (EXPERIMENTAL)')), + ('', 'long-hash', None, _('show long changeset hash (EXPERIMENTAL)')), + ('', 'rebuild', None, _('rebuild cache even if it exists ' + '(EXPERIMENTAL)')), + ] + commands.diffwsopts + commands.walkopts + commands.formatteropts, + 'synopsis': _('[-r REV] [-f] [-a] [-u] [-d] [-n] [-c] [-l] FILE...'), + 'inferrepo': True, +} + +def fastannotate(ui, repo, *pats, **opts): + """show changeset information by line for each file + + List changes in files, showing the revision id responsible for each line. + + This command is useful for discovering when a change was made and by whom. + + By default this command prints revision numbers. If you include --file, + --user, or --date, the revision number is suppressed unless you also + include --number. The default format can also be customized by setting + fastannotate.defaultformat. + + Returns 0 on success. + + .. container:: verbose + + This command uses an implementation different from the vanilla annotate + command, which may produce slightly different (while still reasonable) + outputs for some cases. + + Unlike the vanilla anootate, fastannotate follows rename regardless of + the existence of --file. + + For the best performance when running on a full repo, use -c, -l, + avoid -u, -d, -n. Use --linear and --no-content to make it even faster. + + For the best performance when running on a shallow (remotefilelog) + repo, avoid --linear, --no-follow, or any diff options. As the server + won't be able to populate annotate cache when non-default options + affecting results are used. + """ + if not pats: + raise error.Abort(_('at least one filename or pattern is required')) + + # performance hack: filtered repo can be slow. unfilter by default. + if ui.configbool('fastannotate', 'unfilteredrepo'): + repo = repo.unfiltered() + + rev = opts.get('rev', '.') + rebuild = opts.get('rebuild', False) + + diffopts = patch.difffeatureopts(ui, opts, section='annotate', + whitespace=True) + aopts = facontext.annotateopts( + diffopts=diffopts, + followmerge=not opts.get('linear', False), + followrename=not opts.get('no_follow', False)) + + if not any(opts.get(s) + for s in ['user', 'date', 'file', 'number', 'changeset']): + # default 'number' for compatibility. but fastannotate is more + # efficient with "changeset", "line-number" and "no-content". + for name in ui.configlist('fastannotate', 'defaultformat', ['number']): + opts[name] = True + + ui.pager('fastannotate') + template = opts.get('template') + if template == 'json': + formatter = faformatter.jsonformatter(ui, repo, opts) + else: + formatter = faformatter.defaultformatter(ui, repo, opts) + showdeleted = opts.get('deleted', False) + showlines = not bool(opts.get('no_content')) + showpath = opts.get('file', False) + + # find the head of the main (master) branch + master = ui.config('fastannotate', 'mainbranch') or rev + + # paths will be used for prefetching and the real annotating + paths = list(_matchpaths(repo, rev, pats, opts, aopts)) + + # for client, prefetch from the server + if util.safehasattr(repo, 'prefetchfastannotate'): + repo.prefetchfastannotate(paths) + + for path in paths: + result = lines = existinglines = None + while True: + try: + with facontext.annotatecontext(repo, path, aopts, rebuild) as a: + result = a.annotate(rev, master=master, showpath=showpath, + showlines=(showlines and + not showdeleted)) + if showdeleted: + existinglines = set((l[0], l[1]) for l in result) + result = a.annotatealllines( + rev, showpath=showpath, showlines=showlines) + break + except (faerror.CannotReuseError, faerror.CorruptedFileError): + # happens if master moves backwards, or the file was deleted + # and readded, or renamed to an existing name, or corrupted. + if rebuild: # give up since we have tried rebuild already + raise + else: # try a second time rebuilding the cache (slow) + rebuild = True + continue + + if showlines: + result, lines = result + + formatter.write(result, lines, existinglines=existinglines) + formatter.end() + +_newopts = set([]) +_knownopts = set([opt[1].replace('-', '_') for opt in + (fastannotatecommandargs['options'] + commands.globalopts)]) + +def _annotatewrapper(orig, ui, repo, *pats, **opts): + """used by wrapdefault""" + # we need this hack until the obsstore has 0.0 seconds perf impact + if ui.configbool('fastannotate', 'unfilteredrepo'): + repo = repo.unfiltered() + + # treat the file as text (skip the isbinary check) + if ui.configbool('fastannotate', 'forcetext'): + opts['text'] = True + + # check if we need to do prefetch (client-side) + rev = opts.get('rev') + if util.safehasattr(repo, 'prefetchfastannotate') and rev is not None: + paths = list(_matchpaths(repo, rev, pats, opts)) + repo.prefetchfastannotate(paths) + + return orig(ui, repo, *pats, **opts) + +def registercommand(): + """register the fastannotate command""" + name = '^fastannotate|fastblame|fa' + command(name, **fastannotatecommandargs)(fastannotate) + +def wrapdefault(): + """wrap the default annotate command, to be aware of the protocol""" + extensions.wrapcommand(commands.table, 'annotate', _annotatewrapper) + +@command('debugbuildannotatecache', + [('r', 'rev', '', _('build up to the specific revision'), _('REV')) + ] + commands.walkopts, + _('[-r REV] FILE...')) +def debugbuildannotatecache(ui, repo, *pats, **opts): + """incrementally build fastannotate cache up to REV for specified files + + If REV is not specified, use the config 'fastannotate.mainbranch'. + + If fastannotate.client is True, download the annotate cache from the + server. Otherwise, build the annotate cache locally. + + The annotate cache will be built using the default diff and follow + options and lives in '.hg/fastannotate/default'. + """ + rev = opts.get('REV') or ui.config('fastannotate', 'mainbranch') + if not rev: + raise error.Abort(_('you need to provide a revision'), + hint=_('set fastannotate.mainbranch or use --rev')) + if ui.configbool('fastannotate', 'unfilteredrepo'): + repo = repo.unfiltered() + ctx = scmutil.revsingle(repo, rev) + m = scmutil.match(ctx, pats, opts) + paths = list(ctx.walk(m)) + if util.safehasattr(repo, 'prefetchfastannotate'): + # client + if opts.get('REV'): + raise error.Abort(_('--rev cannot be used for client')) + repo.prefetchfastannotate(paths) + else: + # server, or full repo + for i, path in enumerate(paths): + ui.progress(_('building'), i, total=len(paths)) + with facontext.annotatecontext(repo, path) as actx: + try: + if actx.isuptodate(rev): + continue + actx.annotate(rev, rev) + except (faerror.CannotReuseError, faerror.CorruptedFileError): + # the cache is broken (could happen with renaming so the + # file history gets invalidated). rebuild and try again. + ui.debug('fastannotate: %s: rebuilding broken cache\n' + % path) + actx.rebuild() + try: + actx.annotate(rev, rev) + except Exception as ex: + # possibly a bug, but should not stop us from building + # cache for other files. + ui.warn(_('fastannotate: %s: failed to ' + 'build cache: %r\n') % (path, ex)) + # clear the progress bar + ui.write() diff --git a/hgext/fastannotate/context.py b/hgext/fastannotate/context.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/context.py @@ -0,0 +1,823 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# context: context needed to annotate a file +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import collections +import contextlib +import hashlib +import os + +from mercurial.i18n import _ +from mercurial import ( + error, + linelog as linelogmod, + lock as lockmod, + mdiff, + node, + pycompat, + scmutil, + util, +) + +from . import ( + error as faerror, + revmap as revmapmod, +) + +# given path, get filelog, cached +@util.lrucachefunc +def _getflog(repo, path): + return repo.file(path) + +# extracted from mercurial.context.basefilectx.annotate +def _parents(f, follow=True): + # Cut _descendantrev here to mitigate the penalty of lazy linkrev + # adjustment. Otherwise, p._adjustlinkrev() would walk changelog + # from the topmost introrev (= srcrev) down to p.linkrev() if it + # isn't an ancestor of the srcrev. + f._changeid + pl = f.parents() + + # Don't return renamed parents if we aren't following. + if not follow: + pl = [p for p in pl if p.path() == f.path()] + + # renamed filectx won't have a filelog yet, so set it + # from the cache to save time + for p in pl: + if not '_filelog' in p.__dict__: + p._filelog = _getflog(f._repo, p.path()) + + return pl + +# extracted from mercurial.context.basefilectx.annotate. slightly modified +# so it takes a fctx instead of a pair of text and fctx. +def _decorate(fctx): + text = fctx.data() + linecount = text.count('\n') + if text and not text.endswith('\n'): + linecount += 1 + return ([(fctx, i) for i in pycompat.xrange(linecount)], text) + +# extracted from mercurial.context.basefilectx.annotate. slightly modified +# so it takes an extra "blocks" parameter calculated elsewhere, instead of +# calculating diff here. +def _pair(parent, child, blocks): + for (a1, a2, b1, b2), t in blocks: + # Changed blocks ('!') or blocks made only of blank lines ('~') + # belong to the child. + if t == '=': + child[0][b1:b2] = parent[0][a1:a2] + return child + +# like scmutil.revsingle, but with lru cache, so their states (like manifests) +# could be reused +_revsingle = util.lrucachefunc(scmutil.revsingle) + +def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None): + """(repo, str, str) -> fctx + + get the filectx object from repo, rev, path, in an efficient way. + + if resolverev is True, "rev" is a revision specified by the revset + language, otherwise "rev" is a nodeid, or a revision number that can + be consumed by repo.__getitem__. + + if adjustctx is not None, the returned fctx will point to a changeset + that introduces the change (last modified the file). if adjustctx + is 'linkrev', trust the linkrev and do not adjust it. this is noticeably + faster for big repos but is incorrect for some cases. + """ + if resolverev and not isinstance(rev, int) and rev is not None: + ctx = _revsingle(repo, rev) + else: + ctx = repo[rev] + + # If we don't need to adjust the linkrev, create the filectx using the + # changectx instead of using ctx[path]. This means it already has the + # changectx information, so blame -u will be able to look directly at the + # commitctx object instead of having to resolve it by going through the + # manifest. In a lazy-manifest world this can prevent us from downloading a + # lot of data. + if adjustctx is None: + # ctx.rev() is None means it's the working copy, which is a special + # case. + if ctx.rev() is None: + fctx = ctx[path] + else: + fctx = repo.filectx(path, changeid=ctx.rev()) + else: + fctx = ctx[path] + if adjustctx == 'linkrev': + introrev = fctx.linkrev() + else: + introrev = fctx.introrev() + if introrev != ctx.rev(): + fctx._changeid = introrev + fctx._changectx = repo[introrev] + return fctx + +# like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock +def encodedir(path): + return (path + .replace('.hg/', '.hg.hg/') + .replace('.l/', '.l.hg/') + .replace('.m/', '.m.hg/') + .replace('.lock/', '.lock.hg/')) + +def hashdiffopts(diffopts): + diffoptstr = str(sorted( + (k, getattr(diffopts, k)) + for k in mdiff.diffopts.defaults.iterkeys() + )) + return hashlib.sha1(diffoptstr).hexdigest()[:6] + +_defaultdiffopthash = hashdiffopts(mdiff.defaultopts) + +class annotateopts(object): + """like mercurial.mdiff.diffopts, but is for annotate + + followrename: follow renames, like "hg annotate -f" + followmerge: follow p2 of a merge changeset, otherwise p2 is ignored + """ + + defaults = { + 'diffopts': None, + 'followrename': True, + 'followmerge': True, + } + + def __init__(self, **opts): + for k, v in self.defaults.iteritems(): + setattr(self, k, opts.get(k, v)) + + @util.propertycache + def shortstr(self): + """represent opts in a short string, suitable for a directory name""" + result = '' + if not self.followrename: + result += 'r0' + if not self.followmerge: + result += 'm0' + if self.diffopts is not None: + assert isinstance(self.diffopts, mdiff.diffopts) + diffopthash = hashdiffopts(self.diffopts) + if diffopthash != _defaultdiffopthash: + result += 'i' + diffopthash + return result or 'default' + +defaultopts = annotateopts() + +class _annotatecontext(object): + """do not use this class directly as it does not use lock to protect + writes. use "with annotatecontext(...)" instead. + """ + + def __init__(self, repo, path, linelogpath, revmappath, opts): + self.repo = repo + self.ui = repo.ui + self.path = path + self.opts = opts + self.linelogpath = linelogpath + self.revmappath = revmappath + self._linelog = None + self._revmap = None + self._node2path = {} # {str: str} + + @property + def linelog(self): + if self._linelog is None: + if os.path.exists(self.linelogpath): + with open(self.linelogpath, 'rb') as f: + try: + self._linelog = linelogmod.linelog.fromdata(f.read()) + except linelogmod.LineLogError: + self._linelog = linelogmod.linelog() + else: + self._linelog = linelogmod.linelog() + return self._linelog + + @property + def revmap(self): + if self._revmap is None: + self._revmap = revmapmod.revmap(self.revmappath) + return self._revmap + + def close(self): + if self._revmap is not None: + self._revmap.flush() + self._revmap = None + if self._linelog is not None: + with open(self.linelogpath, 'wb') as f: + f.write(self._linelog.encode()) + self._linelog = None + + __del__ = close + + def rebuild(self): + """delete linelog and revmap, useful for rebuilding""" + self.close() + self._node2path.clear() + _unlinkpaths([self.revmappath, self.linelogpath]) + + @property + def lastnode(self): + """return last node in revmap, or None if revmap is empty""" + if self._revmap is None: + # fast path, read revmap without loading its full content + return revmapmod.getlastnode(self.revmappath) + else: + return self._revmap.rev2hsh(self._revmap.maxrev) + + def isuptodate(self, master, strict=True): + """return True if the revmap / linelog is up-to-date, or the file + does not exist in the master revision. False otherwise. + + it tries to be fast and could return false negatives, because of the + use of linkrev instead of introrev. + + useful for both server and client to decide whether to update + fastannotate cache or not. + + if strict is True, even if fctx exists in the revmap, but is not the + last node, isuptodate will return False. it's good for performance - no + expensive check was done. + + if strict is False, if fctx exists in the revmap, this function may + return True. this is useful for the client to skip downloading the + cache if the client's master is behind the server's. + """ + lastnode = self.lastnode + try: + f = self._resolvefctx(master, resolverev=True) + # choose linkrev instead of introrev as the check is meant to be + # *fast*. + linknode = self.repo.changelog.node(f.linkrev()) + if not strict and lastnode and linknode != lastnode: + # check if f.node() is in the revmap. note: this loads the + # revmap and can be slow. + return self.revmap.hsh2rev(linknode) is not None + # avoid resolving old manifest, or slow adjustlinkrev to be fast, + # false negatives are acceptable in this case. + return linknode == lastnode + except LookupError: + # master does not have the file, or the revmap is ahead + return True + + def annotate(self, rev, master=None, showpath=False, showlines=False): + """incrementally update the cache so it includes revisions in the main + branch till 'master'. and run annotate on 'rev', which may or may not be + included in the main branch. + + if master is None, do not update linelog. + + the first value returned is the annotate result, it is [(node, linenum)] + by default. [(node, linenum, path)] if showpath is True. + + if showlines is True, a second value will be returned, it is a list of + corresponding line contents. + """ + + # the fast path test requires commit hash, convert rev number to hash, + # so it may hit the fast path. note: in the "fctx" mode, the "annotate" + # command could give us a revision number even if the user passes a + # commit hash. + if isinstance(rev, int): + rev = node.hex(self.repo.changelog.node(rev)) + + # fast path: if rev is in the main branch already + directly, revfctx = self.canannotatedirectly(rev) + if directly: + if self.ui.debugflag: + self.ui.debug('fastannotate: %s: using fast path ' + '(resolved fctx: %s)\n' + % (self.path, util.safehasattr(revfctx, 'node'))) + return self.annotatedirectly(revfctx, showpath, showlines) + + # resolve master + masterfctx = None + if master: + try: + masterfctx = self._resolvefctx(master, resolverev=True, + adjustctx=True) + except LookupError: # master does not have the file + pass + else: + if masterfctx in self.revmap: # no need to update linelog + masterfctx = None + + # ... - @ <- rev (can be an arbitrary changeset, + # / not necessarily a descendant + # master -> o of master) + # | + # a merge -> o 'o': new changesets in the main branch + # |\ '#': revisions in the main branch that + # o * exist in linelog / revmap + # | . '*': changesets in side branches, or + # last master -> # . descendants of master + # | . + # # * joint: '#', and is a parent of a '*' + # |/ + # a joint -> # ^^^^ --- side branches + # | + # ^ --- main branch (in linelog) + + # these DFSes are similar to the traditional annotate algorithm. + # we cannot really reuse the code for perf reason. + + # 1st DFS calculates merges, joint points, and needed. + # "needed" is a simple reference counting dict to free items in + # "hist", reducing its memory usage otherwise could be huge. + initvisit = [revfctx] + if masterfctx: + if masterfctx.rev() is None: + raise error.Abort(_('cannot update linelog to wdir()'), + hint=_('set fastannotate.mainbranch')) + initvisit.append(masterfctx) + visit = initvisit[:] + pcache = {} + needed = {revfctx: 1} + hist = {} # {fctx: ([(llrev or fctx, linenum)], text)} + while visit: + f = visit.pop() + if f in pcache or f in hist: + continue + if f in self.revmap: # in the old main branch, it's a joint + llrev = self.revmap.hsh2rev(f.node()) + self.linelog.annotate(llrev) + result = self.linelog.annotateresult + hist[f] = (result, f.data()) + continue + pl = self._parentfunc(f) + pcache[f] = pl + for p in pl: + needed[p] = needed.get(p, 0) + 1 + if p not in pcache: + visit.append(p) + + # 2nd (simple) DFS calculates new changesets in the main branch + # ('o' nodes in # the above graph), so we know when to update linelog. + newmainbranch = set() + f = masterfctx + while f and f not in self.revmap: + newmainbranch.add(f) + pl = pcache[f] + if pl: + f = pl[0] + else: + f = None + break + + # f, if present, is the position where the last build stopped at, and + # should be the "master" last time. check to see if we can continue + # building the linelog incrementally. (we cannot if diverged) + if masterfctx is not None: + self._checklastmasterhead(f) + + if self.ui.debugflag: + if newmainbranch: + self.ui.debug('fastannotate: %s: %d new changesets in the main' + ' branch\n' % (self.path, len(newmainbranch))) + elif not hist: # no joints, no updates + self.ui.debug('fastannotate: %s: linelog cannot help in ' + 'annotating this revision\n' % self.path) + + # prepare annotateresult so we can update linelog incrementally + self.linelog.annotate(self.linelog.maxrev) + + # 3rd DFS does the actual annotate + visit = initvisit[:] + progress = 0 + while visit: + f = visit[-1] + if f in hist: + visit.pop() + continue + + ready = True + pl = pcache[f] + for p in pl: + if p not in hist: + ready = False + visit.append(p) + if not ready: + continue + + visit.pop() + blocks = None # mdiff blocks, used for appending linelog + ismainbranch = (f in newmainbranch) + # curr is the same as the traditional annotate algorithm, + # if we only care about linear history (do not follow merge), + # then curr is not actually used. + assert f not in hist + curr = _decorate(f) + for i, p in enumerate(pl): + bs = list(self._diffblocks(hist[p][1], curr[1])) + if i == 0 and ismainbranch: + blocks = bs + curr = _pair(hist[p], curr, bs) + if needed[p] == 1: + del hist[p] + del needed[p] + else: + needed[p] -= 1 + + hist[f] = curr + del pcache[f] + + if ismainbranch: # need to write to linelog + if not self.ui.quiet: + progress += 1 + self.ui.progress(_('building cache'), progress, + total=len(newmainbranch)) + bannotated = None + if len(pl) == 2 and self.opts.followmerge: # merge + bannotated = curr[0] + if blocks is None: # no parents, add an empty one + blocks = list(self._diffblocks('', curr[1])) + self._appendrev(f, blocks, bannotated) + elif showpath: # not append linelog, but we need to record path + self._node2path[f.node()] = f.path() + + if progress: # clean progress bar + self.ui.write() + + result = [ + ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l) + for fr, l in hist[revfctx][0]] # [(node, linenumber)] + return self._refineannotateresult(result, revfctx, showpath, showlines) + + def canannotatedirectly(self, rev): + """(str) -> bool, fctx or node. + return (True, f) if we can annotate without updating the linelog, pass + f to annotatedirectly. + return (False, f) if we need extra calculation. f is the fctx resolved + from rev. + """ + result = True + f = None + if not isinstance(rev, int) and rev is not None: + hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev) + if hsh is not None and (hsh, self.path) in self.revmap: + f = hsh + if f is None: + adjustctx = 'linkrev' if self._perfhack else True + f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True) + result = f in self.revmap + if not result and self._perfhack: + # redo the resolution without perfhack - as we are going to + # do write operations, we need a correct fctx. + f = self._resolvefctx(rev, adjustctx=True, resolverev=True) + return result, f + + def annotatealllines(self, rev, showpath=False, showlines=False): + """(rev : str) -> [(node : str, linenum : int, path : str)] + + the result has the same format with annotate, but include all (including + deleted) lines up to rev. call this after calling annotate(rev, ...) for + better performance and accuracy. + """ + revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True) + + # find a chain from rev to anything in the mainbranch + if revfctx not in self.revmap: + chain = [revfctx] + a = '' + while True: + f = chain[-1] + pl = self._parentfunc(f) + if not pl: + break + if pl[0] in self.revmap: + a = pl[0].data() + break + chain.append(pl[0]) + + # both self.linelog and self.revmap is backed by filesystem. now + # we want to modify them but do not want to write changes back to + # files. so we create in-memory objects and copy them. it's like + # a "fork". + linelog = linelogmod.linelog() + linelog.copyfrom(self.linelog) + linelog.annotate(linelog.maxrev) + revmap = revmapmod.revmap() + revmap.copyfrom(self.revmap) + + for f in reversed(chain): + b = f.data() + blocks = list(self._diffblocks(a, b)) + self._doappendrev(linelog, revmap, f, blocks) + a = b + else: + # fastpath: use existing linelog, revmap as we don't write to them + linelog = self.linelog + revmap = self.revmap + + lines = linelog.getalllines() + hsh = revfctx.node() + llrev = revmap.hsh2rev(hsh) + result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev] + # cannot use _refineannotateresult since we need custom logic for + # resolving line contents + if showpath: + result = self._addpathtoresult(result, revmap) + if showlines: + linecontents = self._resolvelines(result, revmap, linelog) + result = (result, linecontents) + return result + + def _resolvelines(self, annotateresult, revmap, linelog): + """(annotateresult) -> [line]. designed for annotatealllines. + this is probably the most inefficient code in the whole fastannotate + directory. but we have made a decision that the linelog does not + store line contents. so getting them requires random accesses to + the revlog data, since they can be many, it can be very slow. + """ + # [llrev] + revs = [revmap.hsh2rev(l[0]) for l in annotateresult] + result = [None] * len(annotateresult) + # {(rev, linenum): [lineindex]} + key2idxs = collections.defaultdict(list) + for i in pycompat.xrange(len(result)): + key2idxs[(revs[i], annotateresult[i][1])].append(i) + while key2idxs: + # find an unresolved line and its linelog rev to annotate + hsh = None + try: + for (rev, _linenum), idxs in key2idxs.iteritems(): + if revmap.rev2flag(rev) & revmapmod.sidebranchflag: + continue + hsh = annotateresult[idxs[0]][0] + break + except StopIteration: # no more unresolved lines + return result + if hsh is None: + # the remaining key2idxs are not in main branch, resolving them + # using the hard way... + revlines = {} + for (rev, linenum), idxs in key2idxs.iteritems(): + if rev not in revlines: + hsh = annotateresult[idxs[0]][0] + if self.ui.debugflag: + self.ui.debug('fastannotate: reading %s line #%d ' + 'to resolve lines %r\n' + % (node.short(hsh), linenum, idxs)) + fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) + lines = mdiff.splitnewlines(fctx.data()) + revlines[rev] = lines + for idx in idxs: + result[idx] = revlines[rev][linenum] + assert all(x is not None for x in result) + return result + + # run the annotate and the lines should match to the file content + self.ui.debug('fastannotate: annotate %s to resolve lines\n' + % node.short(hsh)) + linelog.annotate(rev) + fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) + annotated = linelog.annotateresult + lines = mdiff.splitnewlines(fctx.data()) + if len(lines) != len(annotated): + raise faerror.CorruptedFileError('unexpected annotated lines') + # resolve lines from the annotate result + for i, line in enumerate(lines): + k = annotated[i] + if k in key2idxs: + for idx in key2idxs[k]: + result[idx] = line + del key2idxs[k] + return result + + def annotatedirectly(self, f, showpath, showlines): + """like annotate, but when we know that f is in linelog. + f can be either a 20-char str (node) or a fctx. this is for perf - in + the best case, the user provides a node and we don't need to read the + filelog or construct any filecontext. + """ + if isinstance(f, str): + hsh = f + else: + hsh = f.node() + llrev = self.revmap.hsh2rev(hsh) + if not llrev: + raise faerror.CorruptedFileError('%s is not in revmap' + % node.hex(hsh)) + if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0: + raise faerror.CorruptedFileError('%s is not in revmap mainbranch' + % node.hex(hsh)) + self.linelog.annotate(llrev) + result = [(self.revmap.rev2hsh(r), l) + for r, l in self.linelog.annotateresult] + return self._refineannotateresult(result, f, showpath, showlines) + + def _refineannotateresult(self, result, f, showpath, showlines): + """add the missing path or line contents, they can be expensive. + f could be either node or fctx. + """ + if showpath: + result = self._addpathtoresult(result) + if showlines: + if isinstance(f, str): # f: node or fctx + llrev = self.revmap.hsh2rev(f) + fctx = self._resolvefctx(f, self.revmap.rev2path(llrev)) + else: + fctx = f + lines = mdiff.splitnewlines(fctx.data()) + if len(lines) != len(result): # linelog is probably corrupted + raise faerror.CorruptedFileError() + result = (result, lines) + return result + + def _appendrev(self, fctx, blocks, bannotated=None): + self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated) + + def _diffblocks(self, a, b): + return mdiff.allblocks(a, b, self.opts.diffopts) + + @staticmethod + def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None): + """append a revision to linelog and revmap""" + + def getllrev(f): + """(fctx) -> int""" + # f should not be a linelog revision + if isinstance(f, int): + raise error.ProgrammingError('f should not be an int') + # f is a fctx, allocate linelog rev on demand + hsh = f.node() + rev = revmap.hsh2rev(hsh) + if rev is None: + rev = revmap.append(hsh, sidebranch=True, path=f.path()) + return rev + + # append sidebranch revisions to revmap + siderevs = [] + siderevmap = {} # node: int + if bannotated is not None: + for (a1, a2, b1, b2), op in blocks: + if op != '=': + # f could be either linelong rev, or fctx. + siderevs += [f for f, l in bannotated[b1:b2] + if not isinstance(f, int)] + siderevs = set(siderevs) + if fctx in siderevs: # mainnode must be appended seperately + siderevs.remove(fctx) + for f in siderevs: + siderevmap[f] = getllrev(f) + + # the changeset in the main branch, could be a merge + llrev = revmap.append(fctx.node(), path=fctx.path()) + siderevmap[fctx] = llrev + + for (a1, a2, b1, b2), op in reversed(blocks): + if op == '=': + continue + if bannotated is None: + linelog.replacelines(llrev, a1, a2, b1, b2) + else: + blines = [((r if isinstance(r, int) else siderevmap[r]), l) + for r, l in bannotated[b1:b2]] + linelog.replacelines_vec(llrev, a1, a2, blines) + + def _addpathtoresult(self, annotateresult, revmap=None): + """(revmap, [(node, linenum)]) -> [(node, linenum, path)]""" + if revmap is None: + revmap = self.revmap + + def _getpath(nodeid): + path = self._node2path.get(nodeid) + if path is None: + path = revmap.rev2path(revmap.hsh2rev(nodeid)) + self._node2path[nodeid] = path + return path + + return [(n, l, _getpath(n)) for n, l in annotateresult] + + def _checklastmasterhead(self, fctx): + """check if fctx is the master's head last time, raise if not""" + if fctx is None: + llrev = 0 + else: + llrev = self.revmap.hsh2rev(fctx.node()) + if not llrev: + raise faerror.CannotReuseError() + if self.linelog.maxrev != llrev: + raise faerror.CannotReuseError() + + @util.propertycache + def _parentfunc(self): + """-> (fctx) -> [fctx]""" + followrename = self.opts.followrename + followmerge = self.opts.followmerge + def parents(f): + pl = _parents(f, follow=followrename) + if not followmerge: + pl = pl[:1] + return pl + return parents + + @util.propertycache + def _perfhack(self): + return self.ui.configbool('fastannotate', 'perfhack') + + def _resolvefctx(self, rev, path=None, **kwds): + return resolvefctx(self.repo, rev, (path or self.path), **kwds) + +def _unlinkpaths(paths): + """silent, best-effort unlink""" + for path in paths: + try: + util.unlink(path) + except OSError: + pass + +class pathhelper(object): + """helper for getting paths for lockfile, linelog and revmap""" + + def __init__(self, repo, path, opts=defaultopts): + # different options use different directories + self._vfspath = os.path.join('fastannotate', + opts.shortstr, encodedir(path)) + self._repo = repo + + @property + def dirname(self): + return os.path.dirname(self._repo.vfs.join(self._vfspath)) + + @property + def linelogpath(self): + return self._repo.vfs.join(self._vfspath + '.l') + + def lock(self): + return lockmod.lock(self._repo.vfs, self._vfspath + '.lock') + + @contextlib.contextmanager + def _lockflock(self): + """the same as 'lock' but use flock instead of lockmod.lock, to avoid + creating temporary symlinks.""" + import fcntl + lockpath = self.linelogpath + util.makedirs(os.path.dirname(lockpath)) + lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664) + fcntl.flock(lockfd, fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(lockfd, fcntl.LOCK_UN) + os.close(lockfd) + + @property + def revmappath(self): + return self._repo.vfs.join(self._vfspath + '.m') + +@contextlib.contextmanager +def annotatecontext(repo, path, opts=defaultopts, rebuild=False): + """context needed to perform (fast) annotate on a file + + an annotatecontext of a single file consists of two structures: the + linelog and the revmap. this function takes care of locking. only 1 + process is allowed to write that file's linelog and revmap at a time. + + when something goes wrong, this function will assume the linelog and the + revmap are in a bad state, and remove them from disk. + + use this function in the following way: + + with annotatecontext(...) as actx: + actx. .... + """ + helper = pathhelper(repo, path, opts) + util.makedirs(helper.dirname) + revmappath = helper.revmappath + linelogpath = helper.linelogpath + actx = None + try: + with helper.lock(): + actx = _annotatecontext(repo, path, linelogpath, revmappath, opts) + if rebuild: + actx.rebuild() + yield actx + except Exception: + if actx is not None: + actx.rebuild() + repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path) + raise + finally: + if actx is not None: + actx.close() + +def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False): + """like annotatecontext but get the context from a fctx. convenient when + used in fctx.annotate + """ + repo = fctx._repo + path = fctx._path + if repo.ui.configbool('fastannotate', 'forcefollow', True): + follow = True + aopts = annotateopts(diffopts=diffopts, followrename=follow) + return annotatecontext(repo, path, aopts, rebuild) diff --git a/hgext/fastannotate/error.py b/hgext/fastannotate/error.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/error.py @@ -0,0 +1,13 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# error: errors used in fastannotate +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +from __future__ import absolute_import + +class CorruptedFileError(Exception): + pass + +class CannotReuseError(Exception): + """cannot reuse or update the cache incrementally""" diff --git a/hgext/fastannotate/formatter.py b/hgext/fastannotate/formatter.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/formatter.py @@ -0,0 +1,161 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# format: defines the format used to output annotate result +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +from __future__ import absolute_import + +from mercurial import ( + encoding, + node, + pycompat, + templatefilters, + util, +) +from mercurial.utils import ( + dateutil, +) + +# imitating mercurial.commands.annotate, not using the vanilla formatter since +# the data structures are a bit different, and we have some fast paths. +class defaultformatter(object): + """the default formatter that does leftpad and support some common flags""" + + def __init__(self, ui, repo, opts): + self.ui = ui + self.opts = opts + + if ui.quiet: + datefunc = dateutil.shortdate + else: + datefunc = dateutil.datestr + datefunc = util.cachefunc(datefunc) + getctx = util.cachefunc(lambda x: repo[x[0]]) + hexfunc = self._hexfunc + + # special handling working copy "changeset" and "rev" functions + if self.opts.get('rev') == 'wdir()': + orig = hexfunc + hexfunc = lambda x: None if x is None else orig(x) + wnode = hexfunc(repo[None].p1().node()) + '+' + wrev = str(repo[None].p1().rev()) + wrevpad = '' + if not opts.get('changeset'): # only show + if changeset is hidden + wrev += '+' + wrevpad = ' ' + revenc = lambda x: wrev if x is None else str(x) + wrevpad + csetenc = lambda x: wnode if x is None else str(x) + ' ' + else: + revenc = csetenc = str + + # opt name, separator, raw value (for json/plain), encoder (for plain) + opmap = [('user', ' ', lambda x: getctx(x).user(), ui.shortuser), + ('number', ' ', lambda x: getctx(x).rev(), revenc), + ('changeset', ' ', lambda x: hexfunc(x[0]), csetenc), + ('date', ' ', lambda x: getctx(x).date(), datefunc), + ('file', ' ', lambda x: x[2], str), + ('line_number', ':', lambda x: x[1] + 1, str)] + fieldnamemap = {'number': 'rev', 'changeset': 'node'} + funcmap = [(get, sep, fieldnamemap.get(op, op), enc) + for op, sep, get, enc in opmap + if opts.get(op)] + # no separator for first column + funcmap[0] = list(funcmap[0]) + funcmap[0][1] = '' + self.funcmap = funcmap + + def write(self, annotatedresult, lines=None, existinglines=None): + """(annotateresult, [str], set([rev, linenum])) -> None. write output. + annotateresult can be [(node, linenum, path)], or [(node, linenum)] + """ + pieces = [] # [[str]] + maxwidths = [] # [int] + + # calculate padding + for f, sep, name, enc in self.funcmap: + l = [enc(f(x)) for x in annotatedresult] + pieces.append(l) + if name in ['node', 'date']: # node and date has fixed size + l = l[:1] + widths = map(encoding.colwidth, set(l)) + maxwidth = (max(widths) if widths else 0) + maxwidths.append(maxwidth) + + # buffered output + result = '' + for i in pycompat.xrange(len(annotatedresult)): + for j, p in enumerate(pieces): + sep = self.funcmap[j][1] + padding = ' ' * (maxwidths[j] - len(p[i])) + result += sep + padding + p[i] + if lines: + if existinglines is None: + result += ': ' + lines[i] + else: # extra formatting showing whether a line exists + key = (annotatedresult[i][0], annotatedresult[i][1]) + if key in existinglines: + result += ': ' + lines[i] + else: + result += ': ' + self.ui.label('-' + lines[i], + 'diff.deleted') + + if result[-1] != '\n': + result += '\n' + + self.ui.write(result) + + @util.propertycache + def _hexfunc(self): + if self.ui.debugflag or self.opts.get('long_hash'): + return node.hex + else: + return node.short + + def end(self): + pass + +class jsonformatter(defaultformatter): + def __init__(self, ui, repo, opts): + super(jsonformatter, self).__init__(ui, repo, opts) + self.ui.write('[') + self.needcomma = False + + def write(self, annotatedresult, lines=None, existinglines=None): + if annotatedresult: + self._writecomma() + + pieces = [(name, map(f, annotatedresult)) + for f, sep, name, enc in self.funcmap] + if lines is not None: + pieces.append(('line', lines)) + pieces.sort() + + seps = [','] * len(pieces[:-1]) + [''] + + result = '' + lasti = len(annotatedresult) - 1 + for i in pycompat.xrange(len(annotatedresult)): + result += '\n {\n' + for j, p in enumerate(pieces): + k, vs = p + result += (' "%s": %s%s\n' + % (k, templatefilters.json(vs[i], paranoid=False), + seps[j])) + result += ' }%s' % ('' if i == lasti else ',') + if lasti >= 0: + self.needcomma = True + + self.ui.write(result) + + def _writecomma(self): + if self.needcomma: + self.ui.write(',') + self.needcomma = False + + @util.propertycache + def _hexfunc(self): + return node.hex + + def end(self): + self.ui.write('\n]\n') diff --git a/hgext/fastannotate/protocol.py b/hgext/fastannotate/protocol.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/protocol.py @@ -0,0 +1,250 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# protocol: logic for a server providing fastannotate support +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +from __future__ import absolute_import + +import contextlib +import os + +from mercurial.i18n import _ +from mercurial import ( + error, + extensions, + hg, + localrepo, + scmutil, + wireprotov1peer, + wireprotov1server, +) +from . import context + +# common + +def _getmaster(ui): + """get the mainbranch, and enforce it is set""" + master = ui.config('fastannotate', 'mainbranch') + if not master: + raise error.Abort(_('fastannotate.mainbranch is required ' + 'for both the client and the server')) + return master + +# server-side + +def _capabilities(orig, repo, proto): + result = orig(repo, proto) + result.append('getannotate') + return result + +def _getannotate(repo, proto, path, lastnode): + # output: + # FILE := vfspath + '\0' + str(size) + '\0' + content + # OUTPUT := '' | FILE + OUTPUT + result = '' + buildondemand = repo.ui.configbool('fastannotate', 'serverbuildondemand', + True) + with context.annotatecontext(repo, path) as actx: + if buildondemand: + # update before responding to the client + master = _getmaster(repo.ui) + try: + if not actx.isuptodate(master): + actx.annotate(master, master) + except Exception: + # non-fast-forward move or corrupted. rebuild automically. + actx.rebuild() + try: + actx.annotate(master, master) + except Exception: + actx.rebuild() # delete files + finally: + # although the "with" context will also do a close/flush, we + # need to do it early so we can send the correct respond to + # client. + actx.close() + # send back the full content of revmap and linelog, in the future we + # may want to do some rsync-like fancy updating. + # the lastnode check is not necessary if the client and the server + # agree where the main branch is. + if actx.lastnode != lastnode: + for p in [actx.revmappath, actx.linelogpath]: + if not os.path.exists(p): + continue + content = '' + with open(p, 'rb') as f: + content = f.read() + vfsbaselen = len(repo.vfs.base + '/') + relpath = p[vfsbaselen:] + result += '%s\0%s\0%s' % (relpath, len(content), content) + return result + +def _registerwireprotocommand(): + if 'getannotate' in wireprotov1server.commands: + return + wireprotov1server.wireprotocommand( + 'getannotate', 'path lastnode')(_getannotate) + +def serveruisetup(ui): + _registerwireprotocommand() + extensions.wrapfunction(wireprotov1server, '_capabilities', _capabilities) + +# client-side + +def _parseresponse(payload): + result = {} + i = 0 + l = len(payload) - 1 + state = 0 # 0: vfspath, 1: size + vfspath = size = '' + while i < l: + ch = payload[i] + if ch == '\0': + if state == 1: + result[vfspath] = buffer(payload, i + 1, int(size)) + i += int(size) + state = 0 + vfspath = size = '' + elif state == 0: + state = 1 + else: + if state == 1: + size += ch + elif state == 0: + vfspath += ch + i += 1 + return result + +def peersetup(ui, peer): + class fastannotatepeer(peer.__class__): + @wireprotov1peer.batchable + def getannotate(self, path, lastnode=None): + if not self.capable('getannotate'): + ui.warn(_('remote peer cannot provide annotate cache\n')) + yield None, None + else: + args = {'path': path, 'lastnode': lastnode or ''} + f = wireprotov1peer.future() + yield args, f + yield _parseresponse(f.value) + peer.__class__ = fastannotatepeer + +@contextlib.contextmanager +def annotatepeer(repo): + ui = repo.ui + + # fileservice belongs to remotefilelog + fileservice = getattr(repo, 'fileservice', None) + sharepeer = ui.configbool('fastannotate', 'clientsharepeer', True) + + if sharepeer and fileservice: + ui.debug('fastannotate: using remotefilelog connection pool\n') + conn = repo.connectionpool.get(repo.fallbackpath) + peer = conn.peer + stolen = True + else: + remotepath = ui.expandpath( + ui.config('fastannotate', 'remotepath', 'default')) + peer = hg.peer(ui, {}, remotepath) + stolen = False + + try: + # Note: fastannotate requests should never trigger a remotefilelog + # "getfiles" request, because "getfiles" puts the stream into a state + # that does not exit. See "clientfetch": it does "getannotate" before + # any hg stuff that could potentially trigger a "getfiles". + yield peer + finally: + if not stolen: + for i in ['close', 'cleanup']: + getattr(peer, i, lambda: None)() + else: + conn.__exit__(None, None, None) + +def clientfetch(repo, paths, lastnodemap=None, peer=None): + """download annotate cache from the server for paths""" + if not paths: + return + + if peer is None: + with annotatepeer(repo) as peer: + return clientfetch(repo, paths, lastnodemap, peer) + + if lastnodemap is None: + lastnodemap = {} + + ui = repo.ui + results = [] + with peer.commandexecutor() as batcher: + ui.debug('fastannotate: requesting %d files\n' % len(paths)) + for p in paths: + results.append(batcher.callcommand( + 'getannotate', + {'path': p, 'lastnode':lastnodemap.get(p)})) + + ui.debug('fastannotate: server returned\n') + for result in results: + for path, content in result.result().iteritems(): + # ignore malicious paths + if not path.startswith('fastannotate/') or '/../' in (path + '/'): + ui.debug('fastannotate: ignored malicious path %s\n' % path) + continue + if ui.debugflag: + ui.debug('fastannotate: writing %d bytes to %s\n' + % (len(content), path)) + repo.vfs.makedirs(os.path.dirname(path)) + with repo.vfs(path, 'wb') as f: + f.write(content) + +def _filterfetchpaths(repo, paths): + """return a subset of paths whose history is long and need to fetch linelog + from the server. works with remotefilelog and non-remotefilelog repos. + """ + threshold = repo.ui.configint('fastannotate', 'clientfetchthreshold', 10) + if threshold <= 0: + return paths + + master = repo.ui.config('fastannotate', 'mainbranch') or 'default' + + if 'remotefilelog' in repo.requirements: + ctx = scmutil.revsingle(repo, master) + f = lambda path: len(ctx[path].ancestormap()) + else: + f = lambda path: len(repo.file(path)) + + result = [] + for path in paths: + try: + if f(path) >= threshold: + result.append(path) + except Exception: # file not found etc. + result.append(path) + + return result + +def localreposetup(ui, repo): + class fastannotaterepo(repo.__class__): + def prefetchfastannotate(self, paths, peer=None): + master = _getmaster(self.ui) + needupdatepaths = [] + lastnodemap = {} + try: + for path in _filterfetchpaths(self, paths): + with context.annotatecontext(self, path) as actx: + if not actx.isuptodate(master, strict=False): + needupdatepaths.append(path) + lastnodemap[path] = actx.lastnode + if needupdatepaths: + clientfetch(self, needupdatepaths, lastnodemap, peer) + except Exception as ex: + # could be directory not writable or so, not fatal + self.ui.debug('fastannotate: prefetch failed: %r\n' % ex) + repo.__class__ = fastannotaterepo + +def clientreposetup(ui, repo): + _registerwireprotocommand() + if isinstance(repo, localrepo.localrepository): + localreposetup(ui, repo) + if peersetup not in hg.wirepeersetupfuncs: + hg.wirepeersetupfuncs.append(peersetup) diff --git a/hgext/fastannotate/revmap.py b/hgext/fastannotate/revmap.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/revmap.py @@ -0,0 +1,254 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# revmap: trivial hg hash - linelog rev bidirectional map +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import bisect +import os +import struct + +from mercurial.node import hex +from mercurial import ( + error as hgerror, + pycompat, +) +from . import error + +# the revmap file format is straightforward: +# +# 8 bytes: header +# 1 byte : flag for linelog revision 1 +# ? bytes: (optional) '\0'-terminated path string +# only exists if (flag & renameflag) != 0 +# 20 bytes: hg hash for linelog revision 1 +# 1 byte : flag for linelog revision 2 +# ? bytes: (optional) '\0'-terminated path string +# 20 bytes: hg hash for linelog revision 2 +# .... +# +# the implementation is kinda stupid: __init__ loads the whole revmap. +# no laziness. benchmark shows loading 10000 revisions is about 0.015 +# seconds, which looks enough for our use-case. if this implementation +# becomes a bottleneck, we can change it to lazily read the file +# from the end. + +# whether the changeset is in the side branch. i.e. not in the linear main +# branch but only got referenced by lines in merge changesets. +sidebranchflag = 1 + +# whether the changeset changes the file path (ie. is a rename) +renameflag = 2 + +# len(mercurial.node.nullid) +_hshlen = 20 + +class revmap(object): + """trivial hg bin hash - linelog rev bidirectional map + + also stores a flag (uint8) for each revision, and track renames. + """ + + HEADER = b'REVMAP1\0' + + def __init__(self, path=None): + """create or load the revmap, optionally associate to a file + + if path is None, the revmap is entirely in-memory. the caller is + responsible for locking. concurrent writes to a same file is unsafe. + the caller needs to make sure one file is associated to at most one + revmap object at a time.""" + self.path = path + self._rev2hsh = [None] + self._rev2flag = [None] + self._hsh2rev = {} + # since rename does not happen frequently, do not store path for every + # revision. self._renamerevs can be used for bisecting. + self._renamerevs = [0] + self._renamepaths = [''] + self._lastmaxrev = -1 + if path: + if os.path.exists(path): + self._load() + else: + # write the header so "append" can do incremental updates + self.flush() + + def copyfrom(self, rhs): + """copy the map data from another revmap. do not affect self.path""" + self._rev2hsh = rhs._rev2hsh[:] + self._rev2flag = rhs._rev2flag[:] + self._hsh2rev = rhs._hsh2rev.copy() + self._renamerevs = rhs._renamerevs[:] + self._renamepaths = rhs._renamepaths[:] + self._lastmaxrev = -1 + + @property + def maxrev(self): + """return max linelog revision number""" + return len(self._rev2hsh) - 1 + + def append(self, hsh, sidebranch=False, path=None, flush=False): + """add a binary hg hash and return the mapped linelog revision. + if flush is True, incrementally update the file. + """ + if hsh in self._hsh2rev: + raise error.CorruptedFileError('%r is in revmap already' % hex(hsh)) + if len(hsh) != _hshlen: + raise hgerror.ProgrammingError('hsh must be %d-char long' % _hshlen) + idx = len(self._rev2hsh) + flag = 0 + if sidebranch: + flag |= sidebranchflag + if path is not None and path != self._renamepaths[-1]: + flag |= renameflag + self._renamerevs.append(idx) + self._renamepaths.append(path) + self._rev2hsh.append(hsh) + self._rev2flag.append(flag) + self._hsh2rev[hsh] = idx + if flush: + self.flush() + return idx + + def rev2hsh(self, rev): + """convert linelog revision to hg hash. return None if not found.""" + if rev > self.maxrev or rev < 0: + return None + return self._rev2hsh[rev] + + def rev2flag(self, rev): + """get the flag (uint8) for a given linelog revision. + return None if revision does not exist. + """ + if rev > self.maxrev or rev < 0: + return None + return self._rev2flag[rev] + + def rev2path(self, rev): + """get the path for a given linelog revision. + return None if revision does not exist. + """ + if rev > self.maxrev or rev < 0: + return None + idx = bisect.bisect_right(self._renamerevs, rev) - 1 + return self._renamepaths[idx] + + def hsh2rev(self, hsh): + """convert hg hash to linelog revision. return None if not found.""" + return self._hsh2rev.get(hsh) + + def clear(self, flush=False): + """make the map empty. if flush is True, write to disk""" + # rev 0 is reserved, real rev starts from 1 + self._rev2hsh = [None] + self._rev2flag = [None] + self._hsh2rev = {} + self._rev2path = [''] + self._lastmaxrev = -1 + if flush: + self.flush() + + def flush(self): + """write the state down to the file""" + if not self.path: + return + if self._lastmaxrev == -1: # write the entire file + with open(self.path, 'wb') as f: + f.write(self.HEADER) + for i in pycompat.xrange(1, len(self._rev2hsh)): + self._writerev(i, f) + else: # append incrementally + with open(self.path, 'ab') as f: + for i in pycompat.xrange(self._lastmaxrev + 1, + len(self._rev2hsh)): + self._writerev(i, f) + self._lastmaxrev = self.maxrev + + def _load(self): + """load state from file""" + if not self.path: + return + # use local variables in a loop. CPython uses LOAD_FAST for them, + # which is faster than both LOAD_CONST and LOAD_GLOBAL. + flaglen = 1 + hshlen = _hshlen + with open(self.path, 'rb') as f: + if f.read(len(self.HEADER)) != self.HEADER: + raise error.CorruptedFileError() + self.clear(flush=False) + while True: + buf = f.read(flaglen) + if not buf: + break + flag = ord(buf) + rev = len(self._rev2hsh) + if flag & renameflag: + path = self._readcstr(f) + self._renamerevs.append(rev) + self._renamepaths.append(path) + hsh = f.read(hshlen) + if len(hsh) != hshlen: + raise error.CorruptedFileError() + self._hsh2rev[hsh] = rev + self._rev2flag.append(flag) + self._rev2hsh.append(hsh) + self._lastmaxrev = self.maxrev + + def _writerev(self, rev, f): + """append a revision data to file""" + flag = self._rev2flag[rev] + hsh = self._rev2hsh[rev] + f.write(struct.pack('B', flag)) + if flag & renameflag: + path = self.rev2path(rev) + if path is None: + raise error.CorruptedFileError('cannot find path for %s' % rev) + f.write(path + '\0') + f.write(hsh) + + @staticmethod + def _readcstr(f): + """read a C-language-like '\0'-terminated string""" + buf = '' + while True: + ch = f.read(1) + if not ch: # unexpected eof + raise error.CorruptedFileError() + if ch == '\0': + break + buf += ch + return buf + + def __contains__(self, f): + """(fctx or (node, path)) -> bool. + test if (node, path) is in the map, and is not in a side branch. + f can be either a tuple of (node, path), or a fctx. + """ + if isinstance(f, tuple): # f: (node, path) + hsh, path = f + else: # f: fctx + hsh, path = f.node(), f.path() + rev = self.hsh2rev(hsh) + if rev is None: + return False + if path is not None and path != self.rev2path(rev): + return False + return (self.rev2flag(rev) & sidebranchflag) == 0 + +def getlastnode(path): + """return the last hash in a revmap, without loading its full content. + this is equivalent to `m = revmap(path); m.rev2hsh(m.maxrev)`, but faster. + """ + hsh = None + try: + with open(path, 'rb') as f: + f.seek(-_hshlen, 2) + if f.tell() > len(revmap.HEADER): + hsh = f.read(_hshlen) + except IOError: + pass + return hsh diff --git a/hgext/fastannotate/support.py b/hgext/fastannotate/support.py new file mode 100644 --- /dev/null +++ b/hgext/fastannotate/support.py @@ -0,0 +1,131 @@ +# Copyright 2016-present Facebook. All Rights Reserved. +# +# support: fastannotate support for hgweb, and filectx +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from mercurial import ( + context as hgcontext, + dagop, + extensions, + hgweb, + patch, + util, +) + +from . import ( + context, + revmap, +) + +class _lazyfctx(object): + """delegates to fctx but do not construct fctx when unnecessary""" + + def __init__(self, repo, node, path): + self._node = node + self._path = path + self._repo = repo + + def node(self): + return self._node + + def path(self): + return self._path + + @util.propertycache + def _fctx(self): + return context.resolvefctx(self._repo, self._node, self._path) + + def __getattr__(self, name): + return getattr(self._fctx, name) + +def _convertoutputs(repo, annotated, contents): + """convert fastannotate outputs to vanilla annotate format""" + # fastannotate returns: [(nodeid, linenum, path)], [linecontent] + # convert to what fctx.annotate returns: [annotateline] + results = [] + fctxmap = {} + annotateline = dagop.annotateline + for i, (hsh, linenum, path) in enumerate(annotated): + if (hsh, path) not in fctxmap: + fctxmap[(hsh, path)] = _lazyfctx(repo, hsh, path) + # linenum: the user wants 1-based, we have 0-based. + lineno = linenum + 1 + fctx = fctxmap[(hsh, path)] + line = contents[i] + results.append(annotateline(fctx=fctx, lineno=lineno, text=line)) + return results + +def _getmaster(fctx): + """(fctx) -> str""" + return fctx._repo.ui.config('fastannotate', 'mainbranch') or 'default' + +def _doannotate(fctx, follow=True, diffopts=None): + """like the vanilla fctx.annotate, but do it via fastannotate, and make + the output format compatible with the vanilla fctx.annotate. + may raise Exception, and always return line numbers. + """ + master = _getmaster(fctx) + annotated = contents = None + + with context.fctxannotatecontext(fctx, follow, diffopts) as ac: + try: + annotated, contents = ac.annotate(fctx.rev(), master=master, + showpath=True, showlines=True) + except Exception: + ac.rebuild() # try rebuild once + fctx._repo.ui.debug('fastannotate: %s: rebuilding broken cache\n' + % fctx._path) + try: + annotated, contents = ac.annotate(fctx.rev(), master=master, + showpath=True, showlines=True) + except Exception: + raise + + assert annotated and contents + return _convertoutputs(fctx._repo, annotated, contents) + +def _hgwebannotate(orig, fctx, ui): + diffopts = patch.difffeatureopts(ui, untrusted=True, + section='annotate', whitespace=True) + return _doannotate(fctx, diffopts=diffopts) + +def _fctxannotate(orig, self, follow=False, linenumber=False, skiprevs=None, + diffopts=None): + if skiprevs: + # skiprevs is not supported yet + return orig(self, follow, linenumber, skiprevs=skiprevs, + diffopts=diffopts) + try: + return _doannotate(self, follow, diffopts) + except Exception as ex: + self._repo.ui.debug('fastannotate: falling back to the vanilla ' + 'annotate: %r\n' % ex) + return orig(self, follow=follow, skiprevs=skiprevs, + diffopts=diffopts) + +def _remotefctxannotate(orig, self, follow=False, skiprevs=None, diffopts=None): + # skipset: a set-like used to test if a fctx needs to be downloaded + skipset = None + with context.fctxannotatecontext(self, follow, diffopts) as ac: + skipset = revmap.revmap(ac.revmappath) + return orig(self, follow, skiprevs=skiprevs, diffopts=diffopts, + prefetchskip=skipset) + +def replacehgwebannotate(): + extensions.wrapfunction(hgweb.webutil, 'annotate', _hgwebannotate) + +def replacefctxannotate(): + extensions.wrapfunction(hgcontext.basefilectx, 'annotate', _fctxannotate) + +def replaceremotefctxannotate(): + try: + r = extensions.find('remotefilelog') + except KeyError: + return + else: + extensions.wrapfunction(r.remotefilectx.remotefilectx, 'annotate', + _remotefctxannotate) diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -818,6 +818,7 @@ 'mercurial.thirdparty.zope.interface', 'mercurial.utils', 'hgext', 'hgext.convert', 'hgext.fsmonitor', + 'hgext.fastannotate', 'hgext.fsmonitor.pywatchman', 'hgext.infinitepush', 'hgext.highlight', diff --git a/tests/test-fastannotate-corrupt.t b/tests/test-fastannotate-corrupt.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-corrupt.t @@ -0,0 +1,83 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastannotate= + > EOF + + $ hg init repo + $ cd repo + $ for i in 0 1 2 3 4; do + > echo $i >> a + > echo $i >> b + > hg commit -A -m $i a b + > done + +use the "debugbuildannotatecache" command to build annotate cache at rev 0 + + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=0 + fastannotate: a: 1 new changesets in the main branch + fastannotate: b: 1 new changesets in the main branch + +"debugbuildannotatecache" should work with broken cache (and other files would +be built without being affected). note: linelog being broken is only noticed +when we try to append to it. + + $ echo 'CORRUPT!' >> .hg/fastannotate/default/a.m + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=1 + fastannotate: a: rebuilding broken cache + fastannotate: a: 2 new changesets in the main branch + fastannotate: b: 1 new changesets in the main branch + + $ echo 'CANNOT REUSE!' > .hg/fastannotate/default/a.l + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=2 + fastannotate: a: rebuilding broken cache + fastannotate: a: 3 new changesets in the main branch + fastannotate: b: 1 new changesets in the main branch + + $ rm .hg/fastannotate/default/a.m + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=3 + fastannotate: a: rebuilding broken cache + fastannotate: a: 4 new changesets in the main branch + fastannotate: b: 1 new changesets in the main branch + + $ rm .hg/fastannotate/default/a.l + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=3 + $ hg debugbuildannotatecache --debug --config fastannotate.mainbranch=4 + fastannotate: a: rebuilding broken cache + fastannotate: a: 5 new changesets in the main branch + fastannotate: b: 1 new changesets in the main branch + +"fastannotate" should deal with file corruption as well + + $ rm -rf .hg/fastannotate + $ hg fastannotate --debug -r 0 a + fastannotate: a: 1 new changesets in the main branch + 0: 0 + + $ echo 'CORRUPT!' >> .hg/fastannotate/default/a.m + $ hg fastannotate --debug -r 0 a + fastannotate: a: cache broken and deleted + fastannotate: a: 1 new changesets in the main branch + 0: 0 + + $ echo 'CORRUPT!' > .hg/fastannotate/default/a.l + $ hg fastannotate --debug -r 1 a + fastannotate: a: cache broken and deleted + fastannotate: a: 2 new changesets in the main branch + 0: 0 + 1: 1 + + $ rm .hg/fastannotate/default/a.l + $ hg fastannotate --debug -r 1 a + fastannotate: a: using fast path (resolved fctx: True) + fastannotate: a: cache broken and deleted + fastannotate: a: 2 new changesets in the main branch + 0: 0 + 1: 1 + + $ rm .hg/fastannotate/default/a.m + $ hg fastannotate --debug -r 2 a + fastannotate: a: cache broken and deleted + fastannotate: a: 3 new changesets in the main branch + 0: 0 + 1: 1 + 2: 2 diff --git a/tests/test-fastannotate-diffopts.t b/tests/test-fastannotate-diffopts.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-diffopts.t @@ -0,0 +1,33 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastannotate= + > EOF + + $ hg init repo + $ cd repo + +changes to whitespaces + + $ cat >> a << EOF + > 1 + > + > + > 2 + > EOF + $ hg commit -qAm '1' + $ cat > a << EOF + > 1 + > + > 2 + > + > + > 3 + > EOF + $ hg commit -m 2 + $ hg fastannotate -wB a + 0: 1 + 0: + 1: 2 + 0: + 1: + 1: 3 diff --git a/tests/test-fastannotate-hg.t b/tests/test-fastannotate-hg.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-hg.t @@ -0,0 +1,777 @@ +(this file is backported from core hg tests/test-annotate.t) + + $ cat >> $HGRCPATH << EOF + > [diff] + > git=1 + > [extensions] + > fastannotate= + > [fastannotate] + > modes=fctx + > forcefollow=False + > mainbranch=. + > EOF + + $ HGMERGE=true; export HGMERGE + +init + + $ hg init repo + $ cd repo + +commit + + $ echo 'a' > a + $ hg ci -A -m test -u nobody -d '1 0' + adding a + +annotate -c + + $ hg annotate -c a + 8435f90966e4: a + +annotate -cl + + $ hg annotate -cl a + 8435f90966e4:1: a + +annotate -d + + $ hg annotate -d a + Thu Jan 01 00:00:01 1970 +0000: a + +annotate -n + + $ hg annotate -n a + 0: a + +annotate -nl + + $ hg annotate -nl a + 0:1: a + +annotate -u + + $ hg annotate -u a + nobody: a + +annotate -cdnu + + $ hg annotate -cdnu a + nobody 0 8435f90966e4 Thu Jan 01 00:00:01 1970 +0000: a + +annotate -cdnul + + $ hg annotate -cdnul a + nobody 0 8435f90966e4 Thu Jan 01 00:00:01 1970 +0000:1: a + +annotate (JSON) + + $ hg annotate -Tjson a + [ + { + "abspath": "a", + "lines": [{"line": "a\n", "rev": 0}], + "path": "a" + } + ] + + $ hg annotate -Tjson -cdfnul a + [ + { + "abspath": "a", + "lines": [{"date": [1.0, 0], "file": "a", "line": "a\n", "line_number": 1, "node": "8435f90966e442695d2ded29fdade2bac5ad8065", "rev": 0, "user": "nobody"}], + "path": "a" + } + ] + + $ cat <>a + > a + > a + > EOF + $ hg ci -ma1 -d '1 0' + $ hg cp a b + $ hg ci -mb -d '1 0' + $ cat <> b + > b4 + > b5 + > b6 + > EOF + $ hg ci -mb2 -d '2 0' + +annotate -n b + + $ hg annotate -n b + 0: a + 1: a + 1: a + 3: b4 + 3: b5 + 3: b6 + +annotate --no-follow b + + $ hg annotate --no-follow b + 2: a + 2: a + 2: a + 3: b4 + 3: b5 + 3: b6 + +annotate -nl b + + $ hg annotate -nl b + 0:1: a + 1:2: a + 1:3: a + 3:4: b4 + 3:5: b5 + 3:6: b6 + +annotate -nf b + + $ hg annotate -nf b + 0 a: a + 1 a: a + 1 a: a + 3 b: b4 + 3 b: b5 + 3 b: b6 + +annotate -nlf b + + $ hg annotate -nlf b + 0 a:1: a + 1 a:2: a + 1 a:3: a + 3 b:4: b4 + 3 b:5: b5 + 3 b:6: b6 + + $ hg up -C 2 + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ cat <> b + > b4 + > c + > b5 + > EOF + $ hg ci -mb2.1 -d '2 0' + created new head + $ hg merge + merging b + 0 files updated, 1 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -mmergeb -d '3 0' + +annotate after merge +(note: the first one falls back to the vanilla annotate which does not use linelog) + + $ hg annotate -nf b --debug + fastannotate: b: rebuilding broken cache + fastannotate: b: 5 new changesets in the main branch + 0 a: a + 1 a: a + 1 a: a + 3 b: b4 + 4 b: c + 3 b: b5 + +(difference explained below) + + $ hg annotate -nf b --debug + fastannotate: b: using fast path (resolved fctx: False) + 0 a: a + 1 a: a + 1 a: a + 4 b: b4 + 4 b: c + 4 b: b5 + +annotate after merge with -l +(fastannotate differs from annotate) + + $ hg log -Gp -T '{rev}:{node}' -r '2..5' + @ 5:64afcdf8e29e063c635be123d8d2fb160af00f7e + |\ + | o 4:5fbdc1152d97597717021ad9e063061b200f146bdiff --git a/b b/b + | | --- a/b + | | +++ b/b + | | @@ -1,3 +1,6 @@ + | | a + | | a + | | a + | | +b4 + | | +c + | | +b5 + | | + o | 3:37ec9f5c3d1f99572d7075971cb4876e2139b52fdiff --git a/b b/b + |/ --- a/b + | +++ b/b + | @@ -1,3 +1,6 @@ + | a + | a + | a + | +b4 + | +b5 + | +b6 + | + o 2:3086dbafde1ce745abfc8d2d367847280aabae9ddiff --git a/a b/b + | copy from a + ~ copy to b + + +(in this case, "b4", "b5" could be considered introduced by either rev 3, or rev 4. + and that causes the rev number difference) + + $ hg annotate -nlf b --config fastannotate.modes= + 0 a:1: a + 1 a:2: a + 1 a:3: a + 3 b:4: b4 + 4 b:5: c + 3 b:5: b5 + + $ hg annotate -nlf b + 0 a:1: a + 1 a:2: a + 1 a:3: a + 4 b:4: b4 + 4 b:5: c + 4 b:6: b5 + + $ hg up -C 1 + 0 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg cp a b + $ cat < b + > a + > z + > a + > EOF + $ hg ci -mc -d '3 0' + created new head + $ hg merge + merging b + 0 files updated, 1 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ cat <> b + > b4 + > c + > b5 + > EOF + $ echo d >> b + $ hg ci -mmerge2 -d '4 0' + +annotate after rename merge + + $ hg annotate -nf b + 0 a: a + 6 b: z + 1 a: a + 3 b: b4 + 4 b: c + 3 b: b5 + 7 b: d + +annotate after rename merge with -l +(fastannotate differs from annotate) + + $ hg log -Gp -T '{rev}:{node}' -r '0+1+6+7' + @ 7:6284bb6c38fef984a929862a53bbc71ce9eafa81diff --git a/b b/b + |\ --- a/b + | : +++ b/b + | : @@ -1,3 +1,7 @@ + | : a + | : z + | : a + | : +b4 + | : +c + | : +b5 + | : +d + | : + o : 6:b80e3e32f75a6a67cd4ac85496a11511e9112816diff --git a/a b/b + :/ copy from a + : copy to b + : --- a/a + : +++ b/b + : @@ -1,3 +1,3 @@ + : -a (?) + : a + : +z + : a + : -a (?) + : + o 1:762f04898e6684ff713415f7b8a8d53d33f96c92diff --git a/a b/a + | --- a/a + | +++ b/a + | @@ -1,1 +1,3 @@ + | a + | +a + | +a + | + o 0:8435f90966e442695d2ded29fdade2bac5ad8065diff --git a/a b/a + new file mode 100644 + --- /dev/null + +++ b/a + @@ -0,0 +1,1 @@ + +a + + +(note on question marks: + the upstream bdiff change (96f2f50d923f+3633403888ae+8c0c75aa3ff4+5c4e2636c1a9 + +38ed54888617) alters the output so deletion is not always at the end of the + output. for example: + | a | b | old | new | # old: e1d6aa0e4c3a, new: 8836f13e3c5b + |-------------------| + | a | a | a | -a | + | a | z | +z | a | + | a | a | a | +z | + | | | -a | a | + |-------------------| + | a | a | a | + | a | a | a | + | a | | -a | + this leads to more question marks below) + +(rev 1 adds two "a"s and rev 6 deletes one "a". + the "a" that rev 6 deletes could be either the first or the second "a" of those two "a"s added by rev 1. + and that causes the line number difference) + + $ hg annotate -nlf b --config fastannotate.modes= + 0 a:1: a + 6 b:2: z + 1 a:3: a + 3 b:4: b4 + 4 b:5: c + 3 b:5: b5 + 7 b:7: d + + $ hg annotate -nlf b + 0 a:1: a (?) + 1 a:2: a (?) + 6 b:2: z + 1 a:2: a (?) + 1 a:3: a (?) + 3 b:4: b4 + 4 b:5: c + 3 b:5: b5 + 7 b:7: d + +Issue2807: alignment of line numbers with -l +(fastannotate differs from annotate, same reason as above) + + $ echo more >> b + $ hg ci -mmore -d '5 0' + $ echo more >> b + $ hg ci -mmore -d '6 0' + $ echo more >> b + $ hg ci -mmore -d '7 0' + $ hg annotate -nlf b + 0 a: 1: a (?) + 1 a: 2: a (?) + 6 b: 2: z + 1 a: 2: a (?) + 1 a: 3: a (?) + 3 b: 4: b4 + 4 b: 5: c + 3 b: 5: b5 + 7 b: 7: d + 8 b: 8: more + 9 b: 9: more + 10 b:10: more + +linkrev vs rev + + $ hg annotate -r tip -n a + 0: a + 1: a + 1: a + +linkrev vs rev with -l + + $ hg annotate -r tip -nl a + 0:1: a + 1:2: a + 1:3: a + +Issue589: "undelete" sequence leads to crash + +annotate was crashing when trying to --follow something + +like A -> B -> A + +generate ABA rename configuration + + $ echo foo > foo + $ hg add foo + $ hg ci -m addfoo + $ hg rename foo bar + $ hg ci -m renamefoo + $ hg rename bar foo + $ hg ci -m renamebar + +annotate after ABA with follow + + $ hg annotate --follow foo + foo: foo + +missing file + + $ hg ann nosuchfile + abort: nosuchfile: no such file in rev e9e6b4fa872f + [255] + +annotate file without '\n' on last line + + $ printf "" > c + $ hg ci -A -m test -u nobody -d '1 0' + adding c + $ hg annotate c + $ printf "a\nb" > c + $ hg ci -m test + $ hg annotate c + [0-9]+: a (re) + [0-9]+: b (re) + +Issue3841: check annotation of the file of which filelog includes +merging between the revision and its ancestor + +to reproduce the situation with recent Mercurial, this script uses (1) +"hg debugsetparents" to merge without ancestor check by "hg merge", +and (2) the extension to allow filelog merging between the revision +and its ancestor by overriding "repo._filecommit". + + $ cat > ../legacyrepo.py < from mercurial import node, error + > def reposetup(ui, repo): + > class legacyrepo(repo.__class__): + > def _filecommit(self, fctx, manifest1, manifest2, + > linkrev, tr, changelist): + > fname = fctx.path() + > text = fctx.data() + > flog = self.file(fname) + > fparent1 = manifest1.get(fname, node.nullid) + > fparent2 = manifest2.get(fname, node.nullid) + > meta = {} + > copy = fctx.renamed() + > if copy and copy[0] != fname: + > raise error.Abort('copying is not supported') + > if fparent2 != node.nullid: + > changelist.append(fname) + > return flog.add(text, meta, tr, linkrev, + > fparent1, fparent2) + > raise error.Abort('only merging is supported') + > repo.__class__ = legacyrepo + > EOF + + $ cat > baz < 1 + > 2 + > 3 + > 4 + > 5 + > EOF + $ hg add baz + $ hg commit -m "baz:0" + + $ cat > baz < 1 baz:1 + > 2 + > 3 + > 4 + > 5 + > EOF + $ hg commit -m "baz:1" + + $ cat > baz < 1 baz:1 + > 2 baz:2 + > 3 + > 4 + > 5 + > EOF + $ hg debugsetparents 17 17 + $ hg --config extensions.legacyrepo=../legacyrepo.py commit -m "baz:2" + $ hg debugindexdot .hg/store/data/baz.i + digraph G { + -1 -> 0 + 0 -> 1 + 1 -> 2 + 1 -> 2 + } + $ hg annotate baz + 17: 1 baz:1 + 18: 2 baz:2 + 16: 3 + 16: 4 + 16: 5 + + $ cat > baz < 1 baz:1 + > 2 baz:2 + > 3 baz:3 + > 4 + > 5 + > EOF + $ hg commit -m "baz:3" + + $ cat > baz < 1 baz:1 + > 2 baz:2 + > 3 baz:3 + > 4 baz:4 + > 5 + > EOF + $ hg debugsetparents 19 18 + $ hg --config extensions.legacyrepo=../legacyrepo.py commit -m "baz:4" + $ hg debugindexdot .hg/store/data/baz.i + digraph G { + -1 -> 0 + 0 -> 1 + 1 -> 2 + 1 -> 2 + 2 -> 3 + 3 -> 4 + 2 -> 4 + } + $ hg annotate baz + 17: 1 baz:1 + 18: 2 baz:2 + 19: 3 baz:3 + 20: 4 baz:4 + 16: 5 + +annotate clean file + + $ hg annotate -ncr "wdir()" foo + 11 472b18db256d : foo + +annotate modified file + + $ echo foofoo >> foo + $ hg annotate -r "wdir()" foo + 11 : foo + 20+: foofoo + + $ hg annotate -cr "wdir()" foo + 472b18db256d : foo + b6bedd5477e7+: foofoo + + $ hg annotate -ncr "wdir()" foo + 11 472b18db256d : foo + 20 b6bedd5477e7+: foofoo + + $ hg annotate --debug -ncr "wdir()" foo + 11 472b18db256d1e8282064eab4bfdaf48cbfe83cd : foo + 20 b6bedd5477e797f25e568a6402d4697f3f895a72+: foofoo + + $ hg annotate -udr "wdir()" foo + test Thu Jan 01 00:00:00 1970 +0000: foo + test [A-Za-z0-9:+ ]+: foofoo (re) + + $ hg annotate -ncr "wdir()" -Tjson foo + [ + { + "abspath": "foo", + "lines": [{"line": "foo\n", "node": "472b18db256d1e8282064eab4bfdaf48cbfe83cd", "rev": 11}, {"line": "foofoo\n", "node": null, "rev": null}], + "path": "foo" + } + ] + +annotate added file + + $ echo bar > bar + $ hg add bar + $ hg annotate -ncr "wdir()" bar + 20 b6bedd5477e7+: bar + +annotate renamed file + + $ hg rename foo renamefoo2 + $ hg annotate -ncr "wdir()" renamefoo2 + 11 472b18db256d : foo + 20 b6bedd5477e7+: foofoo + +annotate missing file + + $ rm baz +#if windows + $ hg annotate -ncr "wdir()" baz + abort: $TESTTMP\repo\baz: The system cannot find the file specified + [255] +#else + $ hg annotate -ncr "wdir()" baz + abort: $ENOENT$: $TESTTMP/repo/baz + [255] +#endif + +annotate removed file + + $ hg rm baz +#if windows + $ hg annotate -ncr "wdir()" baz + abort: $TESTTMP\repo\baz: The system cannot find the file specified + [255] +#else + $ hg annotate -ncr "wdir()" baz + abort: $ENOENT$: $TESTTMP/repo/baz + [255] +#endif + +Test annotate with whitespace options + + $ cd .. + $ hg init repo-ws + $ cd repo-ws + $ cat > a < aa + > + > b b + > EOF + $ hg ci -Am "adda" + adding a + $ sed 's/EOL$//g' > a < a a + > + > EOL + > b b + > EOF + $ hg ci -m "changea" + +Annotate with no option + + $ hg annotate a + 1: a a + 0: + 1: + 1: b b + +Annotate with --ignore-space-change + + $ hg annotate --ignore-space-change a + 1: a a + 1: + 0: + 0: b b + +Annotate with --ignore-all-space + + $ hg annotate --ignore-all-space a + 0: a a + 0: + 1: + 0: b b + +Annotate with --ignore-blank-lines (similar to no options case) + + $ hg annotate --ignore-blank-lines a + 1: a a + 0: + 1: + 1: b b + + $ cd .. + +Annotate with linkrev pointing to another branch +------------------------------------------------ + +create history with a filerev whose linkrev points to another branch + + $ hg init branchedlinkrev + $ cd branchedlinkrev + $ echo A > a + $ hg commit -Am 'contentA' + adding a + $ echo B >> a + $ hg commit -m 'contentB' + $ hg up --rev 'desc(contentA)' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo unrelated > unrelated + $ hg commit -Am 'unrelated' + adding unrelated + created new head + $ hg graft -r 'desc(contentB)' + grafting 1:fd27c222e3e6 "contentB" + $ echo C >> a + $ hg commit -m 'contentC' + $ echo W >> a + $ hg log -G + @ changeset: 4:072f1e8df249 + | tag: tip + | user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | summary: contentC + | + o changeset: 3:ff38df03cc4b + | user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | summary: contentB + | + o changeset: 2:62aaf3f6fc06 + | parent: 0:f0932f74827e + | user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | summary: unrelated + | + | o changeset: 1:fd27c222e3e6 + |/ user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | summary: contentB + | + o changeset: 0:f0932f74827e + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: contentA + + +Annotate should list ancestor of starting revision only + + $ hg annotate a + 0: A + 3: B + 4: C + + $ hg annotate a -r 'wdir()' + 0 : A + 3 : B + 4 : C + 4+: W + +Even when the starting revision is the linkrev-shadowed one: + + $ hg annotate a -r 3 + 0: A + 3: B + + $ cd .. + +Issue5360: Deleted chunk in p1 of a merge changeset + + $ hg init repo-5360 + $ cd repo-5360 + $ echo 1 > a + $ hg commit -A a -m 1 + $ echo 2 >> a + $ hg commit -m 2 + $ echo a > a + $ hg commit -m a + $ hg update '.^' -q + $ echo 3 >> a + $ hg commit -m 3 -q + $ hg merge 2 -q + $ cat > a << EOF + > b + > 1 + > 2 + > 3 + > a + > EOF + $ hg resolve --mark -q + $ hg commit -m m + $ hg annotate a + 4: b + 0: 1 + 1: 2 + 3: 3 + 2: a + + $ cd .. diff --git a/tests/test-fastannotate-perfhack.t b/tests/test-fastannotate-perfhack.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-perfhack.t @@ -0,0 +1,182 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastannotate= + > [fastannotate] + > perfhack=1 + > EOF + + $ HGMERGE=true; export HGMERGE + + $ hg init repo + $ cd repo + +a simple merge case + + $ echo 1 > a + $ hg commit -qAm 'append 1' + $ echo 2 >> a + $ hg commit -m 'append 2' + $ echo 3 >> a + $ hg commit -m 'append 3' + $ hg up 1 -q + $ cat > a << EOF + > 0 + > 1 + > 2 + > EOF + $ hg commit -qm 'insert 0' + $ hg merge 2 -q + $ echo 4 >> a + $ hg commit -m merge + $ hg log -G -T '{rev}: {desc}' + @ 4: merge + |\ + | o 3: insert 0 + | | + o | 2: append 3 + |/ + o 1: append 2 + | + o 0: append 1 + + $ hg fastannotate a + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + $ hg fastannotate -r 0 a + 0: 1 + $ hg fastannotate -r 1 a + 0: 1 + 1: 2 + $ hg fastannotate -udnclf a + test 3 d641cb51f61e Thu Jan 01 00:00:00 1970 +0000 a:1: 0 + test 0 4994017376d3 Thu Jan 01 00:00:00 1970 +0000 a:1: 1 + test 1 e940cb6d9a06 Thu Jan 01 00:00:00 1970 +0000 a:2: 2 + test 2 26162a884ba6 Thu Jan 01 00:00:00 1970 +0000 a:3: 3 + test 4 3ad7bcd2815f Thu Jan 01 00:00:00 1970 +0000 a:5: 4 + $ hg fastannotate --linear a + 3: 0 + 0: 1 + 1: 2 + 4: 3 + 4: 4 + +incrementally updating + + $ hg fastannotate -r 0 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + $ hg fastannotate -r 0 a --debug --rebuild + fastannotate: a: 1 new changesets in the main branch + 0: 1 + $ hg fastannotate -r 1 a --debug + fastannotate: a: 1 new changesets in the main branch + 0: 1 + 1: 2 + $ hg fastannotate -r 3 a --debug + fastannotate: a: 1 new changesets in the main branch + 3: 0 + 0: 1 + 1: 2 + $ hg fastannotate -r 4 a --debug + fastannotate: a: 1 new changesets in the main branch + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + $ hg fastannotate -r 1 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + 1: 2 + +rebuild happens automatically if unable to update + + $ hg fastannotate -r 2 a --debug + fastannotate: a: cache broken and deleted + fastannotate: a: 3 new changesets in the main branch + 0: 1 + 1: 2 + 2: 3 + +config option "fastannotate.mainbranch" + + $ hg fastannotate -r 1 --rebuild --config fastannotate.mainbranch=tip a --debug + fastannotate: a: 4 new changesets in the main branch + 0: 1 + 1: 2 + $ hg fastannotate -r 4 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + +rename + + $ hg mv a b + $ cat > b << EOF + > 0 + > 11 + > 3 + > 44 + > EOF + $ hg commit -m b -q + $ hg fastannotate -ncf --long-hash b + 3 d641cb51f61e331c44654104301f8154d7865c89 a: 0 + 5 d44dade239915bc82b91e4556b1257323f8e5824 b: 11 + 2 26162a884ba60e8c87bf4e0d6bb8efcc6f711a4e a: 3 + 5 d44dade239915bc82b91e4556b1257323f8e5824 b: 44 + $ hg fastannotate -r 26162a884ba60e8c87bf4e0d6bb8efcc6f711a4e a + 0: 1 + 1: 2 + 2: 3 + +fastannotate --deleted + + $ hg fastannotate --deleted -nf b + 3 a: 0 + 5 b: 11 + 0 a: -1 + 1 a: -2 + 2 a: 3 + 5 b: 44 + 4 a: -4 + $ hg fastannotate --deleted -r 3 -nf a + 3 a: 0 + 0 a: 1 + 1 a: 2 + +file and directories with ".l", ".m" suffixes + + $ cd .. + $ hg init repo2 + $ cd repo2 + + $ mkdir a.l b.m c.lock a.l.hg b.hg + $ for i in a b c d d.l d.m a.l/a b.m/a c.lock/a a.l.hg/a b.hg/a; do + > echo $i > $i + > done + $ hg add . -q + $ hg commit -m init + $ hg fastannotate a.l/a b.m/a c.lock/a a.l.hg/a b.hg/a d.l d.m a b c d + 0: a + 0: a.l.hg/a + 0: a.l/a + 0: b + 0: b.hg/a + 0: b.m/a + 0: c + 0: c.lock/a + 0: d + 0: d.l + 0: d.m + +empty file + + $ touch empty + $ hg commit -A empty -m empty + $ hg fastannotate empty diff --git a/tests/test-fastannotate-protocol.t b/tests/test-fastannotate-protocol.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-protocol.t @@ -0,0 +1,216 @@ + $ cat >> $HGRCPATH << EOF + > [ui] + > ssh = $PYTHON "$TESTDIR/dummyssh" + > [extensions] + > fastannotate= + > [fastannotate] + > mainbranch=@ + > EOF + + $ HGMERGE=true; export HGMERGE + +setup the server repo + + $ hg init repo-server + $ cd repo-server + $ cat >> .hg/hgrc << EOF + > [fastannotate] + > server=1 + > EOF + $ for i in 1 2 3 4; do + > echo $i >> a + > hg commit -A -m $i a + > done + $ [ -d .hg/fastannotate ] + [1] + $ hg bookmark @ + $ cd .. + +setup the local repo + + $ hg clone 'ssh://user@dummy/repo-server' repo-local -q + $ cd repo-local + $ cat >> .hg/hgrc << EOF + > [fastannotate] + > client=1 + > clientfetchthreshold=0 + > EOF + $ [ -d .hg/fastannotate ] + [1] + $ hg fastannotate a --debug + running * (glob) + sending hello command + sending between command + remote: * (glob) (?) + remote: capabilities: * (glob) + remote: * (glob) (?) + sending protocaps command + fastannotate: requesting 1 files + sending getannotate command + fastannotate: writing 112 bytes to fastannotate/default/a.l (?) + fastannotate: server returned + fastannotate: writing 112 bytes to fastannotate/default/a.l (?) + fastannotate: writing 94 bytes to fastannotate/default/a.m + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + 1: 2 + 2: 3 + 3: 4 + +the cache could be reused and no download is necessary + + $ hg fastannotate a --debug + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + 1: 2 + 2: 3 + 3: 4 + +if the client agrees where the head of the master branch is, no re-download +happens even if the client has more commits + + $ echo 5 >> a + $ hg commit -m 5 + $ hg bookmark -r 3 @ -f + $ hg fastannotate a --debug + 0: 1 + 1: 2 + 2: 3 + 3: 4 + 4: 5 + +if the client has a different "@" (head of the master branch) and "@" is ahead +of the server, the server can detect things are unchanged and does not return +full contents (not that there is no "writing ... to fastannotate"), but the +client can also build things up on its own (causing diverge) + + $ hg bookmark -r 4 @ -f + $ hg fastannotate a --debug + running * (glob) + sending hello command + sending between command + remote: * (glob) (?) + remote: capabilities: * (glob) + remote: * (glob) (?) + sending protocaps command + fastannotate: requesting 1 files + sending getannotate command + fastannotate: server returned + fastannotate: a: 1 new changesets in the main branch + 0: 1 + 1: 2 + 2: 3 + 3: 4 + 4: 5 + +if the client has a different "@" which is behind the server. no download is +necessary + + $ hg fastannotate a --debug --config fastannotate.mainbranch=2 + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + 1: 2 + 2: 3 + 3: 4 + 4: 5 + +define fastannotate on-disk paths + + $ p1=.hg/fastannotate/default + $ p2=../repo-server/.hg/fastannotate/default + +revert bookmark change so the client is behind the server + + $ hg bookmark -r 2 @ -f + +in the "fctx" mode with the "annotate" command, the client also downloads the +cache. but not in the (default) "fastannotate" mode. + + $ rm $p1/a.l $p1/a.m + $ hg annotate a --debug | grep 'fastannotate: writing' + [1] + $ hg annotate a --config fastannotate.modes=fctx --debug | grep 'fastannotate: writing' | sort + fastannotate: writing 112 bytes to fastannotate/default/a.l + fastannotate: writing 94 bytes to fastannotate/default/a.m + +the fastannotate cache (built server-side, downloaded client-side) in two repos +have the same content (because the client downloads from the server) + + $ diff $p1/a.l $p2/a.l + $ diff $p1/a.m $p2/a.m + +in the "fctx" mode, the client could also build the cache locally + + $ hg annotate a --config fastannotate.modes=fctx --debug --config fastannotate.mainbranch=4 | grep fastannotate + fastannotate: requesting 1 files + fastannotate: server returned + fastannotate: a: 1 new changesets in the main branch + +the server would rebuild broken cache automatically + + $ cp $p2/a.m $p2/a.m.bak + $ echo BROKEN1 > $p1/a.m + $ echo BROKEN2 > $p2/a.m + $ hg fastannotate a --debug | grep 'fastannotate: writing' | sort + fastannotate: writing 112 bytes to fastannotate/default/a.l + fastannotate: writing 94 bytes to fastannotate/default/a.m + $ diff $p1/a.m $p2/a.m + $ diff $p2/a.m $p2/a.m.bak + +use the "debugbuildannotatecache" command to build annotate cache + + $ rm -rf $p1 $p2 + $ hg --cwd ../repo-server debugbuildannotatecache a --debug + fastannotate: a: 4 new changesets in the main branch + $ hg --cwd ../repo-local debugbuildannotatecache a --debug + running * (glob) + sending hello command + sending between command + remote: * (glob) (?) + remote: capabilities: * (glob) + remote: * (glob) (?) + sending protocaps command + fastannotate: requesting 1 files + sending getannotate command + fastannotate: server returned + fastannotate: writing * (glob) + fastannotate: writing * (glob) + $ diff $p1/a.l $p2/a.l + $ diff $p1/a.m $p2/a.m + +with the clientfetchthreshold config option, the client can build up the cache +without downloading from the server + + $ rm -rf $p1 + $ hg fastannotate a --debug --config fastannotate.clientfetchthreshold=10 + fastannotate: a: 3 new changesets in the main branch + 0: 1 + 1: 2 + 2: 3 + 3: 4 + 4: 5 + +if the fastannotate directory is not writable, the fctx mode still works + + $ rm -rf $p1 + $ touch $p1 + $ hg annotate a --debug --traceback --config fastannotate.modes=fctx + fastannotate: a: cache broken and deleted + fastannotate: prefetch failed: * (glob) + fastannotate: a: cache broken and deleted + fastannotate: falling back to the vanilla annotate: * (glob) + 0: 1 + 1: 2 + 2: 3 + 3: 4 + 4: 5 + +with serverbuildondemand=False, the server will not build anything + + $ cat >> ../repo-server/.hg/hgrc < [fastannotate] + > serverbuildondemand=False + > EOF + $ rm -rf $p1 $p2 + $ hg fastannotate a --debug | grep 'fastannotate: writing' + [1] diff --git a/tests/test-fastannotate-renames.t b/tests/test-fastannotate-renames.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-renames.t @@ -0,0 +1,168 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastannotate= + > [fastannotate] + > mainbranch=main + > EOF + + $ hg init repo + $ cd repo + +add or rename files on top of the master branch + + $ echo a1 > a + $ echo b1 > b + $ hg commit -qAm 1 + $ hg bookmark -i main + $ hg fastannotate --debug -nf b + fastannotate: b: 1 new changesets in the main branch + 0 b: b1 + $ hg fastannotate --debug -nf a + fastannotate: a: 1 new changesets in the main branch + 0 a: a1 + $ echo a2 >> a + $ cat > b << EOF + > b0 + > b1 + > EOF + $ hg mv a t + $ hg mv b a + $ hg mv t b + $ hg commit -m 'swap names' + +existing linelogs are not helpful with such renames in side branches + + $ hg fastannotate --debug -nf a + fastannotate: a: linelog cannot help in annotating this revision + 1 a: b0 + 0 b: b1 + $ hg fastannotate --debug -nf b + fastannotate: b: linelog cannot help in annotating this revision + 0 a: a1 + 1 b: a2 + +move main branch forward, rebuild should happen + + $ hg bookmark -i main -r . -q + $ hg fastannotate --debug -nf b + fastannotate: b: cache broken and deleted + fastannotate: b: 2 new changesets in the main branch + 0 a: a1 + 1 b: a2 + $ hg fastannotate --debug -nf b + fastannotate: b: using fast path (resolved fctx: True) + 0 a: a1 + 1 b: a2 + +for rev 0, the existing linelog is still useful for a, but not for b + + $ hg fastannotate --debug -nf a -r 0 + fastannotate: a: using fast path (resolved fctx: True) + 0 a: a1 + $ hg fastannotate --debug -nf b -r 0 + fastannotate: b: linelog cannot help in annotating this revision + 0 b: b1 + +a rebuild can also be triggered if "the main branch last time" mismatches + + $ echo a3 >> a + $ hg commit -m a3 + $ cat >> b << EOF + > b3 + > b4 + > EOF + $ hg commit -m b4 + $ hg bookmark -i main -q + $ hg fastannotate --debug -nf a + fastannotate: a: cache broken and deleted + fastannotate: a: 3 new changesets in the main branch + 1 a: b0 + 0 b: b1 + 2 a: a3 + $ hg fastannotate --debug -nf a + fastannotate: a: using fast path (resolved fctx: True) + 1 a: b0 + 0 b: b1 + 2 a: a3 + +linelog can be updated without being helpful + + $ hg mv a t + $ hg mv b a + $ hg mv t b + $ hg commit -m 'swap names again' + $ hg fastannotate --debug -nf b + fastannotate: b: 1 new changesets in the main branch + 1 a: b0 + 0 b: b1 + 2 a: a3 + $ hg fastannotate --debug -nf b + fastannotate: b: linelog cannot help in annotating this revision + 1 a: b0 + 0 b: b1 + 2 a: a3 + +move main branch forward again, rebuilds are one-time + + $ hg bookmark -i main -q + $ hg fastannotate --debug -nf a + fastannotate: a: cache broken and deleted + fastannotate: a: 4 new changesets in the main branch + 0 a: a1 + 1 b: a2 + 3 b: b3 + 3 b: b4 + $ hg fastannotate --debug -nf b + fastannotate: b: cache broken and deleted + fastannotate: b: 4 new changesets in the main branch + 1 a: b0 + 0 b: b1 + 2 a: a3 + $ hg fastannotate --debug -nf a + fastannotate: a: using fast path (resolved fctx: True) + 0 a: a1 + 1 b: a2 + 3 b: b3 + 3 b: b4 + $ hg fastannotate --debug -nf b + fastannotate: b: using fast path (resolved fctx: True) + 1 a: b0 + 0 b: b1 + 2 a: a3 + +list changeset hashes to improve readability + + $ hg log -T '{rev}:{node}\n' + 4:980e1ab8c516350172928fba95b49ede3b643dca + 3:14e123fedad9f491f5dde0beca2a767625a0a93a + 2:96495c41e4c12218766f78cdf244e768d7718b0f + 1:35c2b781234c994896aba36bd3245d3104e023df + 0:653e95416ebb5dbcc25bbc7f75568c9e01f7bd2f + +annotate a revision not in the linelog. linelog cannot be used, but does not get rebuilt either + + $ hg fastannotate --debug -nf a -r 96495c41e4c12218766f78cdf244e768d7718b0f + fastannotate: a: linelog cannot help in annotating this revision + 1 a: b0 + 0 b: b1 + 2 a: a3 + $ hg fastannotate --debug -nf a -r 2 + fastannotate: a: linelog cannot help in annotating this revision + 1 a: b0 + 0 b: b1 + 2 a: a3 + $ hg fastannotate --debug -nf a -r . + fastannotate: a: using fast path (resolved fctx: True) + 0 a: a1 + 1 b: a2 + 3 b: b3 + 3 b: b4 + +annotate an ancient revision where the path matches. linelog can be used + + $ hg fastannotate --debug -nf a -r 0 + fastannotate: a: using fast path (resolved fctx: True) + 0 a: a1 + $ hg fastannotate --debug -nf a -r 653e95416ebb5dbcc25bbc7f75568c9e01f7bd2f + fastannotate: a: using fast path (resolved fctx: False) + 0 a: a1 diff --git a/tests/test-fastannotate-revmap.py b/tests/test-fastannotate-revmap.py new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate-revmap.py @@ -0,0 +1,191 @@ +from __future__ import absolute_import, print_function + +import os +import tempfile + +from mercurial import util +from hgext.fastannotate import error, revmap + +def genhsh(i): + return chr(i) + b'\0' * 19 + +def gettemppath(): + fd, path = tempfile.mkstemp() + os.unlink(path) + os.close(fd) + return path + +def ensure(condition): + if not condition: + raise RuntimeError('Unexpected') + +def testbasicreadwrite(): + path = gettemppath() + + rm = revmap.revmap(path) + ensure(rm.maxrev == 0) + for i in xrange(5): + ensure(rm.rev2hsh(i) is None) + ensure(rm.hsh2rev(b'\0' * 20) is None) + + paths = ['', 'a', None, 'b', 'b', 'c', 'c', None, 'a', 'b', 'a', 'a'] + for i in xrange(1, 5): + ensure(rm.append(genhsh(i), sidebranch=(i & 1), path=paths[i]) == i) + + ensure(rm.maxrev == 4) + for i in xrange(1, 5): + ensure(rm.hsh2rev(genhsh(i)) == i) + ensure(rm.rev2hsh(i) == genhsh(i)) + + # re-load and verify + rm.flush() + rm = revmap.revmap(path) + ensure(rm.maxrev == 4) + for i in xrange(1, 5): + ensure(rm.hsh2rev(genhsh(i)) == i) + ensure(rm.rev2hsh(i) == genhsh(i)) + ensure(bool(rm.rev2flag(i) & revmap.sidebranchflag) == bool(i & 1)) + + # append without calling save() explicitly + for i in xrange(5, 12): + ensure(rm.append(genhsh(i), sidebranch=(i & 1), path=paths[i], + flush=True) == i) + + # re-load and verify + rm = revmap.revmap(path) + ensure(rm.maxrev == 11) + for i in xrange(1, 12): + ensure(rm.hsh2rev(genhsh(i)) == i) + ensure(rm.rev2hsh(i) == genhsh(i)) + ensure(rm.rev2path(i) == paths[i] or paths[i - 1]) + ensure(bool(rm.rev2flag(i) & revmap.sidebranchflag) == bool(i & 1)) + + os.unlink(path) + + # missing keys + ensure(rm.rev2hsh(12) is None) + ensure(rm.rev2hsh(0) is None) + ensure(rm.rev2hsh(-1) is None) + ensure(rm.rev2flag(12) is None) + ensure(rm.rev2path(12) is None) + ensure(rm.hsh2rev(b'\1' * 20) is None) + + # illformed hash (not 20 bytes) + try: + rm.append(b'\0') + ensure(False) + except Exception: + pass + +def testcorruptformat(): + path = gettemppath() + + # incorrect header + with open(path, 'w') as f: + f.write(b'NOT A VALID HEADER') + try: + revmap.revmap(path) + ensure(False) + except error.CorruptedFileError: + pass + + # rewrite the file + os.unlink(path) + rm = revmap.revmap(path) + rm.append(genhsh(0), flush=True) + + rm = revmap.revmap(path) + ensure(rm.maxrev == 1) + + # corrupt the file by appending a byte + size = os.stat(path).st_size + with open(path, 'a') as f: + f.write('\xff') + try: + revmap.revmap(path) + ensure(False) + except error.CorruptedFileError: + pass + + # corrupt the file by removing the last byte + ensure(size > 0) + with open(path, 'w') as f: + f.truncate(size - 1) + try: + revmap.revmap(path) + ensure(False) + except error.CorruptedFileError: + pass + + os.unlink(path) + +def testcopyfrom(): + path = gettemppath() + rm = revmap.revmap(path) + for i in xrange(1, 10): + ensure(rm.append(genhsh(i), sidebranch=(i & 1), path=str(i // 3)) == i) + rm.flush() + + # copy rm to rm2 + rm2 = revmap.revmap() + rm2.copyfrom(rm) + path2 = gettemppath() + rm2.path = path2 + rm2.flush() + + # two files should be the same + ensure(len(set(util.readfile(p) for p in [path, path2])) == 1) + + os.unlink(path) + os.unlink(path2) + +class fakefctx(object): + def __init__(self, node, path=None): + self._node = node + self._path = path + + def node(self): + return self._node + + def path(self): + return self._path + +def testcontains(): + path = gettemppath() + + rm = revmap.revmap(path) + for i in xrange(1, 5): + ensure(rm.append(genhsh(i), sidebranch=(i & 1)) == i) + + for i in xrange(1, 5): + ensure(((genhsh(i), None) in rm) == ((i & 1) == 0)) + ensure((fakefctx(genhsh(i)) in rm) == ((i & 1) == 0)) + for i in xrange(5, 10): + ensure(fakefctx(genhsh(i)) not in rm) + ensure((genhsh(i), None) not in rm) + + # "contains" checks paths + rm = revmap.revmap() + for i in xrange(1, 5): + ensure(rm.append(genhsh(i), path=str(i // 2)) == i) + for i in xrange(1, 5): + ensure(fakefctx(genhsh(i), path=str(i // 2)) in rm) + ensure(fakefctx(genhsh(i), path='a') not in rm) + +def testlastnode(): + path = gettemppath() + ensure(revmap.getlastnode(path) is None) + rm = revmap.revmap(path) + ensure(revmap.getlastnode(path) is None) + for i in xrange(1, 10): + hsh = genhsh(i) + rm.append(hsh, path=str(i // 2), flush=True) + ensure(revmap.getlastnode(path) == hsh) + rm2 = revmap.revmap(path) + ensure(rm2.rev2hsh(rm2.maxrev) == hsh) + +testbasicreadwrite() +testcorruptformat() +testcopyfrom() +testcontains() +testlastnode() diff --git a/tests/test-fastannotate.t b/tests/test-fastannotate.t new file mode 100644 --- /dev/null +++ b/tests/test-fastannotate.t @@ -0,0 +1,263 @@ + $ cat >> $HGRCPATH << EOF + > [extensions] + > fastannotate= + > EOF + + $ HGMERGE=true; export HGMERGE + + $ hg init repo + $ cd repo + +a simple merge case + + $ echo 1 > a + $ hg commit -qAm 'append 1' + $ echo 2 >> a + $ hg commit -m 'append 2' + $ echo 3 >> a + $ hg commit -m 'append 3' + $ hg up 1 -q + $ cat > a << EOF + > 0 + > 1 + > 2 + > EOF + $ hg commit -qm 'insert 0' + $ hg merge 2 -q + $ echo 4 >> a + $ hg commit -m merge + $ hg log -G -T '{rev}: {desc}' + @ 4: merge + |\ + | o 3: insert 0 + | | + o | 2: append 3 + |/ + o 1: append 2 + | + o 0: append 1 + + $ hg fastannotate a + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + $ hg fastannotate -r 0 a + 0: 1 + $ hg fastannotate -r 1 a + 0: 1 + 1: 2 + $ hg fastannotate -udnclf a + test 3 d641cb51f61e Thu Jan 01 00:00:00 1970 +0000 a:1: 0 + test 0 4994017376d3 Thu Jan 01 00:00:00 1970 +0000 a:1: 1 + test 1 e940cb6d9a06 Thu Jan 01 00:00:00 1970 +0000 a:2: 2 + test 2 26162a884ba6 Thu Jan 01 00:00:00 1970 +0000 a:3: 3 + test 4 3ad7bcd2815f Thu Jan 01 00:00:00 1970 +0000 a:5: 4 + $ hg fastannotate --linear a + 3: 0 + 0: 1 + 1: 2 + 4: 3 + 4: 4 + +incrementally updating + + $ hg fastannotate -r 0 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + $ hg fastannotate -r 0 a --debug --rebuild + fastannotate: a: 1 new changesets in the main branch + 0: 1 + $ hg fastannotate -r 1 a --debug + fastannotate: a: 1 new changesets in the main branch + 0: 1 + 1: 2 + $ hg fastannotate -r 3 a --debug + fastannotate: a: 1 new changesets in the main branch + 3: 0 + 0: 1 + 1: 2 + $ hg fastannotate -r 4 a --debug + fastannotate: a: 1 new changesets in the main branch + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + $ hg fastannotate -r 1 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 0: 1 + 1: 2 + +rebuild happens automatically if unable to update + + $ hg fastannotate -r 2 a --debug + fastannotate: a: cache broken and deleted + fastannotate: a: 3 new changesets in the main branch + 0: 1 + 1: 2 + 2: 3 + +config option "fastannotate.mainbranch" + + $ hg fastannotate -r 1 --rebuild --config fastannotate.mainbranch=tip a --debug + fastannotate: a: 4 new changesets in the main branch + 0: 1 + 1: 2 + $ hg fastannotate -r 4 a --debug + fastannotate: a: using fast path (resolved fctx: True) + 3: 0 + 0: 1 + 1: 2 + 2: 3 + 4: 4 + +config option "fastannotate.modes" + + $ hg annotate -r 1 --debug a + 0: 1 + 1: 2 + $ hg annotate --config fastannotate.modes=fctx -r 1 --debug a + fastannotate: a: using fast path (resolved fctx: False) + 0: 1 + 1: 2 + $ hg fastannotate --config fastannotate.modes=fctx -h -q + hg: unknown command 'fastannotate' + (did you mean *) (glob) + [255] + +rename + + $ hg mv a b + $ cat > b << EOF + > 0 + > 11 + > 3 + > 44 + > EOF + $ hg commit -m b -q + $ hg fastannotate -ncf --long-hash b + 3 d641cb51f61e331c44654104301f8154d7865c89 a: 0 + 5 d44dade239915bc82b91e4556b1257323f8e5824 b: 11 + 2 26162a884ba60e8c87bf4e0d6bb8efcc6f711a4e a: 3 + 5 d44dade239915bc82b91e4556b1257323f8e5824 b: 44 + $ hg fastannotate -r 26162a884ba60e8c87bf4e0d6bb8efcc6f711a4e a + 0: 1 + 1: 2 + 2: 3 + +fastannotate --deleted + + $ hg fastannotate --deleted -nf b + 3 a: 0 + 5 b: 11 + 0 a: -1 + 1 a: -2 + 2 a: 3 + 5 b: 44 + 4 a: -4 + $ hg fastannotate --deleted -r 3 -nf a + 3 a: 0 + 0 a: 1 + 1 a: 2 + +file and directories with ".l", ".m" suffixes + + $ cd .. + $ hg init repo2 + $ cd repo2 + + $ mkdir a.l b.m c.lock a.l.hg b.hg + $ for i in a b c d d.l d.m a.l/a b.m/a c.lock/a a.l.hg/a b.hg/a; do + > echo $i > $i + > done + $ hg add . -q + $ hg commit -m init + $ hg fastannotate a.l/a b.m/a c.lock/a a.l.hg/a b.hg/a d.l d.m a b c d + 0: a + 0: a.l.hg/a + 0: a.l/a + 0: b + 0: b.hg/a + 0: b.m/a + 0: c + 0: c.lock/a + 0: d + 0: d.l + 0: d.m + +empty file + + $ touch empty + $ hg commit -A empty -m empty + $ hg fastannotate empty + +json format + + $ hg fastannotate -Tjson -cludn b a empty + [ + { + "date": [0.0, 0], + "line": "a\n", + "line_number": 1, + "node": "1fd620b16252aecb54c6aa530dff5ed6e6ec3d21", + "rev": 0, + "user": "test" + }, + { + "date": [0.0, 0], + "line": "b\n", + "line_number": 1, + "node": "1fd620b16252aecb54c6aa530dff5ed6e6ec3d21", + "rev": 0, + "user": "test" + } + ] + + $ hg fastannotate -Tjson -cludn empty + [ + ] + $ hg fastannotate -Tjson --no-content -n a + [ + { + "rev": 0 + } + ] + +working copy + + $ echo a >> a + $ hg fastannotate -r 'wdir()' a + abort: cannot update linelog to wdir() + (set fastannotate.mainbranch) + [255] + $ cat >> $HGRCPATH << EOF + > [fastannotate] + > mainbranch = . + > EOF + $ hg fastannotate -r 'wdir()' a + 0 : a + 1+: a + $ hg fastannotate -cludn -r 'wdir()' a + test 0 1fd620b16252 Thu Jan 01 00:00:00 1970 +0000:1: a + test 1 720582f5bdb6+ *:2: a (glob) + $ hg fastannotate -cludn -r 'wdir()' -Tjson a + [ + { + "date": [0.0, 0], + "line": "a\n", + "line_number": 1, + "node": "1fd620b16252aecb54c6aa530dff5ed6e6ec3d21", + "rev": 0, + "user": "test" + }, + { + "date": [*, 0], (glob) + "line": "a\n", + "line_number": 2, + "node": null, + "rev": null, + "user": "test" + } + ]