diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py --- a/mercurial/hgweb/hgwebdir_mod.py +++ b/mercurial/hgweb/hgwebdir_mod.py @@ -452,13 +452,10 @@ for virtualrepo in _virtualdirs(): real = repos.get(virtualrepo) if real: - wsgireq.env['REPO_NAME'] = virtualrepo - # We have to re-parse because of updated environment - # variable. - # TODO this is kind of hacky and we should have a better - # way of doing this than with REPO_NAME side-effects. + # Re-parse the WSGI environment to take into account our + # repository path component. wsgireq.req = requestmod.parserequestfromenv( - wsgireq.env, wsgireq.req.bodyfh) + wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo) try: # ensure caller gets private copy of ui repo = hg.repository(self.ui.copy(), real) diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -155,11 +155,16 @@ # Request body input stream. bodyfh = attr.ib() -def parserequestfromenv(env, bodyfh): +def parserequestfromenv(env, bodyfh, reponame=None): """Parse URL components from environment variables. WSGI defines request attributes via environment variables. This function parses the environment variables into a data structure. + + If ``reponame`` is defined, the leading path components matching that + string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. + This simulates the world view of a WSGI application that processes + requests from the base URL of a repo. """ # PEP-0333 defines the WSGI spec and is a useful reference for this code. @@ -215,28 +220,34 @@ fullurl += '?' + env['QUERY_STRING'] advertisedfullurl += '?' + env['QUERY_STRING'] - # When dispatching requests, we look at the URL components (PATH_INFO - # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir - # has the concept of "virtual" repositories. This is defined via REPO_NAME. - # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app - # root. We also exclude its path components from PATH_INFO when resolving - # the dispatch path. + # If ``reponame`` is defined, that must be a prefix on PATH_INFO + # that represents the repository being dispatched to. When computing + # the dispatch info, we ignore these leading path components. apppath = env.get('SCRIPT_NAME', '') - if env.get('REPO_NAME'): - if not apppath.endswith('/'): - apppath += '/' + if reponame: + repoprefix = '/' + reponame.strip('/') - apppath += env.get('REPO_NAME') + if not env.get('PATH_INFO'): + raise error.ProgrammingError('reponame requires PATH_INFO') + + if not env['PATH_INFO'].startswith(repoprefix): + raise error.ProgrammingError('PATH_INFO does not begin with repo ' + 'name: %s (%s)' % (env['PATH_INFO'], + reponame)) - if 'PATH_INFO' in env: - dispatchparts = env['PATH_INFO'].strip('/').split('/') + dispatchpath = env['PATH_INFO'][len(repoprefix):] - # Strip out repo parts. - repoparts = env.get('REPO_NAME', '').split('/') - if dispatchparts[:len(repoparts)] == repoparts: - dispatchparts = dispatchparts[len(repoparts):] + if dispatchpath and not dispatchpath.startswith('/'): + raise error.ProgrammingError('reponame prefix of PATH_INFO does ' + 'not end at path delimiter: %s (%s)' % + (env['PATH_INFO'], reponame)) + + apppath = apppath.rstrip('/') + repoprefix + dispatchparts = dispatchpath.strip('/').split('/') + elif env.get('PATH_INFO', '').strip('/'): + dispatchparts = env['PATH_INFO'].strip('/').split('/') else: dispatchparts = [] @@ -283,7 +294,7 @@ apppath=apppath, dispatchparts=dispatchparts, dispatchpath=dispatchpath, havepathinfo='PATH_INFO' in env, - reponame=env.get('REPO_NAME'), + reponame=reponame, querystring=querystring, qsparams=qsparams, headers=headers, diff --git a/tests/test-wsgirequest.py b/tests/test-wsgirequest.py --- a/tests/test-wsgirequest.py +++ b/tests/test-wsgirequest.py @@ -5,6 +5,9 @@ from mercurial.hgweb import ( request as requestmod, ) +from mercurial import ( + error, +) DEFAULT_ENV = { r'REQUEST_METHOD': r'GET', @@ -20,11 +23,11 @@ r'wsgi.run_once': False, } -def parse(env, bodyfh=None, extra=None): +def parse(env, bodyfh=None, reponame=None, extra=None): env = dict(env) env.update(extra or {}) - return requestmod.parserequestfromenv(env, bodyfh) + return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame) class ParseRequestTests(unittest.TestCase): def testdefault(self): @@ -203,24 +206,26 @@ self.assertTrue(r.havepathinfo) def testreponame(self): - """REPO_NAME path components get stripped from URL.""" - r = parse(DEFAULT_ENV, extra={ - r'REPO_NAME': r'repo', - r'PATH_INFO': r'/path1/path2' - }) + """repository path components get stripped from URL.""" + + with self.assertRaisesRegexp(error.ProgrammingError, + b'reponame requires PATH_INFO'): + parse(DEFAULT_ENV, reponame=b'repo') - self.assertEqual(r.url, b'http://testserver/path1/path2') - self.assertEqual(r.baseurl, b'http://testserver') - self.assertEqual(r.advertisedurl, b'http://testserver/path1/path2') - self.assertEqual(r.advertisedbaseurl, b'http://testserver') - self.assertEqual(r.apppath, b'/repo') - self.assertEqual(r.dispatchparts, [b'path1', b'path2']) - self.assertEqual(r.dispatchpath, b'path1/path2') - self.assertTrue(r.havepathinfo) - self.assertEqual(r.reponame, b'repo') + with self.assertRaisesRegexp(error.ProgrammingError, + b'PATH_INFO does not begin with repo ' + b'name'): + parse(DEFAULT_ENV, reponame=b'repo', extra={ + r'PATH_INFO': r'/pathinfo', + }) - r = parse(DEFAULT_ENV, extra={ - r'REPO_NAME': r'repo', + with self.assertRaisesRegexp(error.ProgrammingError, + b'reponame prefix of PATH_INFO'): + parse(DEFAULT_ENV, reponame=b'repo', extra={ + r'PATH_INFO': r'/repoextra/path', + }) + + r = parse(DEFAULT_ENV, reponame=b'repo', extra={ r'PATH_INFO': r'/repo/path1/path2', }) @@ -234,8 +239,7 @@ self.assertTrue(r.havepathinfo) self.assertEqual(r.reponame, b'repo') - r = parse(DEFAULT_ENV, extra={ - r'REPO_NAME': r'prefix/repo', + r = parse(DEFAULT_ENV, reponame=b'prefix/repo', extra={ r'PATH_INFO': r'/prefix/repo/path1/path2', })