diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py --- a/mercurial/hgweb/hgweb_mod.py +++ b/mercurial/hgweb/hgweb_mod.py @@ -305,6 +305,7 @@ def _runwsgi(self, wsgireq, repo): req = wsgireq.req + res = wsgireq.res rctx = requestcontext(self, repo) # This state is global across all threads. @@ -317,11 +318,12 @@ wsgireq.headers = [h for h in wsgireq.headers if h[0] != 'Content-Security-Policy'] wsgireq.headers.append(('Content-Security-Policy', rctx.csp)) + res.headers['Content-Security-Policy'] = rctx.csp - handled, res = wireprotoserver.handlewsgirequest( - rctx, wsgireq, req, self.check_perm) + handled = wireprotoserver.handlewsgirequest( + rctx, wsgireq, req, res, self.check_perm) if handled: - return res + return res.sendresponse() if req.havepathinfo: query = req.dispatchpath diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -23,6 +23,7 @@ attr, ) from .. import ( + error, pycompat, util, ) @@ -201,6 +202,128 @@ headers=headers, bodyfh=bodyfh) +class wsgiresponse(object): + """Represents a response to a WSGI request. + + A response consists of a status line, headers, and a body. + + Consumers must populate the ``status`` and ``headers`` fields and + make a call to a ``setbody*()`` method before the response can be + issued. + + When it is time to start sending the response over the wire, + ``sendresponse()`` is called. It handles emitting the header portion + of the response message. It then yields chunks of body data to be + written to the peer. Typically, the WSGI application itself calls + and returns the value from ``sendresponse()``. + """ + + def __init__(self, req, startresponse): + """Create an empty response tied to a specific request. + + ``req`` is a ``parsedrequest``. ``startresponse`` is the + ``start_response`` function passed to the WSGI application. + """ + self._req = req + self._startresponse = startresponse + + self.status = None + self.headers = wsgiheaders.Headers([]) + + self._bodybytes = None + self._bodygen = None + self._started = False + + def setbodybytes(self, b): + """Define the response body as static bytes.""" + if self._bodybytes is not None or self._bodygen is not None: + raise error.ProgrammingError('cannot define body multiple times') + + self._bodybytes = b + self.headers['Content-Length'] = '%d' % len(b) + + def setbodygen(self, gen): + """Define the response body as a generator of bytes.""" + if self._bodybytes is not None or self._bodygen is not None: + raise error.ProgrammingError('cannot define body multiple times') + + self._bodygen = gen + + def sendresponse(self): + """Send the generated response to the client. + + Before this is called, ``status`` must be set and one of + ``setbodybytes()`` or ``setbodygen()`` must be called. + + Calling this method multiple times is not allowed. + """ + if self._started: + raise error.ProgrammingError('sendresponse() called multiple times') + + self._started = True + + if not self.status: + raise error.ProgrammingError('status line not defined') + + if self._bodybytes is None and self._bodygen is None: + raise error.ProgrammingError('response body not defined') + + # Various HTTP clients (notably httplib) won't read the HTTP response + # until the HTTP request has been sent in full. If servers (us) send a + # response before the HTTP request has been fully sent, the connection + # may deadlock because neither end is reading. + # + # We work around this by "draining" the request data before + # sending any response in some conditions. + drain = False + close = False + + # If the client sent Expect: 100-continue, we assume it is smart enough + # to deal with the server sending a response before reading the request. + # (httplib doesn't do this.) + if self._req.headers.get('Expect', '').lower() == '100-continue': + pass + # Only tend to request methods that have bodies. Strictly speaking, + # we should sniff for a body. But this is fine for our existing + # WSGI applications. + elif self._req.method not in ('POST', 'PUT'): + pass + else: + # If we don't know how much data to read, there's no guarantee + # that we can drain the request responsibly. The WSGI + # specification only says that servers *should* ensure the + # input stream doesn't overrun the actual request. So there's + # no guarantee that reading until EOF won't corrupt the stream + # state. + if not isinstance(self._req.bodyfh, util.cappedreader): + close = True + else: + # We /could/ only drain certain HTTP response codes. But 200 and + # non-200 wire protocol responses both require draining. Since + # we have a capped reader in place for all situations where we + # drain, it is safe to read from that stream. We'll either do + # a drain or no-op if we're already at EOF. + drain = True + + if close: + self.headers['Connection'] = 'Close' + + if drain: + assert isinstance(self._req.bodyfh, util.cappedreader) + while True: + chunk = self._req.bodyfh.read(32768) + if not chunk: + break + + self._startresponse(pycompat.sysstr(self.status), self.headers.items()) + if self._bodybytes: + yield self._bodybytes + elif self._bodygen: + for chunk in self._bodygen: + yield chunk + else: + error.ProgrammingError('do not know how to send body') + class wsgirequest(object): """Higher-level API for a WSGI request. @@ -228,6 +351,7 @@ self.env = wsgienv self.req = parserequestfromenv(wsgienv, inp) self.form = self.req.querystringdict + self.res = wsgiresponse(self.req, start_response) self._start_response = start_response self.server_write = None self.headers = [] diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py --- a/mercurial/wireprotoserver.py +++ b/mercurial/wireprotoserver.py @@ -149,7 +149,7 @@ def iscmd(cmd): return cmd in wireproto.commands -def handlewsgirequest(rctx, wsgireq, req, checkperm): +def handlewsgirequest(rctx, wsgireq, req, res, checkperm): """Possibly process a wire protocol request. If the current request is a wire protocol request, the request is @@ -157,10 +157,10 @@ ``wsgireq`` is a ``wsgirequest`` instance. ``req`` is a ``parsedrequest`` instance. + ``res`` is a ``wsgiresponse`` instance. - Returns a 2-tuple of (bool, response) where the 1st element indicates - whether the request was handled and the 2nd element is a return - value for a WSGI application (often a generator of bytes). + Returns a bool indicating if the request was serviced. If set, the caller + should stop processing the request, as a response has already been issued. """ # Avoid cycle involving hg module. from .hgweb import common as hgwebcommon @@ -171,7 +171,7 @@ # string parameter. If it isn't present, this isn't a wire protocol # request. if 'cmd' not in req.querystringdict: - return False, None + return False cmd = req.querystringdict['cmd'][0] @@ -183,18 +183,19 @@ # known wire protocol commands and it is less confusing for machine # clients. if not iscmd(cmd): - return False, None + return False # The "cmd" query string argument is only valid on the root path of the # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request # in this case. We send an HTTP 404 for backwards compatibility reasons. if req.dispatchpath: - res = _handlehttperror( - hgwebcommon.ErrorResponse(hgwebcommon.HTTP_NOT_FOUND), wsgireq, - req) - - return True, res + res.status = hgwebcommon.statusmessage(404) + res.headers['Content-Type'] = HGTYPE + # TODO This is not a good response to issue for this request. This + # is mostly for BC for now. + res.setbodybytes('0\n%s\n' % b'Not Found') + return True proto = httpv1protocolhandler(wsgireq, req, repo.ui, lambda perm: checkperm(rctx, wsgireq, perm)) @@ -204,11 +205,16 @@ # exception here. So consider refactoring into a exception type that # is associated with the wire protocol. try: - res = _callhttp(repo, wsgireq, req, proto, cmd) + _callhttp(repo, wsgireq, req, res, proto, cmd) except hgwebcommon.ErrorResponse as e: - res = _handlehttperror(e, wsgireq, req) + for k, v in e.headers: + res.headers[k] = v + res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e)) + # TODO This response body assumes the failed command was + # "unbundle." That assumption is not always valid. + res.setbodybytes('0\n%s\n' % pycompat.bytestr(e)) - return True, res + return True def _httpresponsetype(ui, req, prefer_uncompressed): """Determine the appropriate response type and compression settings. @@ -250,7 +256,10 @@ opts = {'level': ui.configint('server', 'zliblevel')} return HGTYPE, util.compengines['zlib'], opts -def _callhttp(repo, wsgireq, req, proto, cmd): +def _callhttp(repo, wsgireq, req, res, proto, cmd): + # Avoid cycle involving hg module. + from .hgweb import common as hgwebcommon + def genversion2(gen, engine, engineopts): # application/mercurial-0.2 always sends a payload header # identifying the compression engine. @@ -262,26 +271,35 @@ for chunk in gen: yield chunk + def setresponse(code, contenttype, bodybytes=None, bodygen=None): + if code == HTTP_OK: + res.status = '200 Script output follows' + else: + res.status = hgwebcommon.statusmessage(code) + + res.headers['Content-Type'] = contenttype + + if bodybytes is not None: + res.setbodybytes(bodybytes) + if bodygen is not None: + res.setbodygen(bodygen) + if not wireproto.commands.commandavailable(cmd, proto): - wsgireq.respond(HTTP_OK, HGERRTYPE, - body=_('requested wire protocol command is not ' - 'available over HTTP')) - return [] + setresponse(HTTP_OK, HGERRTYPE, + _('requested wire protocol command is not available over ' + 'HTTP')) + return proto.checkperm(wireproto.commands[cmd].permission) rsp = wireproto.dispatch(repo, proto, cmd) if isinstance(rsp, bytes): - wsgireq.respond(HTTP_OK, HGTYPE, body=rsp) - return [] + setresponse(HTTP_OK, HGTYPE, bodybytes=rsp) elif isinstance(rsp, wireprototypes.bytesresponse): - wsgireq.respond(HTTP_OK, HGTYPE, body=rsp.data) - return [] + setresponse(HTTP_OK, HGTYPE, bodybytes=rsp.data) elif isinstance(rsp, wireprototypes.streamreslegacy): - gen = rsp.gen - wsgireq.respond(HTTP_OK, HGTYPE) - return gen + setresponse(HTTP_OK, HGTYPE, bodygen=rsp.gen) elif isinstance(rsp, wireprototypes.streamres): gen = rsp.gen @@ -294,30 +312,18 @@ if mediatype == HGTYPE2: gen = genversion2(gen, engine, engineopts) - wsgireq.respond(HTTP_OK, mediatype) - return gen + setresponse(HTTP_OK, mediatype, bodygen=gen) elif isinstance(rsp, wireprototypes.pushres): rsp = '%d\n%s' % (rsp.res, rsp.output) - wsgireq.respond(HTTP_OK, HGTYPE, body=rsp) - return [] + setresponse(HTTP_OK, HGTYPE, bodybytes=rsp) elif isinstance(rsp, wireprototypes.pusherr): rsp = '0\n%s\n' % rsp.res - wsgireq.respond(HTTP_OK, HGTYPE, body=rsp) - return [] + res.drain = True + setresponse(HTTP_OK, HGTYPE, bodybytes=rsp) elif isinstance(rsp, wireprototypes.ooberror): - rsp = rsp.message - wsgireq.respond(HTTP_OK, HGERRTYPE, body=rsp) - return [] - raise error.ProgrammingError('hgweb.protocol internal failure', rsp) - -def _handlehttperror(e, wsgireq, req): - """Called when an ErrorResponse is raised during HTTP request processing.""" - - # TODO This response body assumes the failed command was - # "unbundle." That assumption is not always valid. - wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e)) - - return '' + setresponse(HTTP_OK, HGERRTYPE, bodybytes=rsp.message) + else: + raise error.ProgrammingError('hgweb.protocol internal failure', rsp) def _sshv1respondbytes(fout, value): """Send a bytes response for protocol version 1."""