diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -78,6 +78,7 @@ url as urlmod, util, vfs as vfsmod, + wireprotoframing, wireprotoserver, ) from .utils import dateutil @@ -2711,6 +2712,12 @@ The content of the file defined as the value to this argument will be transferred verbatim as the HTTP request body. + ``frame `` + Send a unified protocol frame as part of the request body. + + All frames will be collected and sent as the body to the HTTP + request. + close ----- @@ -2750,6 +2757,28 @@ --------- ``read()`` N bytes from the server's stderr pipe, if available. + + Specifying Unified Frame-Based Protocol Frames + ---------------------------------------------- + + It is possible to emit a *Unified Frame-Based Protocol* by using special + syntax. + + A frame is composed as a type, flags, and payload. These can be parsed + from a string of the form `` ``. That is, 3 + space-delimited strings. + + ``payload`` is the simplest: it is evaluated as a Python byte string + literal. + + ``type`` can be an integer value for the frame type or the string name + of the type. The strings are defined in ``wireprotoframing.py``. e.g. + ``command-name``. + + ``flags`` is a ``|`` delimited list of flag components. Each component + (and there can be just one) can be an integer or a flag name for the + specified frame type. Values are resolved to integers and then bitwise + OR'd together. """ opts = pycompat.byteskwargs(opts) @@ -2953,6 +2982,7 @@ method, httppath = request[1:] headers = {} body = None + frames = [] for line in lines: line = line.lstrip() m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line) @@ -2963,11 +2993,20 @@ if line.startswith(b'BODYFILE '): with open(line.split(b' ', 1), 'rb') as fh: body = fh.read() + elif line.startswith(b'frame '): + frame = wireprotoframing.makeframefromhumanstring( + line[len(b'frame '):]) + + frames.append(frame) else: raise error.Abort(_('unknown argument to httprequest: %s') % line) url = path + httppath + + if frames: + body = b''.join(bytes(f) for f in frames) + req = urlmod.urlreq.request(pycompat.strurl(url), body, headers) # urllib.Request insists on using has_data() as a proxy for diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt --- a/mercurial/help/internals/wireprotocol.txt +++ b/mercurial/help/internals/wireprotocol.txt @@ -187,12 +187,15 @@ Requests to unknown commands or URLS result in an HTTP 404. TODO formally define response type, how error is communicated, etc. -HTTP request and response bodies use the *TBD Protocol* for media exchange. +HTTP request and response bodies use the *Unified Frame-Based Protocol* +(defined below) for media exchange. The entirety of the HTTP message +body is 0 or more frames as defined by this protocol. Clients and servers MUST advertise the ``TBD`` media type via the ``Content-Type`` request and response headers. In addition, clients MUST advertise this media type value in their ``Accept`` request header in all requests. +TODO finalize the media type. For now, it is defined in wireprotoserver.py. Servers receiving requests without an ``Accept`` header SHOULD respond with an HTTP 406. @@ -429,7 +432,7 @@ SSH Version 2 Transport ----------------------- -**Experimental** +**Experimental and under development** Version 2 of the SSH transport behaves identically to version 1 of the SSH transport with the exception of handshake semantics. See above for how @@ -451,6 +454,164 @@ Following capabilities advertisement, the peers communicate using version 1 of the SSH transport. +Unified Frame-Based Protocol +============================ + +**Experimental and under development** + +The *Unified Frame-Based Protocol* is a communications protocol between +Mercurial peers. The protocol aims to be mostly transport agnostic +(works similarly on HTTP, SSH, etc). + +To operate the protocol, a bi-directional, half-duplex pipe supporting +ordered sends and receives is required. That is, each peer has one pipe +for sending data and another for receiving. + +The protocol is request-response based: the client issues requests to +the server, which issues replies to those requests. Server-initiated +messaging is not supported. + +All data is read and written in atomic units called *frames*. These +are conceptually similar to TCP packets. Higher-level functionality +is built on the exchange and processing of frames. + +Frames begin with a 4 octet header followed by a variable length +payload:: + + +-----------------------------------------------+ + | Length (24) | + +-----------+-----------------------------------+ + | Type (4) | + +-----------+ + | Flags (4) | + +===========+===================================================| + | Frame Payload (0...) ... + +---------------------------------------------------------------+ + +The length of the frame payload is expressed as an unsigned 24 bit +little endian integer. Values larger than 65535 MUST NOT be used unless +given permission by the server as part of the negotiated capabilities +during the handshake. The frame header is not part of the advertised +frame length. + +The 4-bit ``Type`` field denotes the type of message being sent. + +The 4-bit ``Flags`` field defines special, per-type attributes for +the frame. + +The sections below define the frame types and their behavior. + +Command Request (``0x01``) +-------------------------- + +This frame contains a request to run a command. + +The name of the command to run constitutes the entirety of the frame +payload. + +This frame type MUST ONLY be sent from clients to servers: it is illegal +for a server to send this frame to a client. + +The following flag values are defined for this type: + +0x01 + End of command data. When set, the client will not send any command + arguments or additional command data. When set, the command has been + fully issued and the server has the full context to process the command. + The next frame issued by the client is not part of this command. +0x02 + Command argument frames expected. When set, the client will send + *Command Argument* frames containing command argument data. +0x04 + Command data frames expected. When set, the client will send + *Command Data* frames containing a raw stream of data for this + command. + +The ``0x01`` flag is mutually exclusive with both the ``0x02`` and ``0x04`` +flags. + +Command Argument (``0x02``) +--------------------------- + +This frame contains a named argument for a command. + +The frame type MUST ONLY be sent from clients to servers: it is illegal +for a server to send this frame to a client. + +The payload consists of: + +* A 16-bit little endian integer denoting the length of the + argument name. +* A 16-bit little endian integer denoting the length of the + argument value. +* N bytes of ASCII data containing the argument name. +* N bytes of binary data containing the argument value. + +The payload MUST hold the entirety of the 32-bit header and the +argument name. The argument value MAY span multiple frames. If this +occurs, the appropriate frame flag should be set to indicate this. + +The following flag values are defined for this type: + +0x01 + Argument data continuation. When set, the data for this argument did + not fit in a single frame and the next frame will contain additional + argument data. + +0x02 + End of arguments data. When set, the client will not send any more + command arguments for the command this frame is associated with. + The next frame issued by the client will be command data or + belong to a separate request. + +Command Data (``0x03``) +----------------------- + +This frame contains raw data for a command. + +Most commands can be executed by specifying arguments. However, +arguments have an upper bound to their length. For commands that +accept data that is beyond this length or whose length isn't known +when the command is initially sent, they will need to stream +arbitrary data to the server. This frame type facilitates the sending +of this data. + +The payload of this frame type consists of a stream of raw data to be +consumed by the command handler on the server. The format of the data +is command specific. + +The following flag values are defined for this type: + +0x01 + Command data continuation. When set, the data for this command + continues into a subsequent frame. + +0x02 + End of data. When set, command data has been fully sent to the + server. The command has been fully issued and no new data for this + command will be sent. The next frame will belong to a new command. + +Issuing Commands +---------------- + +A client can request that a remote run a command by sending it +frames defining that command. This logical stream is composed of +1 ``Command Request`` frame, 0 or more ``Command Argument`` frames, +and 0 or more ``Command Data`` frames. + +Argument frames are the recommended mechanism for transferring fixed +sets of parameters to a command. Data frames are appropriate for +transferring variable data. A similar comparison would be to HTTP: +argument frames are headers and the message body is data frames. + +It is recommended for servers to delay the dispatch of a command +until all argument frames for that command have been received. Servers +MAY impose limits on the maximum argument size. +TODO define failure mechanism. + +Servers MAY dispatch to commands immediately once argument data +is available or delay until command data is received in full. + Capabilities ============ diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py new file mode 100644 --- /dev/null +++ b/mercurial/wireprotoframing.py @@ -0,0 +1,156 @@ +# wireprotoframing.py - unified framing protocol for wire protocol +# +# Copyright 2018 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +# This file contains functionality to support the unified frame-based wire +# protocol. For details about the protocol, see +# `hg help internals.wireprotocol`. + +from __future__ import absolute_import + +import struct + +from . import ( + util, +) + +FRAME_HEADER_SIZE = 4 +DEFAULT_MAX_FRAME_SIZE = 32768 + +FRAME_TYPE_COMMAND_NAME = 0x01 +FRAME_TYPE_COMMAND_ARGUMENT = 0x02 +FRAME_TYPE_COMMAND_DATA = 0x03 + +FRAME_TYPES = { + b'command-name': FRAME_TYPE_COMMAND_NAME, + b'command-argument': FRAME_TYPE_COMMAND_ARGUMENT, + b'command-data': FRAME_TYPE_COMMAND_DATA, +} + +FLAG_COMMAND_NAME_EOS = 0x01 +FLAG_COMMAND_NAME_HAVE_ARGS = 0x02 +FLAG_COMMAND_NAME_HAVE_DATA = 0x04 + +FLAGS_COMMAND = { + b'eos': FLAG_COMMAND_NAME_EOS, + b'have-args': FLAG_COMMAND_NAME_HAVE_ARGS, + b'have-data': FLAG_COMMAND_NAME_HAVE_DATA, +} + +FLAG_COMMAND_ARGUMENT_CONTINUATION = 0x01 +FLAG_COMMAND_ARGUMENT_EOA = 0x02 + +FLAGS_COMMAND_ARGUMENT = { + b'continuation': FLAG_COMMAND_ARGUMENT_CONTINUATION, + b'eoa': FLAG_COMMAND_ARGUMENT_EOA, +} + +FLAG_COMMAND_DATA_CONTINUATION = 0x01 +FLAG_COMMAND_DATA_EOS = 0x02 + +FLAGS_COMMAND_DATA = { + b'continuation': FLAG_COMMAND_DATA_CONTINUATION, + b'eos': FLAG_COMMAND_DATA_EOS, +} + +# Maps frame types to their available flags. +FRAME_TYPE_FLAGS = { + FRAME_TYPE_COMMAND_NAME: FLAGS_COMMAND, + FRAME_TYPE_COMMAND_ARGUMENT: FLAGS_COMMAND_ARGUMENT, + FRAME_TYPE_COMMAND_DATA: FLAGS_COMMAND_DATA, +} + +ARGUMENT_FRAME_HEADER = struct.Struct(r' , creates a frame. + + This can be used by user-facing applications and tests for creating + frames easily without having to type out a bunch of constants. + + Frame type and flags can be specified by integer or named constant. + Flags can be delimited by `|` to bitwise OR them together. + """ + frametype, frameflags, payload = s.split(b' ', 2) + + if frametype in FRAME_TYPES: + frametype = FRAME_TYPES[frametype] + else: + frametype = int(frametype) + + finalflags = 0 + validflags = FRAME_TYPE_FLAGS[frametype] + for flag in frameflags.split(b'|'): + if flag in validflags: + finalflags |= validflags[flag] + else: + finalflags |= int(flag) + + payload = util.unescapestr(payload) + + return makeframe(frametype, finalflags, payload) + +def createcommandframes(cmd, args, datafh=None): + """Create frames necessary to transmit a request to run a command. + + This is a generator of bytearrays. Each item represents a frame + ready to be sent over the wire to a peer. + """ + flags = 0 + if args: + flags |= FLAG_COMMAND_NAME_HAVE_ARGS + if datafh: + flags |= FLAG_COMMAND_NAME_HAVE_DATA + + if not flags: + flags |= FLAG_COMMAND_NAME_EOS + + yield makeframe(FRAME_TYPE_COMMAND_NAME, flags, cmd) + + for i, k in enumerate(sorted(args)): + v = args[k] + last = i == len(args) - 1 + + # TODO handle splitting of argument values across frames. + payload = bytearray(ARGUMENT_FRAME_HEADER.size + len(k) + len(v)) + offset = 0 + ARGUMENT_FRAME_HEADER.pack_into(payload, offset, len(k), len(v)) + offset += ARGUMENT_FRAME_HEADER.size + payload[offset:offset + len(k)] = k + offset += len(k) + payload[offset:offset + len(v)] = v + + flags = FLAG_COMMAND_ARGUMENT_EOA if last else 0 + yield makeframe(FRAME_TYPE_COMMAND_ARGUMENT, flags, payload) + + if datafh: + while True: + data = datafh.read(DEFAULT_MAX_FRAME_SIZE) + + done = False + if len(data) == DEFAULT_MAX_FRAME_SIZE: + flags = FLAG_COMMAND_DATA_CONTINUATION + else: + flags = FLAG_COMMAND_DATA_EOS + assert datafh.read(1) == b'' + done = True + + yield makeframe(FRAME_TYPE_COMMAND_DATA, flags, data) + + if done: + break diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py --- a/mercurial/wireprotoserver.py +++ b/mercurial/wireprotoserver.py @@ -32,7 +32,7 @@ HGTYPE = 'application/mercurial-0.1' HGTYPE2 = 'application/mercurial-0.2' HGERRTYPE = 'application/hg-error' -HTTPV2TYPE = 'application/mercurial-tbd' +FRAMINGTYPE = b'application/mercurial-exp-framing-0001' HTTPV2 = wireprototypes.HTTPV2 SSHV1 = wireprototypes.SSHV1 @@ -336,21 +336,21 @@ res.setbodybytes(_('invalid wire protocol command: %s') % command) return - if req.headers.get(b'Accept') != HTTPV2TYPE: + if req.headers.get(b'Accept') != FRAMINGTYPE: res.status = b'406 Not Acceptable' res.headers[b'Content-Type'] = b'text/plain' res.setbodybytes(_('client MUST specify Accept header with value: %s\n') - % HTTPV2TYPE) + % FRAMINGTYPE) return if (b'Content-Type' in req.headers - and req.headers[b'Content-Type'] != HTTPV2TYPE): + and req.headers[b'Content-Type'] != FRAMINGTYPE): res.status = b'415 Unsupported Media Type' # TODO we should send a response with appropriate media type, # since client does Accept it. res.headers[b'Content-Type'] = b'text/plain' res.setbodybytes(_('client MUST send Content-Type header with ' - 'value: %s\n') % HTTPV2TYPE) + 'value: %s\n') % FRAMINGTYPE) return # We don't do anything meaningful yet. diff --git a/tests/test-http-api-httpv2.t b/tests/test-http-api-httpv2.t --- a/tests/test-http-api-httpv2.t +++ b/tests/test-http-api-httpv2.t @@ -1,5 +1,5 @@ $ HTTPV2=exp-http-v2-0001 - $ MEDIATYPE=application/mercurial-tbd + $ MEDIATYPE=application/mercurial-exp-framing-0001 $ send() { > hg --verbose debugwireproto --peer raw http://$LOCALIP:$HGPORT/ @@ -120,9 +120,9 @@ s> Server: testing stub value\r\n s> Date: $HTTP_DATE$\r\n s> Content-Type: text/plain\r\n - s> Content-Length: 72\r\n + s> Content-Length: 85\r\n s> \r\n - s> client MUST specify Accept header with value: application/mercurial-tbd\n + s> client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n Bad Accept header results in 406 @@ -143,9 +143,9 @@ s> Server: testing stub value\r\n s> Date: $HTTP_DATE$\r\n s> Content-Type: text/plain\r\n - s> Content-Length: 72\r\n + s> Content-Length: 85\r\n s> \r\n - s> client MUST specify Accept header with value: application/mercurial-tbd\n + s> client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n Bad Content-Type header results in 415 @@ -158,7 +158,7 @@ using raw connection to peer s> POST /api/exp-http-v2-0001/ro/customreadonly HTTP/1.1\r\n s> Accept-Encoding: identity\r\n - s> accept: application/mercurial-tbd\r\n + s> accept: application/mercurial-exp-framing-0001\r\n s> content-type: badmedia\r\n s> user-agent: test\r\n s> host: $LOCALIP:$HGPORT\r\n (glob) @@ -168,9 +168,9 @@ s> Server: testing stub value\r\n s> Date: $HTTP_DATE$\r\n s> Content-Type: text/plain\r\n - s> Content-Length: 75\r\n + s> Content-Length: 88\r\n s> \r\n - s> client MUST send Content-Type header with value: application/mercurial-tbd\n + s> client MUST send Content-Type header with value: application/mercurial-exp-framing-0001\n Request to read-only command works out of the box @@ -179,15 +179,18 @@ > accept: $MEDIATYPE > content-type: $MEDIATYPE > user-agent: test + > frame command-name eos customreadonly > EOF using raw connection to peer s> POST /api/exp-http-v2-0001/ro/customreadonly HTTP/1.1\r\n s> Accept-Encoding: identity\r\n - s> accept: application/mercurial-tbd\r\n - s> content-type: application/mercurial-tbd\r\n + s> accept: application/mercurial-exp-framing-0001\r\n + s> content-type: application/mercurial-exp-framing-0001\r\n s> user-agent: test\r\n + s> content-length: 18\r\n s> host: $LOCALIP:$HGPORT\r\n (glob) s> \r\n + s> \x0e\x00\x00\x11customreadonly s> makefile('rb', None) s> HTTP/1.1 200 OK\r\n s> Server: testing stub value\r\n @@ -283,15 +286,18 @@ > user-agent: test > accept: $MEDIATYPE > content-type: $MEDIATYPE + > frame command-name eos customreadonly > EOF using raw connection to peer s> POST /api/exp-http-v2-0001/rw/customreadonly HTTP/1.1\r\n s> Accept-Encoding: identity\r\n - s> accept: application/mercurial-tbd\r\n - s> content-type: application/mercurial-tbd\r\n + s> accept: application/mercurial-exp-framing-0001\r\n + s> content-type: application/mercurial-exp-framing-0001\r\n s> user-agent: test\r\n + s> content-length: 18\r\n s> host: $LOCALIP:$HGPORT\r\n (glob) s> \r\n + s> \x0e\x00\x00\x11customreadonly s> makefile('rb', None) s> HTTP/1.1 200 OK\r\n s> Server: testing stub value\r\n @@ -311,7 +317,7 @@ using raw connection to peer s> POST /api/exp-http-v2-0001/rw/badcommand HTTP/1.1\r\n s> Accept-Encoding: identity\r\n - s> accept: application/mercurial-tbd\r\n + s> accept: application/mercurial-exp-framing-0001\r\n s> user-agent: test\r\n s> host: $LOCALIP:$HGPORT\r\n (glob) s> \r\n