This is easier than rolling our own encoding format.
As a bonus, some of our artificial limits around lengths of
things went away because we are no longer using fixed length
fields to hold sizes.
hg-reviewers |
This is easier than rolling our own encoding format.
As a bonus, some of our artificial limits around lengths of
things went away because we are no longer using fixed length
fields to hold sizes.
Automatic diff as part of commit; lint not applicable. |
Automatic diff as part of commit; unit tests not applicable. |
Path | Packages | |||
---|---|---|---|---|
M | mercurial/help/internals/wireprotocol.txt (28 lines) | |||
M | mercurial/wireprotoframing.py (46 lines) | |||
M | tests/test-wireproto-serverreactor.py (58 lines) |
are ``%s`` and ``%%``. ``%s`` will be replaced by whatever the string | are ``%s`` and ``%%``. ``%s`` will be replaced by whatever the string | ||||
at that position resolves to. ``%%`` will be replaced by ``%``. All | at that position resolves to. ``%%`` will be replaced by ``%``. All | ||||
other 2-byte sequences beginning with ``%`` represent a literal | other 2-byte sequences beginning with ``%`` represent a literal | ||||
``%`` followed by that character. However, future versions of the | ``%`` followed by that character. However, future versions of the | ||||
wire protocol reserve the right to allow clients to opt in to receiving | wire protocol reserve the right to allow clients to opt in to receiving | ||||
formatting strings with additional formatters, hence why ``%%`` is | formatting strings with additional formatters, hence why ``%%`` is | ||||
required to represent the literal ``%``. | required to represent the literal ``%``. | ||||
The raw frame consists of a series of data structures representing | The frame payload consists of a CBOR array of CBOR maps. Each map | ||||
textual atoms to print. Each atom begins with a struct defining the | defines an *atom* of text data to print. Each *atom* has the following | ||||
size of the data that follows: | bytestring keys: | ||||
* A 16-bit little endian unsigned integer denoting the length of the | msg | ||||
formatting string. | (bytestring) The formatting string. Content MUST be ASCII. | ||||
* An 8-bit unsigned integer denoting the number of label strings | args (optional) | ||||
that follow. | Array of bytestrings defining arguments to the formatting string. | ||||
* An 8-bit unsigned integer denoting the number of formatting string | labels (optional) | ||||
arguments strings that follow. | Array of bytestrings defining labels to apply to this atom. | ||||
* An array of 8-bit unsigned integers denoting the lengths of | |||||
*labels* data. | |||||
* An array of 16-bit unsigned integers denoting the lengths of | |||||
formatting strings. | |||||
* The formatting string, encoded as UTF-8. | |||||
* 0 or more ASCII strings defining labels to apply to this atom. | |||||
* 0 or more UTF-8 strings that will be used as arguments to the | |||||
formatting string. | |||||
TODO use ASCII for formatting string. | |||||
All data to be printed MUST be encoded into a single frame: this frame | All data to be printed MUST be encoded into a single frame: this frame | ||||
does not support spanning data across multiple frames. | does not support spanning data across multiple frames. | ||||
All textual data encoded in these frames is assumed to be line delimited. | All textual data encoded in these frames is assumed to be line delimited. | ||||
The last atom in the frame SHOULD end with a newline (``\n``). If it | The last atom in the frame SHOULD end with a newline (``\n``). If it | ||||
doesn't, clients MAY add a newline to facilitate immediate printing. | doesn't, clients MAY add a newline to facilitate immediate printing. | ||||
if application: | if application: | ||||
flags |= FLAG_ERROR_RESPONSE_APPLICATION | flags |= FLAG_ERROR_RESPONSE_APPLICATION | ||||
yield stream.makeframe(requestid=requestid, | yield stream.makeframe(requestid=requestid, | ||||
typeid=FRAME_TYPE_ERROR_RESPONSE, | typeid=FRAME_TYPE_ERROR_RESPONSE, | ||||
flags=flags, | flags=flags, | ||||
payload=msg) | payload=msg) | ||||
def createtextoutputframe(stream, requestid, atoms): | def createtextoutputframe(stream, requestid, atoms, | ||||
maxframesize=DEFAULT_MAX_FRAME_SIZE): | |||||
"""Create a text output frame to render text to people. | """Create a text output frame to render text to people. | ||||
``atoms`` is a 3-tuple of (formatting string, args, labels). | ``atoms`` is a 3-tuple of (formatting string, args, labels). | ||||
The formatting string contains ``%s`` tokens to be replaced by the | The formatting string contains ``%s`` tokens to be replaced by the | ||||
corresponding indexed entry in ``args``. ``labels`` is an iterable of | corresponding indexed entry in ``args``. ``labels`` is an iterable of | ||||
formatters to be applied at rendering time. In terms of the ``ui`` | formatters to be applied at rendering time. In terms of the ``ui`` | ||||
class, each atom corresponds to a ``ui.write()``. | class, each atom corresponds to a ``ui.write()``. | ||||
""" | """ | ||||
bytesleft = DEFAULT_MAX_FRAME_SIZE | atomdicts = [] | ||||
atomchunks = [] | |||||
for (formatting, args, labels) in atoms: | for (formatting, args, labels) in atoms: | ||||
if len(args) > 255: | |||||
raise ValueError('cannot use more than 255 formatting arguments') | |||||
if len(labels) > 255: | |||||
raise ValueError('cannot use more than 255 labels') | |||||
# TODO look for localstr, other types here? | # TODO look for localstr, other types here? | ||||
if not isinstance(formatting, bytes): | if not isinstance(formatting, bytes): | ||||
raise ValueError('must use bytes formatting strings') | raise ValueError('must use bytes formatting strings') | ||||
for arg in args: | for arg in args: | ||||
if not isinstance(arg, bytes): | if not isinstance(arg, bytes): | ||||
raise ValueError('must use bytes for arguments') | raise ValueError('must use bytes for arguments') | ||||
for label in labels: | for label in labels: | ||||
if not isinstance(label, bytes): | if not isinstance(label, bytes): | ||||
raise ValueError('must use bytes for labels') | raise ValueError('must use bytes for labels') | ||||
# Formatting string must be UTF-8. | # Formatting string must be ASCII. | ||||
formatting = formatting.decode(r'utf-8', r'replace').encode(r'utf-8') | formatting = formatting.decode(r'ascii', r'replace').encode(r'ascii') | ||||
# Arguments must be UTF-8. | # Arguments must be UTF-8. | ||||
args = [a.decode(r'utf-8', r'replace').encode(r'utf-8') for a in args] | args = [a.decode(r'utf-8', r'replace').encode(r'utf-8') for a in args] | ||||
# Labels must be ASCII. | # Labels must be ASCII. | ||||
labels = [l.decode(r'ascii', r'strict').encode(r'ascii') | labels = [l.decode(r'ascii', r'strict').encode(r'ascii') | ||||
for l in labels] | for l in labels] | ||||
if len(formatting) > 65535: | atom = {b'msg': formatting} | ||||
raise ValueError('formatting string cannot be longer than 64k') | if args: | ||||
atom[b'args'] = args | |||||
if any(len(a) > 65535 for a in args): | if labels: | ||||
raise ValueError('argument string cannot be longer than 64k') | atom[b'labels'] = labels | ||||
if any(len(l) > 255 for l in labels): | atomdicts.append(atom) | ||||
raise ValueError('label string cannot be longer than 255 bytes') | |||||
chunks = [ | payload = cbor.dumps(atomdicts, canonical=True) | ||||
struct.pack(r'<H', len(formatting)), | |||||
struct.pack(r'<BB', len(labels), len(args)), | |||||
struct.pack(r'<' + r'B' * len(labels), *map(len, labels)), | |||||
struct.pack(r'<' + r'H' * len(args), *map(len, args)), | |||||
] | |||||
chunks.append(formatting) | |||||
chunks.extend(labels) | |||||
chunks.extend(args) | |||||
atom = b''.join(chunks) | |||||
atomchunks.append(atom) | |||||
bytesleft -= len(atom) | |||||
if bytesleft < 0: | if len(payload) > maxframesize: | ||||
raise ValueError('cannot encode data in a single frame') | raise ValueError('cannot encode data in a single frame') | ||||
yield stream.makeframe(requestid=requestid, | yield stream.makeframe(requestid=requestid, | ||||
typeid=FRAME_TYPE_TEXT_OUTPUT, | typeid=FRAME_TYPE_TEXT_OUTPUT, | ||||
flags=0, | flags=0, | ||||
payload=b''.join(atomchunks)) | payload=payload) | ||||
class stream(object): | class stream(object): | ||||
"""Represents a logical unidirectional series of frames.""" | """Represents a logical unidirectional series of frames.""" | ||||
def __init__(self, streamid, active=False): | def __init__(self, streamid, active=False): | ||||
self.streamid = streamid | self.streamid = streamid | ||||
self._active = False | self._active = False | ||||
self.assertEqual(frames, [ | self.assertEqual(frames, [ | ||||
ffs(b'1 1 stream-begin command-request new|have-data ' | ffs(b'1 1 stream-begin command-request new|have-data ' | ||||
b"cbor:{b'name': b'command', b'args': {b'key1': b'key1value', " | b"cbor:{b'name': b'command', b'args': {b'key1': b'key1value', " | ||||
b"b'key2': b'key2value', b'key3': b'key3value'}}"), | b"b'key2': b'key2value', b'key3': b'key3value'}}"), | ||||
ffs(b'1 1 0 command-data eos %s' % data.getvalue()), | ffs(b'1 1 0 command-data eos %s' % data.getvalue()), | ||||
]) | ]) | ||||
def testtextoutputexcessiveargs(self): | |||||
"""At most 255 formatting arguments are allowed.""" | |||||
with self.assertRaisesRegexp(ValueError, | |||||
'cannot use more than 255 formatting'): | |||||
args = [b'x' for i in range(256)] | |||||
list(framing.createtextoutputframe(None, 1, | |||||
[(b'bleh', args, [])])) | |||||
def testtextoutputexcessivelabels(self): | |||||
"""At most 255 labels are allowed.""" | |||||
with self.assertRaisesRegexp(ValueError, | |||||
'cannot use more than 255 labels'): | |||||
labels = [b'l' for i in range(256)] | |||||
list(framing.createtextoutputframe(None, 1, | |||||
[(b'bleh', [], labels)])) | |||||
def testtextoutputformattingstringtype(self): | def testtextoutputformattingstringtype(self): | ||||
"""Formatting string must be bytes.""" | """Formatting string must be bytes.""" | ||||
with self.assertRaisesRegexp(ValueError, 'must use bytes formatting '): | with self.assertRaisesRegexp(ValueError, 'must use bytes formatting '): | ||||
list(framing.createtextoutputframe(None, 1, [ | list(framing.createtextoutputframe(None, 1, [ | ||||
(b'foo'.decode('ascii'), [], [])])) | (b'foo'.decode('ascii'), [], [])])) | ||||
def testtextoutputargumentbytes(self): | def testtextoutputargumentbytes(self): | ||||
with self.assertRaisesRegexp(ValueError, 'must use bytes for argument'): | with self.assertRaisesRegexp(ValueError, 'must use bytes for argument'): | ||||
list(framing.createtextoutputframe(None, 1, [ | list(framing.createtextoutputframe(None, 1, [ | ||||
(b'foo', [b'foo'.decode('ascii')], [])])) | (b'foo', [b'foo'.decode('ascii')], [])])) | ||||
def testtextoutputlabelbytes(self): | def testtextoutputlabelbytes(self): | ||||
with self.assertRaisesRegexp(ValueError, 'must use bytes for labels'): | with self.assertRaisesRegexp(ValueError, 'must use bytes for labels'): | ||||
list(framing.createtextoutputframe(None, 1, [ | list(framing.createtextoutputframe(None, 1, [ | ||||
(b'foo', [], [b'foo'.decode('ascii')])])) | (b'foo', [], [b'foo'.decode('ascii')])])) | ||||
def testtextoutputtoolongformatstring(self): | |||||
with self.assertRaisesRegexp(ValueError, | |||||
'formatting string cannot be longer than'): | |||||
list(framing.createtextoutputframe(None, 1, [ | |||||
(b'x' * 65536, [], [])])) | |||||
def testtextoutputtoolongargumentstring(self): | |||||
with self.assertRaisesRegexp(ValueError, | |||||
'argument string cannot be longer than'): | |||||
list(framing.createtextoutputframe(None, 1, [ | |||||
(b'bleh', [b'x' * 65536], [])])) | |||||
def testtextoutputtoolonglabelstring(self): | |||||
with self.assertRaisesRegexp(ValueError, | |||||
'label string cannot be longer than'): | |||||
list(framing.createtextoutputframe(None, 1, [ | |||||
(b'bleh', [], [b'x' * 65536])])) | |||||
def testtextoutput1simpleatom(self): | def testtextoutput1simpleatom(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo', [], [])])) | (b'foo', [], [])])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 \x03\x00\x00\x00foo'), | ffs(b'1 1 stream-begin text-output 0 ' | ||||
b"cbor:[{b'msg': b'foo'}]"), | |||||
]) | ]) | ||||
def testtextoutput2simpleatoms(self): | def testtextoutput2simpleatoms(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo', [], []), | (b'foo', [], []), | ||||
(b'bar', [], []), | (b'bar', [], []), | ||||
])) | ])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 ' | ffs(b'1 1 stream-begin text-output 0 ' | ||||
br'\x03\x00\x00\x00foo\x03\x00\x00\x00bar'), | b"cbor:[{b'msg': b'foo'}, {b'msg': b'bar'}]") | ||||
]) | ]) | ||||
def testtextoutput1arg(self): | def testtextoutput1arg(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo %s', [b'val1'], []), | (b'foo %s', [b'val1'], []), | ||||
])) | ])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 ' | ffs(b'1 1 stream-begin text-output 0 ' | ||||
br'\x06\x00\x00\x01\x04\x00foo %sval1'), | b"cbor:[{b'msg': b'foo %s', b'args': [b'val1']}]") | ||||
]) | ]) | ||||
def testtextoutput2arg(self): | def testtextoutput2arg(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo %s %s', [b'val', b'value'], []), | (b'foo %s %s', [b'val', b'value'], []), | ||||
])) | ])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 ' | ffs(b'1 1 stream-begin text-output 0 ' | ||||
br'\x09\x00\x00\x02\x03\x00\x05\x00foo %s %svalvalue'), | b"cbor:[{b'msg': b'foo %s %s', b'args': [b'val', b'value']}]") | ||||
]) | ]) | ||||
def testtextoutput1label(self): | def testtextoutput1label(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo', [], [b'label']), | (b'foo', [], [b'label']), | ||||
])) | ])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 ' | ffs(b'1 1 stream-begin text-output 0 ' | ||||
br'\x03\x00\x01\x00\x05foolabel'), | b"cbor:[{b'msg': b'foo', b'labels': [b'label']}]") | ||||
]) | ]) | ||||
def testargandlabel(self): | def testargandlabel(self): | ||||
stream = framing.stream(1) | stream = framing.stream(1) | ||||
val = list(framing.createtextoutputframe(stream, 1, [ | val = list(framing.createtextoutputframe(stream, 1, [ | ||||
(b'foo %s', [b'arg'], [b'label']), | (b'foo %s', [b'arg'], [b'label']), | ||||
])) | ])) | ||||
self.assertEqual(val, [ | self.assertEqual(val, [ | ||||
ffs(br'1 1 stream-begin text-output 0 ' | ffs(b'1 1 stream-begin text-output 0 ' | ||||
br'\x06\x00\x01\x01\x05\x03\x00foo %slabelarg'), | b"cbor:[{b'msg': b'foo %s', b'args': [b'arg'], " | ||||
b"b'labels': [b'label']}]") | |||||
]) | ]) | ||||
class ServerReactorTests(unittest.TestCase): | class ServerReactorTests(unittest.TestCase): | ||||
def _sendsingleframe(self, reactor, f): | def _sendsingleframe(self, reactor, f): | ||||
results = list(sendframes(reactor, [f])) | results = list(sendframes(reactor, [f])) | ||||
self.assertEqual(len(results), 1) | self.assertEqual(len(results), 1) | ||||
return results[0] | return results[0] |