diff --git a/mercurial/thirdparty/cbor/LICENSE b/mercurial/thirdparty/cbor/LICENSE new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/LICENSE @@ -0,0 +1,13 @@ +Copyright 2014-2015 Brian Olson + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/mercurial/thirdparty/cbor/Makefile b/mercurial/thirdparty/cbor/Makefile new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/Makefile @@ -0,0 +1,4 @@ +test: + PYTHONPATH=. python tests/cbor/test_cbor.py + +check: test diff --git a/mercurial/thirdparty/cbor/README.md b/mercurial/thirdparty/cbor/README.md new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/README.md @@ -0,0 +1,33 @@ +Concise Binary Object Representation (CBOR) is a superset of JSON's schema that's faster and more compact. + +* http://tools.ietf.org/html/rfc7049 +* http://cbor.io/ + +This repository contains implementations for Python and Go. + +## Python ## +This Python implementation provides loads()/dumps() like the json standard library. + +Compare to Python 2.7.5's standard library implementation of json: + +``` +#! + +serialized 50000 objects into 1163097 cbor bytes in 0.05 seconds (1036613.48/s) and 1767001 json bytes in 0.22 seconds (224772.48/s) +compress to 999179 bytes cbor.gz and 1124500 bytes json.gz +load 50000 objects from cbor in 0.07 secs (763708.80/sec) and json in 0.32 (155348.97/sec) +``` + +There is also a pure-python implementation which gets about 1/3 the speed of json's C augmented speed. + +Tested in Python 2.7.5, 2,7.6, 3.3.3, and 3.4.0 + +Available on pypi: + +pip install cbor + +## Go ## + +import cbor "bitbucket.org/bodhisnarkva/cbor/go" + +The Go implementation is newer. It attempts to do serialization to/from struct types using reflection, but doesn't do 100% of cases like that right. It _should_ do everything fine serializing `map[string]interface{}` and `[]interface{}` and other basic types. It passes the test of decoding 100% of CBOR common appendix test strings. \ No newline at end of file diff --git a/mercurial/thirdparty/cbor/c/cbor.h b/mercurial/thirdparty/cbor/c/cbor.h new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/c/cbor.h @@ -0,0 +1,76 @@ +#ifndef CBOR_H +#define CBOR_H + +#define CBOR_TYPE_MASK 0xE0 /* top 3 bits */ +#define CBOR_INFO_BITS 0x1F /* low 5 bits */ + +#define CBOR_UINT 0x00 +#define CBOR_NEGINT 0x20 +#define CBOR_BYTES 0x40 +#define CBOR_TEXT 0x60 +#define CBOR_ARRAY 0x80 +#define CBOR_MAP 0xA0 +#define CBOR_TAG 0xC0 +#define CBOR_7 0xE0 /* float and other types */ + +#define CBOR_ADDITIONAL_INFORMATION 0x1F + +/* read the "additional information" of a tag byte which is often a + * small literal integer describing the length in bytes of the data + * item */ +#define IS_SMALL_LITERAL(n) (((n) & 0x1f) < 24) +#define SMALL_LITERAL(n) ((n) & 0x1f) + + +#define CBOR_UINT8_FOLLOWS 24 // 0x18 +#define CBOR_UINT16_FOLLOWS 25 // 0x19 +#define CBOR_UINT32_FOLLOWS 26 // 0x1A +#define CBOR_UINT64_FOLLOWS 27 // 0x1B +#define CBOR_VAR_FOLLOWS 31 // 0x1F + +#define CBOR_UINT8 (CBOR_UINT | CBOR_UINT8_FOLLOWS) +#define CBOR_UINT16 (CBOR_UINT | CBOR_UINT16_FOLLOWS) +#define CBOR_UINT32 (CBOR_UINT | CBOR_UINT32_FOLLOWS) +#define CBOR_UINT64 (CBOR_UINT | CBOR_UINT64_FOLLOWS) + +#define CBOR_NEGINT8 (CBOR_NEGINT | CBOR_UINT8_FOLLOWS) +#define CBOR_NEGINT16 (CBOR_NEGINT | CBOR_UINT16_FOLLOWS) +#define CBOR_NEGINT32 (CBOR_NEGINT | CBOR_UINT32_FOLLOWS) +#define CBOR_NEGINT64 (CBOR_NEGINT | CBOR_UINT64_FOLLOWS) + + +#define CBOR_BREAK 0xFF + +#define CBOR_FALSE (CBOR_7 | 20) +#define CBOR_TRUE (CBOR_7 | 21) +#define CBOR_NULL (CBOR_7 | 22) +#define CBOR_UNDEFINED (CBOR_7 | 23) + +#define CBOR_FLOAT16 (CBOR_7 | 25) +#define CBOR_FLOAT32 (CBOR_7 | 26) +#define CBOR_FLOAT64 (CBOR_7 | 27) + + +#define CBOR_TAG_DATE_STRING (0) /* RFC3339 */ +#define CBOR_TAG_DATE_ARRAY (1) /* any number type follows, seconds since 1970-01-01T00:00:00 UTC */ +#define CBOR_TAG_BIGNUM (2) /* big endian byte string follows */ +#define CBOR_TAG_NEGBIGNUM (3) /* big endian byte string follows */ +#define CBOR_TAG_DECIMAL (4) /* [ 10^x exponent, number ] */ +#define CBOR_TAG_BIGFLOAT (5) /* [ 2^x exponent, number ] */ +//#define CBOR_TAG_BASE64URL (21) +//#define CBOR_TAG_BASE64 (22) +#define CBOR_TAG_BASE16 (23) +#define CBOR_TAG_CBOR (24) /* following byte string is embedded CBOR data */ + +#define CBOR_TAG_URI 32 +//#define CBOR_TAG_BASE64URL 33 +//#define CBOR_TAG_BASE64 34 +#define CBOR_TAG_REGEX 35 +#define CBOR_TAG_MIME 36 /* following text is MIME message, headers, separators and all */ +#define CBOR_TAG_CBOR_FILEHEADER 55799 /* can open a file with 0xd9d9f7 */ + + +/* Content-Type: application/cbor */ + + +#endif /* CBOR_H */ diff --git a/mercurial/thirdparty/cbor/c/cbormodule.c b/mercurial/thirdparty/cbor/c/cbormodule.c new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/c/cbormodule.c @@ -0,0 +1,1491 @@ +#include "Python.h" + +#include "cbor.h" + +#include +#include + +//#include +#include + + +#ifndef DEBUG_LOGGING +// causes things to be written to stderr +#define DEBUG_LOGGING 0 +//#define DEBUG_LOGGING 1 +#endif + + +#ifdef Py_InitModule +// Python 2.7 + +#define HAS_FILE_READER 1 +#define IS_PY3 0 + +#else + +#define HAS_FILE_READER 0 +#define IS_PY3 1 + +#endif + +typedef struct { + unsigned int sort_keys; +} EncodeOptions; + +// Hey Look! It's a polymorphic object structure in C! + +// read(, len): read len bytes and return in buffer, or NULL on error +// read1(, uint8_t*): read one byte and return 0 on success +// return_buffer(, *): release result of read(, len) +// delete(): destructor. free thiz and contents. +#define READER_FUNCTIONS \ + void* (*read)(void* self, Py_ssize_t len); \ + int (*read1)(void* self, uint8_t* oneByte); \ + void (*return_buffer)(void* self, void* buffer); \ + void (*delete)(void* self); + +#define SET_READER_FUNCTIONS(thiz, clazz) (thiz)->read = clazz##_read;\ + (thiz)->read1 = clazz##_read1;\ + (thiz)->return_buffer = clazz##_return_buffer;\ + (thiz)->delete = clazz##_delete; + +typedef struct _Reader { + READER_FUNCTIONS; +} Reader; + +static Reader* NewBufferReader(PyObject* ob); +static Reader* NewObjectReader(PyObject* ob); +#if HAS_FILE_READER +static Reader* NewFileReader(PyObject* ob); +#endif + + +static PyObject* loads_tag(Reader* rin, uint64_t aux); +static int loads_kv(PyObject* out, Reader* rin); + +typedef struct VarBufferPart { + void* start; + uint64_t len; + struct VarBufferPart* next; +} VarBufferPart; + + +static int logprintf(const char* fmt, ...) { + va_list ap; + int ret; + va_start(ap, fmt); +#if DEBUG_LOGGING + ret = vfprintf(stderr, fmt, ap); +#else + ret = 0; +#endif + va_end(ap); + return ret; +} + +// TODO: portably work this out at compile time +static int _is_big_endian = 0; + +static int is_big_endian(void) { + uint32_t val = 1234; + _is_big_endian = val == htonl(val); + //logprintf("is_big_endian=%d\n", _is_big_endian); + return _is_big_endian; +} + + +PyObject* decodeFloat16(Reader* rin) { + // float16 parsing adapted from example code in spec + uint8_t hibyte, lobyte;// = raw[pos]; + int err; + int exp; + int mant; + double val; + + err = rin->read1(rin, &hibyte); + if (err) { logprintf("fail in float16[0]\n"); return NULL; } + err = rin->read1(rin, &lobyte); + if (err) { logprintf("fail in float16[1]\n"); return NULL; } + + exp = (hibyte >> 2) & 0x1f; + mant = ((hibyte & 0x3) << 8) | lobyte; + if (exp == 0) { + val = ldexp(mant, -24); + } else if (exp != 31) { + val = ldexp(mant + 1024, exp - 25); + } else { + val = mant == 0 ? INFINITY : NAN; + } + if (hibyte & 0x80) { + val = -val; + } + return PyFloat_FromDouble(val); +} +PyObject* decodeFloat32(Reader* rin) { + float val; + uint8_t* raw = rin->read(rin, 4); + if (!raw) { logprintf("fail in float32\n"); return NULL; } + if (_is_big_endian) { + // easy! + val = *((float*)raw); + } else { + uint8_t* dest = (uint8_t*)(&val); + dest[3] = raw[0]; + dest[2] = raw[1]; + dest[1] = raw[2]; + dest[0] = raw[3]; + } + rin->return_buffer(rin, raw); + return PyFloat_FromDouble(val); +} +PyObject* decodeFloat64(Reader* rin) { + int si; + uint64_t aux = 0; + uint8_t* raw = rin->read(rin, 8); + if (!raw) { logprintf("fail in float64\n"); return NULL; } + for (si = 0; si < 8; si++) { + aux = aux << 8; + aux |= raw[si]; + } + rin->return_buffer(rin, raw); + return PyFloat_FromDouble(*((double*)(&aux))); +} + +// parse following int value into *auxP +// return 0 on success, -1 on fail +static int handle_info_bits(Reader* rin, uint8_t cbor_info, uint64_t* auxP) { + uint64_t aux; + + if (cbor_info <= 23) { + // literal value <=23 + aux = cbor_info; + } else if (cbor_info == CBOR_UINT8_FOLLOWS) { + uint8_t taux; + if (rin->read1(rin, &taux)) { logprintf("fail in uint8\n"); return -1; } + aux = taux; + } else if (cbor_info == CBOR_UINT16_FOLLOWS) { + uint8_t hibyte, lobyte; + if (rin->read1(rin, &hibyte)) { logprintf("fail in uint16[0]\n"); return -1; } + if (rin->read1(rin, &lobyte)) { logprintf("fail in uint16[1]\n"); return -1; } + aux = (hibyte << 8) | lobyte; + } else if (cbor_info == CBOR_UINT32_FOLLOWS) { + uint8_t* raw = (uint8_t*)rin->read(rin, 4); + if (!raw) { logprintf("fail in uint32[1]\n"); return -1; } + aux = + (((uint64_t)raw[0]) << 24) | + (((uint64_t)raw[1]) << 16) | + (((uint64_t)raw[2]) << 8) | + ((uint64_t)raw[3]); + rin->return_buffer(rin, raw); + } else if (cbor_info == CBOR_UINT64_FOLLOWS) { + int si; + uint8_t* raw = (uint8_t*)rin->read(rin, 8); + if (!raw) { logprintf("fail in uint64[1]\n"); return -1; } + aux = 0; + for (si = 0; si < 8; si++) { + aux = aux << 8; + aux |= raw[si]; + } + rin->return_buffer(rin, raw); + } else { + aux = 0; + } + *auxP = aux; + return 0; +} + +static PyObject* inner_loads_c(Reader* rin, uint8_t c); + +static PyObject* inner_loads(Reader* rin) { + uint8_t c; + int err; + + err = rin->read1(rin, &c); + if (err) { logprintf("fail in loads tag\n"); return NULL; } + return inner_loads_c(rin, c); +} + +PyObject* inner_loads_c(Reader* rin, uint8_t c) { + uint8_t cbor_type; + uint8_t cbor_info; + uint64_t aux; + + cbor_type = c & CBOR_TYPE_MASK; + cbor_info = c & CBOR_INFO_BITS; + +#if 0 + if (pos > len) { + PyErr_SetString(PyExc_ValueError, "misparse, token went longer than buffer"); + return NULL; + } + + pos += 1; +#endif + + if (cbor_type == CBOR_7) { + if (cbor_info == CBOR_UINT16_FOLLOWS) { // float16 + return decodeFloat16(rin); + } else if (cbor_info == CBOR_UINT32_FOLLOWS) { // float32 + return decodeFloat32(rin); + } else if (cbor_info == CBOR_UINT64_FOLLOWS) { // float64 + return decodeFloat64(rin); + } + // not a float, fall through to other CBOR_7 interpretations + } + if (handle_info_bits(rin, cbor_info, &aux)) { logprintf("info bits failed\n"); return NULL; } + + PyObject* out = NULL; + switch (cbor_type) { + case CBOR_UINT: + out = PyLong_FromUnsignedLongLong(aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding UINT"); + } + return out; + case CBOR_NEGINT: + if (aux > 0x7fffffffffffffff) { + PyObject* bignum = PyLong_FromUnsignedLongLong(aux); + PyObject* minusOne = PyLong_FromLong(-1); + out = PyNumber_Subtract(minusOne, bignum); + Py_DECREF(minusOne); + Py_DECREF(bignum); + } else { + out = PyLong_FromLongLong((long long)(((long long)-1) - aux)); + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding NEGINT"); + } + return out; + case CBOR_BYTES: + if (cbor_info == CBOR_VAR_FOLLOWS) { + size_t total = 0; + VarBufferPart* parts = NULL; + VarBufferPart* parts_tail = NULL; + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + uint8_t scbor_type = sc & CBOR_TYPE_MASK; + uint8_t scbor_info = sc & CBOR_INFO_BITS; + uint64_t saux; + void* blob; + + if (scbor_type != CBOR_BYTES) { + PyErr_Format(PyExc_ValueError, "expected subordinate BYTES block under VAR BYTES, but got %x", scbor_type); + return NULL; + } + if(handle_info_bits(rin, scbor_info, &saux)) { logprintf("var bytes sub infobits failed\n"); return NULL; } + blob = rin->read(rin, saux); + if (!blob) { logprintf("var bytes sub bytes read failed\n"); return NULL; } + if (parts_tail == NULL) { + parts = parts_tail = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux); + } else { + parts_tail->next = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux); + parts_tail = parts_tail->next; + } + parts_tail->start = (void*)(parts_tail + 1); + memcpy(parts_tail->start, blob, saux); + rin->return_buffer(rin, blob); + parts_tail->len = saux; + parts_tail->next = NULL; + total += saux; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; } + } + // Done + { + uint8_t* allbytes = (uint8_t*)PyMem_Malloc(total); + uintptr_t op = 0; + while (parts != NULL) { + VarBufferPart* next; + memcpy(allbytes + op, parts->start, parts->len); + op += parts->len; + next = parts->next; + PyMem_Free(parts); + parts = next; + } + out = PyBytes_FromStringAndSize((char*)allbytes, total); + PyMem_Free(allbytes); + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR BYTES"); + } + } else { + void* raw; + if (aux == 0) { + static void* empty_string = ""; + raw = empty_string; + } else { + raw = rin->read(rin, aux); + if (!raw) { logprintf("bytes read failed\n"); return NULL; } + } + out = PyBytes_FromStringAndSize(raw, (Py_ssize_t)aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding BYTES"); + } + if (aux != 0) { + rin->return_buffer(rin, raw); + } + } + return out; + case CBOR_TEXT: + if (cbor_info == CBOR_VAR_FOLLOWS) { + PyObject* parts = PyList_New(0); + PyObject* joiner = PyUnicode_FromString(""); + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* subitem = inner_loads_c(rin, sc); + if (subitem == NULL) { logprintf("fail in var text subitem\n"); return NULL; } + PyList_Append(parts, subitem); + Py_DECREF(subitem); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; } + } + // Done + out = PyUnicode_Join(joiner, parts); + Py_DECREF(joiner); + Py_DECREF(parts); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR TEXT"); + } + } else { + void* raw; + if (aux == 0) { + static void* empty_string = ""; + raw = empty_string; + } else { + raw = rin->read(rin, aux); + if (!raw) { logprintf("read text failed\n"); return NULL; } + } + out = PyUnicode_FromStringAndSize((char*)raw, (Py_ssize_t)aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding TEXT"); + } + if (aux != 0) { + rin->return_buffer(rin, raw); + } + } + return out; + case CBOR_ARRAY: + if (cbor_info == CBOR_VAR_FOLLOWS) { + uint8_t sc; + out = PyList_New(0); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* subitem = inner_loads_c(rin, sc); + if (subitem == NULL) { logprintf("fail in var array subitem\n"); return NULL; } + PyList_Append(out, subitem); + Py_DECREF(subitem); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; } + } + // Done + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR ARRAY"); + } + } else { + unsigned int i; + out = PyList_New((Py_ssize_t)aux); + for (i = 0; i < aux; i++) { + PyObject* subitem = inner_loads(rin); + if (subitem == NULL) { logprintf("array subitem[%d] (of %d) failed\n", i, aux); return NULL; } + PyList_SetItem(out, (Py_ssize_t)i, subitem); + // PyList_SetItem became the owner of the reference count of subitem, we don't need to DECREF it + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding ARRAY"); + } + } + return out; + case CBOR_MAP: + out = PyDict_New(); + if (cbor_info == CBOR_VAR_FOLLOWS) { + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* key = inner_loads_c(rin, sc); + PyObject* value; + if (key == NULL) { logprintf("var map key fail\n"); return NULL; } + value = inner_loads(rin); + if (value == NULL) { logprintf("var map val vail\n"); return NULL; } + PyDict_SetItem(out, key, value); + Py_DECREF(key); + Py_DECREF(value); + + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; } + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR MAP"); + } + } else { + unsigned int i; + for (i = 0; i < aux; i++) { + if (loads_kv(out, rin) != 0) { + logprintf("map kv[%d] failed\n", i); + return NULL; + } + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding MAP"); + } + } + return out; + case CBOR_TAG: + return loads_tag(rin, aux); + case CBOR_7: + if (aux == 20) { + out = Py_False; + Py_INCREF(out); + } else if (aux == 21) { + out = Py_True; + Py_INCREF(out); + } else if (aux == 22) { + out = Py_None; + Py_INCREF(out); + } else if (aux == 23) { + // js `undefined`, closest is py None + out = Py_None; + Py_INCREF(out); + } + if (out == NULL) { + PyErr_Format(PyExc_ValueError, "unknown section 7 marker %02x, aux=%llu", c, aux); + } + return out; + default: + PyErr_Format(PyExc_RuntimeError, "unknown cbor marker %02x", c); + return NULL; + } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunreachable-code" + PyErr_SetString(PyExc_RuntimeError, "cbor library internal error moof!"); + return NULL; +#pragma GCC diagnostic pop +} + +static int loads_kv(PyObject* out, Reader* rin) { + PyObject* key = inner_loads(rin); + PyObject* value; + if (key == NULL) { logprintf("map key fail\n"); return -1; } + value = inner_loads(rin); + if (value == NULL) { logprintf("map val fail\n"); return -1; } + PyDict_SetItem(out, key, value); + Py_DECREF(key); + Py_DECREF(value); + return 0; +} + +static PyObject* loads_bignum(Reader* rin, uint8_t c) { + PyObject* out = NULL; + + uint8_t bytes_info = c & CBOR_INFO_BITS; + if (bytes_info < 24) { + int i; + PyObject* eight = PyLong_FromLong(8); + out = PyLong_FromLong(0); + for (i = 0; i < bytes_info; i++) { + // TODO: is this leaking like crazy? + PyObject* curbyte; + PyObject* tout = PyNumber_Lshift(out, eight); + Py_DECREF(out); + out = tout; + uint8_t cb; + if (rin->read1(rin, &cb)) { + logprintf("r1 fail in bignum %d/%d\n", i, bytes_info); + Py_DECREF(eight); + Py_DECREF(out); + return NULL; + } + curbyte = PyLong_FromLong(cb); + tout = PyNumber_Or(out, curbyte); + Py_DECREF(curbyte); + Py_DECREF(out); + out = tout; + } + Py_DECREF(eight); + return out; + } else { + PyErr_Format(PyExc_NotImplementedError, "TODO: TAG BIGNUM for bigger bignum bytes_info=%d, len(ull)=%lu\n", bytes_info, sizeof(unsigned long long)); + return NULL; + } +} + + +// returns a PyObject for cbor.cbor.Tag +// Returned PyObject* is a BORROWED reference from the module dict +static PyObject* getCborTagClass(void) { + PyObject* cbor_module = PyImport_ImportModule("cbor.cbor"); + PyObject* moddict = PyModule_GetDict(cbor_module); + PyObject* tag_class = PyDict_GetItemString(moddict, "Tag"); + // moddict and tag_class are 'borrowed reference' + Py_DECREF(cbor_module); + + return tag_class; +} + + +static PyObject* loads_tag(Reader* rin, uint64_t aux) { + PyObject* out = NULL; + // return an object CBORTag(tagnum, nextob) + if (aux == CBOR_TAG_BIGNUM) { + // If the next object is bytes, interpret it here without making a PyObject for it. + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in bignum tag\n"); return NULL; } + if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) { + return loads_bignum(rin, sc); + } else { + PyErr_Format(PyExc_ValueError, "TAG BIGNUM not followed by bytes but %02x", sc); + return NULL; + } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunreachable-code" + PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG BIGNUM %02x ...\n", sc); + return NULL; +#pragma GCC diagnostic pop + } else if (aux == CBOR_TAG_NEGBIGNUM) { + // If the next object is bytes, interpret it here without making a PyObject for it. + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in negbignum tag\n"); return NULL; } + if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) { + out = loads_bignum(rin, sc); + if (out == NULL) { logprintf("loads_bignum fail inside TAG_NEGBIGNUM\n"); return NULL; } + PyObject* minusOne = PyLong_FromLong(-1); + PyObject* tout = PyNumber_Subtract(minusOne, out); + Py_DECREF(minusOne); + Py_DECREF(out); + out = tout; + return out; + } else { + PyErr_Format(PyExc_ValueError, "TAG NEGBIGNUM not followed by bytes but %02x", sc); + return NULL; + } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunreachable-code" + PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG NEGBIGNUM %02x ...\n", sc); + return NULL; +#pragma GCC diagnostic pop + } + out = inner_loads(rin); + if (out == NULL) { return NULL; } + { + PyObject* tag_class = getCborTagClass(); + PyObject* args = Py_BuildValue("(K,O)", aux, out); + PyObject* tout = PyObject_CallObject(tag_class, args); + Py_DECREF(args); + Py_DECREF(out); + // tag_class was just a borrowed reference + out = tout; + } + return out; +} + + +static PyObject* +cbor_loads(PyObject* noself, PyObject* args) { + PyObject* ob; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + if (ob == Py_None) { + PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads"); + return NULL; + } + + { + PyObject* out = NULL; + Reader* r = NewBufferReader(ob); + if (!r) { + return NULL; + } + out = inner_loads(r); + r->delete(r); + return out; + } +} + + +#if HAS_FILE_READER + +typedef struct _FileReader { + READER_FUNCTIONS; + FILE* fin; + void* dst; + Py_ssize_t dst_size; + Py_ssize_t read_count; +} FileReader; + +// read from a python builtin file which contains a C FILE* +static void* FileReader_read(void* self, Py_ssize_t len) { + FileReader* thiz = (FileReader*)self; + Py_ssize_t rtotal = 0; + uintptr_t opos; + //logprintf("file read %d\n", len); + if (len > thiz->dst_size) { + thiz->dst = PyMem_Realloc(thiz->dst, len); + thiz->dst_size = len; + } else if ((thiz->dst_size > (128 * 1024)) && (len < 4096)) { + PyMem_Free(thiz->dst); + thiz->dst = PyMem_Malloc(len); + thiz->dst_size = len; + } + opos = (uintptr_t)(thiz->dst); + while (1) { + size_t rlen = fread((void*)opos, 1, len, thiz->fin); + if (rlen == 0) { + // file isn't going to give any more + PyErr_Format(PyExc_ValueError, "only got %zd bytes with %zd stil to read from file", rtotal, len); + PyMem_Free(thiz->dst); + thiz->dst = NULL; + thiz->dst_size = 0; + return NULL; + } + thiz->read_count += rlen; + rtotal += rlen; + opos += rlen; + len -= rlen; + if (rtotal >= len) { + if (thiz->dst == NULL) { + PyErr_SetString(PyExc_RuntimeError, "known error in file reader, NULL dst"); + return NULL; + } + return thiz->dst; + } + } +} +static int FileReader_read1(void* self, uint8_t* oneByte) { + FileReader* thiz = (FileReader*)self; + size_t didread = fread((void*)oneByte, 1, 1, thiz->fin); + if (didread == 0) { + logprintf("failed to read 1 from file\n"); + PyErr_SetString(PyExc_ValueError, "got nothing reading 1 from file"); + return -1; + } + thiz->read_count++; + return 0; +} +static void FileReader_return_buffer(void* self, void* buffer) { + // Nothing to do, we hold onto the buffer and maybe reuse it for next read +} +static void FileReader_delete(void* self) { + FileReader* thiz = (FileReader*)self; + if (thiz->dst) { + PyMem_Free(thiz->dst); + } + PyMem_Free(thiz); +} +static Reader* NewFileReader(PyObject* ob) { + FileReader* fr = (FileReader*)PyMem_Malloc(sizeof(FileReader)); + if (fr == NULL) { + PyErr_SetString(PyExc_MemoryError, "failed to allocate FileReader"); + return NULL; + } + fr->fin = PyFile_AsFile(ob); + if (fr->fin == NULL) { + PyErr_SetString(PyExc_RuntimeError, "PyFile_AsFile NULL"); + PyMem_Free(fr); + return NULL; + } + fr->dst = NULL; + fr->dst_size = 0; + fr->read_count = 0; + SET_READER_FUNCTIONS(fr, FileReader); + return (Reader*)fr; +} + +#endif /* Python 2.7 FileReader */ + + +typedef struct _ObjectReader { + READER_FUNCTIONS; + PyObject* ob; + + // We got one object with all the bytes neccessary, and need to + // DECREF it later. + PyObject* retval; + void* bytes; + + // OR, we got several objects, we DECREFed them as we went, and + // need to Free() this buffer at the end. + void* dst; + + Py_ssize_t read_count; + int exception_is_external; +} ObjectReader; + +// read from a python file-like object which has a .read(n) method +static void* ObjectReader_read(void* context, Py_ssize_t len) { + ObjectReader* thiz = (ObjectReader*)context; + Py_ssize_t rtotal = 0; + uintptr_t opos = 0; + //logprintf("ob read %d\n", len); + assert(!thiz->dst); + assert(!thiz->bytes); + while (rtotal < len) { + PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "n", len - rtotal, NULL); + Py_ssize_t rlen; + if (retval == NULL) { + thiz->exception_is_external = 1; + logprintf("exception in object.read()\n"); + return NULL; + } + if (!PyBytes_Check(retval)) { + logprintf("object.read() is not bytes\n"); + PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n"); + Py_DECREF(retval); + return NULL; + } + rlen = PyBytes_Size(retval); + thiz->read_count += rlen; + if (rlen > len - rtotal) { + logprintf("object.read() is too much!\n"); + PyErr_Format(PyExc_ValueError, "ob.read() returned %ld bytes but only wanted %lu\n", rlen, len - rtotal); + Py_DECREF(retval); + return NULL; + } + if (rlen == len) { + // best case! All in one call to read() + // We _keep_ a reference to retval until later. + thiz->retval = retval; + thiz->bytes = PyBytes_AsString(retval); + assert(thiz->bytes); + thiz->dst = NULL; + opos = 0; + return thiz->bytes; + } + if (thiz->dst == NULL) { + thiz->dst = PyMem_Malloc(len); + opos = (uintptr_t)thiz->dst; + } + // else, not enough all in one go + memcpy((void*)opos, PyBytes_AsString(retval), rlen); + Py_DECREF(retval); + opos += rlen; + rtotal += rlen; + } + assert(thiz->dst); + return thiz->dst; +} +static int ObjectReader_read1(void* self, uint8_t* oneByte) { + ObjectReader* thiz = (ObjectReader*)self; + PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "i", 1, NULL); + Py_ssize_t rlen; + if (retval == NULL) { + thiz->exception_is_external = 1; + //logprintf("call ob read(1) failed\n"); + return -1; + } + if (!PyBytes_Check(retval)) { + PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n"); + return -1; + } + rlen = PyBytes_Size(retval); + thiz->read_count += rlen; + if (rlen > 1) { + PyErr_Format(PyExc_ValueError, "TODO: raise exception: WAT ob.read() returned %ld bytes but only wanted 1\n", rlen); + return -1; + } + if (rlen == 1) { + *oneByte = PyBytes_AsString(retval)[0]; + Py_DECREF(retval); + return 0; + } + PyErr_SetString(PyExc_ValueError, "got nothing reading 1"); + return -1; +} +static void ObjectReader_return_buffer(void* context, void* buffer) { + ObjectReader* thiz = (ObjectReader*)context; + if (buffer == thiz->bytes) { + Py_DECREF(thiz->retval); + thiz->retval = NULL; + thiz->bytes = NULL; + } else if (buffer == thiz->dst) { + PyMem_Free(thiz->dst); + thiz->dst = NULL; + } else { + logprintf("TODO: raise exception, could not release buffer %p, wanted dst=%p or bytes=%p\n", buffer, thiz->dst, thiz->bytes); + } +} +static void ObjectReader_delete(void* context) { + ObjectReader* thiz = (ObjectReader*)context; + if (thiz->retval != NULL) { + Py_DECREF(thiz->retval); + } + if (thiz->dst != NULL) { + PyMem_Free(thiz->dst); + } + PyMem_Free(thiz); +} +static Reader* NewObjectReader(PyObject* ob) { + ObjectReader* r = (ObjectReader*)PyMem_Malloc(sizeof(ObjectReader)); + r->ob = ob; + r->retval = NULL; + r->bytes = NULL; + r->dst = NULL; + r->read_count = 0; + r->exception_is_external = 0; + SET_READER_FUNCTIONS(r, ObjectReader); + return (Reader*)r; +} + +typedef struct _BufferReader { + READER_FUNCTIONS; + uint8_t* raw; + Py_ssize_t len; + uintptr_t pos; +} BufferReader; + +// read from a buffer, aka loads() +static void* BufferReader_read(void* context, Py_ssize_t len) { + BufferReader* thiz = (BufferReader*)context; + //logprintf("br %p %d (%d)\n", thiz, len, thiz->len); + if (len <= thiz->len) { + void* out = (void*)thiz->pos; + thiz->pos += len; + thiz->len -= len; + assert(out); + return out; + } + PyErr_Format(PyExc_ValueError, "buffer read for %zd but only have %zd\n", len, thiz->len); + return NULL; +} +static int BufferReader_read1(void* self, uint8_t* oneByte) { + BufferReader* thiz = (BufferReader*)self; + //logprintf("br %p _1_ (%d)\n", thiz, thiz->len); + if (thiz->len <= 0) { + PyErr_SetString(PyExc_LookupError, "buffer exhausted"); + return -1; + } + *oneByte = *((uint8_t*)thiz->pos); + thiz->pos += 1; + thiz->len -= 1; + return 0; +} +static void BufferReader_return_buffer(void* context, void* buffer) { + // nothing to do +} +static void BufferReader_delete(void* context) { + BufferReader* thiz = (BufferReader*)context; + PyMem_Free(thiz); +} +static Reader* NewBufferReader(PyObject* ob) { + BufferReader* r = (BufferReader*)PyMem_Malloc(sizeof(BufferReader)); + SET_READER_FUNCTIONS(r, BufferReader); + if (PyByteArray_Check(ob)) { + r->raw = (uint8_t*)PyByteArray_AsString(ob); + r->len = PyByteArray_Size(ob); + } else if (PyBytes_Check(ob)) { + r->raw = (uint8_t*)PyBytes_AsString(ob); + r->len = PyBytes_Size(ob); + } else { + PyErr_SetString(PyExc_ValueError, "input of unknown type not bytes or bytearray"); + return NULL; + } + r->pos = (uintptr_t)r->raw; + if (r->len == 0) { + PyErr_SetString(PyExc_ValueError, "got zero length string in loads"); + return NULL; + } + if (r->raw == NULL) { + PyErr_SetString(PyExc_ValueError, "got NULL buffer for string"); + return NULL; + } + //logprintf("NBR(%llu, %ld)\n", r->pos, r->len); + return (Reader*)r; +} + + +static PyObject* +cbor_load(PyObject* noself, PyObject* args) { + PyObject* ob; + Reader* reader; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + if (ob == Py_None) { + PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads"); + return NULL; + } + PyObject* retval; +#if HAS_FILE_READER + if (PyFile_Check(ob)) { + reader = NewFileReader(ob); + if (reader == NULL) { return NULL; } + retval = inner_loads(reader); + if ((retval == NULL) && + (((FileReader*)reader)->read_count == 0) && + (feof(((FileReader*)reader)->fin) != 0)) { + // never got anything, started at EOF + PyErr_Clear(); + PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF"); + } + reader->delete(reader); + } else +#endif + { + reader = NewObjectReader(ob); + retval = inner_loads(reader); + if ((retval == NULL) && + (!((ObjectReader*)reader)->exception_is_external) && + ((ObjectReader*)reader)->read_count == 0) { + // never got anything, assume EOF + PyErr_Clear(); + PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF"); + } + reader->delete(reader); + } + return retval; +} + + +static void tag_u64_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT64_FOLLOWS; + out[pos+1] = (aux >> 56) & 0x0ff; + out[pos+2] = (aux >> 48) & 0x0ff; + out[pos+3] = (aux >> 40) & 0x0ff; + out[pos+4] = (aux >> 32) & 0x0ff; + out[pos+5] = (aux >> 24) & 0x0ff; + out[pos+6] = (aux >> 16) & 0x0ff; + out[pos+7] = (aux >> 8) & 0x0ff; + out[pos+8] = aux & 0x0ff; + } + pos += 9; + *posp = pos; +} + + +static void tag_aux_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + if (aux <= 23) { + // tiny literal + if (out != NULL) { + out[pos] = cbor_type | aux; + } + pos += 1; + } else if (aux <= 0x0ff) { + // one byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT8_FOLLOWS; + out[pos+1] = aux; + } + pos += 2; + } else if (aux <= 0x0ffff) { + // two byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT16_FOLLOWS; + out[pos+1] = (aux >> 8) & 0x0ff; + out[pos+2] = aux & 0x0ff; + } + pos += 3; + } else if (aux <= 0x0ffffffffL) { + // four byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT32_FOLLOWS; + out[pos+1] = (aux >> 24) & 0x0ff; + out[pos+2] = (aux >> 16) & 0x0ff; + out[pos+3] = (aux >> 8) & 0x0ff; + out[pos+4] = aux & 0x0ff; + } + pos += 5; + } else { + // eight byte value + tag_u64_out(cbor_type, aux, out, posp); + return; + } + *posp = pos; + return; +} + +static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp); + +static int dumps_dict(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + Py_ssize_t dictlen = PyDict_Size(ob); + PyObject* key; + PyObject* val; + int err; + + tag_aux_out(CBOR_MAP, dictlen, out, &pos); + + if (optp->sort_keys) { + Py_ssize_t index = 0; + PyObject* keylist = PyDict_Keys(ob); + PyList_Sort(keylist); + + //fprintf(stderr, "sortking keys\n"); + for (index = 0; index < PyList_Size(keylist); index++) { + key = PyList_GetItem(keylist, index); // Borrowed ref + val = PyDict_GetItem(ob, key); // Borrowed ref + err = inner_dumps(optp, key, out, &pos); + if (err != 0) { return err; } + err = inner_dumps(optp, val, out, &pos); + if (err != 0) { return err; } + } + Py_DECREF(keylist); + } else { + Py_ssize_t dictiter = 0; + //fprintf(stderr, "unsorted keys\n"); + while (PyDict_Next(ob, &dictiter, &key, &val)) { + err = inner_dumps(optp, key, out, &pos); + if (err != 0) { return err; } + err = inner_dumps(optp, val, out, &pos); + if (err != 0) { return err; } + } + } + + *posp = pos; + return 0; +} + + +static void dumps_bignum(EncodeOptions *optp, uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + PyObject* eight = PyLong_FromLong(8); + PyObject* bytemask = NULL; + PyObject* nval = NULL; + uint8_t* revbytes = NULL; + int revbytepos = 0; + int val_is_orig = 1; + if (out != NULL) { + bytemask = PyLong_FromLongLong(0x0ff); + revbytes = PyMem_Malloc(23); + } + while (PyObject_IsTrue(val) && (revbytepos < 23)) { + if (revbytes != NULL) { + PyObject* tbyte = PyNumber_And(val, bytemask); + revbytes[revbytepos] = PyLong_AsLong(tbyte); + Py_DECREF(tbyte); + } + revbytepos++; + nval = PyNumber_InPlaceRshift(val, eight); + if (val_is_orig) { + val_is_orig = 0; + } else { + Py_DECREF(val); + } + val = nval; + } + if (revbytes != NULL) { + out[pos] = CBOR_TAG | tag; + pos++; + out[pos] = CBOR_BYTES | revbytepos; + pos++; + revbytepos--; + while (revbytepos >= 0) { + out[pos] = revbytes[revbytepos]; + pos++; + revbytepos--; + } + PyMem_Free(revbytes); + Py_DECREF(bytemask); + } else { + pos += 2 + revbytepos; + } + if (!val_is_orig) { + Py_DECREF(val); + } + Py_DECREF(eight); + *posp = pos; +} + +static int dumps_tag(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + int err = 0; + + + PyObject* tag_num; + PyObject* tag_value; + err = 0; + + tag_num = PyObject_GetAttrString(ob, "tag"); + if (tag_num != NULL) { + tag_value = PyObject_GetAttrString(ob, "value"); + if (tag_value != NULL) { +#ifdef Py_INTOBJECT_H + if (PyInt_Check(tag_num)) { + long val = PyInt_AsLong(tag_num); + if (val >= 0) { + tag_aux_out(CBOR_TAG, val, out, &pos); + err = inner_dumps(optp, tag_value, out, &pos); + } else { + PyErr_Format(PyExc_ValueError, "tag cannot be a negative int: %ld", val); + err = -1; + } + } else +#endif + if (PyLong_Check(tag_num)) { + int overflow = -1; + long long val = PyLong_AsLongLongAndOverflow(tag_num, &overflow); + if (overflow == 0) { + if (val >= 0) { + tag_aux_out(CBOR_TAG, val, out, &pos); + err = inner_dumps(optp, tag_value, out, &pos); + } else { + PyErr_Format(PyExc_ValueError, "tag cannot be a negative long: %lld", val); + err = -1; + } + } else { + PyErr_SetString(PyExc_ValueError, "tag number too large"); + err = -1; + } + } + Py_DECREF(tag_value); + } else { + PyErr_SetString(PyExc_ValueError, "broken Tag object has .tag but not .value"); + err = -1; + } + Py_DECREF(tag_num); + } else { + PyErr_SetString(PyExc_ValueError, "broken Tag object with no .tag"); + err = -1; + } + if (err != 0) { return err; } + + *posp = pos; + return err; +} + + +// With out=NULL it just counts the length. +// return err, 0=OK +static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + + if (ob == Py_None) { + if (out != NULL) { + out[pos] = CBOR_NULL; + } + pos += 1; + } else if (PyBool_Check(ob)) { + if (out != NULL) { + if (PyObject_IsTrue(ob)) { + out[pos] = CBOR_TRUE; + } else { + out[pos] = CBOR_FALSE; + } + } + pos += 1; + } else if (PyDict_Check(ob)) { + int err = dumps_dict(optp, ob, out, &pos); + if (err != 0) { return err; } + } else if (PyList_Check(ob)) { + Py_ssize_t i; + Py_ssize_t listlen = PyList_Size(ob); + tag_aux_out(CBOR_ARRAY, listlen, out, &pos); + for (i = 0; i < listlen; i++) { + int err = inner_dumps(optp, PyList_GetItem(ob, i), out, &pos); + if (err != 0) { return err; } + } + } else if (PyTuple_Check(ob)) { + Py_ssize_t i; + Py_ssize_t listlen = PyTuple_Size(ob); + tag_aux_out(CBOR_ARRAY, listlen, out, &pos); + for (i = 0; i < listlen; i++) { + int err = inner_dumps(optp, PyTuple_GetItem(ob, i), out, &pos); + if (err != 0) { return err; } + } + // TODO: accept other enumerables and emit a variable length array +#ifdef Py_INTOBJECT_H + // PyInt exists in Python 2 but not 3 + } else if (PyInt_Check(ob)) { + long val = PyInt_AsLong(ob); + if (val >= 0) { + tag_aux_out(CBOR_UINT, val, out, &pos); + } else { + tag_aux_out(CBOR_NEGINT, -1 - val, out, &pos); + } +#endif + } else if (PyLong_Check(ob)) { + int overflow = 0; + long long val = PyLong_AsLongLongAndOverflow(ob, &overflow); + if (overflow == 0) { + if (val >= 0) { + tag_aux_out(CBOR_UINT, val, out, &pos); + } else { + tag_aux_out(CBOR_NEGINT, -1L - val, out, &pos); + } + } else { + if (overflow < 0) { + // BIG NEGINT + PyObject* minusone = PyLong_FromLongLong(-1L); + PyObject* val = PyNumber_Subtract(minusone, ob); + Py_DECREF(minusone); + dumps_bignum(optp, CBOR_TAG_NEGBIGNUM, val, out, &pos); + Py_DECREF(val); + } else { + // BIG INT + dumps_bignum(optp, CBOR_TAG_BIGNUM, ob, out, &pos); + } + } + } else if (PyFloat_Check(ob)) { + double val = PyFloat_AsDouble(ob); + tag_u64_out(CBOR_7, *((uint64_t*)(&val)), out, &pos); + } else if (PyBytes_Check(ob)) { + Py_ssize_t len = PyBytes_Size(ob); + tag_aux_out(CBOR_BYTES, len, out, &pos); + if (out != NULL) { + memcpy(out + pos, PyBytes_AsString(ob), len); + } + pos += len; + } else if (PyUnicode_Check(ob)) { + PyObject* utf8 = PyUnicode_AsUTF8String(ob); + Py_ssize_t len = PyBytes_Size(utf8); + tag_aux_out(CBOR_TEXT, len, out, &pos); + if (out != NULL) { + memcpy(out + pos, PyBytes_AsString(utf8), len); + } + pos += len; + Py_DECREF(utf8); + } else { + int handled = 0; + { + PyObject* tag_class = getCborTagClass(); + if (PyObject_IsInstance(ob, tag_class)) { + int err = dumps_tag(optp, ob, out, &pos); + if (err != 0) { return err; } + handled = 1; + } + // tag_class was just a borrowed reference + } + + // TODO: other special object serializations here + + if (!handled) { +#if IS_PY3 + PyErr_Format(PyExc_ValueError, "cannot serialize unknown object: %R", ob); +#else + PyObject* badtype = PyObject_Type(ob); + PyObject* badtypename = PyObject_Str(badtype); + PyErr_Format(PyExc_ValueError, "cannot serialize unknown object of type %s", PyString_AsString(badtypename)); + Py_DECREF(badtypename); + Py_DECREF(badtype); +#endif + return -1; + } + } + if (posp != NULL) { + *posp = pos; + } + return 0; +} + +static int _dumps_kwargs(EncodeOptions *optp, PyObject* kwargs) { + if (kwargs == NULL) { + } else if (!PyDict_Check(kwargs)) { + PyErr_Format(PyExc_ValueError, "kwargs not dict: %R\n", kwargs); + return 0; + } else { + PyObject* sort_keys = PyDict_GetItemString(kwargs, "sort_keys"); // Borrowed ref + if (sort_keys != NULL) { + optp->sort_keys = PyObject_IsTrue(sort_keys); + //fprintf(stderr, "sort_keys=%d\n", optp->sort_keys); + } + } + return 1; +} + +static PyObject* +cbor_dumps(PyObject* noself, PyObject* args, PyObject* kwargs) { + + PyObject* ob; + EncodeOptions opts = {0}; + EncodeOptions *optp = &opts; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + if (ob == NULL) { + return NULL; + } + + if (!_dumps_kwargs(optp, kwargs)) { + return NULL; + } + + { + Py_ssize_t outlen = 0; + uintptr_t pos = 0; + void* out = NULL; + PyObject* obout = NULL; + int err; + + // first pass just to count length + err = inner_dumps(optp, ob, NULL, &pos); + if (err != 0) { + return NULL; + } + + outlen = pos; + + out = PyMem_Malloc(outlen); + if (out == NULL) { + PyErr_NoMemory(); + return NULL; + } + + err = inner_dumps(optp, ob, out, NULL); + if (err != 0) { + PyMem_Free(out); + return NULL; + } + + // TODO: I wish there was a way to do this without this copy. + obout = PyBytes_FromStringAndSize(out, outlen); + PyMem_Free(out); + return obout; + } +} + +static PyObject* +cbor_dump(PyObject* noself, PyObject* args, PyObject *kwargs) { + // args should be (obj, fp) + PyObject* ob; + PyObject* fp; + EncodeOptions opts = {0}; + EncodeOptions *optp = &opts; + + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + fp = PyList_GetItem(args, 1); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + fp = PyTuple_GetItem(args, 1); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + if ((ob == NULL) || (fp == NULL)) { + return NULL; + } + + if (!_dumps_kwargs(optp, kwargs)) { + return NULL; + } + + { + // TODO: make this smarter, right now it is justt fp.write(dumps(ob)) + Py_ssize_t outlen = 0; + uintptr_t pos = 0; + void* out = NULL; + int err; + + // first pass just to count length + err = inner_dumps(optp, ob, NULL, &pos); + if (err != 0) { + return NULL; + } + + outlen = pos; + + out = PyMem_Malloc(outlen); + if (out == NULL) { + PyErr_NoMemory(); + return NULL; + } + + err = inner_dumps(optp, ob, out, NULL); + if (err != 0) { + PyMem_Free(out); + return NULL; + } + +#if HAS_FILE_READER + if (PyFile_Check(fp)) { + FILE* fout = PyFile_AsFile(fp); + fwrite(out, 1, outlen, fout); + } else +#endif + { + PyObject* ret; + PyObject* obout = NULL; +#if IS_PY3 + PyObject* writeStr = PyUnicode_FromString("write"); +#else + PyObject* writeStr = PyString_FromString("write"); +#endif + obout = PyBytes_FromStringAndSize(out, outlen); + //logprintf("write %zd bytes to %p.write() as %p\n", outlen, fp, obout); + ret = PyObject_CallMethodObjArgs(fp, writeStr, obout, NULL); + Py_DECREF(writeStr); + Py_DECREF(obout); + if (ret != NULL) { + Py_DECREF(ret); + } else { + // exception in fp.write() + PyMem_Free(out); + return NULL; + } + //logprintf("wrote %zd bytes to %p.write() as %p\n", outlen, fp, obout); + } + PyMem_Free(out); + } + + Py_RETURN_NONE; +} + + +static PyMethodDef CborMethods[] = { + {"loads", cbor_loads, METH_VARARGS, + "parse cbor from data buffer to objects"}, + {"dumps", (PyCFunction)cbor_dumps, METH_VARARGS|METH_KEYWORDS, + "serialize python object to bytes"}, + {"load", cbor_load, METH_VARARGS, + "Parse cbor from data buffer to objects.\n" + "Takes a file-like object capable of .read(N)\n"}, + {"dump", (PyCFunction)cbor_dump, METH_VARARGS|METH_KEYWORDS, + "Serialize python object to bytes.\n" + "dump(obj, fp)\n" + "obj: object to output; fp: file-like object to .write() to\n"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +#ifdef Py_InitModule +// Python 2.7 +PyMODINIT_FUNC +init_cbor(void) +{ + (void) Py_InitModule("cbor._cbor", CborMethods); +} +#else +// Python 3 +PyMODINIT_FUNC +PyInit__cbor(void) +{ + static PyModuleDef modef = { + PyModuleDef_HEAD_INIT, + }; + //modef.m_base = PyModuleDef_HEAD_INIT; + modef.m_name = "cbor._cbor"; + modef.m_doc = NULL; + modef.m_size = 0; + modef.m_methods = CborMethods; +#ifdef Py_mod_exec + modef.m_slots = NULL; // Py >= 3.5 +#else + modef.m_reload = NULL; // Py < 3.5 +#endif + modef.m_traverse = NULL; + modef.m_clear = NULL; + modef.m_free = NULL; + return PyModule_Create(&modef); +} +#endif + diff --git a/mercurial/thirdparty/cbor/cbor/VERSION.py b/mercurial/thirdparty/cbor/cbor/VERSION.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/VERSION.py @@ -0,0 +1 @@ +'1.0.0' diff --git a/mercurial/thirdparty/cbor/cbor/__init__.py b/mercurial/thirdparty/cbor/cbor/__init__.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/__init__.py @@ -0,0 +1,19 @@ +#!python + +try: + # try C library _cbor.so + from ._cbor import loads, dumps, load, dump +except: + # fall back to 100% python implementation + from .cbor import loads, dumps, load, dump + +from .cbor import Tag +from .tagmap import TagMapper, ClassTag, UnknownTagException +from .VERSION import __doc__ as __version__ + +__all__ = [ + 'loads', 'dumps', 'load', 'dump', + 'Tag', + 'TagMapper', 'ClassTag', 'UnknownTagException', + '__version__', +] diff --git a/mercurial/thirdparty/cbor/cbor/cbor.py b/mercurial/thirdparty/cbor/cbor/cbor.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/cbor.py @@ -0,0 +1,510 @@ +#!python +# -*- Python -*- + +import datetime +import re +import struct +import sys + +_IS_PY3 = sys.version_info[0] >= 3 + +if _IS_PY3: + from io import BytesIO as StringIO +else: + try: + from cStringIO import StringIO + except: + from StringIO import StringIO + + +CBOR_TYPE_MASK = 0xE0 # top 3 bits +CBOR_INFO_BITS = 0x1F # low 5 bits + + +CBOR_UINT = 0x00 +CBOR_NEGINT = 0x20 +CBOR_BYTES = 0x40 +CBOR_TEXT = 0x60 +CBOR_ARRAY = 0x80 +CBOR_MAP = 0xA0 +CBOR_TAG = 0xC0 +CBOR_7 = 0xE0 # float and other types + +CBOR_UINT8_FOLLOWS = 24 # 0x18 +CBOR_UINT16_FOLLOWS = 25 # 0x19 +CBOR_UINT32_FOLLOWS = 26 # 0x1a +CBOR_UINT64_FOLLOWS = 27 # 0x1b +CBOR_VAR_FOLLOWS = 31 # 0x1f + +CBOR_BREAK = 0xFF + +CBOR_FALSE = (CBOR_7 | 20) +CBOR_TRUE = (CBOR_7 | 21) +CBOR_NULL = (CBOR_7 | 22) +CBOR_UNDEFINED = (CBOR_7 | 23) # js 'undefined' value + +CBOR_FLOAT16 = (CBOR_7 | 25) +CBOR_FLOAT32 = (CBOR_7 | 26) +CBOR_FLOAT64 = (CBOR_7 | 27) + +CBOR_TAG_DATE_STRING = 0 # RFC3339 +CBOR_TAG_DATE_ARRAY = 1 # any number type follows, seconds since 1970-01-01T00:00:00 UTC +CBOR_TAG_BIGNUM = 2 # big endian byte string follows +CBOR_TAG_NEGBIGNUM = 3 # big endian byte string follows +CBOR_TAG_DECIMAL = 4 # [ 10^x exponent, number ] +CBOR_TAG_BIGFLOAT = 5 # [ 2^x exponent, number ] +CBOR_TAG_BASE64URL = 21 +CBOR_TAG_BASE64 = 22 +CBOR_TAG_BASE16 = 23 +CBOR_TAG_CBOR = 24 # following byte string is embedded CBOR data + +CBOR_TAG_URI = 32 +CBOR_TAG_BASE64URL = 33 +CBOR_TAG_BASE64 = 34 +CBOR_TAG_REGEX = 35 +CBOR_TAG_MIME = 36 # following text is MIME message, headers, separators and all +CBOR_TAG_CBOR_FILEHEADER = 55799 # can open a file with 0xd9d9f7 + +_CBOR_TAG_BIGNUM_BYTES = struct.pack('B', CBOR_TAG | CBOR_TAG_BIGNUM) + + +def dumps_int(val): + "return bytes representing int val in CBOR" + if val >= 0: + # CBOR_UINT is 0, so I'm lazy/efficient about not OR-ing it in. + if val <= 23: + return struct.pack('B', val) + if val <= 0x0ff: + return struct.pack('BB', CBOR_UINT8_FOLLOWS, val) + if val <= 0x0ffff: + return struct.pack('!BH', CBOR_UINT16_FOLLOWS, val) + if val <= 0x0ffffffff: + return struct.pack('!BI', CBOR_UINT32_FOLLOWS, val) + if val <= 0x0ffffffffffffffff: + return struct.pack('!BQ', CBOR_UINT64_FOLLOWS, val) + outb = _dumps_bignum_to_bytearray(val) + return _CBOR_TAG_BIGNUM_BYTES + _encode_type_num(CBOR_BYTES, len(outb)) + outb + val = -1 - val + return _encode_type_num(CBOR_NEGINT, val) + + +if _IS_PY3: + def _dumps_bignum_to_bytearray(val): + out = [] + while val > 0: + out.insert(0, val & 0x0ff) + val = val >> 8 + return bytes(out) +else: + def _dumps_bignum_to_bytearray(val): + out = [] + while val > 0: + out.insert(0, chr(val & 0x0ff)) + val = val >> 8 + return b''.join(out) + + +def dumps_float(val): + return struct.pack("!Bd", CBOR_FLOAT64, val) + + +_CBOR_TAG_NEGBIGNUM_BYTES = struct.pack('B', CBOR_TAG | CBOR_TAG_NEGBIGNUM) + + +def _encode_type_num(cbor_type, val): + """For some CBOR primary type [0..7] and an auxiliary unsigned number, return CBOR encoded bytes""" + assert val >= 0 + if val <= 23: + return struct.pack('B', cbor_type | val) + if val <= 0x0ff: + return struct.pack('BB', cbor_type | CBOR_UINT8_FOLLOWS, val) + if val <= 0x0ffff: + return struct.pack('!BH', cbor_type | CBOR_UINT16_FOLLOWS, val) + if val <= 0x0ffffffff: + return struct.pack('!BI', cbor_type | CBOR_UINT32_FOLLOWS, val) + if (((cbor_type == CBOR_NEGINT) and (val <= 0x07fffffffffffffff)) or + ((cbor_type != CBOR_NEGINT) and (val <= 0x0ffffffffffffffff))): + return struct.pack('!BQ', cbor_type | CBOR_UINT64_FOLLOWS, val) + if cbor_type != CBOR_NEGINT: + raise Exception("value too big for CBOR unsigned number: {0!r}".format(val)) + outb = _dumps_bignum_to_bytearray(val) + return _CBOR_TAG_NEGBIGNUM_BYTES + _encode_type_num(CBOR_BYTES, len(outb)) + outb + + +if _IS_PY3: + def _is_unicode(val): + return isinstance(val, str) +else: + def _is_unicode(val): + return isinstance(val, unicode) + + +def dumps_string(val, is_text=None, is_bytes=None): + if _is_unicode(val): + val = val.encode('utf8') + is_text = True + is_bytes = False + if (is_bytes) or not (is_text == True): + return _encode_type_num(CBOR_BYTES, len(val)) + val + return _encode_type_num(CBOR_TEXT, len(val)) + val + + +def dumps_array(arr, sort_keys=False): + head = _encode_type_num(CBOR_ARRAY, len(arr)) + parts = [dumps(x, sort_keys=sort_keys) for x in arr] + return head + b''.join(parts) + + +if _IS_PY3: + def dumps_dict(d, sort_keys=False): + head = _encode_type_num(CBOR_MAP, len(d)) + parts = [head] + if sort_keys: + for k in sorted(d.keys()): + v = d[k] + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + else: + for k,v in d.items(): + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + return b''.join(parts) +else: + def dumps_dict(d, sort_keys=False): + head = _encode_type_num(CBOR_MAP, len(d)) + parts = [head] + if sort_keys: + for k in sorted(d.iterkeys()): + v = d[k] + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + else: + for k,v in d.iteritems(): + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + return b''.join(parts) + + +def dumps_bool(b): + if b: + return struct.pack('B', CBOR_TRUE) + return struct.pack('B', CBOR_FALSE) + + +def dumps_tag(t, sort_keys=False): + return _encode_type_num(CBOR_TAG, t.tag) + dumps(t.value, sort_keys=sort_keys) + + +if _IS_PY3: + def _is_stringish(x): + return isinstance(x, (str, bytes)) + def _is_intish(x): + return isinstance(x, int) +else: + def _is_stringish(x): + return isinstance(x, (str, basestring, bytes, unicode)) + def _is_intish(x): + return isinstance(x, (int, long)) + + +def dumps(ob, sort_keys=False): + if ob is None: + return struct.pack('B', CBOR_NULL) + if isinstance(ob, bool): + return dumps_bool(ob) + if _is_stringish(ob): + return dumps_string(ob) + if isinstance(ob, (list, tuple)): + return dumps_array(ob, sort_keys=sort_keys) + # TODO: accept other enumerables and emit a variable length array + if isinstance(ob, dict): + return dumps_dict(ob, sort_keys=sort_keys) + if isinstance(ob, float): + return dumps_float(ob) + if _is_intish(ob): + return dumps_int(ob) + if isinstance(ob, Tag): + return dumps_tag(ob, sort_keys=sort_keys) + raise Exception("don't know how to cbor serialize object of type %s", type(ob)) + + +# same basic signature as json.dump, but with no options (yet) +def dump(obj, fp, sort_keys=False): + """ + obj: Python object to serialize + fp: file-like object capable of .write(bytes) + """ + # this is kinda lame, but probably not inefficient for non-huge objects + # TODO: .write() to fp as we go as each inner object is serialized + blob = dumps(obj, sort_keys=sort_keys) + fp.write(blob) + + +class Tag(object): + def __init__(self, tag=None, value=None): + self.tag = tag + self.value = value + + def __repr__(self): + return "Tag({0!r}, {1!r})".format(self.tag, self.value) + + def __eq__(self, other): + if not isinstance(other, Tag): + return False + return (self.tag == other.tag) and (self.value == other.value) + + +def loads(data): + """ + Parse CBOR bytes and return Python objects. + """ + if data is None: + raise ValueError("got None for buffer to decode in loads") + fp = StringIO(data) + return _loads(fp)[0] + + +def load(fp): + """ + Parse and return object from fp, a file-like object supporting .read(n) + """ + return _loads(fp)[0] + + +_MAX_DEPTH = 100 + + +def _tag_aux(fp, tb): + bytes_read = 1 + tag = tb & CBOR_TYPE_MASK + tag_aux = tb & CBOR_INFO_BITS + if tag_aux <= 23: + aux = tag_aux + elif tag_aux == CBOR_UINT8_FOLLOWS: + data = fp.read(1) + aux = struct.unpack_from("!B", data, 0)[0] + bytes_read += 1 + elif tag_aux == CBOR_UINT16_FOLLOWS: + data = fp.read(2) + aux = struct.unpack_from("!H", data, 0)[0] + bytes_read += 2 + elif tag_aux == CBOR_UINT32_FOLLOWS: + data = fp.read(4) + aux = struct.unpack_from("!I", data, 0)[0] + bytes_read += 4 + elif tag_aux == CBOR_UINT64_FOLLOWS: + data = fp.read(8) + aux = struct.unpack_from("!Q", data, 0)[0] + bytes_read += 8 + else: + assert tag_aux == CBOR_VAR_FOLLOWS, "bogus tag {0:02x}".format(tb) + aux = None + + return tag, tag_aux, aux, bytes_read + + +def _read_byte(fp): + tb = fp.read(1) + if len(tb) == 0: + # I guess not all file-like objects do this + raise EOFError() + return ord(tb) + + +def _loads_var_array(fp, limit, depth, returntags, bytes_read): + ob = [] + tb = _read_byte(fp) + while tb != CBOR_BREAK: + (subob, sub_len) = _loads_tb(fp, tb, limit, depth, returntags) + bytes_read += 1 + sub_len + ob.append(subob) + tb = _read_byte(fp) + return (ob, bytes_read + 1) + + +def _loads_var_map(fp, limit, depth, returntags, bytes_read): + ob = {} + tb = _read_byte(fp) + while tb != CBOR_BREAK: + (subk, sub_len) = _loads_tb(fp, tb, limit, depth, returntags) + bytes_read += 1 + sub_len + (subv, sub_len) = _loads(fp, limit, depth, returntags) + bytes_read += sub_len + ob[subk] = subv + tb = _read_byte(fp) + return (ob, bytes_read + 1) + + +if _IS_PY3: + def _loads_array(fp, limit, depth, returntags, aux, bytes_read): + ob = [] + for i in range(aux): + subob, subpos = _loads(fp) + bytes_read += subpos + ob.append(subob) + return ob, bytes_read + def _loads_map(fp, limit, depth, returntags, aux, bytes_read): + ob = {} + for i in range(aux): + subk, subpos = _loads(fp) + bytes_read += subpos + subv, subpos = _loads(fp) + bytes_read += subpos + ob[subk] = subv + return ob, bytes_read +else: + def _loads_array(fp, limit, depth, returntags, aux, bytes_read): + ob = [] + for i in xrange(aux): + subob, subpos = _loads(fp) + bytes_read += subpos + ob.append(subob) + return ob, bytes_read + def _loads_map(fp, limit, depth, returntags, aux, bytes_read): + ob = {} + for i in xrange(aux): + subk, subpos = _loads(fp) + bytes_read += subpos + subv, subpos = _loads(fp) + bytes_read += subpos + ob[subk] = subv + return ob, bytes_read + + +def _loads(fp, limit=None, depth=0, returntags=False): + "return (object, bytes read)" + if depth > _MAX_DEPTH: + raise Exception("hit CBOR loads recursion depth limit") + + tb = _read_byte(fp) + + return _loads_tb(fp, tb, limit, depth, returntags) + +def _loads_tb(fp, tb, limit=None, depth=0, returntags=False): + # Some special cases of CBOR_7 best handled by special struct.unpack logic here + if tb == CBOR_FLOAT16: + data = fp.read(2) + hibyte, lowbyte = struct.unpack_from("BB", data, 0) + exp = (hibyte >> 2) & 0x1F + mant = ((hibyte & 0x03) << 8) | lowbyte + if exp == 0: + val = mant * (2.0 ** -24) + elif exp == 31: + if mant == 0: + val = float('Inf') + else: + val = float('NaN') + else: + val = (mant + 1024.0) * (2 ** (exp - 25)) + if hibyte & 0x80: + val = -1.0 * val + return (val, 3) + elif tb == CBOR_FLOAT32: + data = fp.read(4) + pf = struct.unpack_from("!f", data, 0) + return (pf[0], 5) + elif tb == CBOR_FLOAT64: + data = fp.read(8) + pf = struct.unpack_from("!d", data, 0) + return (pf[0], 9) + + tag, tag_aux, aux, bytes_read = _tag_aux(fp, tb) + + if tag == CBOR_UINT: + return (aux, bytes_read) + elif tag == CBOR_NEGINT: + return (-1 - aux, bytes_read) + elif tag == CBOR_BYTES: + ob, subpos = loads_bytes(fp, aux) + return (ob, bytes_read + subpos) + elif tag == CBOR_TEXT: + raw, subpos = loads_bytes(fp, aux, btag=CBOR_TEXT) + ob = raw.decode('utf8') + return (ob, bytes_read + subpos) + elif tag == CBOR_ARRAY: + if aux is None: + return _loads_var_array(fp, limit, depth, returntags, bytes_read) + return _loads_array(fp, limit, depth, returntags, aux, bytes_read) + elif tag == CBOR_MAP: + if aux is None: + return _loads_var_map(fp, limit, depth, returntags, bytes_read) + return _loads_map(fp, limit, depth, returntags, aux, bytes_read) + elif tag == CBOR_TAG: + ob, subpos = _loads(fp) + bytes_read += subpos + if returntags: + # Don't interpret the tag, return it and the tagged object. + ob = Tag(aux, ob) + else: + # attempt to interpet the tag and the value into a Python object. + ob = tagify(ob, aux) + return ob, bytes_read + elif tag == CBOR_7: + if tb == CBOR_TRUE: + return (True, bytes_read) + if tb == CBOR_FALSE: + return (False, bytes_read) + if tb == CBOR_NULL: + return (None, bytes_read) + if tb == CBOR_UNDEFINED: + return (None, bytes_read) + raise ValueError("unknown cbor tag 7 byte: {:02x}".format(tb)) + + +def loads_bytes(fp, aux, btag=CBOR_BYTES): + # TODO: limit to some maximum number of chunks and some maximum total bytes + if aux is not None: + # simple case + ob = fp.read(aux) + return (ob, aux) + # read chunks of bytes + chunklist = [] + total_bytes_read = 0 + while True: + tb = fp.read(1)[0] + if not _IS_PY3: + tb = ord(tb) + if tb == CBOR_BREAK: + total_bytes_read += 1 + break + tag, tag_aux, aux, bytes_read = _tag_aux(fp, tb) + assert tag == btag, 'variable length value contains unexpected component' + ob = fp.read(aux) + chunklist.append(ob) + total_bytes_read += bytes_read + aux + return (b''.join(chunklist), total_bytes_read) + + +if _IS_PY3: + def _bytes_to_biguint(bs): + out = 0 + for ch in bs: + out = out << 8 + out = out | ch + return out +else: + def _bytes_to_biguint(bs): + out = 0 + for ch in bs: + out = out << 8 + out = out | ord(ch) + return out + + +def tagify(ob, aux): + # TODO: make this extensible? + # cbor.register_tag_handler(tagnumber, tag_handler) + # where tag_handler takes (tagnumber, tagged_object) + if aux == CBOR_TAG_DATE_STRING: + # TODO: parse RFC3339 date string + pass + if aux == CBOR_TAG_DATE_ARRAY: + return datetime.datetime.utcfromtimestamp(ob) + if aux == CBOR_TAG_BIGNUM: + return _bytes_to_biguint(ob) + if aux == CBOR_TAG_NEGBIGNUM: + return -1 - _bytes_to_biguint(ob) + if aux == CBOR_TAG_REGEX: + # Is this actually a good idea? Should we just return the tag and the raw value to the user somehow? + return re.compile(ob) + return Tag(aux, ob) diff --git a/mercurial/thirdparty/cbor/cbor/tagmap.py b/mercurial/thirdparty/cbor/cbor/tagmap.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/tagmap.py @@ -0,0 +1,142 @@ +try: + # try C library _cbor.so + from ._cbor import loads, dumps, load, dump +except: + # fall back to 100% python implementation + from .cbor import loads, dumps, load, dump + +from .cbor import Tag, CBOR_TAG_CBOR, _IS_PY3 + + +class ClassTag(object): + ''' + For some CBOR tag_number, encode/decode Python class_type. + class_type manily used for isintance(foo, class_type) + Call encode_function() taking a Python instance and returning CBOR primitive types. + Call decode_function() on CBOR primitive types and return an instance of the Python class_type (a factory function). + ''' + def __init__(self, tag_number, class_type, encode_function, decode_function): + self.tag_number = tag_number + self.class_type = class_type + self.encode_function = encode_function + self.decode_function = decode_function + + +# TODO: This would be more efficient if it moved into cbor.py and +# cbormodule.c, happening inline so that there is only one traversal +# of the objects. But that would require two implementations. When +# this API has been used more and can be considered settled I should +# do that. -- Brian Olson 20140917_172229 +class TagMapper(object): + ''' + Translate Python objects and CBOR tagged data. + Use the CBOR TAG system to note that some data is of a certain class. + Dump while translating Python objects into a CBOR compatible representation. + Load and translate CBOR primitives back into Python objects. + ''' + def __init__(self, class_tags=None, raise_on_unknown_tag=False): + ''' + class_tags: list of ClassTag objects + ''' + self.class_tags = class_tags + self.raise_on_unknown_tag = raise_on_unknown_tag + + def encode(self, obj): + for ct in self.class_tags: + if (ct.class_type is None) or (ct.encode_function is None): + continue + if isinstance(obj, ct.class_type): + return Tag(ct.tag_number, ct.encode_function(obj)) + if isinstance(obj, (list, tuple)): + return [self.encode(x) for x in obj] + if isinstance(obj, dict): + # assume key is a primitive + # can't do this in Python 2.6: + #return {k:self.encode(v) for k,v in obj.iteritems()} + out = {} + if _IS_PY3: + items = obj.items() + else: + items = obj.iteritems() + for k,v in items: + out[k] = self.encode(v) + return out + # fall through, let underlying cbor.dump decide if it can encode object + return obj + + def decode(self, obj): + if isinstance(obj, Tag): + for ct in self.class_tags: + if ct.tag_number == obj.tag: + return ct.decode_function(obj.value) + # unknown Tag + if self.raise_on_unknown_tag: + raise UnknownTagException(str(obj.tag)) + # otherwise, pass it through + return obj + if isinstance(obj, list): + # update in place. cbor only decodes to list, not tuple + for i,v in enumerate(obj): + obj[i] = self.decode(v) + return obj + if isinstance(obj, dict): + # update in place + if _IS_PY3: + items = obj.items() + else: + items = obj.iteritems() + for k,v in items: + # assume key is a primitive + obj[k] = self.decode(v) + return obj + # non-recursive object (num,bool,blob,string) + return obj + + def dump(self, obj, fp): + dump(self.encode(obj), fp) + + def dumps(self, obj): + return dumps(self.encode(obj)) + + def load(self, fp): + return self.decode(load(fp)) + + def loads(self, blob): + return self.decode(loads(blob)) + + +class WrappedCBOR(ClassTag): + """Handles Tag 24, where a byte array is sub encoded CBOR. + Unpacks sub encoded object on finding such a tag. + Does not convert anyting into such a tag. + + Usage: +>>> import cbor +>>> import cbor.tagmap +>>> tm=cbor.TagMapper([cbor.tagmap.WrappedCBOR()]) +>>> x = cbor.dumps(cbor.Tag(24, cbor.dumps({"a":[1,2,3]}))) +>>> x +'\xd8\x18G\xa1Aa\x83\x01\x02\x03' +>>> tm.loads(x) +{'a': [1L, 2L, 3L]} +>>> cbor.loads(x) +Tag(24L, '\xa1Aa\x83\x01\x02\x03') +""" + def __init__(self): + super(WrappedCBOR, self).__init__(CBOR_TAG_CBOR, None, None, loads) + + @staticmethod + def wrap(ob): + return Tag(CBOR_TAG_CBOR, dumps(ob)) + + @staticmethod + def dump(ob, fp): + return dump(Tag(CBOR_TAG_CBOR, dumps(ob)), fp) + + @staticmethod + def dumps(ob): + return dumps(Tag(CBOR_TAG_CBOR, dumps(ob))) + + +class UnknownTagException(BaseException): + pass diff --git a/mercurial/thirdparty/cbor/cbor/tests/__init__.py b/mercurial/thirdparty/cbor/cbor/tests/__init__.py new file mode 100644 diff --git a/mercurial/thirdparty/cbor/cbor/tests/test_cbor.py b/mercurial/thirdparty/cbor/cbor/tests/test_cbor.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/tests/test_cbor.py @@ -0,0 +1,442 @@ +#!python +# -*- coding: utf-8 -*- + +import base64 +import datetime +import json +import logging +import random +import sys +import time +import unittest +import zlib + + +logger = logging.getLogger(__name__) + + +from cbor.cbor import dumps as pydumps +from cbor.cbor import loads as pyloads +from cbor.cbor import dump as pydump +from cbor.cbor import load as pyload +from cbor.cbor import Tag +try: + from cbor._cbor import dumps as cdumps + from cbor._cbor import loads as cloads + from cbor._cbor import dump as cdump + from cbor._cbor import load as cload +except ImportError: + # still test what we can without C fast mode + logger.warn('testing without C accelerated CBOR', exc_info=True) + cdumps, cloads, cdump, cload = None, None, None, None + + +_IS_PY3 = sys.version_info[0] >= 3 + + +if _IS_PY3: + _range = range + from io import BytesIO as StringIO +else: + _range = xrange + from cStringIO import StringIO + + +class TestRoot(object): + @classmethod + def loads(cls, *args): + return cls._ld[0](*args) + @classmethod + def dumps(cls, *args, **kwargs): + return cls._ld[1](*args, **kwargs) + @classmethod + def speediterations(cls): + return cls._ld[2] + @classmethod + def load(cls, *args): + return cls._ld[3](*args) + @classmethod + def dump(cls, *args, **kwargs): + return cls._ld[4](*args, **kwargs) + @classmethod + def testable(cls): + ok = (cls._ld[0] is not None) and (cls._ld[1] is not None) and (cls._ld[3] is not None) and (cls._ld[4] is not None) + if not ok: + logger.warn('non-testable case %s skipped', cls.__name__) + return ok + +# Can't set class level function pointers, because then they expect a +# (cls) first argument. So, toss them in a list to hide them. +class TestPyPy(TestRoot): + _ld = [pyloads, pydumps, 1000, pyload, pydump] + +class TestPyC(TestRoot): + _ld = [pyloads, cdumps, 2000, pyload, cdump] + +class TestCPy(TestRoot): + _ld = [cloads, pydumps, 2000, cload, pydump] + +class TestCC(TestRoot): + _ld = [cloads, cdumps, 150000, cload, cdump] + + +if _IS_PY3: + def _join_jsers(jsers): + return (''.join(jsers)).encode('utf8') + def hexstr(bs): + return ' '.join(map(lambda x: '{0:02x}'.format(x), bs)) +else: + def _join_jsers(jsers): + return b''.join(jsers) + def hexstr(bs): + return ' '.join(map(lambda x: '{0:02x}'.format(ord(x)), bs)) + + +class XTestCBOR(object): + def _oso(self, ob): + ser = self.dumps(ob) + try: + o2 = self.loads(ser) + assert ob == o2, '%r != %r from %s' % (ob, o2, base64.b16encode(ser)) + except Exception as e: + sys.stderr.write('failure on buf len={0} {1!r} ob={2!r} {3!r}; {4}\n'.format(len(ser), hexstr(ser), ob, ser, e)) + raise + + def _osos(self, ob): + obs = self.dumps(ob) + o2 = self.loads(obs) + o2s = self.dumps(o2) + assert obs == o2s + + def _oso_bytearray(self, ob): + ser = self.dumps(ob) + try: + o2 = self.loads(bytearray(ser)) + assert ob == o2, '%r != %r from %s' % (ob, o2, base64.b16encode(ser)) + except Exception as e: + sys.stderr.write('failure on buf len={0} {1!r} ob={2!r} {3!r}; {4}\n'.format(len(ser), hexstr(ser), ob, ser, e)) + raise + + test_objects = [ + 1, + 0, + True, + False, + None, + -1, + -1.5, + 1.5, + 1000, + -1000, + 1000000000, + 2376030000, + -1000000000, + 1000000000000000, + -1000000000000000, + [], + [1,2,3], + {}, + b'aoeu1234\x00\xff', + u'åöéûのかめ亀', + b'', + u'', + Tag(1234, 'aoeu'), + ] + + def test_basic(self): + if not self.testable(): return + for ob in self.test_objects: + self._oso(ob) + + def test_basic_bytearray(self): + if not self.testable(): return + xoso = self._oso + self._oso = self._oso_bytearray + try: + self.test_basic() + finally: + self._oso = xoso + + def test_random_ints(self): + if not self.testable(): return + icount = self.speediterations() + for i in _range(icount): + v = random.randint(-4294967295, 0xffffffff) + self._oso(v) + oldv = [] + for i in _range(int(icount / 10)): + v = random.randint(-1000000000000000000000, 1000000000000000000000) + self._oso(v) + oldv.append(v) + + def test_randobs(self): + if not self.testable(): return + icount = self.speediterations() + for i in _range(icount): + ob = _randob() + self._oso(ob) + + def test_tuple(self): + if not self.testable(): return + l = [1,2,3] + t = tuple(l) + ser = self.dumps(t) + o2 = self.loads(ser) + assert l == o2 + + def test_speed_vs_json(self): + if not self.testable(): return + # It should be noted that the python standard library has a C implementation of key parts of json encoding and decoding + icount = self.speediterations() + obs = [_randob_notag() for x in _range(icount)] + st = time.time() + bsers = [self.dumps(o) for o in obs] + nt = time.time() + cbor_ser_time = nt - st + jsers = [json.dumps(o) for o in obs] + jt = time.time() + json_ser_time = jt - nt + cbor_byte_count = sum(map(len, bsers)) + json_byte_count = sum(map(len, jsers)) + sys.stderr.write( + 'serialized {nobs} objects into {cb} cbor bytes in {ct:.2f} seconds ({cops:.2f}/s, {cbps:.1f}B/s) and {jb} json bytes in {jt:.2f} seconds ({jops:.2f}/s, {jbps:.1f}B/s)\n'.format( + nobs=len(obs), + cb=cbor_byte_count, + ct=cbor_ser_time, + cops=len(obs) / cbor_ser_time, + cbps=cbor_byte_count / cbor_ser_time, + jb=json_byte_count, + jt=json_ser_time, + jops=len(obs) / json_ser_time, + jbps=json_byte_count / json_ser_time)) + bsersz = zlib.compress(b''.join(bsers)) + jsersz = zlib.compress(_join_jsers(jsers)) + sys.stderr.write('compress to {0} bytes cbor.gz and {1} bytes json.gz\n'.format( + len(bsersz), len(jsersz))) + + st = time.time() + bo2 = [self.loads(b) for b in bsers] + bt = time.time() + cbor_load_time = bt - st + jo2 = [json.loads(b) for b in jsers] + jt = time.time() + json_load_time = jt - bt + sys.stderr.write('load {nobs} objects from cbor in {ct:.2f} secs ({cops:.2f}/sec, {cbps:.1f}B/s) and json in {jt:.2f} ({jops:.2f}/sec, {jbps:.1f}B/s)\n'.format( + nobs=len(obs), + ct=cbor_load_time, + cops=len(obs) / cbor_load_time, + cbps=cbor_byte_count / cbor_load_time, + jt=json_load_time, + jops=len(obs) / json_load_time, + jbps=json_byte_count / json_load_time + )) + + def test_loads_none(self): + if not self.testable(): return + try: + ob = self.loads(None) + assert False, "expected ValueError when passing in None" + except ValueError: + pass + + def test_concat(self): + "Test that we can concatenate output and retrieve the objects back out." + if not self.testable(): return + self._oso(self.test_objects) + fob = StringIO() + + for ob in self.test_objects: + self.dump(ob, fob) + fob.seek(0) + obs2 = [] + try: + while True: + obs2.append(self.load(fob)) + except EOFError: + pass + assert obs2 == self.test_objects + + # TODO: find more bad strings with which to fuzz CBOR + def test_badread(self): + if not self.testable(): return + try: + ob = self.loads(b'\xff') + assert False, 'badread should have failed' + except ValueError as ve: + #logger.info('error', exc_info=True) + pass + except Exception as ex: + logger.info('unexpected error!', exc_info=True) + assert False, 'unexpected error' + str(ex) + + def test_datetime(self): + if not self.testable(): return + # right now we're just testing that it's possible to dumps() + # Tag(0,...) because there was a bug around that. + xb = self.dumps(Tag(0, datetime.datetime(1984,1,24,23,22,21).isoformat())) + + def test_sortkeys(self): + if not self.testable(): return + obytes = [] + xbytes = [] + for n in _range(2, 27): + ob = {u'{:02x}'.format(x):x for x in _range(n)} + obytes.append(self.dumps(ob, sort_keys=True)) + xbytes.append(self.dumps(ob, sort_keys=False)) + allOGood = True + someXMiss = False + for i, g in enumerate(_GOLDEN_SORTED_KEYS_BYTES): + if g != obytes[i]: + logger.error('bad sorted result, wanted %r got %r', g, obytes[i]) + allOGood = False + if g != xbytes[i]: + someXMiss = True + + assert allOGood + assert someXMiss + + +_GOLDEN_SORTED_KEYS_BYTES = [ +b'\xa2b00\x00b01\x01', +b'\xa3b00\x00b01\x01b02\x02', +b'\xa4b00\x00b01\x01b02\x02b03\x03', +b'\xa5b00\x00b01\x01b02\x02b03\x03b04\x04', +b'\xa6b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05', +b'\xa7b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06', +b'\xa8b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07', +b'\xa9b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08', +b'\xaab00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\t', +b'\xabb00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\n', +b'\xacb00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0b', +b'\xadb00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0c', +b'\xaeb00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\r', +b'\xafb00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0e', +b'\xb0b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0f', +b'\xb1b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10', +b'\xb2b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11', +b'\xb3b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12', +b'\xb4b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13', +b'\xb5b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14', +b'\xb6b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14b15\x15', +b'\xb7b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14b15\x15b16\x16', +b'\xb8\x18b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14b15\x15b16\x16b17\x17', +b'\xb8\x19b00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14b15\x15b16\x16b17\x17b18\x18\x18', +b'\xb8\x1ab00\x00b01\x01b02\x02b03\x03b04\x04b05\x05b06\x06b07\x07b08\x08b09\tb0a\nb0b\x0bb0c\x0cb0d\rb0e\x0eb0f\x0fb10\x10b11\x11b12\x12b13\x13b14\x14b15\x15b16\x16b17\x17b18\x18\x18b19\x18\x19', +] + +def gen_sorted_bytes(): + for n in _range(2, 27): + sys.stdout.write(repr(cbor.dumps({u'{:02x}'.format(x):x for x in _range(n)}, sort_keys=True)) + ',\n') + +def gen_unsorted_bytes(): + for n in _range(2, 27): + sys.stdout.write(repr(cbor.dumps({u'{:02x}'.format(x):x for x in _range(n)}, sort_keys=False)) + ',\n') + + +class TestCBORPyPy(unittest.TestCase, XTestCBOR, TestPyPy): + pass + +class TestCBORCPy(unittest.TestCase, XTestCBOR, TestCPy): + pass + +class TestCBORPyC(unittest.TestCase, XTestCBOR, TestPyC): + pass + +class TestCBORCC(unittest.TestCase, XTestCBOR, TestCC): + pass + + +def _randob(): + return _randob_x(_randob_probabilities, _randob_probsum, _randob) + +def _randob_notag(): + return _randob_x(_randob_probabilities_notag, _randob_notag_probsum, _randob_notag) + +def _randArray(randob=_randob): + return [randob() for x in _range(random.randint(0,5))] + +_chars = [chr(x) for x in _range(ord(' '), ord('~'))] + +def _randStringOrBytes(randob=_randob): + tstr = ''.join([random.choice(_chars) for x in _range(random.randint(1,10))]) + if random.randint(0,1) == 1: + if _IS_PY3: + # default str is unicode + # sometimes squash to bytes + return tstr.encode('utf8') + else: + # default str is bytes + # sometimes promote to unicode string + return tstr.decode('utf8') + return tstr + +def _randString(randob=_randob): + return ''.join([random.choice(_chars) for x in _range(random.randint(1,10))]) + +def _randDict(randob=_randob): + ob = {} + for x in _range(random.randint(0,5)): + ob[_randString()] = randob() + return ob + + +def _randTag(randob=_randob): + t = Tag() + # Tags 0..36 are know standard things we might implement special + # decoding for. This number will grow over time, and this test + # need to be adjusted to only assign unclaimed tags for Tag<->Tag + # encode-decode testing. + t.tag = random.randint(37, 1000000) + t.value = randob() + return t + +def _randInt(randob=_randob): + return random.randint(-4294967295, 4294967295) + +def _randBignum(randob=_randob): + return random.randint(-1000000000000000000000, 1000000000000000000000) + +def _randFloat(randob=_randob): + return random.random() + +_CONSTANTS = (True, False, None) +def _randConst(randob=_randob): + return random.choice(_CONSTANTS) + +_randob_probabilities = [ + (0.1, _randDict), + (0.1, _randTag), + (0.2, _randArray), + (0.3, _randStringOrBytes), + (0.3, _randInt), + (0.2, _randBignum), + (0.2, _randFloat), + (0.2, _randConst), +] + +_randob_probsum = sum([x[0] for x in _randob_probabilities]) + +_randob_probabilities_notag = [ + (0.1, _randDict), + (0.2, _randArray), + (0.3, _randString), + (0.3, _randInt), + (0.2, _randBignum), + (0.2, _randFloat), + (0.2, _randConst), +] + +_randob_notag_probsum = sum([x[0] for x in _randob_probabilities_notag]) + +def _randob_x(probs=_randob_probabilities, probsum=_randob_probsum, randob=_randob): + pos = random.uniform(0, probsum) + for p, op in probs: + if pos < p: + return op(randob) + pos -= p + return None + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + unittest.main() diff --git a/mercurial/thirdparty/cbor/cbor/tests/test_objects.py b/mercurial/thirdparty/cbor/cbor/tests/test_objects.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/tests/test_objects.py @@ -0,0 +1,82 @@ +import base64 +import sys +import unittest + + +from cbor.tagmap import ClassTag, TagMapper, Tag, UnknownTagException + +#try: +from cbor.tests.test_cbor import TestPyPy, hexstr +#except ImportError: +# from .test_cbor import TestPyPy, hexstr + + +class SomeType(object): + "target type for translator tests" + def __init__(self, a, b): + self.a = a + self.b = b + + @staticmethod + def to_cbor(ob): + assert isinstance(ob, SomeType) + return (ob.a, ob.b) + + @staticmethod + def from_cbor(data): + return SomeType(*data) + + def __eq__(self, other): + # why isn't this just the default implementation in the object class? + return isinstance(other, type(self)) and (self.__dict__ == other.__dict__) + + +class UnknownType(object): + pass + + +known_tags = [ + ClassTag(4325, SomeType, SomeType.to_cbor, SomeType.from_cbor) +] + + +class TestObjects(unittest.TestCase): + def setUp(self): + self.tx = TagMapper(known_tags) + + def _oso(self, ob): + ser = self.tx.dumps(ob) + try: + o2 = self.tx.loads(ser) + assert ob == o2, '%r != %r from %s' % (ob, o2, base64.b16encode(ser)) + except Exception as e: + sys.stderr.write('failure on buf len={0} {1!r} ob={2!r} {3!r}; {4}\n'.format(len(ser), hexstr(ser), ob, ser, e)) + raise + + def test_basic(self): + self._oso(SomeType(1,2)) + + def test_unk_fail(self): + ok = False + try: + self.tx.dumps(UnknownType()) + except: + ok = True + assert ok + + def test_tag_passthrough(self): + self.tx.raise_on_unknown_tag = False + self._oso(Tag(1234, 'aoeu')) + + def test_unk_tag_fail(self): + ok = False + self.tx.raise_on_unknown_tag = True + try: + self._oso(Tag(1234, 'aoeu')) + except UnknownTagException as ute: + ok = True + ok = False + + +if __name__ == '__main__': + unittest.main() diff --git a/mercurial/thirdparty/cbor/cbor/tests/test_usage.py b/mercurial/thirdparty/cbor/cbor/tests/test_usage.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/tests/test_usage.py @@ -0,0 +1,241 @@ +#!python +from __future__ import absolute_import +from __future__ import division # / => float +import gc +import logging +import os +import resource +import sys +import tempfile +import unittest + +from cbor.tests.test_cbor import _randob + + +logger = logging.getLogger(__name__) + + +try: + from cbor._cbor import dumps as cdumps + from cbor._cbor import loads as cloads + from cbor._cbor import dump as cdump + from cbor._cbor import load as cload +except ImportError: + # still test what we can without C fast mode + logger.warn('testing without C accelerated CBOR', exc_info=True) + cdumps, cloads, cdump, cload = None, None, None, None + + + +_TEST_COUNT = 100000 +_TEST_OUTER = 5 + + +_IS_PY3 = sys.version_info[0] >= 3 + + +if _IS_PY3: + _range = range + from io import BytesIO as StringIO +else: + _range = xrange + from cStringIO import StringIO + + +class TestUsage(unittest.TestCase): + def test_dumps_usage(self): + ''' + repeatedly serialize, check that usage doesn't go up + ''' + if cdumps is None: + logger.warn('no C dumps(), skipping test_dumps_usage') + return + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + ob = _randob() + blob = cdumps(ob) + # and silently drop the result. I hope the garbage collector works! + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + def test_loads_usage(self): + ''' + repeatedly serialize, check that usage doesn't go up + ''' + if (cdumps is None) or (cloads is None): + logger.warn('no C fast CBOR, skipping test_loads_usage') + return + ## Just a string passes! + #ob = 'sntaoheusnatoheusnaotehuasnoetuhaosentuhaoesnth' + ## Just an array passes! + #ob = [1,2,3,4,5,6,7,8,9,12,12,13] + ## Just a dict passes! + #ob = {'a':'b', 'c':'d', 'e':'f', 'g':'h'} + # dict of dict is doom! + #ob = {'a':{'b':'c', 'd':'e', 'f':'g'}, 'x':'p'} + ob = {'aoeu':[1,2,3,4],'foo':'bar','pants':{'foo':0xb44, 'pi':3.14}, 'flubber': [{'x':'y', 'z':[None, 2, []]}, 2, 'hello']} + blob = cdumps(ob) + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + dob = cloads(blob) + # and silently drop the result. I hope the garbage collector works! + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + def test_tempfile(self): + '''repeatedly seralize to temp file, then repeatedly deserialize from + it, checking usage all along the way. + ''' + if cdump is None: + logger.warn('no C dump(), skipping test_tempfile') + return + with tempfile.NamedTemporaryFile() as ntf: + # first, write a bunch to temp file + with open(ntf.name, 'wb') as fout: + sys.stderr.write('write {!r} {}\n'.format(ntf.name, fout)) + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + ob = _randob() + cdump(ob, fout) + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + sys.stderr.write('{!r} is {} bytes\n'.format(ntf.name, os.path.getsize(ntf.name))) + + # now, read a bunch back from temp file. + with open(ntf.name, 'rb') as fin: + sys.stderr.write('read {!r} {}\n'.format(ntf.name, fin)) + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + dob = cload(fin) + # and silently drop the result. I hope the garbage collector works! + gc.collect() + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + def test_stringio_usage(self): + '''serialize data to StringIO, read it back''' + if cdump is None: + logger.warn('no C dump(), skipping test_tempfile') + return + + # warmup the rusage, allocate everything! + fout = StringIO() + sys.stderr.write('write 1 to StringIO\n') + oblist = [] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + ob = _randob() + oblist.append(ob) + cdump(ob, fout) + + # position at start to overwrite, but leave allocated + fout.seek(0) + + sys.stderr.write('write 2 to StringIO\n') + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + pos = 0 + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + ob = oblist[pos] + pos += 1 + cdump(ob, fout) + gc.collect() + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + sys.stderr.write('StringIO is {} bytes\n'.format(fout.tell())) + fout.seek(0) + + fin = fout + sys.stderr.write('read StringIO\n') + start_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history = [start_usage] + for o in _range(_TEST_OUTER): + for i in _range(_TEST_COUNT): + dob = cload(fin) + # and silently drop the result. I hope the garbage collector works! + gc.collect() + t_usage = resource.getrusage(resource.RUSAGE_SELF) + usage_history.append(t_usage) + end_usage = usage_history[-1] + dmaxrss = end_usage.ru_maxrss - start_usage.ru_maxrss + didrss = end_usage.ru_idrss - start_usage.ru_idrss + dmaxrsspct = ((end_usage.ru_maxrss != 0) and (dmaxrss / end_usage.ru_maxrss)) or 0 + didrsspct = ((end_usage.ru_idrss != 0) and (didrss / end_usage.ru_idrss)) or 0 + + sys.stderr.write('maxrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_maxrss, end_usage.ru_maxrss, dmaxrss, dmaxrsspct * 100.0)) + sys.stderr.write('idrss: {} - {}, d={} ({:.2f}%)\n'.format(start_usage.ru_idrss, end_usage.ru_idrss, didrss, didrsspct * 100.0)) + + assert (dmaxrsspct) < 0.05, [x.ru_maxrss for x in usage_history] + assert (didrsspct) < 0.05, [x.ru_idrss for x in usage_history] + + + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + unittest.main() diff --git a/mercurial/thirdparty/cbor/cbor/tests/test_vectors.py b/mercurial/thirdparty/cbor/cbor/tests/test_vectors.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/cbor/tests/test_vectors.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python + +""" +Test CBOR implementation against common "test vectors" set from +https://github.com/cbor/test-vectors/ +""" + +import base64 +import json +import logging +import math +import os +import sys +import unittest + + +_IS_PY3 = sys.version_info[0] >= 3 + + +logger = logging.getLogger(__name__) + + +#from cbor.cbor import dumps as pydumps +from cbor.cbor import loads as pyloads +try: + #from cbor._cbor import dumps as cdumps + from cbor._cbor import loads as cloads +except ImportError: + # still test what we can without C fast mode + logger.warn('testing without C accelerated CBOR', exc_info=True) + #cdumps, cloads = None, None + cloads = None +from cbor import Tag + + +# Accomodate several test vectors that have diagnostic descriptors but not JSON +_DIAGNOSTIC_TESTS = { + 'Infinity': lambda x: x == float('Inf'), + '-Infinity': lambda x: x == float('-Inf'), + 'NaN': math.isnan, + 'undefined': lambda x: x is None, + + # TODO: parse into datetime.datetime() + '0("2013-03-21T20:04:00Z")': lambda x: isinstance(x, Tag) and (x.tag == 0) and (x.value == '2013-03-21T20:04:00Z'), + + "h''": lambda x: x == b'', + "(_ h'0102', h'030405')": lambda x: x == b'\x01\x02\x03\x04\x05', + '{1: 2, 3: 4}': lambda x: x == {1: 2, 3: 4}, + "h'01020304'": lambda x: x == b'\x01\x02\x03\x04', +} + + +# We expect these to raise exception because they encode reserved/unused codes in the spec. +# ['hex'] values of tests we expect to raise +_EXPECT_EXCEPTION = set(['f0', 'f818', 'f8ff']) + + +def _check(row, decoded): + cbdata = base64.b64decode(row['cbor']) + if cloads is not None: + cb = cloads(cbdata) + if cb != decoded: + anyerr = True + sys.stderr.write('expected {0!r} got {1!r} c failed to decode cbor {2}\n'.format(decoded, cb, base64.b16encode(cbdata))) + + cb = pyloads(cbdata) + if cb != decoded: + anyerr = True + sys.stderr.write('expected {0!r} got {1!r} py failed to decode cbor {2}\n'.format(decoded, cb, base64.b16encode(cbdata))) + + +def _check_foo(row, checkf): + cbdata = base64.b64decode(row['cbor']) + if cloads is not None: + cb = cloads(cbdata) + if not checkf(cb): + anyerr = True + sys.stderr.write('expected {0!r} got {1!r} c failed to decode cbor {2}\n'.format(decoded, cb, base64.b16encode(cbdata))) + + cb = pyloads(cbdata) + if not checkf(cb): + anyerr = True + sys.stderr.write('expected {0!r} got {1!r} py failed to decode cbor {2}\n'.format(decoded, cb, base64.b16encode(cbdata))) + + +class TestVectors(unittest.TestCase): + def test_vectors(self): + here = os.path.dirname(__file__) + jf = os.path.abspath(os.path.join(here, '../../../test-vectors/appendix_a.json')) + if not os.path.exists(jf): + logging.warning('cannot find test-vectors/appendix_a.json, tried: %r', jf) + return + + if _IS_PY3: + testfile = open(jf, 'r') + tv = json.load(testfile) + else: + testfile = open(jf, 'rb') + tv = json.load(testfile) + anyerr = False + for row in tv: + rhex = row.get('hex') + if 'decoded' in row: + decoded = row['decoded'] + _check(row, decoded) + continue + elif 'diagnostic' in row: + diag = row['diagnostic'] + checkf = _DIAGNOSTIC_TESTS.get(diag) + if checkf is not None: + _check_foo(row, checkf) + continue + + # variously verbose log of what we're not testing: + cbdata = base64.b64decode(row['cbor']) + try: + pd = pyloads(cbdata) + except: + if rhex and (rhex in _EXPECT_EXCEPTION): + pass + else: + logging.error('failed to py load hex=%s diag=%r', rhex, row.get('diagnostic'), exc_info=True) + pd = '' + cd = None + if cloads is not None: + try: + cd = cloads(cbdata) + except: + if rhex and (rhex in _EXPECT_EXCEPTION): + pass + else: + logging.error('failed to c load hex=%s diag=%r', rhex, row.get('diagnostic'), exc_info=True) + cd = '' + logging.warning('skipping hex=%s diag=%r py=%s c=%s', rhex, row.get('diagnostic'), pd, cd) + testfile.close() + + assert not anyerr + + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + unittest.main() diff --git a/mercurial/thirdparty/cbor/setup.py b/mercurial/thirdparty/cbor/setup.py new file mode 100644 --- /dev/null +++ b/mercurial/thirdparty/cbor/setup.py @@ -0,0 +1,129 @@ +#! /usr/bin/env python +# Copyright 2014 Brian Olson +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Thanks! +# to Mic Bowman for a bunch of work and impetus on dumps(,sort_keys=) + +from distutils.command.build_ext import build_ext +from distutils.errors import (CCompilerError, DistutilsExecError, + DistutilsPlatformError) +import sys + +from setuptools import setup, Extension + + +build_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) +if sys.platform == 'win32' and sys.version_info > (2, 6): + # 2.6's distutils.msvc9compiler can raise an IOError when failing to + # find the compiler + build_errors += (IOError,) + + +class BuildError(Exception): + """Raised if compiling extensions failed.""" + + +class optional_build_ext(build_ext): + """build_ext implementation with optional C speedups.""" + + def run(self): + try: + build_ext.run(self) + except DistutilsPlatformError: + raise BuildError() + + def build_extension(self, ext): + try: + build_ext.build_extension(self, ext) + except build_errors as be: + raise BuildError(be) + except ValueError as ve: + # this can happen on Windows 64 bit, see Python issue 7511 + if "'path'" in str(sys.exc_info()[1]): # works with Python 2 and 3 + raise BuildError(ve) + raise + + +VERSION = eval(open('cbor/VERSION.py','rb').read()) + + +setup_options = dict( + name='cbor', + version=VERSION, + description='RFC 7049 - Concise Binary Object Representation', + long_description=""" +An implementation of RFC 7049 - Concise Binary Object Representation (CBOR). + +CBOR is comparable to JSON, has a superset of JSON's ability, but serializes to a binary format which is smaller and faster to generate and parse. + +The two primary functions are cbor.loads() and cbor.dumps(). + +This library includes a C implementation which runs 3-5 times faster than the Python standard library's C-accelerated implementanion of JSON. This is also includes a 100% Python implementation. +""", + author='Brian Olson', + author_email='bolson@bolson.org', + url='https://bitbucket.org/bodhisnarkva/cbor', + packages=['cbor'], + package_dir={'cbor':'cbor'}, + ext_modules=[ + Extension( + 'cbor._cbor', + include_dirs=['c/'], + sources=['c/cbormodule.c'], + headers=['c/cbor.h'], + ) + ], + license='Apache', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: C', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + cmdclass={'build_ext': optional_build_ext}, +) + + +def main(): + """ Perform setup with optional C speedups. + + Optional extension compilation stolen from markupsafe, which again stole + it from simplejson. Creds to Bob Ippolito for the original code. + """ + is_jython = 'java' in sys.platform + is_pypy = hasattr(sys, 'pypy_translation_info') + + if is_jython or is_pypy: + del setup_options['ext_modules'] + + try: + setup(**setup_options) + except BuildError as be: + sys.stderr.write(''' +BUILD ERROR: + %s +RETRYING WITHOUT C EXTENSIONS +''' % (be,)) + del setup_options['ext_modules'] + setup(**setup_options) + + +if __name__ == '__main__': + main() diff --git a/mercurial/thirdparty/cbor/utest.sh b/mercurial/thirdparty/cbor/utest.sh new file mode 100755 --- /dev/null +++ b/mercurial/thirdparty/cbor/utest.sh @@ -0,0 +1,11 @@ +#!/bin/sh -x + +python -m cbor.tests.test_cbor +python -m cbor.tests.test_objects +python -m cbor.tests.test_usage +python -m cbor.tests.test_vectors + +#python cbor/tests/test_cbor.py +#python cbor/tests/test_objects.py +#python cbor/tests/test_usage.py +#python cbor/tests/test_vectors.py