diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -638,7 +638,7 @@ PyObject *encodedir(PyObject *self, PyObject *args); PyObject *pathencode(PyObject *self, PyObject *args); PyObject *lowerencode(PyObject *self, PyObject *args); -PyObject *parse_index2(PyObject *self, PyObject *args); +PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs); static PyMethodDef methods[] = { {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"}, @@ -646,7 +646,8 @@ "create a set containing non-normal and other parent entries of given " "dirstate\n"}, {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, - {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"}, + {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS, + "parse a revlog index\n"}, {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"}, {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"}, {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"}, diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -98,6 +98,7 @@ int ntlookups; /* # lookups */ int ntmisses; /* # lookups that miss the cache */ int inlined; + long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */ }; static Py_ssize_t index_length(const indexObject *self) @@ -113,14 +114,19 @@ static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#"); +static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#"); +static const char *const v2_tuple_format = PY23("Kiiiiiis#KiKi", "Kiiiiiiy#KiKi"); #else -static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#"); +static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#"); +static const char *const v2_tuple_format = PY23("kiiiiiis#kiki", "kiiiiiiy#kiki"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ static const long v1_hdrsize = 64; +/* A Revlogv2 index entry is 96 bytes long. */ +static const long v2_hdrsize = 96; + static void raise_revlog_error(void) { PyObject *mod = NULL, *dict = NULL, *errclass = NULL; @@ -157,7 +163,7 @@ static const char *index_deref(indexObject *self, Py_ssize_t pos) { if (pos >= self->length) - return self->added + (pos - self->length) * v1_hdrsize; + return self->added + (pos - self->length) * self->hdrsize; if (self->inlined && pos > 0) { if (self->offsets == NULL) { @@ -174,7 +180,7 @@ return self->offsets[pos]; } - return (const char *)(self->buf.buf) + pos * v1_hdrsize; + return (const char *)(self->buf.buf) + pos * self->hdrsize; } /* @@ -280,8 +286,9 @@ */ static PyObject *index_get(indexObject *self, Py_ssize_t pos) { - uint64_t offset_flags; - int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + uint64_t offset_flags, sidedata_offset, unified_revlog_id; + int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, + sidedata_comp_len, rank; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -320,9 +327,22 @@ parent_2 = getbe32(data + 28); c_node_id = data + 32; - return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, - base_rev, link_rev, parent_1, parent_2, c_node_id, - self->nodelen); + if (self->hdrsize == v1_hdrsize) { + return Py_BuildValue(v1_tuple_format, offset_flags, comp_len, uncomp_len, + base_rev, link_rev, parent_1, parent_2, c_node_id, + self->nodelen); + } else { + unified_revlog_id = getbe64(data + 64); + rank = getbe32(data + 72); + sidedata_offset = getbe64(data + 76); + sidedata_comp_len = getbe32(data + 84); + + return Py_BuildValue(v2_tuple_format, offset_flags, comp_len, + uncomp_len, base_rev, link_rev, parent_1, + parent_2, c_node_id, self->nodelen, + unified_revlog_id, rank, + sidedata_offset, sidedata_comp_len); + } } /* @@ -373,18 +393,30 @@ static PyObject *index_append(indexObject *self, PyObject *obj) { - uint64_t offset_flags; + uint64_t offset_flags, unified_revlog_id, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; - Py_ssize_t c_node_id_len; + Py_ssize_t c_node_id_len, rank, sidedata_comp_len; const char *c_node_id; char *data; - if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, - &uncomp_len, &base_rev, &link_rev, &parent_1, - &parent_2, &c_node_id, &c_node_id_len)) { - PyErr_SetString(PyExc_TypeError, "8-tuple required"); - return NULL; + if (self->hdrsize == v1_hdrsize) { + if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags, &comp_len, + &uncomp_len, &base_rev, &link_rev, &parent_1, + &parent_2, &c_node_id, &c_node_id_len)) { + PyErr_SetString(PyExc_TypeError, "8-tuple required"); + return NULL; + } + } else { + if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags, &comp_len, + &uncomp_len, &base_rev, &link_rev, &parent_1, + &parent_2, &c_node_id, &c_node_id_len, + &unified_revlog_id, &rank, &sidedata_offset, + &sidedata_comp_len)) { + PyErr_SetString(PyExc_TypeError, "12-tuple required"); + return NULL; + } } + if (c_node_id_len != self->nodelen) { PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; @@ -394,14 +426,14 @@ size_t new_added_length = self->added_length ? self->added_length * 2 : 4096; void *new_added = - PyMem_Realloc(self->added, new_added_length * v1_hdrsize); + PyMem_Realloc(self->added, new_added_length * self->hdrsize); if (!new_added) return PyErr_NoMemory(); self->added = new_added; self->added_length = new_added_length; } rev = self->length + self->new_length; - data = self->added + v1_hdrsize * self->new_length++; + data = self->added + self->hdrsize * self->new_length++; putbe32(offset_flags >> 32, data); putbe32(offset_flags & 0xffffffffU, data + 4); putbe32(comp_len, data + 8); @@ -411,7 +443,17 @@ putbe32(parent_1, data + 24); putbe32(parent_2, data + 28); memcpy(data + 32, c_node_id, c_node_id_len); + /* Padding since SHA-1 is only 20 bytes for now */ memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len); + if (self->hdrsize != v1_hdrsize) { + putbe64(unified_revlog_id, data + 64); + putbe32(rank, data + 72); + putbe64(sidedata_offset, data + 76); + putbe32(sidedata_comp_len, data + 84); + /* Padding for 96 bytes alignment */ + memset(data + 88, 0, self->hdrsize - 88); + } + if (self->ntinitialized) nt_insert(&self->nt, c_node_id, rev); @@ -2563,14 +2605,17 @@ const char *data = (const char *)self->buf.buf; Py_ssize_t pos = 0; Py_ssize_t end = self->buf.len; - long incr = v1_hdrsize; + long incr = self->hdrsize; Py_ssize_t len = 0; - while (pos + v1_hdrsize <= end && pos >= 0) { - uint32_t comp_len; + while (pos + self->hdrsize <= end && pos >= 0) { + uint32_t comp_len, sidedata_comp_len = 0; /* 3rd element of header is length of compressed inline data */ comp_len = getbe32(data + pos + 8); - incr = v1_hdrsize + comp_len; + if (self->hdrsize == v2_hdrsize) { + sidedata_comp_len = getbe32(data + pos + 84); + } + incr = self->hdrsize + comp_len + sidedata_comp_len; if (offsets) offsets[len] = data + pos; len++; @@ -2586,11 +2631,13 @@ return len; } -static int index_init(indexObject *self, PyObject *args) +static int index_init(indexObject *self, PyObject *args, PyObject *kwargs) { - PyObject *data_obj, *inlined_obj; + PyObject *data_obj, *inlined_obj, *revlogv2; Py_ssize_t size; + static char *kwlist[] = {"data", "inlined", "revlogv2", NULL}; + /* Initialize before argument-checking to avoid index_dealloc() crash. */ self->added = NULL; @@ -2606,7 +2653,9 @@ self->nodelen = 20; self->nullentry = NULL; - if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) + revlogv2 = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist, + &data_obj, &inlined_obj, &revlogv2)) return -1; if (!PyObject_CheckBuffer(data_obj)) { PyErr_SetString(PyExc_TypeError, @@ -2618,8 +2667,22 @@ return -1; } - self->nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, - -1, -1, -1, -1, nullid, self->nodelen); + if (revlogv2 && PyObject_IsTrue(revlogv2)) { + self->hdrsize = v2_hdrsize; + } else { + self->hdrsize = v1_hdrsize; + } + + if (self->hdrsize == v1_hdrsize) { + self->nullentry = + Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1, + -1, -1, -1, nullid, self->nodelen); + } else { + self->nullentry = + Py_BuildValue(PY23("iiiiiiis#iiii", "iiiiiiiy#iiii"), 0, 0, 0, + -1, -1, -1, -1, nullid, self->nodelen, 0, 0, 0, 0); + } + if (!self->nullentry) return -1; PyObject_GC_UnTrack(self->nullentry); @@ -2641,11 +2704,11 @@ goto bail; self->length = len; } else { - if (size % v1_hdrsize) { + if (size % self->hdrsize) { PyErr_SetString(PyExc_ValueError, "corrupt index file"); goto bail; } - self->length = size / v1_hdrsize; + self->length = size / self->hdrsize; } return 0; @@ -2797,16 +2860,16 @@ }; /* - * returns a tuple of the form (index, index, cache) with elements as + * returns a tuple of the form (index, cache) with elements as * follows: * - * index: an index object that lazily parses RevlogNG records + * index: an index object that lazily parses Revlog (v1 or v2) records * cache: if data is inlined, a tuple (0, index_file_content), else None * index_file_content could be a string, or a buffer * * added complications are for backwards compatibility */ -PyObject *parse_index2(PyObject *self, PyObject *args) +PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *cache = NULL; indexObject *idx; @@ -2816,7 +2879,7 @@ if (idx == NULL) goto bail; - ret = index_init(idx, args); + ret = index_init(idx, args, kwargs); if (ret == -1) goto bail; diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -69,7 +69,6 @@ templatefilters, util, ) -from .pure import parsers as pureparsers from .interfaces import ( repository, util as interfaceutil,