diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c --- a/mercurial/cext/manifest.c +++ b/mercurial/cext/manifest.c @@ -28,6 +28,7 @@ typedef struct { PyObject_HEAD PyObject *pydata; + Py_ssize_t nodelen; line *lines; int numlines; /* number of line entries */ int livelines; /* number of non-deleted lines */ @@ -49,12 +50,11 @@ } /* get the node value of a single line */ -static PyObject *nodeof(line *l, char *flag) +static PyObject *nodeof(Py_ssize_t nodelen, line *l, char *flag) { char *s = l->start; Py_ssize_t llen = pathlen(l); Py_ssize_t hlen = l->len - llen - 2; - Py_ssize_t hlen_raw; PyObject *hash; if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */ PyErr_SetString(PyExc_ValueError, "manifest line too short"); @@ -73,36 +73,29 @@ break; } - switch (hlen) { - case 40: /* sha1 */ - hlen_raw = 20; - break; - case 64: /* new hash */ - hlen_raw = 32; - break; - default: + if (hlen != 2 * nodelen) { PyErr_SetString(PyExc_ValueError, "invalid node length in manifest"); return NULL; } - hash = unhexlify(s + llen + 1, hlen_raw * 2); + hash = unhexlify(s + llen + 1, nodelen * 2); if (!hash) { return NULL; } if (l->hash_suffix != '\0') { char newhash[33]; - memcpy(newhash, PyBytes_AsString(hash), hlen_raw); + memcpy(newhash, PyBytes_AsString(hash), nodelen); Py_DECREF(hash); - newhash[hlen_raw] = l->hash_suffix; - hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1); + newhash[nodelen] = l->hash_suffix; + hash = PyBytes_FromStringAndSize(newhash, nodelen + 1); } return hash; } /* get the node hash and flags of a line as a tuple */ -static PyObject *hashflags(line *l) +static PyObject *hashflags(Py_ssize_t nodelen, line *l) { char flag; - PyObject *hash = nodeof(l, &flag); + PyObject *hash = nodeof(nodelen, l, &flag); PyObject *flags; PyObject *tup; @@ -190,17 +183,23 @@ static int lazymanifest_init(lazymanifest *self, PyObject *args) { char *data; - Py_ssize_t len; + Py_ssize_t nodelen, len; int err, ret; PyObject *pydata; lazymanifest_init_early(self); - if (!PyArg_ParseTuple(args, "S", &pydata)) { + if (!PyArg_ParseTuple(args, "nS", &nodelen, &pydata)) { return -1; } - err = PyBytes_AsStringAndSize(pydata, &data, &len); + if (nodelen != 20 && nodelen != 32) { + /* See fixed buffer in nodeof */ + PyErr_Format(PyExc_ValueError, "Unsupported node length"); + return -1; + } + self->nodelen = nodelen; + self->dirty = false; - self->dirty = false; + err = PyBytes_AsStringAndSize(pydata, &data, &len); if (err == -1) return -1; self->pydata = pydata; @@ -291,17 +290,18 @@ static PyObject *lmiter_iterentriesnext(PyObject *o) { + lmIter *self = (lmIter *)o; Py_ssize_t pl; line *l; char flag; PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; - l = lmiter_nextline((lmIter *)o); + l = lmiter_nextline(self); if (!l) { goto done; } pl = pathlen(l); path = PyBytes_FromStringAndSize(l->start, pl); - hash = nodeof(l, &flag); + hash = nodeof(self->m->nodelen, l, &flag); if (!path || !hash) { goto done; } @@ -471,7 +471,7 @@ PyErr_Format(PyExc_KeyError, "No such manifest entry."); return NULL; } - return hashflags(hit); + return hashflags(self->nodelen, hit); } static int lazymanifest_delitem(lazymanifest *self, PyObject *key) @@ -568,13 +568,13 @@ pyhash = PyTuple_GetItem(value, 0); if (!PyBytes_Check(pyhash)) { PyErr_Format(PyExc_TypeError, - "node must be a 20 or 32 bytes string"); + "node must be a %zi bytes string", self->nodelen); return -1; } hlen = PyBytes_Size(pyhash); - if (hlen != 20 && hlen != 32) { + if (hlen != self->nodelen) { PyErr_Format(PyExc_TypeError, - "node must be a 20 or 32 bytes string"); + "node must be a %zi bytes string", self->nodelen); return -1; } hash = PyBytes_AsString(pyhash); @@ -739,6 +739,7 @@ goto nomem; } lazymanifest_init_early(copy); + copy->nodelen = self->nodelen; copy->numlines = self->numlines; copy->livelines = self->livelines; copy->dirty = false; @@ -777,6 +778,7 @@ goto nomem; } lazymanifest_init_early(copy); + copy->nodelen = self->nodelen; copy->dirty = true; copy->lines = malloc(self->maxlines * sizeof(line)); if (!copy->lines) { @@ -872,7 +874,7 @@ if (!key) goto nomem; if (result < 0) { - PyObject *l = hashflags(left); + PyObject *l = hashflags(self->nodelen, left); if (!l) { goto nomem; } @@ -885,7 +887,7 @@ Py_DECREF(outer); sneedle++; } else if (result > 0) { - PyObject *r = hashflags(right); + PyObject *r = hashflags(self->nodelen, right); if (!r) { goto nomem; } @@ -902,12 +904,12 @@ if (left->len != right->len || memcmp(left->start, right->start, left->len) || left->hash_suffix != right->hash_suffix) { - PyObject *l = hashflags(left); + PyObject *l = hashflags(self->nodelen, left); PyObject *r; if (!l) { goto nomem; } - r = hashflags(right); + r = hashflags(self->nodelen, right); if (!r) { Py_DECREF(l); goto nomem; diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -668,7 +668,7 @@ void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 17; +static const int version = 18; static void module_init(PyObject *mod) { diff --git a/mercurial/cext/parsers.pyi b/mercurial/cext/parsers.pyi --- a/mercurial/cext/parsers.pyi +++ b/mercurial/cext/parsers.pyi @@ -29,7 +29,7 @@ # From manifest.c class lazymanifest: - def __init__(self, data: bytes): ... + def __init__(self, nodelen: int, data: bytes): ... def __iter__(self) -> Iterator[bytes]: ... def __len__(self) -> int: ... diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -42,7 +42,7 @@ FASTDELTA_TEXTDIFF_THRESHOLD = 1000 -def _parse(data): +def _parse(nodelen, data): # This method does a little bit of excessive-looking # precondition checking. This is so that the behavior of this # class exactly matches its C counterpart to try and help @@ -63,7 +63,7 @@ nl -= 1 else: flags = b'' - if nl not in (40, 64): + if nl != 2 * nodelen: raise ValueError(b'Invalid manifest line') yield f, bin(n), flags @@ -131,7 +131,7 @@ else: hlen = nlpos - zeropos - 1 flags = b'' - if hlen not in (40, 64): + if hlen != 2 * self.lm._nodelen: raise error.StorageError(b'Invalid manifest line') hashval = unhexlify( data, self.lm.extrainfo[self.pos], zeropos + 1, hlen @@ -176,12 +176,14 @@ def __init__( self, + nodelen, data, positions=None, extrainfo=None, extradata=None, hasremovals=False, ): + self._nodelen = nodelen if positions is None: self.positions = self.findlines(data) self.extrainfo = [0] * len(self.positions) @@ -288,7 +290,7 @@ hlen -= 1 else: flags = b'' - if hlen not in (40, 64): + if hlen != 2 * self._nodelen: raise error.StorageError(b'Invalid manifest line') hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen) return (hashval, flags) @@ -344,6 +346,7 @@ def copy(self): # XXX call _compact like in C? return _lazymanifest( + self._nodelen, self.data, self.positions, self.extrainfo, @@ -454,7 +457,7 @@ def filtercopy(self, filterfn): # XXX should be optimized - c = _lazymanifest(b'') + c = _lazymanifest(self._nodelen, b'') for f, n, fl in self.iterentries(): if filterfn(f): c[f] = n, fl @@ -469,8 +472,9 @@ @interfaceutil.implementer(repository.imanifestdict) class manifestdict(object): - def __init__(self, data=b''): - self._lm = _lazymanifest(data) + def __init__(self, nodelen, data=b''): + self._nodelen = nodelen + self._lm = _lazymanifest(nodelen, data) def __getitem__(self, key): return self._lm[key][0] @@ -578,14 +582,14 @@ return self.copy() if self._filesfastpath(match): - m = manifestdict() + m = manifestdict(self._nodelen) lm = self._lm for fn in match.files(): if fn in lm: m._lm[fn] = lm[fn] return m - m = manifestdict() + m = manifestdict(self._nodelen) m._lm = self._lm.filtercopy(match) return m @@ -628,7 +632,7 @@ return b'' def copy(self): - c = manifestdict() + c = manifestdict(self._nodelen) c._lm = self._lm.copy() return c @@ -795,6 +799,7 @@ self._dir = dir self.nodeconstants = nodeconstants self._node = self.nodeconstants.nullid + self._nodelen = self.nodeconstants.nodelen self._loadfunc = _noop self._copyfunc = _noop self._dirty = False @@ -1322,7 +1327,7 @@ def parse(self, text, readsubtree): selflazy = self._lazydirs - for f, n, fl in _parse(text): + for f, n, fl in _parse(self._nodelen, text): if fl == b't': f = f + b'/' # False below means "doesn't need to be copied" and can use the @@ -2019,7 +2024,7 @@ class memmanifestctx(object): def __init__(self, manifestlog): self._manifestlog = manifestlog - self._manifestdict = manifestdict() + self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen) def _storage(self): return self._manifestlog.getstorage(b'') @@ -2081,8 +2086,9 @@ def read(self): if self._data is None: - if self._node == self._manifestlog.nodeconstants.nullid: - self._data = manifestdict() + nc = self._manifestlog.nodeconstants + if self._node == nc.nullid: + self._data = manifestdict(nc.nodelen) else: store = self._storage() if self._node in store.fulltextcache: @@ -2091,7 +2097,7 @@ text = store.revision(self._node) arraytext = bytearray(text) store.fulltextcache[self._node] = arraytext - self._data = manifestdict(text) + self._data = manifestdict(nc.nodelen, text) return self._data def readfast(self, shallow=False): @@ -2118,7 +2124,7 @@ store = self._storage() r = store.rev(self._node) d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r)) - return manifestdict(d) + return manifestdict(store.nodeconstants.nodelen, d) def find(self, key): return self.read().find(key) @@ -2244,7 +2250,7 @@ if shallow: r = store.rev(self._node) d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r)) - return manifestdict(d) + return manifestdict(store.nodeconstants.nodelen, d) else: # Need to perform a slow delta r0 = store.deltaparent(store.rev(self._node)) @@ -2273,7 +2279,9 @@ return self.readdelta(shallow=shallow) if shallow: - return manifestdict(store.revision(self._node)) + return manifestdict( + store.nodeconstants.nodelen, store.revision(self._node) + ) else: return self.read() diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -80,7 +80,7 @@ ('cext', 'bdiff'): 3, ('cext', 'mpatch'): 1, ('cext', 'osutil'): 4, - ('cext', 'parsers'): 17, + ('cext', 'parsers'): 18, } # map import request to other package or module diff --git a/tests/test-manifest.py b/tests/test-manifest.py --- a/tests/test-manifest.py +++ b/tests/test-manifest.py @@ -81,12 +81,12 @@ raise NotImplementedError('parsemanifest not implemented by test case') def testEmptyManifest(self): - m = self.parsemanifest(EMTPY_MANIFEST) + m = self.parsemanifest(20, EMTPY_MANIFEST) self.assertEqual(0, len(m)) self.assertEqual([], list(m)) def testManifest(self): - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m)) self.assertEqual(BIN_HASH_2, m[b'bar/baz/qux.py']) self.assertEqual(b'l', m.flags(b'bar/baz/qux.py')) @@ -95,20 +95,16 @@ with self.assertRaises(KeyError): m[b'wat'] - def testManifestLongHashes(self): - m = self.parsemanifest(b'a\0' + b'f' * 64 + b'\n') - self.assertEqual(binascii.unhexlify(b'f' * 64), m[b'a']) - def testSetItem(self): want = BIN_HASH_1 - m = self.parsemanifest(EMTPY_MANIFEST) + m = self.parsemanifest(20, EMTPY_MANIFEST) m[b'a'] = want self.assertIn(b'a', m) self.assertEqual(want, m[b'a']) self.assertEqual(b'a\0' + HASH_1 + b'\n', m.text()) - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) m[b'a'] = want self.assertEqual(want, m[b'a']) self.assertEqual(b'a\0' + HASH_1 + b'\n' + A_SHORT_MANIFEST, m.text()) @@ -116,14 +112,14 @@ def testSetFlag(self): want = b'x' - m = self.parsemanifest(EMTPY_MANIFEST) + m = self.parsemanifest(20, EMTPY_MANIFEST) # first add a file; a file-less flag makes no sense m[b'a'] = BIN_HASH_1 m.setflag(b'a', want) self.assertEqual(want, m.flags(b'a')) self.assertEqual(b'a\0' + HASH_1 + want + b'\n', m.text()) - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) # first add a file; a file-less flag makes no sense m[b'a'] = BIN_HASH_1 m.setflag(b'a', want) @@ -133,7 +129,7 @@ ) def testCopy(self): - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) m[b'a'] = BIN_HASH_1 m2 = m.copy() del m @@ -142,7 +138,7 @@ def testCompaction(self): unhex = binascii.unhexlify h1, h2 = unhex(HASH_1), unhex(HASH_2) - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) m[b'alpha'] = h1 m[b'beta'] = h2 del m[b'foo'] @@ -164,7 +160,7 @@ m[b'foo'] def testMatchException(self): - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*']) def filt(path): @@ -177,7 +173,7 @@ m._matches(match) def testRemoveItem(self): - m = self.parsemanifest(A_SHORT_MANIFEST) + m = self.parsemanifest(20, A_SHORT_MANIFEST) del m[b'foo'] with self.assertRaises(KeyError): m[b'foo'] @@ -193,9 +189,9 @@ addl = b'z-only-in-left\0' + HASH_1 + b'\n' addr = b'z-only-in-right\0' + HASH_2 + b'x\n' left = self.parsemanifest( - A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + b'x') + addl + 20, A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + b'x') + addl ) - right = self.parsemanifest(A_SHORT_MANIFEST + addr) + right = self.parsemanifest(20, A_SHORT_MANIFEST + addr) want = { b'foo': ((BIN_HASH_3, b'x'), (BIN_HASH_1, b'')), b'z-only-in-left': ((BIN_HASH_1, b''), MISSING), @@ -208,14 +204,18 @@ b'foo': (MISSING, (BIN_HASH_3, b'x')), b'z-only-in-left': (MISSING, (BIN_HASH_1, b'')), } - self.assertEqual(want, self.parsemanifest(EMTPY_MANIFEST).diff(left)) + self.assertEqual( + want, self.parsemanifest(20, EMTPY_MANIFEST).diff(left) + ) want = { b'bar/baz/qux.py': ((BIN_HASH_2, b'l'), MISSING), b'foo': ((BIN_HASH_3, b'x'), MISSING), b'z-only-in-left': ((BIN_HASH_1, b''), MISSING), } - self.assertEqual(want, left.diff(self.parsemanifest(EMTPY_MANIFEST))) + self.assertEqual( + want, left.diff(self.parsemanifest(20, EMTPY_MANIFEST)) + ) copy = right.copy() del copy[b'z-only-in-right'] del right[b'foo'] @@ -225,7 +225,7 @@ } self.assertEqual(want, right.diff(copy)) - short = self.parsemanifest(A_SHORT_MANIFEST) + short = self.parsemanifest(20, A_SHORT_MANIFEST) pruned = short.copy() del pruned[b'foo'] want = { @@ -247,27 +247,27 @@ l + b'\n' for l in reversed(A_SHORT_MANIFEST.split(b'\n')) if l ) try: - self.parsemanifest(backwards) + self.parsemanifest(20, backwards) self.fail('Should have raised ValueError') except ValueError as v: self.assertIn('Manifest lines not in sorted order.', str(v)) def testNoTerminalNewline(self): try: - self.parsemanifest(A_SHORT_MANIFEST + b'wat') + self.parsemanifest(20, A_SHORT_MANIFEST + b'wat') self.fail('Should have raised ValueError') except ValueError as v: self.assertIn('Manifest did not end in a newline.', str(v)) def testNoNewLineAtAll(self): try: - self.parsemanifest(b'wat') + self.parsemanifest(20, b'wat') self.fail('Should have raised ValueError') except ValueError as v: self.assertIn('Manifest did not end in a newline.', str(v)) def testHugeManifest(self): - m = self.parsemanifest(A_HUGE_MANIFEST) + m = self.parsemanifest(20, A_HUGE_MANIFEST) self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m)) self.assertEqual(len(m), len(list(m))) @@ -275,7 +275,7 @@ """Tests matches() for a few specific files to make sure that both the set of files as well as their flags and nodeids are correct in the resulting manifest.""" - m = self.parsemanifest(A_HUGE_MANIFEST) + m = self.parsemanifest(20, A_HUGE_MANIFEST) match = matchmod.exact([b'file1', b'file200', b'file300']) m2 = m._matches(match) @@ -291,7 +291,7 @@ """Tests matches() for a small set of specific files, including one nonexistent file to make sure in only matches against existing files. """ - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.exact( [b'a/b/c/bar.txt', b'a/b/d/qux.py', b'readme.txt', b'nonexistent'] @@ -305,7 +305,7 @@ def testMatchesNonexistentDirectory(self): """Tests matches() for a relpath match on a directory that doesn't actually exist.""" - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.match( util.localpath(b'/repo'), b'', [b'a/f'], default=b'relpath' @@ -316,7 +316,7 @@ def testMatchesExactLarge(self): """Tests matches() for files matching a large list of exact files.""" - m = self.parsemanifest(A_HUGE_MANIFEST) + m = self.parsemanifest(20, A_HUGE_MANIFEST) flist = m.keys()[80:300] match = matchmod.exact(flist) @@ -326,7 +326,7 @@ def testMatchesFull(self): '''Tests matches() for what should be a full match.''' - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.match(util.localpath(b'/repo'), b'', [b'']) m2 = m._matches(match) @@ -336,7 +336,7 @@ def testMatchesDirectory(self): """Tests matches() on a relpath match on a directory, which should match against all files within said directory.""" - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.match( util.localpath(b'/repo'), b'', [b'a/b'], default=b'relpath' @@ -362,7 +362,7 @@ """Tests matches() on an exact match on a directory, which should result in an empty manifest because you can't perform an exact match against a directory.""" - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.exact([b'a/b']) m2 = m._matches(match) @@ -372,7 +372,7 @@ def testMatchesCwd(self): """Tests matches() on a relpath match with the current directory ('.') when not in the root directory.""" - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.match( util.localpath(b'/repo'), b'a/b', [b'.'], default=b'relpath' @@ -397,7 +397,7 @@ def testMatchesWithPattern(self): """Tests matches() for files matching a pattern that reside deeper than the specified directory.""" - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) match = matchmod.match(util.localpath(b'/repo'), b'', [b'a/b/*/*.txt']) m2 = m._matches(match) @@ -408,8 +408,12 @@ class testmanifestdict(unittest.TestCase, basemanifesttests): - def parsemanifest(self, text): - return manifestmod.manifestdict(text) + def parsemanifest(self, nodelen, text): + return manifestmod.manifestdict(nodelen, text) + + def testManifestLongHashes(self): + m = self.parsemanifest(32, b'a\0' + b'f' * 64 + b'\n') + self.assertEqual(binascii.unhexlify(b'f' * 64), m[b'a']) def testObviouslyBogusManifest(self): # This is a 163k manifest that came from oss-fuzz. It was a @@ -433,15 +437,15 @@ b'\xac\xbe' ) with self.assertRaises(ValueError): - self.parsemanifest(data) + self.parsemanifest(20, data) class testtreemanifest(unittest.TestCase, basemanifesttests): - def parsemanifest(self, text): + def parsemanifest(self, nodelen, text): return manifestmod.treemanifest(sha1nodeconstants, b'', text) def testWalkSubtrees(self): - m = self.parsemanifest(A_DEEPER_MANIFEST) + m = self.parsemanifest(20, A_DEEPER_MANIFEST) dirs = [s._dir for s in m.walksubtrees()] self.assertEqual(