diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c --- a/mercurial/cext/manifest.c +++ b/mercurial/cext/manifest.c @@ -49,23 +49,35 @@ } /* get the node value of a single line */ -static PyObject *nodeof(line *l) +static PyObject *nodeof(line *l, char *flag) { char *s = l->start; Py_ssize_t llen = pathlen(l); Py_ssize_t hlen = l->len - llen - 2; - Py_ssize_t hlen_raw = 20; + Py_ssize_t hlen_raw; PyObject *hash; if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */ PyErr_SetString(PyExc_ValueError, "manifest line too short"); return NULL; } + /* Detect flags after the hash first. */ + switch (s[llen + hlen]) { + case 'l': + case 't': + case 'x': + *flag = s[llen + hlen]; + --hlen; + break; + default: + *flag = '\0'; + break; + } + switch (hlen) { case 40: /* sha1 */ - case 41: /* sha1 with cruft for a merge */ + hlen_raw = 20; break; case 64: /* new hash */ - case 65: /* new hash with cruft for a merge */ hlen_raw = 32; break; default: @@ -89,9 +101,8 @@ /* get the node hash and flags of a line as a tuple */ static PyObject *hashflags(line *l) { - char *s = l->start; - Py_ssize_t plen = pathlen(l); - PyObject *hash = nodeof(l); + char flag; + PyObject *hash = nodeof(l, &flag); ssize_t hlen; Py_ssize_t hplen, flen; PyObject *flags; @@ -99,14 +110,7 @@ if (!hash) return NULL; - /* hash is either 20 or 21 bytes for an old hash, so we use a - ternary here to get the "real" hexlified sha length. */ - hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64; - /* 1 for null byte, 1 for newline */ - hplen = plen + hlen + 2; - flen = l->len - hplen; - - flags = PyBytes_FromStringAndSize(s + hplen - 1, flen); + flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0); if (!flags) { Py_DECREF(hash); return NULL; @@ -291,6 +295,7 @@ { Py_ssize_t pl; line *l; + char flag; Py_ssize_t consumed; PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; l = lmiter_nextline((lmIter *)o); @@ -299,13 +304,11 @@ } pl = pathlen(l); path = PyBytes_FromStringAndSize(l->start, pl); - hash = nodeof(l); + hash = nodeof(l, &flag); if (!path || !hash) { goto done; } - consumed = pl + 41; - flags = PyBytes_FromStringAndSize(l->start + consumed, - l->len - consumed - 1); + flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0); if (!flags) { goto done; } @@ -568,19 +571,13 @@ pyhash = PyTuple_GetItem(value, 0); if (!PyBytes_Check(pyhash)) { PyErr_Format(PyExc_TypeError, - "node must be a 20-byte string"); + "node must be a 20 or 32 bytes string"); return -1; } hlen = PyBytes_Size(pyhash); - /* Some parts of the codebase try and set 21 or 22 - * byte "hash" values in order to perturb things for - * status. We have to preserve at least the 21st - * byte. Sigh. If there's a 22nd byte, we drop it on - * the floor, which works fine. - */ - if (hlen != 20 && hlen != 21 && hlen != 22) { + if (hlen != 20 && hlen != 32) { PyErr_Format(PyExc_TypeError, - "node must be a 20-byte string"); + "node must be a 20 or 32 bytes string"); return -1; } hash = PyBytes_AsString(pyhash); @@ -588,28 +585,39 @@ pyflags = PyTuple_GetItem(value, 1); if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) { PyErr_Format(PyExc_TypeError, - "flags must a 0 or 1 byte string"); + "flags must a 0 or 1 bytes string"); return -1; } if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) { return -1; } + if (flen == 1) { + switch (*flags) { + case 'l': + case 't': + case 'x': + break; + default: + PyErr_Format(PyExc_TypeError, "invalid manifest flag"); + return -1; + } + } /* one null byte and one newline */ - dlen = plen + 41 + flen + 1; + dlen = plen + hlen * 2 + 1 + flen + 1; dest = malloc(dlen); if (!dest) { PyErr_NoMemory(); return -1; } memcpy(dest, path, plen + 1); - for (i = 0; i < 20; i++) { + for (i = 0; i < hlen; i++) { /* Cast to unsigned, so it will not get sign-extended when promoted * to int (as is done when passing to a variadic function) */ sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]); } - memcpy(dest + plen + 41, flags, flen); - dest[plen + 41 + flen] = '\n'; + memcpy(dest + plen + 2 * hlen + 1, flags, flen); + dest[plen + 2 * hlen + 1 + flen] = '\n'; new.start = dest; new.len = dlen; new.hash_suffix = '\0'; diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -121,8 +121,20 @@ self.pos += 1 return data zeropos = data.find(b'\x00', pos) - hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40) - flags = self.lm._getflags(data, self.pos, zeropos) + nlpos = data.find(b'\n', pos) + if zeropos == -1 or nlpos == -1 or nlpos < zeropos: + raise error.StorageError(b'Invalid manifest line') + flags = data[nlpos - 1 : nlpos] + if flags in _manifestflags: + hlen = nlpos - zeropos - 2 + else: + hlen = nlpos - zeropos - 1 + flags = b'' + if hlen not in (40, 64): + raise error.StorageError(b'Invalid manifest line') + hashval = unhexlify( + data, self.lm.extrainfo[self.pos], zeropos + 1, hlen + ) self.pos += 1 return (data[pos:zeropos], hashval, flags) @@ -140,6 +152,9 @@ return (a > b) - (a < b) +_manifestflags = {b'', b'l', b't', b'x'} + + class _lazymanifest(object): """A pure python manifest backed by a byte string. It is supplimented with internal lists as it is modified, until it is compacted back to a pure byte @@ -251,15 +266,6 @@ def __contains__(self, key): return self.bsearch(key) != -1 - def _getflags(self, data, needle, pos): - start = pos + 41 - end = data.find(b"\n", start) - if end == -1: - end = len(data) - 1 - if start == end: - return b'' - return self.data[start:end] - def __getitem__(self, key): if not isinstance(key, bytes): raise TypeError(b"getitem: manifest keys must be a bytes.") @@ -273,13 +279,17 @@ nlpos = data.find(b'\n', zeropos) assert 0 <= needle <= len(self.positions) assert len(self.extrainfo) == len(self.positions) + if zeropos == -1 or nlpos == -1 or nlpos < zeropos: + raise error.StorageError(b'Invalid manifest line') hlen = nlpos - zeropos - 1 - # Hashes sometimes have an extra byte tucked on the end, so - # detect that. - if hlen % 2: + flags = data[nlpos - 1 : nlpos] + if flags in _manifestflags: hlen -= 1 + else: + flags = b'' + if hlen not in (40, 64): + raise error.StorageError(b'Invalid manifest line') hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen) - flags = self._getflags(data, needle, zeropos) return (hashval, flags) def __delitem__(self, key): @@ -408,9 +418,7 @@ def _pack(self, d): n = d[1] - if len(n) == 21 or len(n) == 33: - n = n[:-1] - assert len(n) == 20 or len(n) == 32 + assert len(n) in (20, 32) return d[0] + b'\x00' + hex(n) + d[2] + b'\n' def text(self): @@ -609,6 +617,8 @@ return self._lm.diff(m2._lm, clean) def setflag(self, key, flag): + if flag not in _manifestflags: + raise TypeError(b"Invalid manifest flag set.") self._lm[key] = self[key], flag def get(self, key, default=None): @@ -1049,11 +1059,10 @@ self._dirs[dir].__setitem__(subpath, n) else: # manifest nodes are either 20 bytes or 32 bytes, - # depending on the hash in use. An extra byte is - # occasionally used by hg, but won't ever be - # persisted. Trim to 21 or 33 bytes as appropriate. - trim = 21 if len(n) < 25 else 33 - self._files[f] = n[:trim] # to match manifestdict's behavior + # depending on the hash in use. Assert this as historically + # sometimes extra bytes were added. + assert len(n) in (20, 32) + self._files[f] = n self._dirty = True def _load(self): @@ -1066,6 +1075,8 @@ def setflag(self, f, flags): """Set the flags (symlink, executable) for path f.""" + if flags not in _manifestflags: + raise TypeError(b"Invalid manifest flag set.") self._load() dir, subpath = _splittopdir(f) if dir: diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -725,8 +725,7 @@ b'prompt changed/deleted', ) elif n1 == addednodeid: - # This extra 'a' is added by working copy manifest to mark - # the file as locally added. We should forget it instead of + # This file was locally added. We should forget it instead of # deleting it. actions[f] = ( mergestatemod.ACTION_FORGET, diff --git a/tests/test-manifest.py b/tests/test-manifest.py --- a/tests/test-manifest.py +++ b/tests/test-manifest.py @@ -156,39 +156,6 @@ with self.assertRaises(KeyError): m[b'foo'] - def testSetGetNodeSuffix(self): - clean = self.parsemanifest(A_SHORT_MANIFEST) - m = self.parsemanifest(A_SHORT_MANIFEST) - h = m[b'foo'] - f = m.flags(b'foo') - want = h + b'a' - # Merge code wants to set 21-byte fake hashes at times - m[b'foo'] = want - self.assertEqual(want, m[b'foo']) - self.assertEqual( - [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', BIN_HASH_1 + b'a')], - list(m.items()), - ) - # Sometimes it even tries a 22-byte fake hash, but we can - # return 21 and it'll work out - m[b'foo'] = want + b'+' - self.assertEqual(want, m[b'foo']) - # make sure the suffix survives a copy - match = matchmod.match(util.localpath(b'/repo'), b'', [b're:foo']) - m2 = m._matches(match) - self.assertEqual(want, m2[b'foo']) - self.assertEqual(1, len(m2)) - m2 = m.copy() - self.assertEqual(want, m2[b'foo']) - # suffix with iteration - self.assertEqual( - [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', want)], list(m.items()) - ) - - # shows up in diff - self.assertEqual({b'foo': ((want, f), (h, b''))}, m.diff(clean)) - self.assertEqual({b'foo': ((h, b''), (want, f))}, clean.diff(m)) - def testMatchException(self): m = self.parsemanifest(A_SHORT_MANIFEST) match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*'])