diff --git a/mercurial/config.py b/mercurial/config.py --- a/mercurial/config.py +++ b/mercurial/config.py @@ -258,93 +258,3 @@ self.parse( path, fp.read(), sections=sections, remap=remap, include=include ) - - -def parselist(value): - """parse a configuration value as a list of comma/space separated strings - - >>> parselist(b'this,is "a small" ,test') - ['this', 'is', 'a small', 'test'] - """ - - def _parse_plain(parts, s, offset): - whitespace = False - while offset < len(s) and ( - s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' - ): - whitespace = True - offset += 1 - if offset >= len(s): - return None, parts, offset - if whitespace: - parts.append(b'') - if s[offset : offset + 1] == b'"' and not parts[-1]: - return _parse_quote, parts, offset + 1 - elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': - parts[-1] = parts[-1][:-1] + s[offset : offset + 1] - return _parse_plain, parts, offset + 1 - parts[-1] += s[offset : offset + 1] - return _parse_plain, parts, offset + 1 - - def _parse_quote(parts, s, offset): - if offset < len(s) and s[offset : offset + 1] == b'"': # "" - parts.append(b'') - offset += 1 - while offset < len(s) and ( - s[offset : offset + 1].isspace() - or s[offset : offset + 1] == b',' - ): - offset += 1 - return _parse_plain, parts, offset - - while offset < len(s) and s[offset : offset + 1] != b'"': - if ( - s[offset : offset + 1] == b'\\' - and offset + 1 < len(s) - and s[offset + 1 : offset + 2] == b'"' - ): - offset += 1 - parts[-1] += b'"' - else: - parts[-1] += s[offset : offset + 1] - offset += 1 - - if offset >= len(s): - real_parts = _configlist(parts[-1]) - if not real_parts: - parts[-1] = b'"' - else: - real_parts[0] = b'"' + real_parts[0] - parts = parts[:-1] - parts.extend(real_parts) - return None, parts, offset - - offset += 1 - while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: - offset += 1 - - if offset < len(s): - if offset + 1 == len(s) and s[offset : offset + 1] == b'"': - parts[-1] += b'"' - offset += 1 - else: - parts.append(b'') - else: - return None, parts, offset - - return _parse_plain, parts, offset - - def _configlist(s): - s = s.rstrip(b' ,') - if not s: - return [] - parser, parts, offset = _parse_plain, [b''], 0 - while parser: - parser, parts, offset = parser(parts, s, offset) - return parts - - if value is not None and isinstance(value, bytes): - result = _configlist(value.lstrip(b' ,\n')) - else: - result = value - return result or [] diff --git a/mercurial/ui.py b/mercurial/ui.py --- a/mercurial/ui.py +++ b/mercurial/ui.py @@ -887,10 +887,10 @@ """ # default is not always a list v = self.configwith( - config.parselist, section, name, default, b'list', untrusted + stringutil.parselist, section, name, default, b'list', untrusted ) if isinstance(v, bytes): - return config.parselist(v) + return stringutil.parselist(v) elif v is None: return [] return v diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py --- a/mercurial/utils/stringutil.py +++ b/mercurial/utils/stringutil.py @@ -868,6 +868,96 @@ return _booleans.get(s.lower(), None) +def parselist(value): + """parse a configuration value as a list of comma/space separated strings + + >>> parselist(b'this,is "a small" ,test') + ['this', 'is', 'a small', 'test'] + """ + + def _parse_plain(parts, s, offset): + whitespace = False + while offset < len(s) and ( + s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' + ): + whitespace = True + offset += 1 + if offset >= len(s): + return None, parts, offset + if whitespace: + parts.append(b'') + if s[offset : offset + 1] == b'"' and not parts[-1]: + return _parse_quote, parts, offset + 1 + elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': + parts[-1] = parts[-1][:-1] + s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + parts[-1] += s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + + def _parse_quote(parts, s, offset): + if offset < len(s) and s[offset : offset + 1] == b'"': # "" + parts.append(b'') + offset += 1 + while offset < len(s) and ( + s[offset : offset + 1].isspace() + or s[offset : offset + 1] == b',' + ): + offset += 1 + return _parse_plain, parts, offset + + while offset < len(s) and s[offset : offset + 1] != b'"': + if ( + s[offset : offset + 1] == b'\\' + and offset + 1 < len(s) + and s[offset + 1 : offset + 2] == b'"' + ): + offset += 1 + parts[-1] += b'"' + else: + parts[-1] += s[offset : offset + 1] + offset += 1 + + if offset >= len(s): + real_parts = _configlist(parts[-1]) + if not real_parts: + parts[-1] = b'"' + else: + real_parts[0] = b'"' + real_parts[0] + parts = parts[:-1] + parts.extend(real_parts) + return None, parts, offset + + offset += 1 + while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: + offset += 1 + + if offset < len(s): + if offset + 1 == len(s) and s[offset : offset + 1] == b'"': + parts[-1] += b'"' + offset += 1 + else: + parts.append(b'') + else: + return None, parts, offset + + return _parse_plain, parts, offset + + def _configlist(s): + s = s.rstrip(b' ,') + if not s: + return [] + parser, parts, offset = _parse_plain, [b''], 0 + while parser: + parser, parts, offset = parser(parts, s, offset) + return parts + + if value is not None and isinstance(value, bytes): + result = _configlist(value.lstrip(b' ,\n')) + else: + result = value + return result or [] + + def evalpythonliteral(s): """Evaluate a string containing a Python literal expression""" # We could backport our tokenizer hack to rewrite '' to u'' if we want diff --git a/rust/hg-core/src/config/config.rs b/rust/hg-core/src/config/config.rs --- a/rust/hg-core/src/config/config.rs +++ b/rust/hg-core/src/config/config.rs @@ -361,10 +361,11 @@ /// /// This is appropriate for new configuration keys. The value syntax is /// **not** the same as most existing list-valued config, which has Python - /// parsing implemented in `parselist()` in `mercurial/config.py`. - /// Faithfully porting that parsing algorithm to Rust (including behavior - /// that are arguably bugs) turned out to be non-trivial and hasn’t been - /// completed as of this writing. + /// parsing implemented in `parselist()` in + /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing + /// algorithm to Rust (including behavior that are arguably bugs) + /// turned out to be non-trivial and hasn’t been completed as of this + /// writing. /// /// Instead, the "simple" syntax is: split on comma, then trim leading and /// trailing whitespace of each component. Quotes or backslashes are not diff --git a/tests/test-doctest.py b/tests/test-doctest.py --- a/tests/test-doctest.py +++ b/tests/test-doctest.py @@ -131,7 +131,6 @@ ('mercurial.changelog', '{}'), ('mercurial.cmdutil', '{}'), ('mercurial.color', '{}'), - ('mercurial.config', '{}'), ('mercurial.dagparser', "{'optionflags': 4}"), ('mercurial.encoding', '{}'), ('mercurial.fancyopts', '{}'),