diff --git a/hgext/narrow/narrowbundle2.py b/hgext/narrow/narrowbundle2.py --- a/hgext/narrow/narrowbundle2.py +++ b/hgext/narrow/narrowbundle2.py @@ -304,8 +304,8 @@ getbundleargs[b'narrow'] = b'boolean' getbundleargs[b'depth'] = b'plain' - getbundleargs[b'oldincludepats'] = b'csv' - getbundleargs[b'oldexcludepats'] = b'csv' + getbundleargs[b'oldincludepats'] = b'qcsv' + getbundleargs[b'oldexcludepats'] = b'qcsv' getbundleargs[b'known'] = b'csv' # Extend changegroup serving to handle requests from narrow clients. diff --git a/hgext/narrow/narrowwirepeer.py b/hgext/narrow/narrowwirepeer.py --- a/hgext/narrow/narrowwirepeer.py +++ b/hgext/narrow/narrowwirepeer.py @@ -79,14 +79,10 @@ preferuncompressed = False try: - def splitpaths(data): - # work around ''.split(',') => [''] - return data.split(b',') if data else [] - - oldincludes = splitpaths(oldincludes) - newincludes = splitpaths(newincludes) - oldexcludes = splitpaths(oldexcludes) - newexcludes = splitpaths(newexcludes) + oldincludes = wireprototypes.decode_qcsv(oldincludes) + newincludes = wireprototypes.decode_qcsv(newincludes) + oldexcludes = wireprototypes.decode_qcsv(oldexcludes) + newexcludes = wireprototypes.decode_qcsv(newexcludes) # validate the patterns narrowspec.validatepatterns(set(oldincludes)) narrowspec.validatepatterns(set(newincludes)) @@ -143,7 +139,7 @@ kwargs[ch] = wireprototypes.encodelist(kwargs[ch]) for ch in ('oldincludes', 'newincludes', 'oldexcludes', 'newexcludes'): - kwargs[ch] = b','.join(kwargs[ch]) + kwargs[ch] = wireprototypes.encode_qcsv(kwargs[ch]) kwargs['ellipses'] = b'%i' % bool(kwargs['ellipses']) f = remote._callcompressable(b'narrow_widen', **kwargs) diff --git a/mercurial/wireprototypes.py b/mercurial/wireprototypes.py --- a/mercurial/wireprototypes.py +++ b/mercurial/wireprototypes.py @@ -161,6 +161,7 @@ # :nodes: list of binary nodes, transmitted as space-separated hex nodes # :csv: list of values, transmitted as comma-separated values # :scsv: set of values, transmitted as comma-separated values +# :qcsv: list of values, transmitted as quote-escaped comma-separated values # :plain: string with no transformation needed. GETBUNDLE_ARGUMENTS = { b'heads': b'nodes', @@ -173,11 +174,195 @@ b'cg': b'boolean', b'cbattempted': b'boolean', b'stream': b'boolean', - b'includepats': b'csv', - b'excludepats': b'csv', + b'includepats': b'qcsv', + b'excludepats': b'qcsv', } +def encode_qcsv(paths): + r'''escape and join a value of type 'qcsv', producing a bytes object + + This produces an RFC 4180-like encoding, with the primary difference being + that we allow and produce items that have an embedded " character without + forcing the item to be completely surrounded by " characters. That is, + b'a"b' serializes as b'a"b', not b'"a""b"'. We also do not treat \r or \n + special in any way. + + If an item starts with a " character or has a , character in it, the entire + item is escaped. + + >>> from mercurial.pycompat import sysstr + >>> def check(paths): + ... return sysstr(encode_qcsv(paths)) + >>> check([b'a', b'b', b'c']) + 'a,b,c' + >>> check([b'a"b', b'c']) + 'a"b,c' + >>> check([b'a,b', b'c']) + '"a,b",c' + >>> check([b'a,b,c']) + '"a,b,c"' + >>> check([b'a"b,"']) + '"a""b,"""' + >>> check([b'a"cb', b'"']) + 'a"cb,""""' + >>> check([b'"']) + '""""' + >>> check([b'', b'']) + ',' + >>> check([b'', b'', b'', b'']) + ',,,' + >>> check([b',']) + '","' + >>> check([b'",,",""']) + '""",,"","""""' + >>> check([]) + '' + >>> check([b'']) + '' + ''' + + def maybequote(p): + if p.startswith(b'"') or b',' in p: + return b'"%s"' % p.replace(b'"', b'""') + return p + + return b','.join([maybequote(p) for p in paths]) + + +def _commasplit(s): + '''Split a csv value, doing RFC 4180-like unescaping. + + If an item in the csv list is escaped, the whole item will be enclosed in " + characters. Embedded " characters in these strings are represented by two + sequential " characters. + + If the item does not start with a " character, it is returned as-is + (including any embedded " characters). If the item starts with a " character + but does not end in one, we will remove the " and the final character of the + string, behavior in this situation is not guaranteed to be stable. + + >>> from mercurial.pycompat import sysstr + >>> def check(s): + ... return list([sysstr(x) for x in _commasplit(s)]) + >>> check(b'a,b,c') + ['a', 'b', 'c'] + >>> check(b'a"b,c') + ['a"b', 'c'] + >>> check(b'"a,b",c') + ['a,b', 'c'] + >>> check(b'"a,b,c"') + ['a,b,c'] + >>> check(b'"a""b,"""') + ['a"b,"'] + >>> check(b'""') + [''] + >>> check(b'""""') + ['"'] + >>> check(b'') + [''] + >>> check(b',') + ['', ''] + >>> check(b',,,') + ['', '', '', ''] + >>> check(b'","') + [','] + >>> check(b'""",,"","""""') + ['",,",""'] + >>> # These are invalid, so just make sure it doesn't crash/hang, the actual + >>> # value is essentially irrelevant. + >>> check(b'"') + [''] + >>> check(b',",') + ['', ''] + >>> # Note the missing 'c'. + >>> check(b'"abc') + ['ab'] + ''' + + dbg = False + startpos = 0 + while True: # loop over all comma-separated items + quoted = False + if s[startpos : startpos + 1] == b'"': + quoted = True + startpos += 1 + searchpos = startpos + while True: # loop over all embedded commas + commapos = s.find(b',', searchpos) + if commapos < 0: + if quoted: + yield s[startpos:-1].replace(b'""', b'"') + else: + yield s[startpos:] + return + else: + if quoted: + if s[startpos:commapos].count(b'"') % 2 == 0: + searchpos = commapos + 1 + continue # find another comma, this one is quoted + yield s[startpos : commapos - 1].replace(b'""', b'"') + + elif commapos == startpos: + yield '' + else: + yield s[startpos:commapos] + startpos = commapos + 1 + break # Stop searching for embedded commas, go to next item + + +def decode_qcsv(s): + r'''decode an value of type 'qcsv', producing a list + + If `s` is an empty string, decodes to an empty list. + + >>> from mercurial.pycompat import sysstr + >>> def check(s): + ... return list([sysstr(x) for x in decode_qcsv(s)]) + >>> check(b'a,b,c') + ['a', 'b', 'c'] + >>> check(b'a"b,c') + ['a"b', 'c'] + >>> check(b'"a,b",c') + ['a,b', 'c'] + >>> check(b'"a,b,c"') + ['a,b,c'] + >>> check(b'"a""b,"""') + ['a"b,"'] + >>> check(b'""') + [''] + >>> check(b'""""') + ['"'] + >>> check(b'') + [] + >>> check(b',') + ['', ''] + >>> check(b',,,') + ['', '', '', ''] + >>> check(b'","') + [','] + >>> check(b'""",,"","""""') + ['",,",""'] + >>> # These are invalid, so just make sure it doesn't crash/hang, the actual + >>> # value is essentially irrelevant. + >>> check(b'"') + [''] + >>> check(b',",') + ['', ''] + >>> # Note the missing 'c'. + >>> check(b'"abc') + ['ab'] + ''' + if not s: + return [] + + if s[0:1] != b'"' and b',"' not in s: + # fast path + return s.split(b',') + + return list(_commasplit(s)) + + class baseprotocolhandler(interfaceutil.Interface): """Abstract base class for wire protocol handlers. diff --git a/mercurial/wireprotov1peer.py b/mercurial/wireprotov1peer.py --- a/mercurial/wireprotov1peer.py +++ b/mercurial/wireprotov1peer.py @@ -462,6 +462,8 @@ value = b','.join(value) elif keytype == b'scsv': value = b','.join(sorted(value)) + elif keytype == b'qcsv': + value = wireprototypes.encode_qcsv(value) elif keytype == b'boolean': value = b'%i' % bool(value) elif keytype != b'plain': diff --git a/mercurial/wireprotov1server.py b/mercurial/wireprotov1server.py --- a/mercurial/wireprotov1server.py +++ b/mercurial/wireprotov1server.py @@ -448,6 +448,8 @@ opts[k] = list(v.split(b',')) elif keytype == b'scsv': opts[k] = set(v.split(b',')) + elif keytype == b'qcsv': + opts[k] = wireprototypes.decode_qcsv(v) elif keytype == b'boolean': # Client should serialize False as '0', which is a non-empty string # so it evaluates as a True bool. diff --git a/relnotes/next b/relnotes/next --- a/relnotes/next +++ b/relnotes/next @@ -29,6 +29,10 @@ * `hg debugmergestate` output is now templated, which may be useful e.g. for IDEs that want to help the user resolve merge conflicts. + * The experimental `narrow` extension will now be able to have include or + exclude patterns that have a comma in the name when both client and server + are updated. + == New Experimental Features == diff --git a/tests/test-doctest.py b/tests/test-doctest.py --- a/tests/test-doctest.py +++ b/tests/test-doctest.py @@ -137,6 +137,7 @@ ('mercurial.revlogutils.deltas', '{}'), ('mercurial.revset', '{}'), ('mercurial.revsetlang', '{}'), + ('mercurial.wireprototypes', '{}'), ('mercurial.simplemerge', '{}'), ('mercurial.smartset', '{}'), ('mercurial.store', '{}'), diff --git a/tests/test-narrow-exchange.t b/tests/test-narrow-exchange.t --- a/tests/test-narrow-exchange.t +++ b/tests/test-narrow-exchange.t @@ -223,3 +223,62 @@ remote: rollback completed (lfs-on !) remote: abort: data/inside2/f.i@f59b4e021835: no match found! (lfs-on !) abort: stream ended unexpectedly (got 0 bytes, expected 4) (lfs-on !) + +Test paths with commas in them + $ cd $TESTTMP + $ hg init commas-master + $ cd commas-master + $ mkdir a,b + $ mkdir a,b/c,d + $ mkdir a,b/e,f + $ mkdir g + $ echo abcd > a,b/c,d/abcd + $ echo abef > a,b/e,f/abef + $ echo ghi > g/h,i + $ hg ci -qAm r0 + $ echo abcd2 >> a,b/c,d/abcd + $ echo abef2 >> a,b/e,f/abef + $ echo ghi2 >> g/h,i + $ hg ci -qm r1 + $ cd .. + +Test that we can pull and push with a file that has a comma in the name, even +though the commas don't appear in the narrowspec file (since they're just +filenames) + $ hg clone --narrow ssh://user@dummy/commas-master commas-in-file \ + > --include g -qr 0 + $ cd commas-in-file + $ hg pull -q + $ echo ghi3 >> g/h,i + $ hg ci -qm 'modify g/h,i' + $ hg push -qf + $ cd .. + +Test commas in the --include, plus pull+push + $ hg clone --narrow ssh://user@dummy/commas-master commas-in-dir \ + > --include a,b --exclude a,b/c,d -qr 0 + $ cd commas-in-dir + $ hg pull -q + $ echo abef3 >> a,b/e,f/abef + $ hg ci -qm 'modify a,b/e,f' + $ hg push -qf + +Test that --{add,remove}{include,exclude} work with commas in the directory +names. + $ hg tracked + I path:a,b + X path:a,b/c,d + $ hg tracked --removeexclude a,b/c,d --addinclude a,b/e,f -q + $ hg tracked + I path:a,b + I path:a,b/e,f + $ hg files + a,b/c,d/abcd + a,b/e,f/abef + $ hg tracked --removeinclude a,b/e,f --addexclude a,b/c,d -q + $ hg tracked + I path:a,b + X path:a,b/c,d + $ hg files + a,b/e,f/abef + $ cd ..