diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py --- a/mercurial/pycompat.py +++ b/mercurial/pycompat.py @@ -11,21 +11,26 @@ from __future__ import absolute_import import builtins +import codecs import concurrent.futures as futures +import functools import getopt import http.client as httplib import http.cookiejar as cookielib import inspect +import io import json import os import pickle import queue import shlex import socketserver +import struct import sys import tempfile import xmlrpc.client as xmlrpclib + ispy3 = sys.version_info[0] >= 3 ispypy = '__pypy__' in sys.builtin_module_names TYPE_CHECKING = False @@ -82,401 +87,340 @@ return _rapply(f, xs) -if ispy3: - import builtins - import codecs - import functools - import io - import struct - - if os.name == r'nt' and sys.version_info >= (3, 6): - # MBCS (or ANSI) filesystem encoding must be used as before. - # Otherwise non-ASCII filenames in existing repositories would be - # corrupted. - # This must be set once prior to any fsencode/fsdecode calls. - sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr +if os.name == r'nt' and sys.version_info >= (3, 6): + # MBCS (or ANSI) filesystem encoding must be used as before. + # Otherwise non-ASCII filenames in existing repositories would be + # corrupted. + # This must be set once prior to any fsencode/fsdecode calls. + sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr - fsencode = os.fsencode - fsdecode = os.fsdecode - oscurdir = os.curdir.encode('ascii') - oslinesep = os.linesep.encode('ascii') - osname = os.name.encode('ascii') - ospathsep = os.pathsep.encode('ascii') - ospardir = os.pardir.encode('ascii') - ossep = os.sep.encode('ascii') - osaltsep = os.altsep - if osaltsep: - osaltsep = osaltsep.encode('ascii') - osdevnull = os.devnull.encode('ascii') +fsencode = os.fsencode +fsdecode = os.fsdecode +oscurdir = os.curdir.encode('ascii') +oslinesep = os.linesep.encode('ascii') +osname = os.name.encode('ascii') +ospathsep = os.pathsep.encode('ascii') +ospardir = os.pardir.encode('ascii') +ossep = os.sep.encode('ascii') +osaltsep = os.altsep +if osaltsep: + osaltsep = osaltsep.encode('ascii') +osdevnull = os.devnull.encode('ascii') - sysplatform = sys.platform.encode('ascii') - sysexecutable = sys.executable - if sysexecutable: - sysexecutable = os.fsencode(sysexecutable) - bytesio = io.BytesIO - # TODO deprecate stringio name, as it is a lie on Python 3. - stringio = bytesio +sysplatform = sys.platform.encode('ascii') +sysexecutable = sys.executable +if sysexecutable: + sysexecutable = os.fsencode(sysexecutable) +bytesio = io.BytesIO +# TODO deprecate stringio name, as it is a lie on Python 3. +stringio = bytesio - def maplist(*args): - return list(map(*args)) + +def maplist(*args): + return list(map(*args)) - def rangelist(*args): - return list(range(*args)) + +def rangelist(*args): + return list(range(*args)) - def ziplist(*args): - return list(zip(*args)) + +def ziplist(*args): + return list(zip(*args)) + - rawinput = input - getargspec = inspect.getfullargspec +rawinput = input +getargspec = inspect.getfullargspec - long = int +long = int - if getattr(sys, 'argv', None) is not None: - # On POSIX, the char** argv array is converted to Python str using - # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which - # isn't directly callable from Python code. In practice, os.fsencode() - # can be used instead (this is recommended by Python's documentation - # for sys.argv). - # - # On Windows, the wchar_t **argv is passed into the interpreter as-is. - # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But - # there's an additional wrinkle. What we really want to access is the - # ANSI codepage representation of the arguments, as this is what - # `int main()` would receive if Python 3 didn't define `int wmain()` - # (this is how Python 2 worked). To get that, we encode with the mbcs - # encoding, which will pass CP_ACP to the underlying Windows API to - # produce bytes. - if os.name == r'nt': - sysargv = [a.encode("mbcs", "ignore") for a in sys.argv] - else: - sysargv = [fsencode(a) for a in sys.argv] +if getattr(sys, 'argv', None) is not None: + # On POSIX, the char** argv array is converted to Python str using + # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which + # isn't directly callable from Python code. In practice, os.fsencode() + # can be used instead (this is recommended by Python's documentation + # for sys.argv). + # + # On Windows, the wchar_t **argv is passed into the interpreter as-is. + # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But + # there's an additional wrinkle. What we really want to access is the + # ANSI codepage representation of the arguments, as this is what + # `int main()` would receive if Python 3 didn't define `int wmain()` + # (this is how Python 2 worked). To get that, we encode with the mbcs + # encoding, which will pass CP_ACP to the underlying Windows API to + # produce bytes. + if os.name == r'nt': + sysargv = [a.encode("mbcs", "ignore") for a in sys.argv] + else: + sysargv = [fsencode(a) for a in sys.argv] - bytechr = struct.Struct('>B').pack - byterepr = b'%r'.__mod__ - - class bytestr(bytes): - """A bytes which mostly acts as a Python 2 str +bytechr = struct.Struct('>B').pack +byterepr = b'%r'.__mod__ - >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1) - ('', 'foo', 'ascii', '1') - >>> s = bytestr(b'foo') - >>> assert s is bytestr(s) - __bytes__() should be called if provided: +class bytestr(bytes): + """A bytes which mostly acts as a Python 2 str - >>> class bytesable(object): - ... def __bytes__(self): - ... return b'bytes' - >>> bytestr(bytesable()) - 'bytes' + >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1) + ('', 'foo', 'ascii', '1') + >>> s = bytestr(b'foo') + >>> assert s is bytestr(s) + + __bytes__() should be called if provided: - There's no implicit conversion from non-ascii str as its encoding is - unknown: + >>> class bytesable(object): + ... def __bytes__(self): + ... return b'bytes' + >>> bytestr(bytesable()) + 'bytes' - >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - UnicodeEncodeError: ... - - Comparison between bytestr and bytes should work: + There's no implicit conversion from non-ascii str as its encoding is + unknown: - >>> assert bytestr(b'foo') == b'foo' - >>> assert b'foo' == bytestr(b'foo') - >>> assert b'f' in bytestr(b'foo') - >>> assert bytestr(b'f') in b'foo' + >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + UnicodeEncodeError: ... - Sliced elements should be bytes, not integer: + Comparison between bytestr and bytes should work: - >>> s[1], s[:2] - (b'o', b'fo') - >>> list(s), list(reversed(s)) - ([b'f', b'o', b'o'], [b'o', b'o', b'f']) - - As bytestr type isn't propagated across operations, you need to cast - bytes to bytestr explicitly: + >>> assert bytestr(b'foo') == b'foo' + >>> assert b'foo' == bytestr(b'foo') + >>> assert b'f' in bytestr(b'foo') + >>> assert bytestr(b'f') in b'foo' - >>> s = bytestr(b'foo').upper() - >>> t = bytestr(s) - >>> s[0], t[0] - (70, b'F') + Sliced elements should be bytes, not integer: - Be careful to not pass a bytestr object to a function which expects - bytearray-like behavior. + >>> s[1], s[:2] + (b'o', b'fo') + >>> list(s), list(reversed(s)) + ([b'f', b'o', b'o'], [b'o', b'o', b'f']) - >>> t = bytes(t) # cast to bytes - >>> assert type(t) is bytes - """ + As bytestr type isn't propagated across operations, you need to cast + bytes to bytestr explicitly: - # Trick pytype into not demanding Iterable[int] be passed to __new__(), - # since the appropriate bytes format is done internally. - # - # https://github.com/google/pytype/issues/500 - if TYPE_CHECKING: + >>> s = bytestr(b'foo').upper() + >>> t = bytestr(s) + >>> s[0], t[0] + (70, b'F') - def __init__(self, s=b''): - pass + Be careful to not pass a bytestr object to a function which expects + bytearray-like behavior. + + >>> t = bytes(t) # cast to bytes + >>> assert type(t) is bytes + """ - def __new__(cls, s=b''): - if isinstance(s, bytestr): - return s - if not isinstance( - s, (bytes, bytearray) - ) and not hasattr( # hasattr-py3-only - s, u'__bytes__' - ): - s = str(s).encode('ascii') - return bytes.__new__(cls, s) + # Trick pytype into not demanding Iterable[int] be passed to __new__(), + # since the appropriate bytes format is done internally. + # + # https://github.com/google/pytype/issues/500 + if TYPE_CHECKING: - def __getitem__(self, key): - s = bytes.__getitem__(self, key) - if not isinstance(s, bytes): - s = bytechr(s) + def __init__(self, s=b''): + pass + + def __new__(cls, s=b''): + if isinstance(s, bytestr): return s - - def __iter__(self): - return iterbytestr(bytes.__iter__(self)) - - def __repr__(self): - return bytes.__repr__(self)[1:] # drop b'' + if not isinstance( + s, (bytes, bytearray) + ) and not hasattr( # hasattr-py3-only + s, u'__bytes__' + ): + s = str(s).encode('ascii') + return bytes.__new__(cls, s) - def iterbytestr(s): - """Iterate bytes as if it were a str object of Python 2""" - return map(bytechr, s) - - def maybebytestr(s): - """Promote bytes to bytestr""" - if isinstance(s, bytes): - return bytestr(s) + def __getitem__(self, key): + s = bytes.__getitem__(self, key) + if not isinstance(s, bytes): + s = bytechr(s) return s - def sysbytes(s): - """Convert an internal str (e.g. keyword, __doc__) back to bytes + def __iter__(self): + return iterbytestr(bytes.__iter__(self)) + + def __repr__(self): + return bytes.__repr__(self)[1:] # drop b'' + - This never raises UnicodeEncodeError, but only ASCII characters - can be round-trip by sysstr(sysbytes(s)). - """ - if isinstance(s, bytes): - return s - return s.encode('utf-8') +def iterbytestr(s): + """Iterate bytes as if it were a str object of Python 2""" + return map(bytechr, s) + - def sysstr(s): - """Return a keyword str to be passed to Python functions such as - getattr() and str.encode() +def maybebytestr(s): + """Promote bytes to bytestr""" + if isinstance(s, bytes): + return bytestr(s) + return s + - This never raises UnicodeDecodeError. Non-ascii characters are - considered invalid and mapped to arbitrary but unique code points - such that 'sysstr(a) != sysstr(b)' for all 'a != b'. - """ - if isinstance(s, builtins.str): - return s - return s.decode('latin-1') +def sysbytes(s): + """Convert an internal str (e.g. keyword, __doc__) back to bytes + + This never raises UnicodeEncodeError, but only ASCII characters + can be round-trip by sysstr(sysbytes(s)). + """ + if isinstance(s, bytes): + return s + return s.encode('utf-8') + - def strurl(url): - """Converts a bytes url back to str""" - if isinstance(url, bytes): - return url.decode('ascii') - return url +def sysstr(s): + """Return a keyword str to be passed to Python functions such as + getattr() and str.encode() + + This never raises UnicodeDecodeError. Non-ascii characters are + considered invalid and mapped to arbitrary but unique code points + such that 'sysstr(a) != sysstr(b)' for all 'a != b'. + """ + if isinstance(s, builtins.str): + return s + return s.decode('latin-1') + - def bytesurl(url): - """Converts a str url to bytes by encoding in ascii""" - if isinstance(url, str): - return url.encode('ascii') - return url +def strurl(url): + """Converts a bytes url back to str""" + if isinstance(url, bytes): + return url.decode('ascii') + return url + - def raisewithtb(exc, tb): - """Raise exception with the given traceback""" - raise exc.with_traceback(tb) +def bytesurl(url): + """Converts a str url to bytes by encoding in ascii""" + if isinstance(url, str): + return url.encode('ascii') + return url - def getdoc(obj): - """Get docstring as bytes; may be None so gettext() won't confuse it - with _('')""" - doc = getattr(obj, '__doc__', None) - if doc is None: - return doc - return sysbytes(doc) + +def raisewithtb(exc, tb): + """Raise exception with the given traceback""" + raise exc.with_traceback(tb) + - def _wrapattrfunc(f): - @functools.wraps(f) - def w(object, name, *args): - return f(object, sysstr(name), *args) +def getdoc(obj): + """Get docstring as bytes; may be None so gettext() won't confuse it + with _('')""" + doc = getattr(obj, '__doc__', None) + if doc is None: + return doc + return sysbytes(doc) - return w + +def _wrapattrfunc(f): + @functools.wraps(f) + def w(object, name, *args): + return f(object, sysstr(name), *args) - # these wrappers are automagically imported by hgloader - delattr = _wrapattrfunc(builtins.delattr) - getattr = _wrapattrfunc(builtins.getattr) - hasattr = _wrapattrfunc(builtins.hasattr) - setattr = _wrapattrfunc(builtins.setattr) - xrange = builtins.range - unicode = str + return w + - def open(name, mode=b'r', buffering=-1, encoding=None): - return builtins.open(name, sysstr(mode), buffering, encoding) +# these wrappers are automagically imported by hgloader +delattr = _wrapattrfunc(builtins.delattr) +getattr = _wrapattrfunc(builtins.getattr) +hasattr = _wrapattrfunc(builtins.hasattr) +setattr = _wrapattrfunc(builtins.setattr) +xrange = builtins.range +unicode = str - safehasattr = _wrapattrfunc(builtins.hasattr) + +def open(name, mode=b'r', buffering=-1, encoding=None): + return builtins.open(name, sysstr(mode), buffering, encoding) + - def _getoptbwrapper(orig, args, shortlist, namelist): - """ - Takes bytes arguments, converts them to unicode, pass them to - getopt.getopt(), convert the returned values back to bytes and then - return them for Python 3 compatibility as getopt.getopt() don't accepts - bytes on Python 3. - """ - args = [a.decode('latin-1') for a in args] - shortlist = shortlist.decode('latin-1') - namelist = [a.decode('latin-1') for a in namelist] - opts, args = orig(args, shortlist, namelist) - opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts] - args = [a.encode('latin-1') for a in args] - return opts, args +safehasattr = _wrapattrfunc(builtins.hasattr) + + +def _getoptbwrapper(orig, args, shortlist, namelist): + """ + Takes bytes arguments, converts them to unicode, pass them to + getopt.getopt(), convert the returned values back to bytes and then + return them for Python 3 compatibility as getopt.getopt() don't accepts + bytes on Python 3. + """ + args = [a.decode('latin-1') for a in args] + shortlist = shortlist.decode('latin-1') + namelist = [a.decode('latin-1') for a in namelist] + opts, args = orig(args, shortlist, namelist) + opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts] + args = [a.encode('latin-1') for a in args] + return opts, args + - def strkwargs(dic): - """ - Converts the keys of a python dictonary to str i.e. unicodes so that - they can be passed as keyword arguments as dictionaries with bytes keys - can't be passed as keyword arguments to functions on Python 3. - """ - dic = {k.decode('latin-1'): v for k, v in dic.items()} - return dic +def strkwargs(dic): + """ + Converts the keys of a python dictonary to str i.e. unicodes so that + they can be passed as keyword arguments as dictionaries with bytes keys + can't be passed as keyword arguments to functions on Python 3. + """ + dic = {k.decode('latin-1'): v for k, v in dic.items()} + return dic - def byteskwargs(dic): - """ - Converts keys of python dictionaries to bytes as they were converted to - str to pass that dictonary as a keyword argument on Python 3. - """ - dic = {k.encode('latin-1'): v for k, v in dic.items()} - return dic - # TODO: handle shlex.shlex(). - def shlexsplit(s, comments=False, posix=True): - """ - Takes bytes argument, convert it to str i.e. unicodes, pass that into - shlex.split(), convert the returned value to bytes and return that for - Python 3 compatibility as shelx.split() don't accept bytes on Python 3. - """ - ret = shlex.split(s.decode('latin-1'), comments, posix) - return [a.encode('latin-1') for a in ret] +def byteskwargs(dic): + """ + Converts keys of python dictionaries to bytes as they were converted to + str to pass that dictonary as a keyword argument on Python 3. + """ + dic = {k.encode('latin-1'): v for k, v in dic.items()} + return dic + - iteritems = lambda x: x.items() - itervalues = lambda x: x.values() +# TODO: handle shlex.shlex(). +def shlexsplit(s, comments=False, posix=True): + """ + Takes bytes argument, convert it to str i.e. unicodes, pass that into + shlex.split(), convert the returned value to bytes and return that for + Python 3 compatibility as shelx.split() don't accept bytes on Python 3. + """ + ret = shlex.split(s.decode('latin-1'), comments, posix) + return [a.encode('latin-1') for a in ret] - # Python 3.5's json.load and json.loads require str. We polyfill its - # code for detecting encoding from bytes. - if sys.version_info[0:2] < (3, 6): - def _detect_encoding(b): - bstartswith = b.startswith - if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): - return 'utf-32' - if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): - return 'utf-16' - if bstartswith(codecs.BOM_UTF8): - return 'utf-8-sig' +iteritems = lambda x: x.items() +itervalues = lambda x: x.values() + +# Python 3.5's json.load and json.loads require str. We polyfill its +# code for detecting encoding from bytes. +if sys.version_info[0:2] < (3, 6): + + def _detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' - if len(b) >= 4: - if not b[0]: - # 00 00 -- -- - utf-32-be - # 00 XX -- -- - utf-16-be - return 'utf-16-be' if b[1] else 'utf-32-be' - if not b[1]: - # XX 00 00 00 - utf-32-le - # XX 00 00 XX - utf-16-le - # XX 00 XX -- - utf-16-le - return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' - elif len(b) == 2: - if not b[0]: - # 00 XX - utf-16-be - return 'utf-16-be' - if not b[1]: - # XX 00 - utf-16-le - return 'utf-16-le' - # default - return 'utf-8' + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' - def json_loads(s, *args, **kwargs): - if isinstance(s, (bytes, bytearray)): - s = s.decode(_detect_encoding(s), 'surrogatepass') + def json_loads(s, *args, **kwargs): + if isinstance(s, (bytes, bytearray)): + s = s.decode(_detect_encoding(s), 'surrogatepass') - return json.loads(s, *args, **kwargs) + return json.loads(s, *args, **kwargs) - else: - json_loads = json.loads else: - import cStringIO - - xrange = xrange - unicode = unicode - bytechr = chr - byterepr = repr - bytestr = str - iterbytestr = iter - maybebytestr = identity - sysbytes = identity - sysstr = identity - strurl = identity - bytesurl = identity - open = open - delattr = delattr - getattr = getattr - hasattr = hasattr - setattr = setattr - - # this can't be parsed on Python 3 - exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n') - - def fsencode(filename): - """ - Partial backport from os.py in Python 3, which only accepts bytes. - In Python 2, our paths should only ever be bytes, a unicode path - indicates a bug. - """ - if isinstance(filename, str): - return filename - else: - raise TypeError("expect str, not %s" % type(filename).__name__) - - # In Python 2, fsdecode() has a very chance to receive bytes. So it's - # better not to touch Python 2 part as it's already working fine. - fsdecode = identity - - def getdoc(obj): - return getattr(obj, '__doc__', None) - - _notset = object() - - def safehasattr(thing, attr): - return getattr(thing, attr, _notset) is not _notset - - def _getoptbwrapper(orig, args, shortlist, namelist): - return orig(args, shortlist, namelist) - - strkwargs = identity - byteskwargs = identity - - oscurdir = os.curdir - oslinesep = os.linesep - osname = os.name - ospathsep = os.pathsep - ospardir = os.pardir - ossep = os.sep - osaltsep = os.altsep - osdevnull = os.devnull - long = long - if getattr(sys, 'argv', None) is not None: - sysargv = sys.argv - sysplatform = sys.platform - sysexecutable = sys.executable - shlexsplit = shlex.split - bytesio = cStringIO.StringIO - stringio = bytesio - maplist = map - rangelist = range - ziplist = zip - rawinput = raw_input - getargspec = inspect.getargspec - iteritems = lambda x: x.iteritems() - itervalues = lambda x: x.itervalues() json_loads = json.loads isjython = sysplatform.startswith(b'java')