Details
Details
- Reviewers
Alphare - Group Reviewers
hg-reviewers - Commits
- rHGfa2b1a46d92e: encoding: remove Python 2 support code
Diff Detail
Diff Detail
- Repository
- rHG Mercurial
- Branch
- default
- Lint
No Linters Available - Unit
No Unit Test Coverage
( )
| Alphare |
| hg-reviewers |
| No Linters Available |
| No Unit Test Coverage |
| Path | Packages | |||
|---|---|---|---|---|
| M | mercurial/encoding.py (88 lines) |
| Commit | Parents | Author | Summary | Date |
|---|---|---|---|---|
| 94199021b779 | 6839c9985319 | Gregory Szorc | Mar 3 2022, 10:58 AM |
| isasciistr = charencode.isasciistr | isasciistr = charencode.isasciistr | ||||
| asciilower = charencode.asciilower | asciilower = charencode.asciilower | ||||
| asciiupper = charencode.asciiupper | asciiupper = charencode.asciiupper | ||||
| _jsonescapeu8fast = charencode.jsonescapeu8fast | _jsonescapeu8fast = charencode.jsonescapeu8fast | ||||
| _sysstr = pycompat.sysstr | _sysstr = pycompat.sysstr | ||||
| if pycompat.ispy3: | |||||
| unichr = chr | unichr = chr | ||||
| # These unicode characters are ignored by HFS+ (Apple Technote 1150, | # These unicode characters are ignored by HFS+ (Apple Technote 1150, | ||||
| # "Unicode Subtleties"), so we need to ignore them in some places for | # "Unicode Subtleties"), so we need to ignore them in some places for | ||||
| # sanity. | # sanity. | ||||
| _ignore = [ | _ignore = [ | ||||
| unichr(int(x, 16)).encode("utf-8") | unichr(int(x, 16)).encode("utf-8") | ||||
| for x in b"200c 200d 200e 200f 202a 202b 202c 202d 202e " | for x in b"200c 200d 200e 200f 202a 202b 202c 202d 202e " | ||||
| b"206a 206b 206c 206d 206e 206f feff".split() | b"206a 206b 206c 206d 206e 206f feff".split() | ||||
| if b"\xe2" in s or b"\xef" in s: | if b"\xe2" in s or b"\xef" in s: | ||||
| for c in _ignore: | for c in _ignore: | ||||
| s = s.replace(c, b'') | s = s.replace(c, b'') | ||||
| return s | return s | ||||
| # encoding.environ is provided read-only, which may not be used to modify | # encoding.environ is provided read-only, which may not be used to modify | ||||
| # the process environment | # the process environment | ||||
| _nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ | _nativeenviron = os.supports_bytes_environ | ||||
| if not pycompat.ispy3: | if _nativeenviron: | ||||
| environ = os.environ # re-exports | |||||
| elif _nativeenviron: | |||||
| environ = os.environb # re-exports | environ = os.environb # re-exports | ||||
| else: | else: | ||||
| # preferred encoding isn't known yet; use utf-8 to avoid unicode error | # preferred encoding isn't known yet; use utf-8 to avoid unicode error | ||||
| # and recreate it once encoding is settled | # and recreate it once encoding is settled | ||||
| environ = { | environ = { | ||||
| k.encode('utf-8'): v.encode('utf-8') | k.encode('utf-8'): v.encode('utf-8') | ||||
| for k, v in os.environ.items() # re-exports | for k, v in os.environ.items() # re-exports | ||||
| } | } | ||||
| _encodingrewrites = { | _encodingrewrites = { | ||||
| b'646': b'ascii', | b'646': b'ascii', | ||||
| b'ANSI_X3.4-1968': b'ascii', | b'ANSI_X3.4-1968': b'ascii', | ||||
| } | } | ||||
| # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. | # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. | ||||
| # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. | # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. | ||||
| # https://bugs.python.org/issue13216 | # https://bugs.python.org/issue13216 | ||||
| if pycompat.iswindows and not pycompat.ispy3: | if pycompat.iswindows: | ||||
| _encodingrewrites[b'cp65001'] = b'utf-8' | _encodingrewrites[b'cp65001'] = b'utf-8' | ||||
| try: | try: | ||||
| encoding = environ.get(b"HGENCODING") | encoding = environ.get(b"HGENCODING") | ||||
| if not encoding: | if not encoding: | ||||
| encoding = locale.getpreferredencoding().encode('ascii') or b'ascii' | encoding = locale.getpreferredencoding().encode('ascii') or b'ascii' | ||||
| encoding = _encodingrewrites.get(encoding, encoding) | encoding = _encodingrewrites.get(encoding, encoding) | ||||
| except locale.Error: | except locale.Error: | ||||
| return unifromlocal(bytesfunc(obj)) | return unifromlocal(bytesfunc(obj)) | ||||
| return unifunc | return unifunc | ||||
| # converter functions between native str and byte string. use these if the | # converter functions between native str and byte string. use these if the | ||||
| # character encoding is not aware (e.g. exception message) or is known to | # character encoding is not aware (e.g. exception message) or is known to | ||||
| # be locale dependent (e.g. date formatting.) | # be locale dependent (e.g. date formatting.) | ||||
| if pycompat.ispy3: | |||||
| strtolocal = unitolocal | strtolocal = unitolocal | ||||
| strfromlocal = unifromlocal | strfromlocal = unifromlocal | ||||
| strmethod = unimethod | strmethod = unimethod | ||||
| else: | |||||
| def strtolocal(s): | |||||
| # type: (str) -> bytes | |||||
| return s # pytype: disable=bad-return-type | |||||
| def strfromlocal(s): | |||||
| # type: (bytes) -> str | |||||
| return s # pytype: disable=bad-return-type | |||||
| strmethod = pycompat.identity | |||||
| def lower(s): | def lower(s): | ||||
| # type: (bytes) -> bytes | # type: (bytes) -> bytes | ||||
| """best-effort encoding-aware case-folding of local string s""" | """best-effort encoding-aware case-folding of local string s""" | ||||
| try: | try: | ||||
| return asciilower(s) | return asciilower(s) | ||||
| except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
| raise error.Abort( | raise error.Abort( | ||||
| pycompat.bytestr(k), hint=b"please check your locale settings" | pycompat.bytestr(k), hint=b"please check your locale settings" | ||||
| ) | ) | ||||
| if not _nativeenviron: | if not _nativeenviron: | ||||
| # now encoding and helper functions are available, recreate the environ | # now encoding and helper functions are available, recreate the environ | ||||
| # dict to be exported to other modules | # dict to be exported to other modules | ||||
| if pycompat.iswindows and pycompat.ispy3: | if pycompat.iswindows: | ||||
| class WindowsEnviron(dict): | class WindowsEnviron(dict): | ||||
| """`os.environ` normalizes environment variables to uppercase on windows""" | """`os.environ` normalizes environment variables to uppercase on windows""" | ||||
| def get(self, key, default=None): | def get(self, key, default=None): | ||||
| return super().get(upper(key), default) | return super().get(upper(key), default) | ||||
| environ = WindowsEnviron() | environ = WindowsEnviron() | ||||
| for k, v in os.environ.items(): # re-exports | for k, v in os.environ.items(): # re-exports | ||||
| environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) | environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) | ||||
| DRIVE_RE = re.compile(b'^[a-z]:') | DRIVE_RE = re.compile(b'^[a-z]:') | ||||
| if pycompat.ispy3: | |||||
| # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which | # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which | ||||
| # returns bytes. | # returns bytes. | ||||
| if pycompat.iswindows: | if pycompat.iswindows: | ||||
| # Python 3 on Windows issues a DeprecationWarning about using the bytes | # Python 3 on Windows issues a DeprecationWarning about using the bytes | ||||
| # API when os.getcwdb() is called. | # API when os.getcwdb() is called. | ||||
| # | # | ||||
| # Additionally, py3.8+ uppercases the drive letter when calling | # Additionally, py3.8+ uppercases the drive letter when calling | ||||
| # os.path.realpath(), which is used on ``repo.root``. Since those | # os.path.realpath(), which is used on ``repo.root``. Since those | ||||
| # strings are compared in various places as simple strings, also call | # strings are compared in various places as simple strings, also call | ||||
| # realpath here. See https://bugs.python.org/issue40368 | # realpath here. See https://bugs.python.org/issue40368 | ||||
| # | # | ||||
| # However this is not reliable, so lets explicitly make this drive | # However this is not reliable, so lets explicitly make this drive | ||||
| # letter upper case. | # letter upper case. | ||||
| # | # | ||||
| # note: we should consider dropping realpath here since it seems to | # note: we should consider dropping realpath here since it seems to | ||||
| # change the semantic of `getcwd`. | # change the semantic of `getcwd`. | ||||
| def getcwd(): | def getcwd(): | ||||
| cwd = os.getcwd() # re-exports | cwd = os.getcwd() # re-exports | ||||
| cwd = os.path.realpath(cwd) | cwd = os.path.realpath(cwd) | ||||
| cwd = strtolocal(cwd) | cwd = strtolocal(cwd) | ||||
| if DRIVE_RE.match(cwd): | if DRIVE_RE.match(cwd): | ||||
| cwd = cwd[0:1].upper() + cwd[1:] | cwd = cwd[0:1].upper() + cwd[1:] | ||||
| return cwd | return cwd | ||||
| else: | else: | ||||
| getcwd = os.getcwdb # re-exports | getcwd = os.getcwdb # re-exports | ||||
| else: | |||||
| getcwd = os.getcwd # re-exports | |||||
| # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. | # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. | ||||
| _wide = _sysstr( | _wide = _sysstr( | ||||
| environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" | environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" | ||||
| and b"WFA" | and b"WFA" | ||||
| or b"WF" | or b"WF" | ||||
| ) | ) | ||||
| return _jsonescapeu8fast(u8chars, paranoid) | return _jsonescapeu8fast(u8chars, paranoid) | ||||
| except ValueError: | except ValueError: | ||||
| pass | pass | ||||
| return charencodepure.jsonescapeu8fallback(u8chars, paranoid) | return charencodepure.jsonescapeu8fallback(u8chars, paranoid) | ||||
| # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 | # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 | ||||
| # bytes are mapped to that range. | # bytes are mapped to that range. | ||||
| if pycompat.ispy3: | |||||
| _utf8strict = r'surrogatepass' | _utf8strict = r'surrogatepass' | ||||
| else: | |||||
| _utf8strict = r'strict' | |||||
| _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | ||||
| def getutf8char(s, pos): | def getutf8char(s, pos): | ||||
| # type: (bytes, int) -> bytes | # type: (bytes, int) -> bytes | ||||
| """get the next full utf-8 character in the given string, starting at pos | """get the next full utf-8 character in the given string, starting at pos | ||||