This commit rewrites the line consuming to use next() on a generator
in order to appease Python 2.
With this commit, we are now able to emit some tokens on Python 2.7 using
the Python 3.7 tokenizer. But there are still bugs...
baymax |
hg-reviewers |
This commit rewrites the line consuming to use next() on a generator
in order to appease Python 2.
With this commit, we are now able to emit some tokens on Python 2.7 using
the Python 3.7 tokenizer. But there are still bugs...
Lint Skipped |
Unit Tests Skipped |
There seems to have been no activities on this Diff for the past 3 Months.
By policy, we are automatically moving it out of the need-review state.
Please, move it back to need-review without hesitation if this diff should still be discussed.
:baymax:need-review-idle:
Path | Packages | |||
---|---|---|---|---|
M | hgdemandimport/py3tokenize.py (24 lines) |
Commit | Parents | Author | Summary | Date |
---|---|---|---|---|
Gregory Szorc | Oct 12 2018, 3:47 PM |
# | # | ||||
# * Removed main() and related functionality. | # * Removed main() and related functionality. | ||||
# * Removed generate_tokens(). | # * Removed generate_tokens(). | ||||
# * Removed open(). | # * Removed open(). | ||||
# * Removed module docstring. | # * Removed module docstring. | ||||
# * Adjusted for relative imports. | # * Adjusted for relative imports. | ||||
# * absolute_import added. | # * absolute_import added. | ||||
# * Removed re.ASCII. | # * Removed re.ASCII. | ||||
# * Various backports to work on Python 2.7. | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
__author__ = 'Ka-Ping Yee <ping@lfw.org>' | __author__ = 'Ka-Ping Yee <ping@lfw.org>' | ||||
__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' | __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' | ||||
'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' | 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' | ||||
'Michael Foord') | 'Michael Foord') | ||||
from codecs import lookup, BOM_UTF8 | from codecs import lookup, BOM_UTF8 | ||||
tabsize = 8 | tabsize = 8 | ||||
class TokenError(Exception): pass | class TokenError(Exception): pass | ||||
class StopTokenizing(Exception): pass | class StopTokenizing(Exception): pass | ||||
class Untokenizer: | class Untokenizer(object): | ||||
def __init__(self): | def __init__(self): | ||||
self.tokens = [] | self.tokens = [] | ||||
self.prev_row = 1 | self.prev_row = 1 | ||||
self.prev_col = 0 | self.prev_col = 0 | ||||
self.encoding = None | self.encoding = None | ||||
def add_whitespace(self, start): | def add_whitespace(self, start): | ||||
and the line on which the token was found. The line passed is the | and the line on which the token was found. The line passed is the | ||||
logical line; continuation lines are included. | logical line; continuation lines are included. | ||||
The first token sequence will always be an ENCODING token | The first token sequence will always be an ENCODING token | ||||
which tells you which encoding was used to decode the bytes stream. | which tells you which encoding was used to decode the bytes stream. | ||||
""" | """ | ||||
# This import is here to avoid problems when the itertools module is not | # This import is here to avoid problems when the itertools module is not | ||||
# built yet and tokenize is imported. | # built yet and tokenize is imported. | ||||
from itertools import chain, repeat | from itertools import repeat | ||||
encoding, consumed = detect_encoding(readline) | encoding, consumed = detect_encoding(readline) | ||||
rl_gen = iter(readline, b"") | |||||
empty = repeat(b"") | def lines(): | ||||
return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding) | for line in consumed: | ||||
yield line | |||||
while True: | |||||
try: | |||||
yield readline() | |||||
except StopIteration: | |||||
break | |||||
yield repeat(b'') | |||||
return _tokenize(lines(), encoding) | |||||
def _tokenize(readline, encoding): | def _tokenize(readline, encoding): | ||||
lnum = parenlev = continued = 0 | lnum = parenlev = continued = 0 | ||||
numchars = '0123456789' | numchars = '0123456789' | ||||
contstr, needcont = '', 0 | contstr, needcont = '', 0 | ||||
contline = None | contline = None | ||||
indents = [0] | indents = [0] | ||||
if encoding is not None: | if encoding is not None: | ||||
if encoding == "utf-8-sig": | if encoding == "utf-8-sig": | ||||
# BOM will already have been stripped. | # BOM will already have been stripped. | ||||
encoding = "utf-8" | encoding = "utf-8" | ||||
yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') | yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') | ||||
last_line = b'' | last_line = b'' | ||||
line = b'' | line = b'' | ||||
while True: # loop over lines in stream | while True: # loop over lines in stream | ||||
try: | try: | ||||
# We capture the value of the line variable here because | # We capture the value of the line variable here because | ||||
# readline uses the empty string '' to signal end of input, | # readline uses the empty string '' to signal end of input, | ||||
# hence `line` itself will always be overwritten at the end | # hence `line` itself will always be overwritten at the end | ||||
# of this loop. | # of this loop. | ||||
last_line = line | last_line = line | ||||
line = readline() | line = next(readline) | ||||
except StopIteration: | except StopIteration: | ||||
line = b'' | line = b'' | ||||
if encoding is not None: | if encoding is not None: | ||||
line = line.decode(encoding) | line = line.decode(encoding) | ||||
lnum += 1 | lnum += 1 | ||||
pos, max = 0, len(line) | pos, max = 0, len(line) | ||||