This is an archive of the discontinued Mercurial Phabricator instance.

Status	Author	Revision
Abandoned	indygreg	D5052 mercurial: strip function return type annotations (RFC)
Abandoned	indygreg	D5051 mercurial: implement custom module importer for Python 2.7 (RFC)
Abandoned	indygreg	D5050 tests: add test verifying our Python 3 tokenizer works
Abandoned	indygreg	D5049 hgdemandimport: don't use str.isidentifier()
Abandoned	indygreg	D5048 hgdemandimport: port line consuming to Python 2
Abandoned	indygreg	D5047 hgdemandimport: remove re.ASCII
Abandoned	indygreg	D5046 hgdemandimport: adjust imports to reflect vendored module
Abandoned	indygreg	D5045 hgdemandimport: remove unused functionality from vendored modules
Abandoned	indygreg	D5044 hgdemandimport: add PSF license and copyright notice to vendored files
Abandoned	indygreg	D5043 hgdemandimport: vendor CPython 3.7 tokenizer modules

Diff 12004

hgdemandimport/py3tokenize.py

	#			#
	# * Removed main() and related functionality.			# * Removed main() and related functionality.
	# * Removed generate_tokens().			# * Removed generate_tokens().
	# * Removed open().			# * Removed open().
	# * Removed module docstring.			# * Removed module docstring.
	# * Adjusted for relative imports.			# * Adjusted for relative imports.
	# * absolute_import added.			# * absolute_import added.
	# * Removed re.ASCII.			# * Removed re.ASCII.
				# * Various backports to work on Python 2.7.

	from __future__ import absolute_import			from __future__ import absolute_import

	__author__ = 'Ka-Ping Yee <ping@lfw.org>'			__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '			__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '			'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
	'Michael Foord')			'Michael Foord')
	from codecs import lookup, BOM_UTF8			from codecs import lookup, BOM_UTF8

	tabsize = 8			tabsize = 8

	class TokenError(Exception): pass			class TokenError(Exception): pass

	class StopTokenizing(Exception): pass			class StopTokenizing(Exception): pass


	class Untokenizer:			class Untokenizer(object):

	def __init__(self):			def __init__(self):
	self.tokens = []			self.tokens = []
	self.prev_row = 1			self.prev_row = 1
	self.prev_col = 0			self.prev_col = 0
	self.encoding = None			self.encoding = None

	def add_whitespace(self, start):			def add_whitespace(self, start):
	and the line on which the token was found. The line passed is the			and the line on which the token was found. The line passed is the
	logical line; continuation lines are included.			logical line; continuation lines are included.

	The first token sequence will always be an ENCODING token			The first token sequence will always be an ENCODING token
	which tells you which encoding was used to decode the bytes stream.			which tells you which encoding was used to decode the bytes stream.
	"""			"""
	# This import is here to avoid problems when the itertools module is not			# This import is here to avoid problems when the itertools module is not
	# built yet and tokenize is imported.			# built yet and tokenize is imported.
	from itertools import chain, repeat			from itertools import repeat
	encoding, consumed = detect_encoding(readline)			encoding, consumed = detect_encoding(readline)
	rl_gen = iter(readline, b"")
	empty = repeat(b"")			def lines():
	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)			for line in consumed:
				yield line

				while True:
				try:
				yield readline()
				except StopIteration:
				break

				yield repeat(b'')

				return _tokenize(lines(), encoding)


	def _tokenize(readline, encoding):			def _tokenize(readline, encoding):
	lnum = parenlev = continued = 0			lnum = parenlev = continued = 0
	numchars = '0123456789'			numchars = '0123456789'
	contstr, needcont = '', 0			contstr, needcont = '', 0
	contline = None			contline = None
	indents = [0]			indents = [0]

	if encoding is not None:			if encoding is not None:
	if encoding == "utf-8-sig":			if encoding == "utf-8-sig":
	# BOM will already have been stripped.			# BOM will already have been stripped.
	encoding = "utf-8"			encoding = "utf-8"
	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')			yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
	last_line = b''			last_line = b''
	line = b''			line = b''
	while True: # loop over lines in stream			while True: # loop over lines in stream
	try:			try:
	# We capture the value of the line variable here because			# We capture the value of the line variable here because
	# readline uses the empty string '' to signal end of input,			# readline uses the empty string '' to signal end of input,
	# hence `line` itself will always be overwritten at the end			# hence `line` itself will always be overwritten at the end
	# of this loop.			# of this loop.
	last_line = line			last_line = line
	line = readline()			line = next(readline)
	except StopIteration:			except StopIteration:
	line = b''			line = b''

	if encoding is not None:			if encoding is not None:
	line = line.decode(encoding)			line = line.decode(encoding)
	lnum += 1			lnum += 1
	pos, max = 0, len(line)			pos, max = 0, len(line)

Diff	ID	Base	Description	Created	Lint	Unit
Base			Base
Diff 1	12004			Oct 13 2018, 3:28 AM	★	★

This is an archive of the discontinued Mercurial Phabricator instance.

hgdemandimport: port line consuming to Python 2AbandonedPublic

Details

Diff Detail

Event Timeline

Revision ContentsChangeset List

Diff 12004

hgdemandimport/py3tokenize.py

hgdemandimport: port line consuming to Python 2
AbandonedPublic

Revision Contents
Changeset List