eric: comparison DebugClients/Python/coverage/phystokens.py

-:71f15675e89f
+:7f51ab29a1a2
 """Better tokenizing for coverage.py."""
-import keyword, re, token, tokenize
+import codecs, keyword, re, sys, token, tokenize
-from .backward import StringIO              # pylint: disable-msg=W0622
+from .backward import set                       # pylint: disable=W0622
+from .parser import generate_tokens
 def phys_tokens(toks):
 """Return all physical tokens, even line continuations.
 tokenize.generate_tokens() doesn't return a token for the backslash that
 last_line = None
 last_lineno = -1
 last_ttype = None
 for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
 if last_lineno != elineno:
-if last_line and last_line[-2:] == "\\\n":
+if last_line and last_line.endswith("\\\n"):
 # We are at the beginning of a new line, and the last line
 # ended with a backslash.  We probably have to inject a
 # backslash token into the stream. Unfortunately, there's more
 # to figure out.  This code::
 #
 you should have your original `source` back, with two differences:
 trailing whitespace is not preserved, and a final line with no newline
 is indistinguishable from a final line with a newline.
 """
-ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
+ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
 line = []
 col = 0
-tokgen = tokenize.generate_tokens(StringIO(source.expandtabs(8)).readline)
+source = source.expandtabs(8).replace('\r\n', '\n')
+tokgen = generate_tokens(source)
 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
 mark_start = True
 for part in re.split('(\n)', ttext):
 if part == '\n':
 yield line
 col = ecol
 if line:
 yield line
-#
+def source_encoding(source):
-# eflag: FileType = Python2
+"""Determine the encoding for `source` (a string), according to PEP 263.
+Returns a string, the name of the encoding.
+"""
+# Note: this function should never be called on Python 3, since py3 has
+# built-in tools to do this.
+assert sys.version_info < (3, 0)
+# This is mostly code adapted from Py3.2's tokenize module.
+cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
+# Do this so the detect_encode code we copied will work.
+readline = iter(source.splitlines(True)).next
+def _get_normal_name(orig_enc):
+"""Imitates get_normal_name in tokenizer.c."""
+# Only care about the first 12 characters.
+enc = orig_enc[:12].lower().replace("_", "-")
+if re.match(r"^utf-8($|-)", enc):
+return "utf-8"
+if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
+return "iso-8859-1"
+return orig_enc
+# From detect_encode():
+# It detects the encoding from the presence of a utf-8 bom or an encoding
+# cookie as specified in pep-0263.  If both a bom and a cookie are present,
+# but disagree, a SyntaxError will be raised.  If the encoding cookie is an
+# invalid charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
+# 'utf-8-sig' is returned.
+# If no encoding is specified, then the default will be returned.  The
+# default varied with version.
+if sys.version_info <= (2, 4):
+default = 'iso-8859-1'
+else:
+default = 'ascii'
+bom_found = False
+encoding = None
+def read_or_stop():
+"""Get the next source line, or ''."""
+try:
+return readline()
+except StopIteration:
+return ''
+def find_cookie(line):
+"""Find an encoding cookie in `line`."""
+try:
+line_string = line.decode('ascii')
+except UnicodeDecodeError:
+return None
+matches = cookie_re.findall(line_string)
+if not matches:
+return None
+encoding = _get_normal_name(matches[0])
+try:
+codec = codecs.lookup(encoding)
+except LookupError:
+# This behaviour mimics the Python interpreter
+raise SyntaxError("unknown encoding: " + encoding)
+if bom_found:
+# codecs in 2.3 were raw tuples of functions, assume the best.
+codec_name = getattr(codec, 'name', encoding)
+if codec_name != 'utf-8':
+# This behaviour mimics the Python interpreter
+raise SyntaxError('encoding problem: utf-8')
+encoding += '-sig'
+return encoding
+first = read_or_stop()
+if first.startswith(codecs.BOM_UTF8):
+bom_found = True
+first = first[3:]
+default = 'utf-8-sig'
+if not first:
+return default
+encoding = find_cookie(first)
+if encoding:
+return encoding
+second = read_or_stop()
+if not second:
+return default
+encoding = find_cookie(second)
+if encoding:
+return encoding
+return default

Mercurial Repositories > eric / file comparison

comparison: DebugClients/Python/coverage/phystokens.py

DebugClients/Python/coverage/phystokens.py