eric: comparison DebugClients/Python3/coverage/phystokens.py

-:04e5dfbd3f3d
+:3586ebd9fac8
 """Better tokenizing for coverage.py."""
 import codecs
 import keyword
 import re
+import sys
 import token
 import tokenize
 from coverage import env
 from coverage.backward import iternext
 ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
 line = []
 col = 0
-# The \f is because of http://bugs.python.org/issue19035
+source = source.expandtabs(8).replace('\r\n', '\n')
-source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')
 tokgen = generate_tokens(source)
 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
 mark_start = True
 for part in re.split('(\n)', ttext):
 # Create our generate_tokens cache as a callable replacement function.
 generate_tokens = CachedTokenizer().generate_tokens
-COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
 @contract(source='bytes')
 def _source_encoding_py2(source):
 """Determine the encoding for `source`, according to PEP 263.
 def compile_unicode(source, filename, mode):
 """Just like the `compile` builtin, but works on any Unicode string.
 Python 2's compile() builtin has a stupid restriction: if the source string
 is Unicode, then it may not have a encoding declaration in it.  Why not?
-Who knows!
+Who knows!  It also decodes to utf8, and then tries to interpret those utf8
+bytes according to the encoding declaration.  Why? Who knows!
-This function catches that exception, neuters the coding declaration, and
-compiles it anyway.
+This function neuters the coding declaration, and compiles it.
 """
-try:
+source = neuter_encoding_declaration(source)
-code = compile(source, filename, mode)
+if env.PY2 and isinstance(filename, unicode):
-except SyntaxError as synerr:
+filename = filename.encode(sys.getfilesystemencoding(), "replace")
-if "coding declaration in unicode string" not in synerr.args[0].lower():
+code = compile(source, filename, mode)
-raise
-source = neuter_encoding_declaration(source)
-code = compile(source, filename, mode)
 return code
 @contract(source='unicode', returns='unicode')
 def neuter_encoding_declaration(source):
-"""Return `source`, with any encoding declaration neutered.
+"""Return `source`, with any encoding declaration neutered."""
+source = COOKIE_RE.sub("# (deleted declaration)", source, count=2)
-This function will only ever be called on `source` that has an encoding
-declaration, so some edge cases can be ignored.
-"""
-source = COOKIE_RE.sub("# (deleted declaration)", source)
 return source

Mercurial Repositories > eric / file comparison

comparison: DebugClients/Python3/coverage/phystokens.py

DebugClients/Python3/coverage/phystokens.py