DebugClients/Python3/coverage/phystokens.py

changeset 5051
3586ebd9fac8
parent 4489
d0d6e4ad31bd
diff -r 04e5dfbd3f3d -r 3586ebd9fac8 DebugClients/Python3/coverage/phystokens.py
--- a/DebugClients/Python3/coverage/phystokens.py	Sat Jul 23 13:33:54 2016 +0200
+++ b/DebugClients/Python3/coverage/phystokens.py	Sun Jul 24 12:01:01 2016 +0200
@@ -6,6 +6,7 @@
 import codecs
 import keyword
 import re
+import sys
 import token
 import tokenize
 
@@ -91,8 +92,7 @@
     line = []
     col = 0
 
-    # The \f is because of http://bugs.python.org/issue19035
-    source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')
+    source = source.expandtabs(8).replace('\r\n', '\n')
     tokgen = generate_tokens(source)
 
     for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
@@ -152,7 +152,7 @@
 generate_tokens = CachedTokenizer().generate_tokens
 
 
-COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
 
 @contract(source='bytes')
 def _source_encoding_py2(source):
@@ -274,30 +274,21 @@
 
     Python 2's compile() builtin has a stupid restriction: if the source string
     is Unicode, then it may not have a encoding declaration in it.  Why not?
-    Who knows!
+    Who knows!  It also decodes to utf8, and then tries to interpret those utf8
+    bytes according to the encoding declaration.  Why? Who knows!
 
-    This function catches that exception, neuters the coding declaration, and
-    compiles it anyway.
+    This function neuters the coding declaration, and compiles it.
 
     """
-    try:
-        code = compile(source, filename, mode)
-    except SyntaxError as synerr:
-        if "coding declaration in unicode string" not in synerr.args[0].lower():
-            raise
-        source = neuter_encoding_declaration(source)
-        code = compile(source, filename, mode)
-
+    source = neuter_encoding_declaration(source)
+    if env.PY2 and isinstance(filename, unicode):
+        filename = filename.encode(sys.getfilesystemencoding(), "replace")
+    code = compile(source, filename, mode)
     return code
 
 
 @contract(source='unicode', returns='unicode')
 def neuter_encoding_declaration(source):
-    """Return `source`, with any encoding declaration neutered.
-
-    This function will only ever be called on `source` that has an encoding
-    declaration, so some edge cases can be ignored.
-
-    """
-    source = COOKIE_RE.sub("# (deleted declaration)", source)
+    """Return `source`, with any encoding declaration neutered."""
+    source = COOKIE_RE.sub("# (deleted declaration)", source, count=2)
     return source

eric ide

mercurial