diff -r 04e5dfbd3f3d -r 3586ebd9fac8 DebugClients/Python3/coverage/phystokens.py --- a/DebugClients/Python3/coverage/phystokens.py Sat Jul 23 13:33:54 2016 +0200 +++ b/DebugClients/Python3/coverage/phystokens.py Sun Jul 24 12:01:01 2016 +0200 @@ -6,6 +6,7 @@ import codecs import keyword import re +import sys import token import tokenize @@ -91,8 +92,7 @@ line = [] col = 0 - # The \f is because of http://bugs.python.org/issue19035 - source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ') + source = source.expandtabs(8).replace('\r\n', '\n') tokgen = generate_tokens(source) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): @@ -152,7 +152,7 @@ generate_tokens = CachedTokenizer().generate_tokens -COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) +COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE) @contract(source='bytes') def _source_encoding_py2(source): @@ -274,30 +274,21 @@ Python 2's compile() builtin has a stupid restriction: if the source string is Unicode, then it may not have a encoding declaration in it. Why not? - Who knows! + Who knows! It also decodes to utf8, and then tries to interpret those utf8 + bytes according to the encoding declaration. Why? Who knows! - This function catches that exception, neuters the coding declaration, and - compiles it anyway. + This function neuters the coding declaration, and compiles it. """ - try: - code = compile(source, filename, mode) - except SyntaxError as synerr: - if "coding declaration in unicode string" not in synerr.args[0].lower(): - raise - source = neuter_encoding_declaration(source) - code = compile(source, filename, mode) - + source = neuter_encoding_declaration(source) + if env.PY2 and isinstance(filename, unicode): + filename = filename.encode(sys.getfilesystemencoding(), "replace") + code = compile(source, filename, mode) return code @contract(source='unicode', returns='unicode') def neuter_encoding_declaration(source): - """Return `source`, with any encoding declaration neutered. - - This function will only ever be called on `source` that has an encoding - declaration, so some edge cases can be ignored. - - """ - source = COOKIE_RE.sub("# (deleted declaration)", source) + """Return `source`, with any encoding declaration neutered.""" + source = COOKIE_RE.sub("# (deleted declaration)", source, count=2) return source