DebugClients/Python3/coverage/phystokens.py

changeset 5051
3586ebd9fac8
parent 4489
d0d6e4ad31bd
equal deleted inserted replaced
5047:04e5dfbd3f3d 5051:3586ebd9fac8
4 """Better tokenizing for coverage.py.""" 4 """Better tokenizing for coverage.py."""
5 5
6 import codecs 6 import codecs
7 import keyword 7 import keyword
8 import re 8 import re
9 import sys
9 import token 10 import token
10 import tokenize 11 import tokenize
11 12
12 from coverage import env 13 from coverage import env
13 from coverage.backward import iternext 14 from coverage.backward import iternext
89 90
90 ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]) 91 ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
91 line = [] 92 line = []
92 col = 0 93 col = 0
93 94
94 # The \f is because of http://bugs.python.org/issue19035 95 source = source.expandtabs(8).replace('\r\n', '\n')
95 source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')
96 tokgen = generate_tokens(source) 96 tokgen = generate_tokens(source)
97 97
98 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): 98 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
99 mark_start = True 99 mark_start = True
100 for part in re.split('(\n)', ttext): 100 for part in re.split('(\n)', ttext):
150 150
151 # Create our generate_tokens cache as a callable replacement function. 151 # Create our generate_tokens cache as a callable replacement function.
152 generate_tokens = CachedTokenizer().generate_tokens 152 generate_tokens = CachedTokenizer().generate_tokens
153 153
154 154
155 COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) 155 COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
156 156
157 @contract(source='bytes') 157 @contract(source='bytes')
158 def _source_encoding_py2(source): 158 def _source_encoding_py2(source):
159 """Determine the encoding for `source`, according to PEP 263. 159 """Determine the encoding for `source`, according to PEP 263.
160 160
272 def compile_unicode(source, filename, mode): 272 def compile_unicode(source, filename, mode):
273 """Just like the `compile` builtin, but works on any Unicode string. 273 """Just like the `compile` builtin, but works on any Unicode string.
274 274
275 Python 2's compile() builtin has a stupid restriction: if the source string 275 Python 2's compile() builtin has a stupid restriction: if the source string
276 is Unicode, then it may not have a encoding declaration in it. Why not? 276 is Unicode, then it may not have a encoding declaration in it. Why not?
277 Who knows! 277 Who knows! It also decodes to utf8, and then tries to interpret those utf8
278 278 bytes according to the encoding declaration. Why? Who knows!
279 This function catches that exception, neuters the coding declaration, and 279
280 compiles it anyway. 280 This function neuters the coding declaration, and compiles it.
281 281
282 """ 282 """
283 try: 283 source = neuter_encoding_declaration(source)
284 code = compile(source, filename, mode) 284 if env.PY2 and isinstance(filename, unicode):
285 except SyntaxError as synerr: 285 filename = filename.encode(sys.getfilesystemencoding(), "replace")
286 if "coding declaration in unicode string" not in synerr.args[0].lower(): 286 code = compile(source, filename, mode)
287 raise
288 source = neuter_encoding_declaration(source)
289 code = compile(source, filename, mode)
290
291 return code 287 return code
292 288
293 289
294 @contract(source='unicode', returns='unicode') 290 @contract(source='unicode', returns='unicode')
295 def neuter_encoding_declaration(source): 291 def neuter_encoding_declaration(source):
296 """Return `source`, with any encoding declaration neutered. 292 """Return `source`, with any encoding declaration neutered."""
297 293 source = COOKIE_RE.sub("# (deleted declaration)", source, count=2)
298 This function will only ever be called on `source` that has an encoding
299 declaration, so some edge cases can be ignored.
300
301 """
302 source = COOKIE_RE.sub("# (deleted declaration)", source)
303 return source 294 return source

eric ide

mercurial