Plugins/CheckerPlugins/CodeStyleChecker/pep8.py

changeset 3616
081b69e00f3e
parent 3484
645c12de6b0c
child 3617
2f859c5dcfb4
equal deleted inserted replaced
3613:47e29cc0f29c 3616:081b69e00f3e
1 #!/usr/bin/env python
1 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
2 3
3 #
4 # pep8.py - Check Python source code formatting, according to PEP 8 4 # pep8.py - Check Python source code formatting, according to PEP 8
5 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> 5 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
6 # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com> 6 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
7 # 7 #
8 # Permission is hereby granted, free of charge, to any person 8 # Permission is hereby granted, free of charge, to any person
9 # obtaining a copy of this software and associated documentation files 9 # obtaining a copy of this software and associated documentation files
10 # (the "Software"), to deal in the Software without restriction, 10 # (the "Software"), to deal in the Software without restriction,
11 # including without limitation the rights to use, copy, modify, merge, 11 # including without limitation the rights to use, copy, modify, merge,
23 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 # SOFTWARE. 26 # SOFTWARE.
27 27
28 """ 28 r"""
29 Check Python source code formatting, according to PEP 8: 29 Check Python source code formatting, according to PEP 8.
30 http://www.python.org/dev/peps/pep-0008/
31 30
32 For usage and a list of options, try this: 31 For usage and a list of options, try this:
33 $ python pep8.py -h 32 $ python pep8.py -h
34 33
35 This program and its regression test suite live here: 34 This program and its regression test suite live here:
45 500 line length 44 500 line length
46 600 deprecation 45 600 deprecation
47 700 statements 46 700 statements
48 900 syntax error 47 900 syntax error
49 """ 48 """
49 from __future__ import with_statement
50 50
51 # 51 #
52 # This is a modified version to make the original pep8.py better suitable 52 # This is a modified version to make the original pep8.py better suitable
53 # for being called from within the eric5 IDE. The modifications are as 53 # for being called from within the eric5 IDE. The modifications are as
54 # follows: 54 # follows:
57 # - added code for eric5 integration 57 # - added code for eric5 integration
58 # 58 #
59 # Copyright (c) 2011 - 2014 Detlev Offenbach <detlev@die-offenbachs.de> 59 # Copyright (c) 2011 - 2014 Detlev Offenbach <detlev@die-offenbachs.de>
60 # 60 #
61 61
62 __version__ = '1.4.6' 62 __version__ = '1.5.6'
63 63
64 import os 64 import os
65 import sys 65 import sys
66 import re 66 import re
67 import time 67 import time
99 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) 99 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
100 WS_NEEDED_OPERATORS = frozenset([ 100 WS_NEEDED_OPERATORS = frozenset([
101 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', 101 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
102 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=']) 102 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
103 WHITESPACE = frozenset(' \t') 103 WHITESPACE = frozenset(' \t')
104 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE, 104 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
105 tokenize.INDENT, tokenize.DEDENT]) 105 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
106 # ERRORTOKEN is triggered by backticks in Python 3
107 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
106 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] 108 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
107 109
108 INDENT_REGEX = re.compile(r'([ \t]*)') 110 INDENT_REGEX = re.compile(r'([ \t]*)')
109 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') 111 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
110 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+') 112 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
111 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') 113 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
112 DOCSTRING_REGEX = re.compile(r'u?r?["\']') 114 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
113 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') 115 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
114 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') 116 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
115 COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') 117 COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
118 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')
116 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' 119 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
117 r'|\s*\(\s*([^)]*[^ )])\s*\))') 120 r'|\s*\(\s*([^)]*[^ )])\s*\))')
118 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) 121 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
119 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') 122 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
120 LAMBDA_REGEX = re.compile(r'\blambda\b') 123 LAMBDA_REGEX = re.compile(r'\blambda\b')
129 # Plugins (check functions) for physical lines 132 # Plugins (check functions) for physical lines
130 ############################################################################## 133 ##############################################################################
131 134
132 135
133 def tabs_or_spaces(physical_line, indent_char): 136 def tabs_or_spaces(physical_line, indent_char):
134 r""" 137 r"""Never mix tabs and spaces.
135 Never mix tabs and spaces.
136 138
137 The most popular way of indenting Python is with spaces only. The 139 The most popular way of indenting Python is with spaces only. The
138 second-most popular way is with tabs only. Code indented with a mixture 140 second-most popular way is with tabs only. Code indented with a mixture
139 of tabs and spaces should be converted to using spaces exclusively. When 141 of tabs and spaces should be converted to using spaces exclusively. When
140 invoking the Python command line interpreter with the -t option, it issues 142 invoking the Python command line interpreter with the -t option, it issues
145 E101: if a == 0:\n a = 1\n\tb = 1 147 E101: if a == 0:\n a = 1\n\tb = 1
146 """ 148 """
147 indent = INDENT_REGEX.match(physical_line).group(1) 149 indent = INDENT_REGEX.match(physical_line).group(1)
148 for offset, char in enumerate(indent): 150 for offset, char in enumerate(indent):
149 if char != indent_char: 151 if char != indent_char:
150 return offset, "E101" 152 return offset, "E101 indentation contains mixed spaces and tabs"
151 153
152 154
153 def tabs_obsolete(physical_line): 155 def tabs_obsolete(physical_line):
154 r""" 156 r"""For new projects, spaces-only are strongly recommended over tabs.
155 For new projects, spaces-only are strongly recommended over tabs. Most
156 editors have features that make this easy to do.
157 157
158 Okay: if True:\n return 158 Okay: if True:\n return
159 W191: if True:\n\treturn 159 W191: if True:\n\treturn
160 """ 160 """
161 indent = INDENT_REGEX.match(physical_line).group(1) 161 indent = INDENT_REGEX.match(physical_line).group(1)
162 if '\t' in indent: 162 if '\t' in indent:
163 return indent.index('\t'), "W191" 163 return indent.index('\t'), "W191 indentation contains tabs"
164 164
165 165
166 def trailing_whitespace(physical_line): 166 def trailing_whitespace(physical_line):
167 r""" 167 r"""Trailing whitespace is superfluous.
168 JCR: Trailing whitespace is superfluous.
169 FBM: Except when it occurs as part of a blank line (i.e. the line is
170 nothing but whitespace). According to Python docs[1] a line with only
171 whitespace is considered a blank line, and is to be ignored. However,
172 matching a blank line to its indentation level avoids mistakenly
173 terminating a multi-line statement (e.g. class declaration) when
174 pasting code into the standard Python interpreter.
175
176 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
177 168
178 The warning returned varies on whether the line itself is blank, for easier 169 The warning returned varies on whether the line itself is blank, for easier
179 filtering for those who want to indent their blank lines. 170 filtering for those who want to indent their blank lines.
180 171
181 Okay: spam(1)\n# 172 Okay: spam(1)\n#
186 physical_line = physical_line.rstrip('\r') # chr(13), carriage return 177 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
187 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L 178 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
188 stripped = physical_line.rstrip(' \t\v') 179 stripped = physical_line.rstrip(' \t\v')
189 if physical_line != stripped: 180 if physical_line != stripped:
190 if stripped: 181 if stripped:
191 return len(stripped), "W291" 182 return len(stripped), "W291 trailing whitespace"
192 else: 183 else:
193 return 0, "W293" 184 return 0, "W293 blank line contains whitespace"
194 185
195 186
196 def trailing_blank_lines(physical_line, lines, line_number): 187 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
197 r""" 188 r"""Trailing blank lines are superfluous.
198 JCR: Trailing blank lines are superfluous.
199 189
200 Okay: spam(1) 190 Okay: spam(1)
201 W391: spam(1)\n 191 W391: spam(1)\n
202 """ 192
203 if not physical_line.rstrip() and line_number == len(lines): 193 However the last line should end with a new line (warning W292).
204 return 0, "W391" 194 """
205 195 if line_number == total_lines:
206 196 stripped_last_line = physical_line.rstrip()
207 def missing_newline(physical_line): 197 if not stripped_last_line:
208 """ 198 return 0, "W391 blank line at end of file"
209 JCR: The last line should have a newline. 199 if stripped_last_line == physical_line:
210 200 return len(physical_line), "W292 no newline at end of file"
211 Reports warning W292. 201
212 """ 202
213 if physical_line.rstrip() == physical_line: 203 def maximum_line_length(physical_line, max_line_length, multiline):
214 return len(physical_line), "W292" 204 r"""Limit all lines to a maximum of 79 characters.
215
216
217 def maximum_line_length(physical_line, max_line_length):
218 """
219 Limit all lines to a maximum of 79 characters.
220 205
221 There are still many devices around that are limited to 80 character 206 There are still many devices around that are limited to 80 character
222 lines; plus, limiting windows to 80 characters makes it possible to have 207 lines; plus, limiting windows to 80 characters makes it possible to have
223 several windows side-by-side. The default wrapping on such devices looks 208 several windows side-by-side. The default wrapping on such devices looks
224 ugly. Therefore, please limit all lines to a maximum of 79 characters. 209 ugly. Therefore, please limit all lines to a maximum of 79 characters.
228 Reports error E501. 213 Reports error E501.
229 """ 214 """
230 line = physical_line.rstrip() 215 line = physical_line.rstrip()
231 length = len(line) 216 length = len(line)
232 if length > max_line_length and not noqa(line): 217 if length > max_line_length and not noqa(line):
218 # Special case for long URLs in multi-line docstrings or comments,
219 # but still report the error when the 72 first chars are whitespaces.
220 chunks = line.split()
221 if ((len(chunks) == 1 and multiline) or
222 (len(chunks) == 2 and chunks[0] == '#')) and \
223 len(line) - len(chunks[-1]) < max_line_length - 7:
224 return
233 if hasattr(line, 'decode'): # Python 2 225 if hasattr(line, 'decode'): # Python 2
234 # The line could contain multi-byte characters 226 # The line could contain multi-byte characters
235 try: 227 try:
236 length = len(line.decode('utf-8')) 228 length = len(line.decode('utf-8'))
237 except UnicodeError: 229 except UnicodeError:
238 pass 230 pass
239 if length > max_line_length: 231 if length > max_line_length:
240 return max_line_length, "E501", length, max_line_length 232 return (max_line_length, "E501 line too long ", length, max_line_length)
241 233
242 234
243 ############################################################################## 235 ##############################################################################
244 # Plugins (check functions) for logical lines 236 # Plugins (check functions) for logical lines
245 ############################################################################## 237 ##############################################################################
246 238
247 239
248 def blank_lines(logical_line, blank_lines, indent_level, line_number, 240 def blank_lines(logical_line, blank_lines, indent_level, line_number,
249 previous_logical, previous_indent_level): 241 blank_before, previous_logical, previous_indent_level):
250 r""" 242 r"""Separate top-level function and class definitions with two blank lines.
251 Separate top-level function and class definitions with two blank lines.
252 243
253 Method definitions inside a class are separated by a single blank line. 244 Method definitions inside a class are separated by a single blank line.
254 245
255 Extra blank lines may be used (sparingly) to separate groups of related 246 Extra blank lines may be used (sparingly) to separate groups of related
256 functions. Blank lines may be omitted between a bunch of related 247 functions. Blank lines may be omitted between a bunch of related
269 """ 260 """
270 if line_number < 3 and not previous_logical: 261 if line_number < 3 and not previous_logical:
271 return # Don't expect blank lines before the first line 262 return # Don't expect blank lines before the first line
272 if previous_logical.startswith('@'): 263 if previous_logical.startswith('@'):
273 if blank_lines: 264 if blank_lines:
274 yield 0, "E304" 265 yield 0, "E304 blank lines found after function decorator"
275 elif blank_lines > 2 or (indent_level and blank_lines == 2): 266 elif blank_lines > 2 or (indent_level and blank_lines == 2):
276 yield 0, "E303", blank_lines 267 yield 0, "E303 too many blank lines (%d)", blank_lines
277 elif logical_line.startswith(('def ', 'class ', '@')): 268 elif logical_line.startswith(('def ', 'class ', '@')):
278 if indent_level: 269 if indent_level:
279 if not (blank_lines or previous_indent_level < indent_level or 270 if not (blank_before or previous_indent_level < indent_level or
280 DOCSTRING_REGEX.match(previous_logical)): 271 DOCSTRING_REGEX.match(previous_logical)):
281 yield 0, "E301" 272 yield 0, "E301 expected 1 blank line, found 0"
282 elif blank_lines != 2: 273 elif blank_before != 2:
283 yield 0, "E302", blank_lines 274 yield 0, "E302 expected 2 blank lines, found %d", blank_before
284 275
285 276
286 def extraneous_whitespace(logical_line): 277 def extraneous_whitespace(logical_line):
287 """ 278 r"""Avoid extraneous whitespace.
288 Avoid extraneous whitespace in the following situations: 279
289 280 Avoid extraneous whitespace in these situations:
290 - Immediately inside parentheses, brackets or braces. 281 - Immediately inside parentheses, brackets or braces.
291
292 - Immediately before a comma, semicolon, or colon. 282 - Immediately before a comma, semicolon, or colon.
293 283
294 Okay: spam(ham[1], {eggs: 2}) 284 Okay: spam(ham[1], {eggs: 2})
295 E201: spam( ham[1], {eggs: 2}) 285 E201: spam( ham[1], {eggs: 2})
296 E201: spam(ham[ 1], {eggs: 2}) 286 E201: spam(ham[ 1], {eggs: 2})
308 text = match.group() 298 text = match.group()
309 char = text.strip() 299 char = text.strip()
310 found = match.start() 300 found = match.start()
311 if text == char + ' ': 301 if text == char + ' ':
312 # assert char in '([{' 302 # assert char in '([{'
313 yield found + 1, "E201", char 303 yield found + 1, "E201 whitespace after '%s'", char
314 elif line[found - 1] != ',': 304 elif line[found - 1] != ',':
315 code = ('E202' if char in '}])' else 'E203') # if char in ',;:' 305 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
316 yield found, code, char 306 yield found, "%s whitespace before '%s'" % (code, char), char
317 307
318 308
319 def whitespace_around_keywords(logical_line): 309 def whitespace_around_keywords(logical_line):
320 r""" 310 r"""Avoid extraneous whitespace around keywords.
321 Avoid extraneous whitespace around keywords.
322 311
323 Okay: True and False 312 Okay: True and False
324 E271: True and False 313 E271: True and False
325 E272: True and False 314 E272: True and False
326 E273: True and\tFalse 315 E273: True and\tFalse
328 """ 317 """
329 for match in KEYWORD_REGEX.finditer(logical_line): 318 for match in KEYWORD_REGEX.finditer(logical_line):
330 before, after = match.groups() 319 before, after = match.groups()
331 320
332 if '\t' in before: 321 if '\t' in before:
333 yield match.start(1), "E274" 322 yield match.start(1), "E274 tab before keyword"
334 elif len(before) > 1: 323 elif len(before) > 1:
335 yield match.start(1), "E272" 324 yield match.start(1), "E272 multiple spaces before keyword"
336 325
337 if '\t' in after: 326 if '\t' in after:
338 yield match.start(2), "E273" 327 yield match.start(2), "E273 tab after keyword"
339 elif len(after) > 1: 328 elif len(after) > 1:
340 yield match.start(2), "E271" 329 yield match.start(2), "E271 multiple spaces after keyword"
341 330
342 331
343 def missing_whitespace(logical_line): 332 def missing_whitespace(logical_line):
344 """ 333 r"""Each comma, semicolon or colon should be followed by whitespace.
345 JCR: Each comma, semicolon or colon should be followed by whitespace.
346 334
347 Okay: [a, b] 335 Okay: [a, b]
348 Okay: (3,) 336 Okay: (3,)
349 Okay: a[1:4] 337 Okay: a[1:4]
350 Okay: a[:4] 338 Okay: a[:4]
362 if char == ':' and before.count('[') > before.count(']') and \ 350 if char == ':' and before.count('[') > before.count(']') and \
363 before.rfind('{') < before.rfind('['): 351 before.rfind('{') < before.rfind('['):
364 continue # Slice syntax, no space required 352 continue # Slice syntax, no space required
365 if char == ',' and line[index + 1] == ')': 353 if char == ',' and line[index + 1] == ')':
366 continue # Allow tuple with only one element: (3,) 354 continue # Allow tuple with only one element: (3,)
367 yield index, "E231", char 355 yield index, "E231 missing whitespace after '%s'", char
368 356
369 357
370 def indentation(logical_line, previous_logical, indent_char, 358 def indentation(logical_line, previous_logical, indent_char,
371 indent_level, previous_indent_level): 359 indent_level, previous_indent_level):
372 r""" 360 r"""Use 4 spaces per indentation level.
373 Use 4 spaces per indentation level.
374 361
375 For really old code that you don't want to mess up, you can continue to 362 For really old code that you don't want to mess up, you can continue to
376 use 8-space tabs. 363 use 8-space tabs.
377 364
378 Okay: a = 1 365 Okay: a = 1
384 371
385 Okay: a = 1\nb = 2 372 Okay: a = 1\nb = 2
386 E113: a = 1\n b = 2 373 E113: a = 1\n b = 2
387 """ 374 """
388 if indent_char == ' ' and indent_level % 4: 375 if indent_char == ' ' and indent_level % 4:
389 yield 0, "E111" 376 yield 0, "E111 indentation is not a multiple of four"
390 indent_expect = previous_logical.endswith(':') 377 indent_expect = previous_logical.endswith(':')
391 if indent_expect and indent_level <= previous_indent_level: 378 if indent_expect and indent_level <= previous_indent_level:
392 yield 0, "E112" 379 yield 0, "E112 expected an indented block"
393 if indent_level > previous_indent_level and not indent_expect: 380 if indent_level > previous_indent_level and not indent_expect:
394 yield 0, "E113" 381 yield 0, "E113 unexpected indentation"
395 382
396 383
397 def continued_indentation(logical_line, tokens, indent_level, hang_closing, 384 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
398 noqa, verbose): 385 indent_char, noqa, verbose):
399 r""" 386 r"""Continuation lines indentation.
400 Continuation lines should align wrapped elements either vertically using 387
401 Python's implicit line joining inside parentheses, brackets and braces, or 388 Continuation lines should align wrapped elements either vertically
402 using a hanging indent. 389 using Python's implicit line joining inside parentheses, brackets
403 390 and braces, or using a hanging indent.
404 When using a hanging indent the following considerations should be applied: 391
405 392 When using a hanging indent these considerations should be applied:
406 - there should be no arguments on the first line, and 393 - there should be no arguments on the first line, and
407
408 - further indentation should be used to clearly distinguish itself as a 394 - further indentation should be used to clearly distinguish itself as a
409 continuation line. 395 continuation line.
410 396
411 Okay: a = (\n) 397 Okay: a = (\n)
412 E123: a = (\n ) 398 E123: a = (\n )
414 Okay: a = (\n 42) 400 Okay: a = (\n 42)
415 E121: a = (\n 42) 401 E121: a = (\n 42)
416 E122: a = (\n42) 402 E122: a = (\n42)
417 E123: a = (\n 42\n ) 403 E123: a = (\n 42\n )
418 E124: a = (24,\n 42\n) 404 E124: a = (24,\n 42\n)
419 E125: if (a or\n b):\n pass 405 E125: if (\n b):\n pass
420 E126: a = (\n 42) 406 E126: a = (\n 42)
421 E127: a = (24,\n 42) 407 E127: a = (24,\n 42)
422 E128: a = (24,\n 42) 408 E128: a = (24,\n 42)
409 E129: if (a or\n b):\n pass
410 E131: a = (\n 42\n 24)
423 """ 411 """
424 first_row = tokens[0][2][0] 412 first_row = tokens[0][2][0]
425 nrows = 1 + tokens[-1][2][0] - first_row 413 nrows = 1 + tokens[-1][2][0] - first_row
426 if noqa or nrows == 1: 414 if noqa or nrows == 1:
427 return 415 return
431 # indents on the final continuation line; in turn, some other 419 # indents on the final continuation line; in turn, some other
432 # indents are allowed to have an extra 4 spaces. 420 # indents are allowed to have an extra 4 spaces.
433 indent_next = logical_line.endswith(':') 421 indent_next = logical_line.endswith(':')
434 422
435 row = depth = 0 423 row = depth = 0
424 valid_hangs = (4,) if indent_char != '\t' else (4, 8)
436 # remember how many brackets were opened on each line 425 # remember how many brackets were opened on each line
437 parens = [0] * nrows 426 parens = [0] * nrows
438 # relative indents of physical lines 427 # relative indents of physical lines
439 rel_indent = [0] * nrows 428 rel_indent = [0] * nrows
429 # for each depth, collect a list of opening rows
430 open_rows = [[0]]
431 # for each depth, memorize the hanging indentation
432 hangs = [None]
440 # visual indents 433 # visual indents
441 indent_chances = {} 434 indent_chances = {}
442 last_indent = tokens[0][2] 435 last_indent = tokens[0][2]
436 visual_indent = None
437 # for each depth, memorize the visual indent column
443 indent = [last_indent[1]] 438 indent = [last_indent[1]]
444 if verbose >= 3: 439 if verbose >= 3:
445 print(">>> " + tokens[0][4].rstrip()) 440 print(">>> " + tokens[0][4].rstrip())
446 441
447 for token_type, text, start, end, line in tokens: 442 for token_type, text, start, end, line in tokens:
448 443
449 last_token_multiline = (start[0] != end[0]) 444 last_token_multiline = (start[0] != end[0])
450 newline = row < start[0] - first_row 445 newline = row < start[0] - first_row
451 if newline: 446 if newline:
452 row = start[0] - first_row 447 row = start[0] - first_row
453 newline = (not last_token_multiline and 448 newline = not last_token_multiline and token_type not in NEWLINE
454 token_type not in (tokenize.NL, tokenize.NEWLINE))
455 449
456 if newline: 450 if newline:
457 # this is the beginning of a continuation line. 451 # this is the beginning of a continuation line.
458 last_indent = start 452 last_indent = start
459 if verbose >= 3: 453 if verbose >= 3:
460 print("... " + line.rstrip()) 454 print("... " + line.rstrip())
461 455
462 # record the initial indent. 456 # record the initial indent.
463 rel_indent[row] = expand_indent(line) - indent_level 457 rel_indent[row] = expand_indent(line) - indent_level
464 458
465 if depth: 459 # identify closing bracket
466 # a bracket expression in a continuation line.
467 # find the line that it was opened on
468 for open_row in range(row - 1, -1, -1):
469 if parens[open_row]:
470 break
471 else:
472 # an unbracketed continuation line (ie, backslash)
473 open_row = 0
474 hang = rel_indent[row] - rel_indent[open_row]
475 close_bracket = (token_type == tokenize.OP and text in ']})') 460 close_bracket = (token_type == tokenize.OP and text in ']})')
461
462 # is the indent relative to an opening bracket line?
463 for open_row in reversed(open_rows[depth]):
464 hang = rel_indent[row] - rel_indent[open_row]
465 hanging_indent = hang in valid_hangs
466 if hanging_indent:
467 break
468 if hangs[depth]:
469 hanging_indent = (hang == hangs[depth])
470 # is there any chance of visual indent?
476 visual_indent = (not close_bracket and hang > 0 and 471 visual_indent = (not close_bracket and hang > 0 and
477 indent_chances.get(start[1])) 472 indent_chances.get(start[1]))
478 473
479 if close_bracket and indent[depth]: 474 if close_bracket and indent[depth]:
480 # closing bracket for visual indent 475 # closing bracket for visual indent
481 if start[1] != indent[depth]: 476 if start[1] != indent[depth]:
482 yield start, "E124" 477 yield (start, "E124 closing bracket does not match "
478 "visual indentation")
483 elif close_bracket and not hang: 479 elif close_bracket and not hang:
484 # closing bracket matches indentation of opening bracket's line 480 # closing bracket matches indentation of opening bracket's line
485 if hang_closing: 481 if hang_closing:
486 yield start, "E133" 482 yield start, "E133 closing bracket is missing indentation"
483 elif indent[depth] and start[1] < indent[depth]:
484 if visual_indent is not True:
485 # visual indent is broken
486 yield (start, "E128 continuation line "
487 "under-indented for visual indent")
488 elif hanging_indent or (indent_next and rel_indent[row] == 8):
489 # hanging indent is verified
490 if close_bracket and not hang_closing:
491 yield (start, "E123 closing bracket does not match "
492 "indentation of opening bracket's line")
493 hangs[depth] = hang
487 elif visual_indent is True: 494 elif visual_indent is True:
488 # visual indent is verified 495 # visual indent is verified
489 if not indent[depth]: 496 indent[depth] = start[1]
490 indent[depth] = start[1]
491 elif visual_indent in (text, str): 497 elif visual_indent in (text, str):
492 # ignore token lined up with matching one from a previous line 498 # ignore token lined up with matching one from a previous line
493 pass 499 pass
494 elif indent[depth] and start[1] < indent[depth]:
495 # visual indent is broken
496 yield start, "E128"
497 elif hang == 4 or (indent_next and rel_indent[row] == 8):
498 # hanging indent is verified
499 if close_bracket and not hang_closing:
500 yield (start, "E123")
501 else: 500 else:
502 # indent is broken 501 # indent is broken
503 if hang <= 0: 502 if hang <= 0:
504 error = "E122" 503 error = "E122", "missing indentation or outdented"
505 elif indent[depth]: 504 elif indent[depth]:
506 error = "E127" 505 error = "E127", "over-indented for visual indent"
507 elif hang % 4: 506 elif not close_bracket and hangs[depth]:
508 error = "E121" 507 error = "E131", "unaligned for hanging indent"
509 else: 508 else:
510 error = "E126" 509 hangs[depth] = hang
511 yield start, error 510 if hang > 4:
511 error = "E126", "over-indented for hanging indent"
512 else:
513 error = "E121", "under-indented for hanging indent"
514 yield start, "%s continuation line %s" % error
512 515
513 # look for visual indenting 516 # look for visual indenting
514 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) 517 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
515 and not indent[depth]): 518 and not indent[depth]):
516 indent[depth] = start[1] 519 indent[depth] = start[1]
522 text in ('u', 'ur', 'b', 'br')): 525 text in ('u', 'ur', 'b', 'br')):
523 indent_chances[start[1]] = str 526 indent_chances[start[1]] = str
524 # special case for the "if" statement because len("if (") == 4 527 # special case for the "if" statement because len("if (") == 4
525 elif not indent_chances and not row and not depth and text == 'if': 528 elif not indent_chances and not row and not depth and text == 'if':
526 indent_chances[end[1] + 1] = True 529 indent_chances[end[1] + 1] = True
530 elif text == ':' and line[end[1]:].isspace():
531 open_rows[depth].append(row)
527 532
528 # keep track of bracket depth 533 # keep track of bracket depth
529 if token_type == tokenize.OP: 534 if token_type == tokenize.OP:
530 if text in '([{': 535 if text in '([{':
531 depth += 1 536 depth += 1
532 indent.append(0) 537 indent.append(0)
538 hangs.append(None)
539 if len(open_rows) == depth:
540 open_rows.append([])
541 open_rows[depth].append(row)
533 parens[row] += 1 542 parens[row] += 1
534 if verbose >= 4: 543 if verbose >= 4:
535 print("bracket depth %s seen, col %s, visual min = %s" % 544 print("bracket depth %s seen, col %s, visual min = %s" %
536 (depth, start[1], indent[depth])) 545 (depth, start[1], indent[depth]))
537 elif text in ')]}' and depth > 0: 546 elif text in ')]}' and depth > 0:
538 # parent indents should not be more than this one 547 # parent indents should not be more than this one
539 prev_indent = indent.pop() or last_indent[1] 548 prev_indent = indent.pop() or last_indent[1]
549 hangs.pop()
540 for d in range(depth): 550 for d in range(depth):
541 if indent[d] > prev_indent: 551 if indent[d] > prev_indent:
542 indent[d] = 0 552 indent[d] = 0
543 for ind in list(indent_chances): 553 for ind in list(indent_chances):
544 if ind >= prev_indent: 554 if ind >= prev_indent:
545 del indent_chances[ind] 555 del indent_chances[ind]
556 del open_rows[depth + 1:]
546 depth -= 1 557 depth -= 1
547 if depth: 558 if depth:
548 indent_chances[indent[depth]] = True 559 indent_chances[indent[depth]] = True
549 for idx in range(row, -1, -1): 560 for idx in range(row, -1, -1):
550 if parens[idx]: 561 if parens[idx]:
551 parens[idx] -= 1 562 parens[idx] -= 1
552 rel_indent[row] = rel_indent[idx]
553 break 563 break
554 assert len(indent) == depth + 1 564 assert len(indent) == depth + 1
555 if start[1] not in indent_chances: 565 if start[1] not in indent_chances:
556 # allow to line up tokens 566 # allow to line up tokens
557 indent_chances[start[1]] = text 567 indent_chances[start[1]] = text
558 568
569 if last_token_multiline:
570 rel_indent[end[0] - first_row] = rel_indent[row]
571
559 if indent_next and expand_indent(line) == indent_level + 4: 572 if indent_next and expand_indent(line) == indent_level + 4:
560 yield last_indent, "E125" 573 pos = (start[0], indent[0] + 4)
574 if visual_indent:
575 code = "E129 visually indented line"
576 else:
577 code = "E125 continuation line"
578 yield pos, "%s with same indent as next logical line" % code
561 579
562 580
563 def whitespace_before_parameters(logical_line, tokens): 581 def whitespace_before_parameters(logical_line, tokens):
564 """ 582 r"""Avoid extraneous whitespace.
583
565 Avoid extraneous whitespace in the following situations: 584 Avoid extraneous whitespace in the following situations:
566 585 - before the open parenthesis that starts the argument list of a
567 - Immediately before the open parenthesis that starts the argument 586 function call.
568 list of a function call. 587 - before the open parenthesis that starts an indexing or slicing.
569
570 - Immediately before the open parenthesis that starts an indexing or
571 slicing.
572 588
573 Okay: spam(1) 589 Okay: spam(1)
574 E211: spam (1) 590 E211: spam (1)
575 591
576 Okay: dict['key'] = list[index] 592 Okay: dict['key'] = list[index]
586 (prev_type == tokenize.NAME or prev_text in '}])') and 602 (prev_type == tokenize.NAME or prev_text in '}])') and
587 # Syntax "class A (B):" is allowed, but avoid it 603 # Syntax "class A (B):" is allowed, but avoid it
588 (index < 2 or tokens[index - 2][1] != 'class') and 604 (index < 2 or tokens[index - 2][1] != 'class') and
589 # Allow "return (a.foo for a in range(5))" 605 # Allow "return (a.foo for a in range(5))"
590 not keyword.iskeyword(prev_text)): 606 not keyword.iskeyword(prev_text)):
591 yield prev_end, "E211", text 607 yield prev_end, "E211 whitespace before '%s'", text
592 prev_type = token_type 608 prev_type = token_type
593 prev_text = text 609 prev_text = text
594 prev_end = end 610 prev_end = end
595 611
596 612
597 def whitespace_around_operator(logical_line): 613 def whitespace_around_operator(logical_line):
598 """ 614 r"""Avoid extraneous whitespace around an operator.
599 Avoid extraneous whitespace in the following situations:
600
601 - More than one space around an assignment (or other) operator to
602 align it with another.
603 615
604 Okay: a = 12 + 3 616 Okay: a = 12 + 3
605 E221: a = 4 + 5 617 E221: a = 4 + 5
606 E222: a = 4 + 5 618 E222: a = 4 + 5
607 E223: a = 4\t+ 5 619 E223: a = 4\t+ 5
609 """ 621 """
610 for match in OPERATOR_REGEX.finditer(logical_line): 622 for match in OPERATOR_REGEX.finditer(logical_line):
611 before, after = match.groups() 623 before, after = match.groups()
612 624
613 if '\t' in before: 625 if '\t' in before:
614 yield match.start(1), "E223" 626 yield match.start(1), "E223 tab before operator"
615 elif len(before) > 1: 627 elif len(before) > 1:
616 yield match.start(1), "E221" 628 yield match.start(1), "E221 multiple spaces before operator"
617 629
618 if '\t' in after: 630 if '\t' in after:
619 yield match.start(2), "E224" 631 yield match.start(2), "E224 tab after operator"
620 elif len(after) > 1: 632 elif len(after) > 1:
621 yield match.start(2), "E222" 633 yield match.start(2), "E222 multiple spaces after operator"
622 634
623 635
624 def missing_whitespace_around_operator(logical_line, tokens): 636 def missing_whitespace_around_operator(logical_line, tokens):
625 r""" 637 r"""Surround operators with a single space on either side.
638
626 - Always surround these binary operators with a single space on 639 - Always surround these binary operators with a single space on
627 either side: assignment (=), augmented assignment (+=, -= etc.), 640 either side: assignment (=), augmented assignment (+=, -= etc.),
628 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), 641 comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
629 Booleans (and, or, not). 642 Booleans (and, or, not).
630 643
631 - Use spaces around arithmetic operators. 644 - If operators with different priorities are used, consider adding
645 whitespace around the operators with the lowest priorities.
632 646
633 Okay: i = i + 1 647 Okay: i = i + 1
634 Okay: submitted += 1 648 Okay: submitted += 1
635 Okay: x = x * 2 - 1 649 Okay: x = x * 2 - 1
636 Okay: hypot2 = x * x + y * y 650 Okay: hypot2 = x * x + y * y
650 parens = 0 664 parens = 0
651 need_space = False 665 need_space = False
652 prev_type = tokenize.OP 666 prev_type = tokenize.OP
653 prev_text = prev_end = None 667 prev_text = prev_end = None
654 for token_type, text, start, end, line in tokens: 668 for token_type, text, start, end, line in tokens:
655 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): 669 if token_type in SKIP_COMMENTS:
656 # ERRORTOKEN is triggered by backticks in Python 3
657 continue 670 continue
658 if text in ('(', 'lambda'): 671 if text in ('(', 'lambda'):
659 parens += 1 672 parens += 1
660 elif text == ')': 673 elif text == ')':
661 parens -= 1 674 parens -= 1
662 if need_space: 675 if need_space:
663 if start != prev_end: 676 if start != prev_end:
664 # Found a (probably) needed space 677 # Found a (probably) needed space
665 if need_space is not True and not need_space[1]: 678 if need_space is not True and not need_space[1]:
666 yield need_space[0], "E225" 679 yield (need_space[0],
680 "E225 missing whitespace around operator")
667 need_space = False 681 need_space = False
668 elif text == '>' and prev_text in ('<', '-'): 682 elif text == '>' and prev_text in ('<', '-'):
669 # Tolerate the "<>" operator, even if running Python 3 683 # Tolerate the "<>" operator, even if running Python 3
670 # Deal with Python 3's annotated return value "->" 684 # Deal with Python 3's annotated return value "->"
671 pass 685 pass
672 else: 686 else:
673 if need_space is True or need_space[1]: 687 if need_space is True or need_space[1]:
674 # A needed trailing space was not found 688 # A needed trailing space was not found
675 yield prev_end, "E225" 689 yield prev_end, "E225 missing whitespace around operator"
676 else: 690 else:
677 code = 'E226' 691 code, optype = 'E226', 'arithmetic'
678 if prev_text == '%': 692 if prev_text == '%':
679 code = 'E228' 693 code, optype = 'E228', 'modulo'
680 elif prev_text not in ARITHMETIC_OP: 694 elif prev_text not in ARITHMETIC_OP:
681 code = 'E227' 695 code, optype = 'E227', 'bitwise or shift'
682 yield need_space[0], code 696 yield (need_space[0], "%s missing whitespace "
697 "around %s operator" % (code, optype))
683 need_space = False 698 need_space = False
684 elif token_type == tokenize.OP and prev_end is not None: 699 elif token_type == tokenize.OP and prev_end is not None:
685 if text == '=' and parens: 700 if text == '=' and parens:
686 # Allow keyword args or defaults: foo(bar=None). 701 # Allow keyword args or defaults: foo(bar=None).
687 pass 702 pass
689 need_space = True 704 need_space = True
690 elif text in UNARY_OPERATORS: 705 elif text in UNARY_OPERATORS:
691 # Check if the operator is being used as a binary operator 706 # Check if the operator is being used as a binary operator
692 # Allow unary operators: -123, -x, +1. 707 # Allow unary operators: -123, -x, +1.
693 # Allow argument unpacking: foo(*args, **kwargs). 708 # Allow argument unpacking: foo(*args, **kwargs).
694 if prev_type == tokenize.OP: 709 if (prev_text in '}])' if prev_type == tokenize.OP
695 binary_usage = (prev_text in '}])') 710 else prev_text not in KEYWORDS):
696 elif prev_type == tokenize.NAME:
697 binary_usage = (prev_text not in KEYWORDS)
698 else:
699 binary_usage = (prev_type not in SKIP_TOKENS)
700
701 if binary_usage:
702 need_space = None 711 need_space = None
703 elif text in WS_OPTIONAL_OPERATORS: 712 elif text in WS_OPTIONAL_OPERATORS:
704 need_space = None 713 need_space = None
705 714
706 if need_space is None: 715 if need_space is None:
707 # Surrounding space is optional, but ensure that 716 # Surrounding space is optional, but ensure that
708 # trailing space matches opening space 717 # trailing space matches opening space
709 need_space = (prev_end, start != prev_end) 718 need_space = (prev_end, start != prev_end)
710 elif need_space and start == prev_end: 719 elif need_space and start == prev_end:
711 # A needed opening space was not found 720 # A needed opening space was not found
712 yield prev_end, "E225" 721 yield prev_end, "E225 missing whitespace around operator"
713 need_space = False 722 need_space = False
714 prev_type = token_type 723 prev_type = token_type
715 prev_text = text 724 prev_text = text
716 prev_end = end 725 prev_end = end
717 726
718 727
719 def whitespace_around_comma(logical_line): 728 def whitespace_around_comma(logical_line):
720 """ 729 r"""Avoid extraneous whitespace after a comma or a colon.
721 Avoid extraneous whitespace in the following situations: 730
722
723 - More than one space around an assignment (or other) operator to
724 align it with another.
725
726 JCR: This should also be applied around comma etc.
727 Note: these checks are disabled by default 731 Note: these checks are disabled by default
728 732
729 Okay: a = (1, 2) 733 Okay: a = (1, 2)
730 E241: a = (1, 2) 734 E241: a = (1, 2)
731 E242: a = (1,\t2) 735 E242: a = (1,\t2)
732 """ 736 """
733 line = logical_line 737 line = logical_line
734 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): 738 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
735 found = m.start() + 1 739 found = m.start() + 1
736 if '\t' in m.group(): 740 if '\t' in m.group():
737 yield found, "E242", m.group()[0] 741 yield found, "E242 tab after '%s'", m.group()[0]
738 else: 742 else:
739 yield found, "E241", m.group()[0] 743 yield found, "E241 multiple spaces after '%s'", m.group()[0]
740 744
741 745
742 def whitespace_around_named_parameter_equals(logical_line, tokens): 746 def whitespace_around_named_parameter_equals(logical_line, tokens):
743 """ 747 r"""Don't use spaces around the '=' sign in function arguments.
748
744 Don't use spaces around the '=' sign when used to indicate a 749 Don't use spaces around the '=' sign when used to indicate a
745 keyword argument or a default parameter value. 750 keyword argument or a default parameter value.
746 751
747 Okay: def complex(real, imag=0.0): 752 Okay: def complex(real, imag=0.0):
748 Okay: return magic(r=real, i=imag) 753 Okay: return magic(r=real, i=imag)
755 E251: return magic(r = real, i = imag) 760 E251: return magic(r = real, i = imag)
756 """ 761 """
757 parens = 0 762 parens = 0
758 no_space = False 763 no_space = False
759 prev_end = None 764 prev_end = None
760 message = "E251" 765 message = "E251 unexpected spaces around keyword / parameter equals"
761 for token_type, text, start, end, line in tokens: 766 for token_type, text, start, end, line in tokens:
767 if token_type == tokenize.NL:
768 continue
762 if no_space: 769 if no_space:
763 no_space = False 770 no_space = False
764 if start != prev_end: 771 if start != prev_end:
765 yield prev_end, message 772 yield (prev_end, message)
766 elif token_type == tokenize.OP: 773 elif token_type == tokenize.OP:
767 if text == '(': 774 if text == '(':
768 parens += 1 775 parens += 1
769 elif text == ')': 776 elif text == ')':
770 parens -= 1 777 parens -= 1
771 elif parens and text == '=': 778 elif parens and text == '=':
772 no_space = True 779 no_space = True
773 if start != prev_end: 780 if start != prev_end:
774 yield prev_end, message 781 yield (prev_end, message)
775 prev_end = end 782 prev_end = end
776 783
777 784
778 def whitespace_before_inline_comment(logical_line, tokens): 785 def whitespace_before_comment(logical_line, tokens):
779 """ 786 r"""Separate inline comments by at least two spaces.
780 Separate inline comments by at least two spaces.
781 787
782 An inline comment is a comment on the same line as a statement. Inline 788 An inline comment is a comment on the same line as a statement. Inline
783 comments should be separated by at least two spaces from the statement. 789 comments should be separated by at least two spaces from the statement.
784 They should start with a # and a single space. 790 They should start with a # and a single space.
785 791
792 Each line of a block comment starts with a # and a single space
793 (unless it is indented text inside the comment).
794
786 Okay: x = x + 1 # Increment x 795 Okay: x = x + 1 # Increment x
787 Okay: x = x + 1 # Increment x 796 Okay: x = x + 1 # Increment x
797 Okay: # Block comment
788 E261: x = x + 1 # Increment x 798 E261: x = x + 1 # Increment x
789 E262: x = x + 1 #Increment x 799 E262: x = x + 1 #Increment x
790 E262: x = x + 1 # Increment x 800 E262: x = x + 1 # Increment x
801 E265: #Block comment
791 """ 802 """
792 prev_end = (0, 0) 803 prev_end = (0, 0)
793 for token_type, text, start, end, line in tokens: 804 for token_type, text, start, end, line in tokens:
794 if token_type == tokenize.COMMENT: 805 if token_type == tokenize.COMMENT:
795 if not line[:start[1]].strip(): 806 inline_comment = line[:start[1]].strip()
796 continue 807 if inline_comment:
797 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: 808 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
798 yield prev_end, "E261" 809 yield (prev_end,
810 "E261 at least two spaces before inline comment")
799 symbol, sp, comment = text.partition(' ') 811 symbol, sp, comment = text.partition(' ')
800 if symbol not in ('#', '#:') or comment[:1].isspace(): 812 bad_prefix = symbol not in ('#', '#:')
801 yield start, "E262" 813 if inline_comment:
814 if bad_prefix or comment[:1].isspace():
815 yield start, "E262 inline comment should start with '# '"
816 elif bad_prefix:
817 if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'):
818 yield start, "E265 block comment should start with '# '"
802 elif token_type != tokenize.NL: 819 elif token_type != tokenize.NL:
803 prev_end = end 820 prev_end = end
804 821
805 822
806 def imports_on_separate_lines(logical_line): 823 def imports_on_separate_lines(logical_line):
807 r""" 824 r"""Imports should usually be on separate lines.
808 Imports should usually be on separate lines.
809 825
810 Okay: import os\nimport sys 826 Okay: import os\nimport sys
811 E401: import sys, os 827 E401: import sys, os
812 828
813 Okay: from subprocess import Popen, PIPE 829 Okay: from subprocess import Popen, PIPE
818 """ 834 """
819 line = logical_line 835 line = logical_line
820 if line.startswith('import '): 836 if line.startswith('import '):
821 found = line.find(',') 837 found = line.find(',')
822 if -1 < found and ';' not in line[:found]: 838 if -1 < found and ';' not in line[:found]:
823 yield found, "E401" 839 yield found, "E401 multiple imports on one line"
824 840
825 841
826 def compound_statements(logical_line): 842 def compound_statements(logical_line):
827 r""" 843 r"""Compound statements (on the same line) are generally discouraged.
828 Compound statements (multiple statements on the same line) are
829 generally discouraged.
830 844
831 While sometimes it's okay to put an if/for/while with a small body 845 While sometimes it's okay to put an if/for/while with a small body
832 on the same line, never do this for multi-clause statements. Also 846 on the same line, never do this for multi-clause statements.
833 avoid folding such long lines! 847 Also avoid folding such long lines!
834 848
835 Okay: if foo == 'blah':\n do_blah_thing() 849 Okay: if foo == 'blah':\n do_blah_thing()
836 Okay: do_one() 850 Okay: do_one()
837 Okay: do_two() 851 Okay: do_two()
838 Okay: do_three() 852 Okay: do_three()
856 before = line[:found] 870 before = line[:found]
857 if (before.count('{') <= before.count('}') and # {'a': 1} (dict) 871 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
858 before.count('[') <= before.count(']') and # [1:2] (slice) 872 before.count('[') <= before.count(']') and # [1:2] (slice)
859 before.count('(') <= before.count(')') and # (Python 3 annotation) 873 before.count('(') <= before.count(')') and # (Python 3 annotation)
860 not LAMBDA_REGEX.search(before)): # lambda x: x 874 not LAMBDA_REGEX.search(before)): # lambda x: x
861 yield found, "E701" 875 yield found, "E701 multiple statements on one line (colon)"
862 found = line.find(':', found + 1) 876 found = line.find(':', found + 1)
863 found = line.find(';') 877 found = line.find(';')
864 while -1 < found: 878 while -1 < found:
865 if found < last_char: 879 if found < last_char:
866 yield found, "E702" 880 yield found, "E702 multiple statements on one line (semicolon)"
867 else: 881 else:
868 yield found, "E703" 882 yield found, "E703 statement ends with a semicolon"
869 found = line.find(';', found + 1) 883 found = line.find(';', found + 1)
870 884
871 885
872 def explicit_line_join(logical_line, tokens): 886 def explicit_line_join(logical_line, tokens):
873 r""" 887 r"""Avoid explicit line join between brackets.
874 Avoid explicit line join between brackets.
875 888
876 The preferred way of wrapping long lines is by using Python's implied line 889 The preferred way of wrapping long lines is by using Python's implied line
877 continuation inside parentheses, brackets and braces. Long lines can be 890 continuation inside parentheses, brackets and braces. Long lines can be
878 broken over multiple lines by wrapping expressions in parentheses. These 891 broken over multiple lines by wrapping expressions in parentheses. These
879 should be used in preference to using a backslash for line continuation. 892 should be used in preference to using a backslash for line continuation.
887 """ 900 """
888 prev_start = prev_end = parens = 0 901 prev_start = prev_end = parens = 0
889 backslash = None 902 backslash = None
890 for token_type, text, start, end, line in tokens: 903 for token_type, text, start, end, line in tokens:
891 if start[0] != prev_start and parens and backslash: 904 if start[0] != prev_start and parens and backslash:
892 yield backslash, "E502" 905 yield backslash, "E502 the backslash is redundant between brackets"
893 if end[0] != prev_end: 906 if end[0] != prev_end:
894 if line.rstrip('\r\n').endswith('\\'): 907 if line.rstrip('\r\n').endswith('\\'):
895 backslash = (end[0], len(line.splitlines()[-1]) - 1) 908 backslash = (end[0], len(line.splitlines()[-1]) - 1)
896 else: 909 else:
897 backslash = None 910 backslash = None
904 elif text in ')]}': 917 elif text in ')]}':
905 parens -= 1 918 parens -= 1
906 919
907 920
908 def comparison_to_singleton(logical_line, noqa): 921 def comparison_to_singleton(logical_line, noqa):
909 """ 922 r"""Comparison to singletons should use "is" or "is not".
923
910 Comparisons to singletons like None should always be done 924 Comparisons to singletons like None should always be done
911 with "is" or "is not", never the equality operators. 925 with "is" or "is not", never the equality operators.
912 926
913 Okay: if arg is not None: 927 Okay: if arg is not None:
914 E711: if arg != None: 928 E711: if arg != None:
929 else: 943 else:
930 code = 'E712' 944 code = 'E712'
931 nonzero = ((singleton == 'True' and same) or 945 nonzero = ((singleton == 'True' and same) or
932 (singleton == 'False' and not same)) 946 (singleton == 'False' and not same))
933 msg += " or 'if %scond:'" % ('' if nonzero else 'not ') 947 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
934 yield match.start(1), code, singleton, msg 948 yield (match.start(1), "%s comparison to %s should be %s" %
949 (code, singleton, msg), singleton, msg)
950
951
952 def comparison_negative(logical_line):
953 r"""Negative comparison should be done using "not in" and "is not".
954
955 Okay: if x not in y:\n pass
956 Okay: assert (X in Y or X is Z)
957 Okay: if not (X in Y):\n pass
958 Okay: zz = x is not y
959 E713: Z = not X in Y
960 E713: if not X.B in Y:\n pass
961 E714: if not X is Y:\n pass
962 E714: Z = not X.B is Y
963 """
964 match = COMPARE_NEGATIVE_REGEX.search(logical_line)
965 if match:
966 pos = match.start(1)
967 if match.group(2) == 'in':
968 yield pos, "E713 test for membership should be 'not in'"
969 else:
970 yield pos, "E714 test for object identity should be 'is not'"
935 971
936 972
937 def comparison_type(logical_line): 973 def comparison_type(logical_line):
938 """ 974 r"""Object type comparisons should always use isinstance().
939 Object type comparisons should always use isinstance() instead of 975
940 comparing types directly. 976 Do not compare types directly.
941 977
942 Okay: if isinstance(obj, int): 978 Okay: if isinstance(obj, int):
943 E721: if type(obj) is type(1): 979 E721: if type(obj) is type(1):
944 980
945 When checking if an object is a string, keep in mind that it might be a 981 When checking if an object is a string, keep in mind that it might be a
952 match = COMPARE_TYPE_REGEX.search(logical_line) 988 match = COMPARE_TYPE_REGEX.search(logical_line)
953 if match: 989 if match:
954 inst = match.group(1) 990 inst = match.group(1)
955 if inst and isidentifier(inst) and inst not in SINGLETONS: 991 if inst and isidentifier(inst) and inst not in SINGLETONS:
956 return # Allow comparison for types which are not obvious 992 return # Allow comparison for types which are not obvious
957 yield match.start(), "E721" 993 yield match.start(), "E721 do not compare types, use 'isinstance()'"
958 994
959 995
960 def python_3000_has_key(logical_line): 996 def python_3000_has_key(logical_line, noqa):
961 r""" 997 r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
962 The {}.has_key() method is removed in the Python 3.
963 Use the 'in' operation instead.
964 998
965 Okay: if "alph" in d:\n print d["alph"] 999 Okay: if "alph" in d:\n print d["alph"]
966 W601: assert d.has_key('alph') 1000 W601: assert d.has_key('alph')
967 """ 1001 """
968 pos = logical_line.find('.has_key(') 1002 pos = logical_line.find('.has_key(')
969 if pos > -1: 1003 if pos > -1 and not noqa:
970 yield pos, "W601" 1004 yield pos, "W601 .has_key() is deprecated, use 'in'"
971 1005
972 1006
973 def python_3000_raise_comma(logical_line): 1007 def python_3000_raise_comma(logical_line):
974 """ 1008 r"""When raising an exception, use "raise ValueError('message')".
975 When raising an exception, use "raise ValueError('message')" 1009
976 instead of the older form "raise ValueError, 'message'". 1010 The older form is removed in Python 3.
977
978 The paren-using form is preferred because when the exception arguments
979 are long or include string formatting, you don't need to use line
980 continuation characters thanks to the containing parentheses. The older
981 form is removed in Python 3.
982 1011
983 Okay: raise DummyError("Message") 1012 Okay: raise DummyError("Message")
984 W602: raise DummyError, "Message" 1013 W602: raise DummyError, "Message"
985 """ 1014 """
986 match = RAISE_COMMA_REGEX.match(logical_line) 1015 match = RAISE_COMMA_REGEX.match(logical_line)
987 if match and not RERAISE_COMMA_REGEX.match(logical_line): 1016 if match and not RERAISE_COMMA_REGEX.match(logical_line):
988 yield match.end() - 1, "W602" 1017 yield match.end() - 1, "W602 deprecated form of raising exception"
989 1018
990 1019
991 def python_3000_not_equal(logical_line): 1020 def python_3000_not_equal(logical_line):
992 """ 1021 r"""New code should always use != instead of <>.
993 != can also be written <>, but this is an obsolete usage kept for 1022
994 backwards compatibility only. New code should always use !=.
995 The older syntax is removed in Python 3. 1023 The older syntax is removed in Python 3.
996 1024
997 Okay: if a != 'no': 1025 Okay: if a != 'no':
998 W603: if a <> 'no': 1026 W603: if a <> 'no':
999 """ 1027 """
1000 pos = logical_line.find('<>') 1028 pos = logical_line.find('<>')
1001 if pos > -1: 1029 if pos > -1:
1002 yield pos, "W603" 1030 yield pos, "W603 '<>' is deprecated, use '!='"
1003 1031
1004 1032
1005 def python_3000_backticks(logical_line): 1033 def python_3000_backticks(logical_line):
1006 """ 1034 r"""Backticks are removed in Python 3: use repr() instead.
1007 Backticks are removed in Python 3.
1008 Use repr() instead.
1009 1035
1010 Okay: val = repr(1 + 2) 1036 Okay: val = repr(1 + 2)
1011 W604: val = `1 + 2` 1037 W604: val = `1 + 2`
1012 """ 1038 """
1013 pos = logical_line.find('`') 1039 pos = logical_line.find('`')
1014 if pos > -1: 1040 if pos > -1:
1015 yield pos, "W604" 1041 yield pos, "W604 backticks are deprecated, use 'repr()'"
1016 1042
1017 1043
1018 ############################################################################## 1044 ##############################################################################
1019 # Helper functions 1045 # Helper functions
1020 ############################################################################## 1046 ##############################################################################
1021 1047
1022 1048
1023 if '' == ''.encode(): 1049 if '' == ''.encode():
1024 # Python 2: implicit encoding. 1050 # Python 2: implicit encoding.
1025 def readlines(filename): 1051 def readlines(filename):
1026 f = open(filename) 1052 """Read the source code."""
1027 try: 1053 with open(filename) as f:
1028 return f.readlines() 1054 return f.readlines()
1029 finally:
1030 f.close()
1031 isidentifier = re.compile(r'[a-zA-Z_]\w*').match 1055 isidentifier = re.compile(r'[a-zA-Z_]\w*').match
1032 stdin_get_value = sys.stdin.read 1056 stdin_get_value = sys.stdin.read
1033 else: 1057 else:
1034 # Python 3 1058 # Python 3
1035 def readlines(filename): # __IGNORE_WARNING__ 1059 def readlines(filename):
1036 f = open(filename, 'rb') 1060 """Read the source code."""
1037 try: 1061 try:
1038 coding, lines = tokenize.detect_encoding(f.readline) 1062 with open(filename, 'rb') as f:
1039 f = TextIOWrapper(f, coding, line_buffering=True) 1063 (coding, lines) = tokenize.detect_encoding(f.readline)
1040 return [l.decode(coding) for l in lines] + f.readlines() 1064 f = TextIOWrapper(f, coding, line_buffering=True)
1065 return [l.decode(coding) for l in lines] + f.readlines()
1041 except (LookupError, SyntaxError, UnicodeError): 1066 except (LookupError, SyntaxError, UnicodeError):
1042 f.close() 1067 # Fall back if file encoding is improperly declared
1043 # Fall back if files are improperly declared 1068 with open(filename, encoding='latin-1') as f:
1044 f = open(filename, encoding='latin-1') 1069 return f.readlines()
1045 return f.readlines()
1046 finally:
1047 f.close()
1048 isidentifier = str.isidentifier 1070 isidentifier = str.isidentifier
1049 1071
1050 def stdin_get_value(): 1072 def stdin_get_value():
1051 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() 1073 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1052 readlines.__doc__ = " Read the source code."
1053 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search 1074 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1054 1075
1055 1076
1056 def expand_indent(line): 1077 def expand_indent(line):
1057 r""" 1078 r"""Return the amount of indentation.
1058 Return the amount of indentation. 1079
1059 Tabs are expanded to the next multiple of 8. 1080 Tabs are expanded to the next multiple of 8.
1060 1081
1061 >>> expand_indent(' ') 1082 >>> expand_indent(' ')
1062 4 1083 4
1063 >>> expand_indent('\t') 1084 >>> expand_indent('\t')
1064 8
1065 >>> expand_indent(' \t')
1066 8 1085 8
1067 >>> expand_indent(' \t') 1086 >>> expand_indent(' \t')
1068 8 1087 8
1069 >>> expand_indent(' \t') 1088 >>> expand_indent(' \t')
1070 16 1089 16
1081 break 1100 break
1082 return result 1101 return result
1083 1102
1084 1103
1085 def mute_string(text): 1104 def mute_string(text):
1086 """ 1105 """Replace contents with 'xxx' to prevent syntax matching.
1087 Replace contents with 'xxx' to prevent syntax matching.
1088 1106
1089 >>> mute_string('"abc"') 1107 >>> mute_string('"abc"')
1090 '"xxx"' 1108 '"xxx"'
1091 >>> mute_string("'''abc'''") 1109 >>> mute_string("'''abc'''")
1092 "'''xxx'''" 1110 "'''xxx'''"
1114 if line[:1] != '-': 1132 if line[:1] != '-':
1115 nrows -= 1 1133 nrows -= 1
1116 continue 1134 continue
1117 if line[:3] == '@@ ': 1135 if line[:3] == '@@ ':
1118 hunk_match = HUNK_REGEX.match(line) 1136 hunk_match = HUNK_REGEX.match(line)
1119 row, nrows = [int(g or '1') for g in hunk_match.groups()] 1137 (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1120 rv[path].update(range(row, row + nrows)) 1138 rv[path].update(range(row, row + nrows))
1121 elif line[:3] == '+++': 1139 elif line[:3] == '+++':
1122 path = line[4:].split('\t', 1)[0] 1140 path = line[4:].split('\t', 1)[0]
1123 if path[:2] == 'b/': 1141 if path[:2] == 'b/':
1124 path = path[2:] 1142 path = path[2:]
1126 return dict([(os.path.join(parent, path), rows) 1144 return dict([(os.path.join(parent, path), rows)
1127 for (path, rows) in rv.items() 1145 for (path, rows) in rv.items()
1128 if rows and filename_match(path, patterns)]) 1146 if rows and filename_match(path, patterns)])
1129 1147
1130 1148
1149 def normalize_paths(value, parent=os.curdir):
1150 """Parse a comma-separated list of paths.
1151
1152 Return a list of absolute paths.
1153 """
1154 if not value or isinstance(value, list):
1155 return value
1156 paths = []
1157 for path in value.split(','):
1158 if '/' in path:
1159 path = os.path.abspath(os.path.join(parent, path))
1160 paths.append(path.rstrip('/'))
1161 return paths
1162
1163
1131 def filename_match(filename, patterns, default=True): 1164 def filename_match(filename, patterns, default=True):
1132 """ 1165 """Check if patterns contains a pattern that matches filename.
1133 Check if patterns contains a pattern that matches filename. 1166
1134 If patterns is unspecified, this always returns True. 1167 If patterns is unspecified, this always returns True.
1135 """ 1168 """
1136 if not patterns: 1169 if not patterns:
1137 return default 1170 return default
1138 return any(fnmatch(filename, pattern) for pattern in patterns) 1171 return any(fnmatch(filename, pattern) for pattern in patterns)
1139 1172
1140 1173
1174 if COMMENT_WITH_NL:
1175 def _is_eol_token(token):
1176 return (token[0] in NEWLINE or
1177 (token[0] == tokenize.COMMENT and token[1] == token[4]))
1178 else:
1179 def _is_eol_token(token):
1180 return token[0] in NEWLINE
1181
1182
1141 ############################################################################## 1183 ##############################################################################
1142 # Framework to run all checks 1184 # Framework to run all checks
1143 ############################################################################## 1185 ##############################################################################
1144 1186
1145 1187
1146 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} 1188 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1147 1189
1148 1190
1149 def register_check(check, codes=None): 1191 def register_check(check, codes=None):
1150 """ 1192 """Register a new check object."""
1151 Register a new check object.
1152 """
1153 def _add_check(check, kind, codes, args): 1193 def _add_check(check, kind, codes, args):
1154 if check in _checks[kind]: 1194 if check in _checks[kind]:
1155 _checks[kind][check][0].extend(codes or []) 1195 _checks[kind][check][0].extend(codes or [])
1156 else: 1196 else:
1157 _checks[kind][check] = (codes or [''], args) 1197 _checks[kind][check] = (codes or [''], args)
1165 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']: 1205 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1166 _add_check(check, 'tree', codes, None) 1206 _add_check(check, 'tree', codes, None)
1167 1207
1168 1208
1169 def init_checks_registry(): 1209 def init_checks_registry():
1170 """ 1210 """Register all globally visible functions.
1171 Register all globally visible functions where the first argument name 1211
1172 is 'physical_line' or 'logical_line'. 1212 The first argument name is either 'physical_line' or 'logical_line'.
1173 """ 1213 """
1174 mod = inspect.getmodule(register_check) 1214 mod = inspect.getmodule(register_check)
1175 for (name, function) in inspect.getmembers(mod, inspect.isfunction): 1215 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1176 register_check(function) 1216 register_check(function)
1177 init_checks_registry() 1217 init_checks_registry()
1178 1218
1179 1219
1180 class Checker(object): 1220 class Checker(object):
1181 """ 1221 """Load a Python source file, tokenize it, check coding style."""
1182 Load a Python source file, tokenize it, check coding style.
1183 """
1184 1222
1185 def __init__(self, filename=None, lines=None, 1223 def __init__(self, filename=None, lines=None,
1186 options=None, report=None, **kwargs): 1224 options=None, report=None, **kwargs):
1187 if options is None: 1225 if options is None:
1188 options = StyleGuide(kwargs).options 1226 options = StyleGuide(kwargs).options
1191 self._io_error = None 1229 self._io_error = None
1192 self._physical_checks = options.physical_checks 1230 self._physical_checks = options.physical_checks
1193 self._logical_checks = options.logical_checks 1231 self._logical_checks = options.logical_checks
1194 self._ast_checks = options.ast_checks 1232 self._ast_checks = options.ast_checks
1195 self.max_line_length = options.max_line_length 1233 self.max_line_length = options.max_line_length
1234 self.multiline = False # in a multiline string?
1196 self.hang_closing = options.hang_closing 1235 self.hang_closing = options.hang_closing
1197 self.verbose = options.verbose 1236 self.verbose = options.verbose
1198 self.filename = filename 1237 self.filename = filename
1199 if filename is None: 1238 if filename is None:
1200 self.filename = 'stdin' 1239 self.filename = 'stdin'
1204 self.lines = stdin_get_value().splitlines(True) 1243 self.lines = stdin_get_value().splitlines(True)
1205 elif lines is None: 1244 elif lines is None:
1206 try: 1245 try:
1207 self.lines = readlines(filename) 1246 self.lines = readlines(filename)
1208 except IOError: 1247 except IOError:
1209 exc_type, exc = sys.exc_info()[:2] 1248 (exc_type, exc) = sys.exc_info()[:2]
1210 self._io_error = '%s: %s' % (exc_type.__name__, exc) 1249 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1211 self.lines = [] 1250 self.lines = []
1212 else: 1251 else:
1213 self.lines = lines 1252 self.lines = lines
1214 if self.lines: 1253 if self.lines:
1224 1263
1225 # added for eric5 integration 1264 # added for eric5 integration
1226 self.options = options 1265 self.options = options
1227 1266
1228 def report_invalid_syntax(self): 1267 def report_invalid_syntax(self):
1229 exc_type, exc = sys.exc_info()[:2] 1268 """Check if the syntax is valid."""
1269 (exc_type, exc) = sys.exc_info()[:2]
1230 if len(exc.args) > 1: 1270 if len(exc.args) > 1:
1231 offset = exc.args[1] 1271 offset = exc.args[1]
1232 if len(offset) > 2: 1272 if len(offset) > 2:
1233 offset = offset[1:3] 1273 offset = offset[1:3]
1234 else: 1274 else:
1235 offset = (1, 0) 1275 offset = (1, 0)
1236 self.report_error_args(offset[0], offset[1] or 0, 1276 self.report_error_args(offset[0], offset[1] or 0,
1237 'E901', self.report_invalid_syntax, 1277 'E901', self.report_invalid_syntax,
1238 exc_type.__name__, exc.args[0]) 1278 exc_type.__name__, exc.args[0])
1239 report_invalid_syntax.__doc__ = " Check if the syntax is valid."
1240 1279
1241 def readline(self): 1280 def readline(self):
1242 """ 1281 """Get the next line from the input buffer."""
1243 Get the next line from the input buffer. 1282 if self.line_number >= self.total_lines:
1244 """ 1283 return ''
1284 line = self.lines[self.line_number]
1245 self.line_number += 1 1285 self.line_number += 1
1246 if self.line_number > len(self.lines): 1286 if self.indent_char is None and line[:1] in WHITESPACE:
1247 return '' 1287 self.indent_char = line[0]
1248 return self.lines[self.line_number - 1]
1249
1250 def readline_check_physical(self):
1251 """
1252 Check and return the next physical line. This method can be
1253 used to feed tokenize.generate_tokens.
1254 """
1255 line = self.readline()
1256 if line:
1257 self.check_physical(line)
1258 return line 1288 return line
1259 1289
1260 def run_check(self, check, argument_names): 1290 def run_check(self, check, argument_names):
1261 """ 1291 """Run a check plugin."""
1262 Run a check plugin.
1263 """
1264 arguments = [] 1292 arguments = []
1265 for name in argument_names: 1293 for name in argument_names:
1266 arguments.append(getattr(self, name)) 1294 arguments.append(getattr(self, name))
1267 return check(*arguments) 1295 return check(*arguments)
1268 1296
1269 def check_physical(self, line): 1297 def check_physical(self, line):
1270 """ 1298 """Run all physical checks on a raw input line."""
1271 Run all physical checks on a raw input line.
1272 """
1273 self.physical_line = line 1299 self.physical_line = line
1274 if self.indent_char is None and line[:1] in WHITESPACE:
1275 self.indent_char = line[0]
1276 for name, check, argument_names in self._physical_checks: 1300 for name, check, argument_names in self._physical_checks:
1277 result = self.run_check(check, argument_names) 1301 result = self.run_check(check, argument_names)
1278 if result is not None: 1302 if result is not None:
1279 offset, code = result[:2] 1303 (offset, text) = result[:2]
1280 args = result[2:] 1304 args = result[2:]
1281 self.report_error_args( 1305 self.report_error_args(
1282 self.line_number, offset, code, check, *args) 1306 self.line_number, offset, text, check, *args)
1307 if text[:4] == 'E101':
1308 self.indent_char = line[0]
1283 1309
1284 def build_tokens_line(self): 1310 def build_tokens_line(self):
1285 """ 1311 """Build a logical line from tokens."""
1286 Build a logical line from tokens.
1287 """
1288 self.mapping = []
1289 logical = [] 1312 logical = []
1290 comments = [] 1313 comments = []
1291 length = 0 1314 length = 0
1292 previous = None 1315 prev_row = prev_col = mapping = None
1293 for token in self.tokens: 1316 for token_type, text, start, end, line in self.tokens:
1294 token_type, text = token[0:2] 1317 if token_type in SKIP_TOKENS:
1318 continue
1319 if not mapping:
1320 mapping = [(0, start)]
1295 if token_type == tokenize.COMMENT: 1321 if token_type == tokenize.COMMENT:
1296 comments.append(text) 1322 comments.append(text)
1297 continue 1323 continue
1298 if token_type in SKIP_TOKENS:
1299 continue
1300 if token_type == tokenize.STRING: 1324 if token_type == tokenize.STRING:
1301 text = mute_string(text) 1325 text = mute_string(text)
1302 if previous: 1326 if prev_row:
1303 end_row, end = previous[3] 1327 (start_row, start_col) = start
1304 start_row, start = token[2] 1328 if prev_row != start_row: # different row
1305 if end_row != start_row: # different row 1329 prev_text = self.lines[prev_row - 1][prev_col - 1]
1306 prev_text = self.lines[end_row - 1][end - 1]
1307 if prev_text == ',' or (prev_text not in '{[(' 1330 if prev_text == ',' or (prev_text not in '{[('
1308 and text not in '}])'): 1331 and text not in '}])'):
1309 logical.append(' ') 1332 text = ' ' + text
1310 length += 1 1333 elif prev_col != start_col: # different column
1311 elif end != start: # different column 1334 text = line[prev_col:start_col] + text
1312 fill = self.lines[end_row - 1][end:start]
1313 logical.append(fill)
1314 length += len(fill)
1315 self.mapping.append((length, token))
1316 logical.append(text) 1335 logical.append(text)
1317 length += len(text) 1336 length += len(text)
1318 previous = token 1337 mapping.append((length, end))
1338 (prev_row, prev_col) = end
1319 self.logical_line = ''.join(logical) 1339 self.logical_line = ''.join(logical)
1320 self.noqa = comments and noqa(''.join(comments)) 1340 self.noqa = comments and noqa(''.join(comments))
1321 # With Python 2, if the line ends with '\r\r\n' the assertion fails 1341 return mapping
1322 # assert self.logical_line.strip() == self.logical_line
1323 1342
1324 def check_logical(self): 1343 def check_logical(self):
1325 """ 1344 """Build a line from tokens and run all logical checks on it."""
1326 Build a line from tokens and run all logical checks on it.
1327 """
1328 self.build_tokens_line()
1329 self.report.increment_logical_line() 1345 self.report.increment_logical_line()
1330 first_line = self.lines[self.mapping[0][1][2][0] - 1] 1346 mapping = self.build_tokens_line()
1331 indent = first_line[:self.mapping[0][1][2][1]] 1347 (start_row, start_col) = mapping[0][1]
1332 self.previous_indent_level = self.indent_level 1348 start_line = self.lines[start_row - 1]
1333 self.indent_level = expand_indent(indent) 1349 self.indent_level = expand_indent(start_line[:start_col])
1350 if self.blank_before < self.blank_lines:
1351 self.blank_before = self.blank_lines
1334 if self.verbose >= 2: 1352 if self.verbose >= 2:
1335 print(self.logical_line[:80].rstrip()) 1353 print(self.logical_line[:80].rstrip())
1336 for name, check, argument_names in self._logical_checks: 1354 for name, check, argument_names in self._logical_checks:
1337 if self.verbose >= 4: 1355 if self.verbose >= 4:
1338 print(' ' + name) 1356 print(' ' + name)
1339 for result in self.run_check(check, argument_names): 1357 for result in self.run_check(check, argument_names):
1340 offset, code = result[:2] 1358 offset, text = result[:2]
1341 args = result[2:] 1359 args = result[2:]
1342 if isinstance(offset, tuple): 1360 if not isinstance(offset, tuple):
1343 orig_number, orig_offset = offset 1361 for token_offset, pos in mapping:
1344 else: 1362 if offset <= token_offset:
1345 for token_offset, token in self.mapping: 1363 break
1346 if offset >= token_offset: 1364 offset = (pos[0], pos[1] + offset - token_offset)
1347 orig_number = token[2][0]
1348 orig_offset = (token[2][1] + offset - token_offset)
1349 self.report_error_args( 1365 self.report_error_args(
1350 orig_number, orig_offset, code, check, *args) 1366 offset[0], offset[1], text, check, *args)
1351 self.previous_logical = self.logical_line 1367 if self.logical_line:
1368 self.previous_indent_level = self.indent_level
1369 self.previous_logical = self.logical_line
1370 self.blank_lines = 0
1371 self.tokens = []
1352 1372
1353 def check_ast(self): 1373 def check_ast(self):
1374 """Build the file's AST and run all AST checks."""
1354 try: 1375 try:
1355 tree = compile(''.join(self.lines), '', 'exec', ast.PyCF_ONLY_AST) 1376 tree = compile(''.join(self.lines), '', 'exec', ast.PyCF_ONLY_AST)
1356 except (SyntaxError, TypeError): 1377 except (SyntaxError, TypeError):
1357 return self.report_invalid_syntax() 1378 return self.report_invalid_syntax()
1358 for name, cls, _ in self._ast_checks: 1379 for name, cls, __ in self._ast_checks:
1359 # extended API for eric5 integration 1380 # extended API for eric5 integration
1360 checker = cls(tree, self.filename, self.options) 1381 checker = cls(tree, self.filename, self.options)
1361 for args in checker.run(): 1382 for args in checker.run():
1362 lineno = args[0] 1383 lineno = args[0]
1363 if not noqa(self.lines[lineno - 1]): 1384 if not self.lines or not noqa(self.lines[lineno - 1]):
1364 self.report_error_args(lineno, *args[1:]) 1385 self.report_error_args(lineno, *args[1:])
1365 1386
1366 def generate_tokens(self): 1387 def generate_tokens(self):
1388 """Tokenize the file, run physical line checks and yield tokens."""
1367 if self._io_error: 1389 if self._io_error:
1368 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) 1390 self.report_error_args(1, 0, 'E902', self._io_error, readlines)
1369 tokengen = tokenize.generate_tokens(self.readline_check_physical) 1391 tokengen = tokenize.generate_tokens(self.readline)
1370 try: 1392 try:
1371 for token in tokengen: 1393 for token in tokengen:
1394 self.maybe_check_physical(token)
1372 yield token 1395 yield token
1373 except (SyntaxError, tokenize.TokenError): 1396 except (SyntaxError, tokenize.TokenError):
1374 self.report_invalid_syntax() 1397 self.report_invalid_syntax()
1375 1398
1399 def maybe_check_physical(self, token):
1400 """If appropriate (based on token), check current physical line(s)."""
1401 # Called after every token, but act only on end of line.
1402 if _is_eol_token(token):
1403 # Obviously, a newline token ends a single physical line.
1404 self.check_physical(token[4])
1405 elif token[0] == tokenize.STRING and '\n' in token[1]:
1406 # Less obviously, a string that contains newlines is a
1407 # multiline string, either triple-quoted or with internal
1408 # newlines backslash-escaped. Check every physical line in the
1409 # string *except* for the last one: its newline is outside of
1410 # the multiline string, so we consider it a regular physical
1411 # line, and will check it like any other physical line.
1412 #
1413 # Subtleties:
1414 # - we don't *completely* ignore the last line; if it contains
1415 # the magical "# noqa" comment, we disable all physical
1416 # checks for the entire multiline string
1417 # - have to wind self.line_number back because initially it
1418 # points to the last line of the string, and we want
1419 # check_physical() to give accurate feedback
1420 if noqa(token[4]):
1421 return
1422 self.multiline = True
1423 self.line_number = token[2][0]
1424 for line in token[1].split('\n')[:-1]:
1425 self.check_physical(line + '\n')
1426 self.line_number += 1
1427 self.multiline = False
1428
1376 def check_all(self, expected=None, line_offset=0): 1429 def check_all(self, expected=None, line_offset=0):
1377 """ 1430 """Run all checks on the input file."""
1378 Run all checks on the input file.
1379 """
1380 self.report.init_file(self.filename, self.lines, expected, line_offset) 1431 self.report.init_file(self.filename, self.lines, expected, line_offset)
1432 self.total_lines = len(self.lines)
1381 if self._ast_checks: 1433 if self._ast_checks:
1382 self.check_ast() 1434 self.check_ast()
1383 self.line_number = 0 1435 self.line_number = 0
1384 self.indent_char = None 1436 self.indent_char = None
1385 self.indent_level = 0 1437 self.indent_level = self.previous_indent_level = 0
1386 self.previous_logical = '' 1438 self.previous_logical = ''
1387 self.tokens = [] 1439 self.tokens = []
1388 self.blank_lines = blank_lines_before_comment = 0 1440 self.blank_lines = self.blank_before = 0
1389 parens = 0 1441 parens = 0
1390 for token in self.generate_tokens(): 1442 for token in self.generate_tokens():
1391 self.tokens.append(token) 1443 self.tokens.append(token)
1392 token_type, text = token[0:2] 1444 token_type, text = token[0:2]
1393 if self.verbose >= 3: 1445 if self.verbose >= 3:
1401 if text in '([{': 1453 if text in '([{':
1402 parens += 1 1454 parens += 1
1403 elif text in '}])': 1455 elif text in '}])':
1404 parens -= 1 1456 parens -= 1
1405 elif not parens: 1457 elif not parens:
1406 if token_type == tokenize.NEWLINE: 1458 if token_type in NEWLINE:
1407 if self.blank_lines < blank_lines_before_comment: 1459 if token_type == tokenize.NEWLINE:
1408 self.blank_lines = blank_lines_before_comment 1460 self.check_logical()
1409 self.check_logical() 1461 self.blank_before = 0
1410 self.tokens = [] 1462 elif len(self.tokens) == 1:
1411 self.blank_lines = blank_lines_before_comment = 0
1412 elif token_type == tokenize.NL:
1413 if len(self.tokens) == 1:
1414 # The physical line contains only this token. 1463 # The physical line contains only this token.
1415 self.blank_lines += 1 1464 self.blank_lines += 1
1416 self.tokens = [] 1465 del self.tokens[0]
1417 elif token_type == tokenize.COMMENT and len(self.tokens) == 1: 1466 else:
1418 if blank_lines_before_comment < self.blank_lines: 1467 self.check_logical()
1419 blank_lines_before_comment = self.blank_lines 1468 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1420 self.blank_lines = 0 1469 if len(self.tokens) == 1:
1421 if COMMENT_WITH_NL:
1422 # The comment also ends a physical line 1470 # The comment also ends a physical line
1423 self.tokens = [] 1471 token = list(token)
1472 token[1] = text.rstrip('\r\n')
1473 token[3] = (token[2][0], token[2][1] + len(token[1]))
1474 self.tokens = [tuple(token)]
1475 self.check_logical()
1476 if len(self.tokens) > 1 and (token_type == tokenize.ENDMARKER and
1477 self.tokens[-2][0] not in SKIP_TOKENS):
1478 self.tokens.pop()
1479 self.check_physical(self.tokens[-1][4])
1480 self.check_logical()
1424 return self.report.get_file_results() 1481 return self.report.get_file_results()
1425 1482
1426 1483
1427 class BaseReport(object): 1484 class BaseReport(object):
1428 """Collect the results of the checks.""" 1485 """Collect the results of the checks."""
1486
1429 print_filename = False 1487 print_filename = False
1430 1488
1431 def __init__(self, options): 1489 def __init__(self, options):
1432 self._benchmark_keys = options.benchmark_keys 1490 self._benchmark_keys = options.benchmark_keys
1433 self._ignore_code = options.ignore_code 1491 self._ignore_code = options.ignore_code
1476 print(self.filename) 1534 print(self.filename)
1477 self.file_errors += 1 1535 self.file_errors += 1
1478 self.total_errors += 1 1536 self.total_errors += 1
1479 return code 1537 return code
1480 1538
1481 def error_args(self, line_number, offset, code, check, *args): 1539 def error_args(self, line_number, offset, text, check, *args):
1482 """Report an error, according to options.""" 1540 """Report an error, according to options."""
1541 code = text[:4]
1483 if self._ignore_code(code): 1542 if self._ignore_code(code):
1484 return 1543 return
1485 if code in self.counters: 1544 if code in self.counters:
1486 self.counters[code] += 1 1545 self.counters[code] += 1
1487 else: 1546 else:
1504 """Return the total count of errors and warnings.""" 1563 """Return the total count of errors and warnings."""
1505 return sum([self.counters[key] 1564 return sum([self.counters[key]
1506 for key in self.messages if key.startswith(prefix)]) 1565 for key in self.messages if key.startswith(prefix)])
1507 1566
1508 def get_statistics(self, prefix=''): 1567 def get_statistics(self, prefix=''):
1509 """ 1568 """Get statistics for message codes that start with the prefix.
1510 Get statistics for message codes that start with the prefix.
1511 1569
1512 prefix='' matches all errors and warnings 1570 prefix='' matches all errors and warnings
1513 prefix='E' matches all errors 1571 prefix='E' matches all errors
1514 prefix='W' matches all warnings 1572 prefix='W' matches all warnings
1515 prefix='E4' matches all errors that have to do with imports 1573 prefix='E4' matches all errors that have to do with imports
1585 if line_number > len(self.lines): 1643 if line_number > len(self.lines):
1586 line = '' 1644 line = ''
1587 else: 1645 else:
1588 line = self.lines[line_number - 1] 1646 line = self.lines[line_number - 1]
1589 print(line.rstrip()) 1647 print(line.rstrip())
1590 print(' ' * offset + '^') 1648 print(re.sub(r'\S', ' ', line[:offset]) + '^')
1591 if self._show_pep8 and doc: 1649 if self._show_pep8 and doc:
1592 print(doc.lstrip('\n').rstrip()) 1650 print(' ' + doc.strip())
1593 return self.file_errors 1651 return self.file_errors
1594 1652
1595 1653
1596 class DiffReport(StandardReport): 1654 class DiffReport(StandardReport):
1597 """Collect and print the results for the changed lines only.""" 1655 """Collect and print the results for the changed lines only."""
1613 # build options from the command line 1671 # build options from the command line
1614 self.checker_class = kwargs.pop('checker_class', Checker) 1672 self.checker_class = kwargs.pop('checker_class', Checker)
1615 parse_argv = kwargs.pop('parse_argv', False) 1673 parse_argv = kwargs.pop('parse_argv', False)
1616 config_file = kwargs.pop('config_file', None) 1674 config_file = kwargs.pop('config_file', None)
1617 parser = kwargs.pop('parser', None) 1675 parser = kwargs.pop('parser', None)
1676 # build options from dict
1677 options_dict = dict(*args, **kwargs)
1678 arglist = None if parse_argv else options_dict.get('paths', None)
1618 options, self.paths = process_options( 1679 options, self.paths = process_options(
1619 parse_argv=parse_argv, config_file=config_file, parser=parser) 1680 arglist, parse_argv, config_file, parser)
1620 if args or kwargs: 1681 if options_dict:
1621 # build options from dict
1622 options_dict = dict(*args, **kwargs)
1623 options.__dict__.update(options_dict) 1682 options.__dict__.update(options_dict)
1624 if 'paths' in options_dict: 1683 if 'paths' in options_dict:
1625 self.paths = options_dict['paths'] 1684 self.paths = options_dict['paths']
1626 1685
1627 self.runner = self.input_file 1686 self.runner = self.input_file
1628 self.options = options 1687 self.options = options
1629 1688
1630 if not options.reporter: 1689 if not options.reporter:
1631 options.reporter = BaseReport if options.quiet else StandardReport 1690 options.reporter = BaseReport if options.quiet else StandardReport
1632 1691
1633 for index, value in enumerate(options.exclude):
1634 options.exclude[index] = value.rstrip('/')
1635 options.select = tuple(options.select or ()) 1692 options.select = tuple(options.select or ())
1636 if not (options.select or options.ignore or 1693 if not (options.select or options.ignore or
1637 options.testsuite or options.doctest) and DEFAULT_IGNORE: 1694 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1638 # The default choice: ignore controversial checks 1695 # The default choice: ignore controversial checks
1639 options.ignore = tuple(DEFAULT_IGNORE.split(',')) 1696 options.ignore = tuple(DEFAULT_IGNORE.split(','))
1699 if ((filename_match(filename, filepatterns) and 1756 if ((filename_match(filename, filepatterns) and
1700 not self.excluded(filename, root))): 1757 not self.excluded(filename, root))):
1701 runner(os.path.join(root, filename)) 1758 runner(os.path.join(root, filename))
1702 1759
1703 def excluded(self, filename, parent=None): 1760 def excluded(self, filename, parent=None):
1704 """ 1761 """Check if the file should be excluded.
1705 Check if options.exclude contains a pattern that matches filename. 1762
1763 Check if 'options.exclude' contains a pattern that matches filename.
1706 """ 1764 """
1707 if not self.options.exclude: 1765 if not self.options.exclude:
1708 return False 1766 return False
1709 basename = os.path.basename(filename) 1767 basename = os.path.basename(filename)
1710 if filename_match(basename, self.options.exclude): 1768 if filename_match(basename, self.options.exclude):
1711 return True 1769 return True
1712 if parent: 1770 if parent:
1713 filename = os.path.join(parent, filename) 1771 filename = os.path.join(parent, filename)
1772 filename = os.path.abspath(filename)
1714 return filename_match(filename, self.options.exclude) 1773 return filename_match(filename, self.options.exclude)
1715 1774
1716 def ignore_code(self, code): 1775 def ignore_code(self, code):
1717 """ 1776 """Check if the error code should be ignored.
1718 Check if the error code should be ignored.
1719 1777
1720 If 'options.select' contains a prefix of the error code, 1778 If 'options.select' contains a prefix of the error code,
1721 return False. Else, if 'options.ignore' contains a prefix of 1779 return False. Else, if 'options.ignore' contains a prefix of
1722 the error code, return True. 1780 the error code, return True.
1723 """ 1781 """
1782 if len(code) < 4 and any(s.startswith(code)
1783 for s in self.options.select):
1784 return False
1724 return (code.startswith(self.options.ignore) and 1785 return (code.startswith(self.options.ignore) and
1725 not code.startswith(self.options.select)) 1786 not code.startswith(self.options.select))
1726 1787
1727 def get_checks(self, argument_name): 1788 def get_checks(self, argument_name):
1728 """ 1789 """Get all the checks for this category.
1790
1729 Find all globally visible functions where the first argument name 1791 Find all globally visible functions where the first argument name
1730 starts with argument_name and which contain selected tests. 1792 starts with argument_name and which contain selected tests.
1731 """ 1793 """
1732 checks = [] 1794 checks = []
1733 for check, attrs in _checks[argument_name].items(): 1795 for check, attrs in _checks[argument_name].items():
1805 if user_conf and os.path.isfile(user_conf): 1867 if user_conf and os.path.isfile(user_conf):
1806 if options.verbose: 1868 if options.verbose:
1807 print('user configuration: %s' % user_conf) 1869 print('user configuration: %s' % user_conf)
1808 config.read(user_conf) 1870 config.read(user_conf)
1809 1871
1872 local_dir = os.curdir
1810 parent = tail = args and os.path.abspath(os.path.commonprefix(args)) 1873 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1811 while tail: 1874 while tail:
1812 if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]): 1875 if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1876 local_dir = parent
1813 if options.verbose: 1877 if options.verbose:
1814 print('local configuration: in %s' % parent) 1878 print('local configuration: in %s' % parent)
1815 break 1879 break
1816 parent, tail = os.path.split(parent) 1880 (parent, tail) = os.path.split(parent)
1817 1881
1818 pep8_section = parser.prog 1882 pep8_section = parser.prog
1819 if config.has_section(pep8_section): 1883 if config.has_section(pep8_section):
1820 option_list = dict([(o.dest, o.type or o.action) 1884 option_list = dict([(o.dest, o.type or o.action)
1821 for o in parser.option_list]) 1885 for o in parser.option_list])
1822 1886
1823 # First, read the default values 1887 # First, read the default values
1824 new_options, _ = parser.parse_args([]) 1888 (new_options, __) = parser.parse_args([])
1825 1889
1826 # Second, parse the configuration 1890 # Second, parse the configuration
1827 for opt in config.options(pep8_section): 1891 for opt in config.options(pep8_section):
1828 if options.verbose > 1: 1892 if options.verbose > 1:
1829 print(" %s = %s" % (opt, config.get(pep8_section, opt))) 1893 print(" %s = %s" % (opt, config.get(pep8_section, opt)))
1835 opt_type = option_list[normalized_opt] 1899 opt_type = option_list[normalized_opt]
1836 if opt_type in ('int', 'count'): 1900 if opt_type in ('int', 'count'):
1837 value = config.getint(pep8_section, opt) 1901 value = config.getint(pep8_section, opt)
1838 elif opt_type == 'string': 1902 elif opt_type == 'string':
1839 value = config.get(pep8_section, opt) 1903 value = config.get(pep8_section, opt)
1904 if normalized_opt == 'exclude':
1905 value = normalize_paths(value, local_dir)
1840 else: 1906 else:
1841 assert opt_type in ('store_true', 'store_false') 1907 assert opt_type in ('store_true', 'store_false')
1842 value = config.getboolean(pep8_section, opt) 1908 value = config.getboolean(pep8_section, opt)
1843 setattr(new_options, normalized_opt, value) 1909 setattr(new_options, normalized_opt, value)
1844 1910
1845 # Third, overwrite with the command-line options 1911 # Third, overwrite with the command-line options
1846 options, _ = parser.parse_args(arglist, values=new_options) 1912 (options, __) = parser.parse_args(arglist, values=new_options)
1847 options.doctest = options.testsuite = False 1913 options.doctest = options.testsuite = False
1848 return options 1914 return options
1849 1915
1850 1916
1851 def process_options(arglist=None, parse_argv=False, config_file=None, 1917 def process_options(arglist=None, parse_argv=False, config_file=None,
1852 parser=None): 1918 parser=None):
1853 """Process options passed either via arglist or via command line args.""" 1919 """Process options passed either via arglist or via command line args."""
1854 if not arglist and not parse_argv:
1855 # Don't read the command line if the module is used as a library.
1856 arglist = []
1857 if not parser: 1920 if not parser:
1858 parser = get_parser() 1921 parser = get_parser()
1859 if not parser.has_option('--config'): 1922 if not parser.has_option('--config'):
1860 if config_file is True: 1923 if config_file is True:
1861 config_file = DEFAULT_CONFIG 1924 config_file = DEFAULT_CONFIG
1864 "tox.ini file or the setup.cfg file located in any parent folder " 1927 "tox.ini file or the setup.cfg file located in any parent folder "
1865 "of the path(s) being processed. Allowed options are: %s." % 1928 "of the path(s) being processed. Allowed options are: %s." %
1866 (parser.prog, ', '.join(parser.config_options)))) 1929 (parser.prog, ', '.join(parser.config_options))))
1867 group.add_option('--config', metavar='path', default=config_file, 1930 group.add_option('--config', metavar='path', default=config_file,
1868 help="user config file location (default: %default)") 1931 help="user config file location (default: %default)")
1869 options, args = parser.parse_args(arglist) 1932 # Don't read the command line if the module is used as a library.
1933 if not arglist and not parse_argv:
1934 arglist = []
1935 # If parse_argv is True and arglist is None, arguments are
1936 # parsed from the command line (sys.argv)
1937 (options, args) = parser.parse_args(arglist)
1870 options.reporter = None 1938 options.reporter = None
1871 1939
1872 if options.ensure_value('testsuite', False): 1940 if options.ensure_value('testsuite', False):
1873 args.append(options.testsuite) 1941 args.append(options.testsuite)
1874 elif not options.ensure_value('doctest', False): 1942 elif not options.ensure_value('doctest', False):
1880 parser.error('input not specified') 1948 parser.error('input not specified')
1881 options = read_config(options, args, arglist, parser) 1949 options = read_config(options, args, arglist, parser)
1882 options.reporter = parse_argv and options.quiet == 1 and FileReport 1950 options.reporter = parse_argv and options.quiet == 1 and FileReport
1883 1951
1884 options.filename = options.filename and options.filename.split(',') 1952 options.filename = options.filename and options.filename.split(',')
1885 options.exclude = options.exclude.split(',') 1953 options.exclude = normalize_paths(options.exclude)
1886 options.select = options.select and options.select.split(',') 1954 options.select = options.select and options.select.split(',')
1887 options.ignore = options.ignore and options.ignore.split(',') 1955 options.ignore = options.ignore and options.ignore.split(',')
1888 1956
1889 if options.diff: 1957 if options.diff:
1890 options.reporter = DiffReport 1958 options.reporter = DiffReport

eric ide

mercurial