|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # |
|
4 # pep8.py - Check Python source code formatting, according to PEP 8 |
|
5 # Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net> |
|
6 # |
|
7 # Permission is hereby granted, free of charge, to any person |
|
8 # obtaining a copy of this software and associated documentation files |
|
9 # (the "Software"), to deal in the Software without restriction, |
|
10 # including without limitation the rights to use, copy, modify, merge, |
|
11 # publish, distribute, sublicense, and/or sell copies of the Software, |
|
12 # and to permit persons to whom the Software is furnished to do so, |
|
13 # subject to the following conditions: |
|
14 # |
|
15 # The above copyright notice and this permission notice shall be |
|
16 # included in all copies or substantial portions of the Software. |
|
17 # |
|
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|
22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|
24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
25 # SOFTWARE. |
|
26 |
|
27 """ |
|
28 Check Python source code formatting, according to PEP 8: |
|
29 http://www.python.org/dev/peps/pep-0008/ |
|
30 |
|
31 For usage and a list of options, try this: |
|
32 $ python pep8.py -h |
|
33 |
|
34 This program and its regression test suite live here: |
|
35 http://github.com/jcrocholl/pep8 |
|
36 |
|
37 Groups of errors and warnings: |
|
38 E errors |
|
39 W warnings |
|
40 100 indentation |
|
41 200 whitespace |
|
42 300 blank lines |
|
43 400 imports |
|
44 500 line length |
|
45 600 deprecation |
|
46 700 statements |
|
47 |
|
48 You can add checks to this program by writing plugins. Each plugin is |
|
49 a simple function that is called for each line of source code, either |
|
50 physical or logical. |
|
51 |
|
52 Physical line: |
|
53 - Raw line of text from the input file. |
|
54 |
|
55 Logical line: |
|
56 - Multi-line statements converted to a single line. |
|
57 - Stripped left and right. |
|
58 - Contents of strings replaced with 'xxx' of same length. |
|
59 - Comments removed. |
|
60 |
|
61 The check function requests physical or logical lines by the name of |
|
62 the first argument: |
|
63 |
|
64 def maximum_line_length(physical_line) |
|
65 def extraneous_whitespace(logical_line) |
|
66 def blank_lines(logical_line, blank_lines, indent_level, line_number) |
|
67 |
|
68 The last example above demonstrates how check plugins can request |
|
69 additional information with extra arguments. All attributes of the |
|
70 Checker object are available. Some examples: |
|
71 |
|
72 lines: a list of the raw lines from the input file |
|
73 tokens: the tokens that contribute to this logical line |
|
74 line_number: line number in the input file |
|
75 blank_lines: blank lines before this one |
|
76 indent_char: first indentation character in this file (' ' or '\t') |
|
77 indent_level: indentation (with tabs expanded to multiples of 8) |
|
78 previous_indent_level: indentation on previous line |
|
79 previous_logical: previous logical line |
|
80 |
|
81 The docstring of each check function shall be the relevant part of |
|
82 text from PEP 8. It is printed if the user enables --show-pep8. |
|
83 Several docstrings contain examples directly from the PEP 8 document. |
|
84 |
|
85 Okay: spam(ham[1], {eggs: 2}) |
|
86 E201: spam( ham[1], {eggs: 2}) |
|
87 |
|
88 These examples are verified automatically when pep8.py is run with the |
|
89 --doctest option. You can add examples for your own check functions. |
|
90 The format is simple: "Okay" or error/warning code followed by colon |
|
91 and space, the rest of the line is example source code. If you put 'r' |
|
92 before the docstring, you can use \n for newline, \t for tab and \s |
|
93 for space. |
|
94 |
|
95 """ |
|
96 |
|
97 # |
|
98 # This is a modified version to make the original tabnanny better suitable |
|
99 # for being called from within the eric5 IDE. The modifications are as |
|
100 # follows: |
|
101 # |
|
102 # - made messages translatable via Qt |
|
103 # |
|
104 # Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de> |
|
105 # |
|
106 |
|
107 __version__ = '0.6.1' |
|
108 |
|
109 import os |
|
110 import sys |
|
111 import re |
|
112 import time |
|
113 import inspect |
|
114 import keyword |
|
115 import tokenize |
|
116 from optparse import OptionParser |
|
117 from fnmatch import fnmatch |
|
118 try: |
|
119 frozenset |
|
120 except NameError: |
|
121 from sets import ImmutableSet as frozenset |
|
122 |
|
123 from PyQt4.QtCore import QCoreApplication, QT_TRANSLATE_NOOP |
|
124 |
|
125 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' |
|
126 DEFAULT_IGNORE = 'E24' |
|
127 MAX_LINE_LENGTH = 79 |
|
128 |
|
129 INDENT_REGEX = re.compile(r'([ \t]*)') |
|
130 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') |
|
131 SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') |
|
132 ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') |
|
133 DOCSTRING_REGEX = re.compile(r'u?r?["\']') |
|
134 WHITESPACE_AROUND_OPERATOR_REGEX = \ |
|
135 re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)') |
|
136 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') |
|
137 WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \ |
|
138 re.compile(r'[()]|\s=[^=]|[^=!<>]=\s') |
|
139 |
|
140 |
|
141 WHITESPACE = ' \t' |
|
142 |
|
143 BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>', |
|
144 '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=', |
|
145 '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<']) |
|
146 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) |
|
147 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS |
|
148 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT, |
|
149 tokenize.DEDENT, tokenize.NEWLINE]) |
|
150 E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) - |
|
151 frozenset(['False', 'None', 'True'])) |
|
152 BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines') |
|
153 |
|
154 options = None |
|
155 args = None |
|
156 |
|
157 |
|
158 ############################################################################## |
|
159 # Helper functions for translated and formatted messages |
|
160 ############################################################################## |
|
161 |
|
162 |
|
163 pep8_messages = { |
|
164 "E101" : QT_TRANSLATE_NOOP("pep8", |
|
165 "indentation contains mixed spaces and tabs"), |
|
166 "E111" : QT_TRANSLATE_NOOP("pep8", |
|
167 "indentation is not a multiple of four"), |
|
168 "E112" : QT_TRANSLATE_NOOP("pep8", |
|
169 "expected an indented block"), |
|
170 "E113" : QT_TRANSLATE_NOOP("pep8", |
|
171 "unexpected indentation"), |
|
172 "W191" : QT_TRANSLATE_NOOP("pep8", |
|
173 "indentation contains tabs"), |
|
174 "E201" : QT_TRANSLATE_NOOP("pep8", |
|
175 "whitespace after '{0}'"), |
|
176 "E202" : QT_TRANSLATE_NOOP("pep8", |
|
177 "whitespace before '{0}'"), |
|
178 "E203" : QT_TRANSLATE_NOOP("pep8", |
|
179 "whitespace before '{0}'"), |
|
180 "E211" : QT_TRANSLATE_NOOP("pep8", |
|
181 "whitespace before '{0}'"), |
|
182 "E221" : QT_TRANSLATE_NOOP("pep8", |
|
183 "multiple spaces before operator"), |
|
184 "E222" : QT_TRANSLATE_NOOP("pep8", |
|
185 "multiple spaces after operator"), |
|
186 "E223" : QT_TRANSLATE_NOOP("pep8", |
|
187 "tab before operator"), |
|
188 "E224" : QT_TRANSLATE_NOOP("pep8", |
|
189 "tab after operator"), |
|
190 "E225" : QT_TRANSLATE_NOOP("pep8", |
|
191 "missing whitespace around operator"), |
|
192 "E231" : QT_TRANSLATE_NOOP("pep8", |
|
193 "missing whitespace after '{0}'"), |
|
194 "E241" : QT_TRANSLATE_NOOP("pep8", |
|
195 "multiple spaces after '{0}'"), |
|
196 "E242" : QT_TRANSLATE_NOOP("pep8", |
|
197 "tab after '{0}'"), |
|
198 "E251" : QT_TRANSLATE_NOOP("pep8", |
|
199 "no spaces around keyword / parameter equals"), |
|
200 "E261" : QT_TRANSLATE_NOOP("pep8", |
|
201 "at least two spaces before inline comment"), |
|
202 "E262" : QT_TRANSLATE_NOOP("pep8", |
|
203 "inline comment should start with '# '"), |
|
204 "W291" : QT_TRANSLATE_NOOP("pep8", |
|
205 "trailing whitespace"), |
|
206 "W292" : QT_TRANSLATE_NOOP("pep8", |
|
207 "no newline at end of file"), |
|
208 "W293" : QT_TRANSLATE_NOOP("pep8", |
|
209 "blank line contains whitespace"), |
|
210 "E301" : QT_TRANSLATE_NOOP("pep8", |
|
211 "expected 1 blank line, found 0"), |
|
212 "E302" : QT_TRANSLATE_NOOP("pep8", |
|
213 "expected 2 blank lines, found {0}"), |
|
214 "E303" : QT_TRANSLATE_NOOP("pep8", |
|
215 "too many blank lines ({0})"), |
|
216 "E304" : QT_TRANSLATE_NOOP("pep8", |
|
217 "blank lines found after function decorator"), |
|
218 "W391" : QT_TRANSLATE_NOOP("pep8", |
|
219 "blank line at end of file"), |
|
220 "E401" : QT_TRANSLATE_NOOP("pep8", |
|
221 "multiple imports on one line"), |
|
222 "E501" : QT_TRANSLATE_NOOP("pep8", |
|
223 "line too long ({0} characters)"), |
|
224 "W601" : QT_TRANSLATE_NOOP("pep8", |
|
225 ".has_key() is deprecated, use 'in'"), |
|
226 "W602" : QT_TRANSLATE_NOOP("pep8", |
|
227 "deprecated form of raising exception"), |
|
228 "W603" : QT_TRANSLATE_NOOP("pep8", |
|
229 "'<>' is deprecated, use '!='"), |
|
230 "W604" : QT_TRANSLATE_NOOP("pep8", |
|
231 "backticks are deprecated, use 'repr()'"), |
|
232 "E701" : QT_TRANSLATE_NOOP("pep8", |
|
233 "multiple statements on one line (colon)"), |
|
234 "E702" : QT_TRANSLATE_NOOP("pep8", |
|
235 "multiple statements on one line (semicolon)"), |
|
236 } |
|
237 |
|
238 pep8_messages_sample_args = { |
|
239 "E201" : ["("], |
|
240 "E202" : [")"], |
|
241 "E203" : [":"], |
|
242 "E211" : ["["], |
|
243 "E231" : [","], |
|
244 "E241" : [","], |
|
245 "E242" : [","], |
|
246 "E302" : [1], |
|
247 "E303" : [3], |
|
248 "E501" : [85], |
|
249 } |
|
250 |
|
251 |
|
252 def getMessage(code, *args): |
|
253 """ |
|
254 Function to get a translated and formatted message for a given code. |
|
255 |
|
256 @param code message code (string) |
|
257 @param args arguments for a formatted message (list) |
|
258 @return translated and formatted message (string) |
|
259 """ |
|
260 if code in pep8_messages: |
|
261 return code + " " + QCoreApplication.translate("pep8", |
|
262 pep8_messages[code]).format(*args) |
|
263 else: |
|
264 return code + " " + QCoreApplication.translate("pep8", |
|
265 "no message for this code defined") |
|
266 |
|
267 ############################################################################## |
|
268 # Plugins (check functions) for physical lines |
|
269 ############################################################################## |
|
270 |
|
271 |
|
272 def tabs_or_spaces(physical_line, indent_char): |
|
273 r""" |
|
274 Never mix tabs and spaces. |
|
275 |
|
276 The most popular way of indenting Python is with spaces only. The |
|
277 second-most popular way is with tabs only. Code indented with a mixture |
|
278 of tabs and spaces should be converted to using spaces exclusively. When |
|
279 invoking the Python command line interpreter with the -t option, it issues |
|
280 warnings about code that illegally mixes tabs and spaces. When using -tt |
|
281 these warnings become errors. These options are highly recommended! |
|
282 |
|
283 Okay: if a == 0:\n a = 1\n b = 1 |
|
284 E101: if a == 0:\n a = 1\n\tb = 1 |
|
285 """ |
|
286 indent = INDENT_REGEX.match(physical_line).group(1) |
|
287 for offset, char in enumerate(indent): |
|
288 if char != indent_char: |
|
289 return offset, "E101" |
|
290 |
|
291 |
|
292 def tabs_obsolete(physical_line): |
|
293 r""" |
|
294 For new projects, spaces-only are strongly recommended over tabs. Most |
|
295 editors have features that make this easy to do. |
|
296 |
|
297 Okay: if True:\n return |
|
298 W191: if True:\n\treturn |
|
299 """ |
|
300 indent = INDENT_REGEX.match(physical_line).group(1) |
|
301 if indent.count('\t'): |
|
302 return indent.index('\t'), "W191" |
|
303 |
|
304 |
|
305 def trailing_whitespace(physical_line): |
|
306 r""" |
|
307 JCR: Trailing whitespace is superfluous. |
|
308 FBM: Except when it occurs as part of a blank line (i.e. the line is |
|
309 nothing but whitespace). According to Python docs[1] a line with only |
|
310 whitespace is considered a blank line, and is to be ignored. However, |
|
311 matching a blank line to its indentation level avoids mistakenly |
|
312 terminating a multi-line statement (e.g. class declaration) when |
|
313 pasting code into the standard Python interpreter. |
|
314 |
|
315 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines |
|
316 |
|
317 The warning returned varies on whether the line itself is blank, for easier |
|
318 filtering for those who want to indent their blank lines. |
|
319 |
|
320 Okay: spam(1) |
|
321 W291: spam(1)\s |
|
322 W293: class Foo(object):\n \n bang = 12 |
|
323 """ |
|
324 physical_line = physical_line.rstrip('\n') # chr(10), newline |
|
325 physical_line = physical_line.rstrip('\r') # chr(13), carriage return |
|
326 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L |
|
327 stripped = physical_line.rstrip() |
|
328 if physical_line != stripped: |
|
329 if stripped: |
|
330 return len(stripped), "W291" |
|
331 else: |
|
332 return 0, "W293" |
|
333 |
|
334 |
|
335 def trailing_blank_lines(physical_line, lines, line_number): |
|
336 r""" |
|
337 JCR: Trailing blank lines are superfluous. |
|
338 |
|
339 Okay: spam(1) |
|
340 W391: spam(1)\n |
|
341 """ |
|
342 if physical_line.strip() == '' and line_number == len(lines): |
|
343 return 0, "W391" |
|
344 |
|
345 |
|
346 def missing_newline(physical_line): |
|
347 """ |
|
348 JCR: The last line should have a newline. |
|
349 """ |
|
350 if physical_line.rstrip() == physical_line: |
|
351 return len(physical_line), "W292" |
|
352 |
|
353 |
|
354 def maximum_line_length(physical_line): |
|
355 """ |
|
356 Limit all lines to a maximum of 79 characters. |
|
357 |
|
358 There are still many devices around that are limited to 80 character |
|
359 lines; plus, limiting windows to 80 characters makes it possible to have |
|
360 several windows side-by-side. The default wrapping on such devices looks |
|
361 ugly. Therefore, please limit all lines to a maximum of 79 characters. |
|
362 For flowing long blocks of text (docstrings or comments), limiting the |
|
363 length to 72 characters is recommended. |
|
364 """ |
|
365 line = physical_line.rstrip() |
|
366 length = len(line) |
|
367 if length > MAX_LINE_LENGTH: |
|
368 try: |
|
369 # The line could contain multi-byte characters |
|
370 if not hasattr(line, 'decode'): # Python 3 |
|
371 line = line.encode('latin-1') |
|
372 length = len(line.decode('utf-8')) |
|
373 except UnicodeDecodeError: |
|
374 pass |
|
375 if length > MAX_LINE_LENGTH: |
|
376 return MAX_LINE_LENGTH, "E501", length |
|
377 |
|
378 |
|
379 ############################################################################## |
|
380 # Plugins (check functions) for logical lines |
|
381 ############################################################################## |
|
382 |
|
383 |
|
384 def blank_lines(logical_line, blank_lines, indent_level, line_number, |
|
385 previous_logical, previous_indent_level, |
|
386 blank_lines_before_comment): |
|
387 r""" |
|
388 Separate top-level function and class definitions with two blank lines. |
|
389 |
|
390 Method definitions inside a class are separated by a single blank line. |
|
391 |
|
392 Extra blank lines may be used (sparingly) to separate groups of related |
|
393 functions. Blank lines may be omitted between a bunch of related |
|
394 one-liners (e.g. a set of dummy implementations). |
|
395 |
|
396 Use blank lines in functions, sparingly, to indicate logical sections. |
|
397 |
|
398 Okay: def a():\n pass\n\n\ndef b():\n pass |
|
399 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass |
|
400 |
|
401 E301: class Foo:\n b = 0\n def bar():\n pass |
|
402 E302: def a():\n pass\n\ndef b(n):\n pass |
|
403 E303: def a():\n pass\n\n\n\ndef b(n):\n pass |
|
404 E303: def a():\n\n\n\n pass |
|
405 E304: @decorator\n\ndef a():\n pass |
|
406 """ |
|
407 if line_number == 1: |
|
408 return # Don't expect blank lines before the first line |
|
409 max_blank_lines = max(blank_lines, blank_lines_before_comment) |
|
410 if previous_logical.startswith('@'): |
|
411 if max_blank_lines: |
|
412 return 0, "E304" |
|
413 elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): |
|
414 return 0, "E303", max_blank_lines |
|
415 elif (logical_line.startswith('def ') or |
|
416 logical_line.startswith('class ') or |
|
417 logical_line.startswith('@')): |
|
418 if indent_level: |
|
419 if not (max_blank_lines or previous_indent_level < indent_level or |
|
420 DOCSTRING_REGEX.match(previous_logical)): |
|
421 return 0, "E301" |
|
422 elif max_blank_lines != 2: |
|
423 return 0, "E302", max_blank_lines |
|
424 |
|
425 |
|
426 def extraneous_whitespace(logical_line): |
|
427 """ |
|
428 Avoid extraneous whitespace in the following situations: |
|
429 |
|
430 - Immediately inside parentheses, brackets or braces. |
|
431 |
|
432 - Immediately before a comma, semicolon, or colon. |
|
433 |
|
434 Okay: spam(ham[1], {eggs: 2}) |
|
435 E201: spam( ham[1], {eggs: 2}) |
|
436 E201: spam(ham[ 1], {eggs: 2}) |
|
437 E201: spam(ham[1], { eggs: 2}) |
|
438 E202: spam(ham[1], {eggs: 2} ) |
|
439 E202: spam(ham[1 ], {eggs: 2}) |
|
440 E202: spam(ham[1], {eggs: 2 }) |
|
441 |
|
442 E203: if x == 4: print x, y; x, y = y , x |
|
443 E203: if x == 4: print x, y ; x, y = y, x |
|
444 E203: if x == 4 : print x, y; x, y = y, x |
|
445 """ |
|
446 line = logical_line |
|
447 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): |
|
448 text = match.group() |
|
449 char = text.strip() |
|
450 found = match.start() |
|
451 if text == char + ' ' and char in '([{': |
|
452 return found + 1, "E201", char |
|
453 if text == ' ' + char and line[found - 1] != ',': |
|
454 if char in '}])': |
|
455 return found, "E202", char |
|
456 if char in ',;:': |
|
457 return found, "E203", char |
|
458 |
|
459 |
|
460 def missing_whitespace(logical_line): |
|
461 """ |
|
462 JCR: Each comma, semicolon or colon should be followed by whitespace. |
|
463 |
|
464 Okay: [a, b] |
|
465 Okay: (3,) |
|
466 Okay: a[1:4] |
|
467 Okay: a[:4] |
|
468 Okay: a[1:] |
|
469 Okay: a[1:4:2] |
|
470 E231: ['a','b'] |
|
471 E231: foo(bar,baz) |
|
472 """ |
|
473 line = logical_line |
|
474 for index in range(len(line) - 1): |
|
475 char = line[index] |
|
476 if char in ',;:' and line[index + 1] not in WHITESPACE: |
|
477 before = line[:index] |
|
478 if char == ':' and before.count('[') > before.count(']'): |
|
479 continue # Slice syntax, no space required |
|
480 if char == ',' and line[index + 1] == ')': |
|
481 continue # Allow tuple with only one element: (3,) |
|
482 return index, "E231", char |
|
483 |
|
484 |
|
485 def indentation(logical_line, previous_logical, indent_char, |
|
486 indent_level, previous_indent_level): |
|
487 r""" |
|
488 Use 4 spaces per indentation level. |
|
489 |
|
490 For really old code that you don't want to mess up, you can continue to |
|
491 use 8-space tabs. |
|
492 |
|
493 Okay: a = 1 |
|
494 Okay: if a == 0:\n a = 1 |
|
495 E111: a = 1 |
|
496 |
|
497 Okay: for item in items:\n pass |
|
498 E112: for item in items:\npass |
|
499 |
|
500 Okay: a = 1\nb = 2 |
|
501 E113: a = 1\n b = 2 |
|
502 """ |
|
503 if indent_char == ' ' and indent_level % 4: |
|
504 return 0, "E111" |
|
505 indent_expect = previous_logical.endswith(':') |
|
506 if indent_expect and indent_level <= previous_indent_level: |
|
507 return 0, "E112" |
|
508 if indent_level > previous_indent_level and not indent_expect: |
|
509 return 0, "E113" |
|
510 |
|
511 |
|
512 def whitespace_before_parameters(logical_line, tokens): |
|
513 """ |
|
514 Avoid extraneous whitespace in the following situations: |
|
515 |
|
516 - Immediately before the open parenthesis that starts the argument |
|
517 list of a function call. |
|
518 |
|
519 - Immediately before the open parenthesis that starts an indexing or |
|
520 slicing. |
|
521 |
|
522 Okay: spam(1) |
|
523 E211: spam (1) |
|
524 |
|
525 Okay: dict['key'] = list[index] |
|
526 E211: dict ['key'] = list[index] |
|
527 E211: dict['key'] = list [index] |
|
528 """ |
|
529 prev_type = tokens[0][0] |
|
530 prev_text = tokens[0][1] |
|
531 prev_end = tokens[0][3] |
|
532 for index in range(1, len(tokens)): |
|
533 token_type, text, start, end, line = tokens[index] |
|
534 if (token_type == tokenize.OP and |
|
535 text in '([' and |
|
536 start != prev_end and |
|
537 (prev_type == tokenize.NAME or prev_text in '}])') and |
|
538 # Syntax "class A (B):" is allowed, but avoid it |
|
539 (index < 2 or tokens[index - 2][1] != 'class') and |
|
540 # Allow "return (a.foo for a in range(5))" |
|
541 (not keyword.iskeyword(prev_text))): |
|
542 return prev_end, "E211", text |
|
543 prev_type = token_type |
|
544 prev_text = text |
|
545 prev_end = end |
|
546 |
|
547 |
|
548 def whitespace_around_operator(logical_line): |
|
549 """ |
|
550 Avoid extraneous whitespace in the following situations: |
|
551 |
|
552 - More than one space around an assignment (or other) operator to |
|
553 align it with another. |
|
554 |
|
555 Okay: a = 12 + 3 |
|
556 E221: a = 4 + 5 |
|
557 E222: a = 4 + 5 |
|
558 E223: a = 4\t+ 5 |
|
559 E224: a = 4 +\t5 |
|
560 """ |
|
561 for match in WHITESPACE_AROUND_OPERATOR_REGEX.finditer(logical_line): |
|
562 before, whitespace, after = match.groups() |
|
563 tab = whitespace == '\t' |
|
564 offset = match.start(2) |
|
565 if before in OPERATORS: |
|
566 return offset, (tab and "E224" or "E222") |
|
567 elif after in OPERATORS: |
|
568 return offset, (tab and "E223" or "E221") |
|
569 |
|
570 |
|
571 def missing_whitespace_around_operator(logical_line, tokens): |
|
572 r""" |
|
573 - Always surround these binary operators with a single space on |
|
574 either side: assignment (=), augmented assignment (+=, -= etc.), |
|
575 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), |
|
576 Booleans (and, or, not). |
|
577 |
|
578 - Use spaces around arithmetic operators. |
|
579 |
|
580 Okay: i = i + 1 |
|
581 Okay: submitted += 1 |
|
582 Okay: x = x * 2 - 1 |
|
583 Okay: hypot2 = x * x + y * y |
|
584 Okay: c = (a + b) * (a - b) |
|
585 Okay: foo(bar, key='word', *args, **kwargs) |
|
586 Okay: baz(**kwargs) |
|
587 Okay: negative = -1 |
|
588 Okay: spam(-1) |
|
589 Okay: alpha[:-i] |
|
590 Okay: if not -5 < x < +5:\n pass |
|
591 Okay: lambda *args, **kw: (args, kw) |
|
592 |
|
593 E225: i=i+1 |
|
594 E225: submitted +=1 |
|
595 E225: x = x*2 - 1 |
|
596 E225: hypot2 = x*x + y*y |
|
597 E225: c = (a+b) * (a-b) |
|
598 E225: c = alpha -4 |
|
599 E225: z = x **y |
|
600 """ |
|
601 parens = 0 |
|
602 need_space = False |
|
603 prev_type = tokenize.OP |
|
604 prev_text = prev_end = None |
|
605 for token_type, text, start, end, line in tokens: |
|
606 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): |
|
607 # ERRORTOKEN is triggered by backticks in Python 3000 |
|
608 continue |
|
609 if text in ('(', 'lambda'): |
|
610 parens += 1 |
|
611 elif text == ')': |
|
612 parens -= 1 |
|
613 if need_space: |
|
614 if start != prev_end: |
|
615 need_space = False |
|
616 elif text == '>' and prev_text == '<': |
|
617 # Tolerate the "<>" operator, even if running Python 3 |
|
618 pass |
|
619 else: |
|
620 return prev_end, "E225" |
|
621 elif token_type == tokenize.OP and prev_end is not None: |
|
622 if text == '=' and parens: |
|
623 # Allow keyword args or defaults: foo(bar=None). |
|
624 pass |
|
625 elif text in BINARY_OPERATORS: |
|
626 need_space = True |
|
627 elif text in UNARY_OPERATORS: |
|
628 # Allow unary operators: -123, -x, +1. |
|
629 # Allow argument unpacking: foo(*args, **kwargs). |
|
630 if prev_type == tokenize.OP: |
|
631 if prev_text in '}])': |
|
632 need_space = True |
|
633 elif prev_type == tokenize.NAME: |
|
634 if prev_text not in E225NOT_KEYWORDS: |
|
635 need_space = True |
|
636 else: |
|
637 need_space = True |
|
638 if need_space and start == prev_end: |
|
639 return prev_end, "E225" |
|
640 prev_type = token_type |
|
641 prev_text = text |
|
642 prev_end = end |
|
643 |
|
644 |
|
645 def whitespace_around_comma(logical_line): |
|
646 """ |
|
647 Avoid extraneous whitespace in the following situations: |
|
648 |
|
649 - More than one space around an assignment (or other) operator to |
|
650 align it with another. |
|
651 |
|
652 JCR: This should also be applied around comma etc. |
|
653 Note: these checks are disabled by default |
|
654 |
|
655 Okay: a = (1, 2) |
|
656 E241: a = (1, 2) |
|
657 E242: a = (1,\t2) |
|
658 """ |
|
659 line = logical_line |
|
660 for separator in ',;:': |
|
661 found = line.find(separator + ' ') |
|
662 if found > -1: |
|
663 return found + 1, "E241", separator |
|
664 found = line.find(separator + '\t') |
|
665 if found > -1: |
|
666 return found + 1, "E242", separator |
|
667 |
|
668 |
|
669 def whitespace_around_named_parameter_equals(logical_line): |
|
670 """ |
|
671 Don't use spaces around the '=' sign when used to indicate a |
|
672 keyword argument or a default parameter value. |
|
673 |
|
674 Okay: def complex(real, imag=0.0): |
|
675 Okay: return magic(r=real, i=imag) |
|
676 Okay: boolean(a == b) |
|
677 Okay: boolean(a != b) |
|
678 Okay: boolean(a <= b) |
|
679 Okay: boolean(a >= b) |
|
680 |
|
681 E251: def complex(real, imag = 0.0): |
|
682 E251: return magic(r = real, i = imag) |
|
683 """ |
|
684 parens = 0 |
|
685 for match in WHITESPACE_AROUND_NAMED_PARAMETER_REGEX.finditer( |
|
686 logical_line): |
|
687 text = match.group() |
|
688 if parens and len(text) == 3: |
|
689 issue = "E251" |
|
690 return match.start(), issue |
|
691 if text == '(': |
|
692 parens += 1 |
|
693 elif text == ')': |
|
694 parens -= 1 |
|
695 |
|
696 |
|
697 def whitespace_before_inline_comment(logical_line, tokens): |
|
698 """ |
|
699 Separate inline comments by at least two spaces. |
|
700 |
|
701 An inline comment is a comment on the same line as a statement. Inline |
|
702 comments should be separated by at least two spaces from the statement. |
|
703 They should start with a # and a single space. |
|
704 |
|
705 Okay: x = x + 1 # Increment x |
|
706 Okay: x = x + 1 # Increment x |
|
707 E261: x = x + 1 # Increment x |
|
708 E262: x = x + 1 #Increment x |
|
709 E262: x = x + 1 # Increment x |
|
710 """ |
|
711 prev_end = (0, 0) |
|
712 for token_type, text, start, end, line in tokens: |
|
713 if token_type == tokenize.NL: |
|
714 continue |
|
715 if token_type == tokenize.COMMENT: |
|
716 if not line[:start[1]].strip(): |
|
717 continue |
|
718 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: |
|
719 return (prev_end, "E261") |
|
720 if (len(text) > 1 and text.startswith('# ') |
|
721 or not text.startswith('# ')): |
|
722 return start, "E262" |
|
723 else: |
|
724 prev_end = end |
|
725 |
|
726 |
|
727 def imports_on_separate_lines(logical_line): |
|
728 r""" |
|
729 Imports should usually be on separate lines. |
|
730 |
|
731 Okay: import os\nimport sys |
|
732 E401: import sys, os |
|
733 |
|
734 Okay: from subprocess import Popen, PIPE |
|
735 Okay: from myclas import MyClass |
|
736 Okay: from foo.bar.yourclass import YourClass |
|
737 Okay: import myclass |
|
738 Okay: import foo.bar.yourclass |
|
739 """ |
|
740 line = logical_line |
|
741 if line.startswith('import '): |
|
742 found = line.find(',') |
|
743 if found > -1: |
|
744 return found, "E401" |
|
745 |
|
746 |
|
747 def compound_statements(logical_line): |
|
748 r""" |
|
749 Compound statements (multiple statements on the same line) are |
|
750 generally discouraged. |
|
751 |
|
752 While sometimes it's okay to put an if/for/while with a small body |
|
753 on the same line, never do this for multi-clause statements. Also |
|
754 avoid folding such long lines! |
|
755 |
|
756 Okay: if foo == 'blah':\n do_blah_thing() |
|
757 Okay: do_one() |
|
758 Okay: do_two() |
|
759 Okay: do_three() |
|
760 |
|
761 E701: if foo == 'blah': do_blah_thing() |
|
762 E701: for x in lst: total += x |
|
763 E701: while t < 10: t = delay() |
|
764 E701: if foo == 'blah': do_blah_thing() |
|
765 E701: else: do_non_blah_thing() |
|
766 E701: try: something() |
|
767 E701: finally: cleanup() |
|
768 E701: if foo == 'blah': one(); two(); three() |
|
769 |
|
770 E702: do_one(); do_two(); do_three() |
|
771 """ |
|
772 line = logical_line |
|
773 found = line.find(':') |
|
774 if -1 < found < len(line) - 1: |
|
775 before = line[:found] |
|
776 if (before.count('{') <= before.count('}') and # {'a': 1} (dict) |
|
777 before.count('[') <= before.count(']') and # [1:2] (slice) |
|
778 not re.search(r'\blambda\b', before)): # lambda x: x |
|
779 return found, "E701" |
|
780 found = line.find(';') |
|
781 if -1 < found: |
|
782 return found, "E702" |
|
783 |
|
784 |
|
785 def python_3000_has_key(logical_line): |
|
786 """ |
|
787 The {}.has_key() method will be removed in the future version of |
|
788 Python. Use the 'in' operation instead, like: |
|
789 d = {"a": 1, "b": 2} |
|
790 if "b" in d: |
|
791 print d["b"] |
|
792 """ |
|
793 pos = logical_line.find('.has_key(') |
|
794 if pos > -1: |
|
795 return pos, "W601" |
|
796 |
|
797 |
|
798 def python_3000_raise_comma(logical_line): |
|
799 """ |
|
800 When raising an exception, use "raise ValueError('message')" |
|
801 instead of the older form "raise ValueError, 'message'". |
|
802 |
|
803 The paren-using form is preferred because when the exception arguments |
|
804 are long or include string formatting, you don't need to use line |
|
805 continuation characters thanks to the containing parentheses. The older |
|
806 form will be removed in Python 3000. |
|
807 """ |
|
808 match = RAISE_COMMA_REGEX.match(logical_line) |
|
809 if match: |
|
810 return match.start(1), "W602" |
|
811 |
|
812 |
|
813 def python_3000_not_equal(logical_line): |
|
814 """ |
|
815 != can also be written <>, but this is an obsolete usage kept for |
|
816 backwards compatibility only. New code should always use !=. |
|
817 The older syntax is removed in Python 3000. |
|
818 """ |
|
819 pos = logical_line.find('<>') |
|
820 if pos > -1: |
|
821 return pos, "W603" |
|
822 |
|
823 |
|
824 def python_3000_backticks(logical_line): |
|
825 """ |
|
826 Backticks are removed in Python 3000. |
|
827 Use repr() instead. |
|
828 """ |
|
829 pos = logical_line.find('`') |
|
830 if pos > -1: |
|
831 return pos, "W604" |
|
832 |
|
833 |
|
834 ############################################################################## |
|
835 # Helper functions |
|
836 ############################################################################## |
|
837 |
|
838 |
|
839 if '' == ''.encode(): |
|
840 # Python 2: implicit encoding. |
|
841 def readlines(filename): |
|
842 return open(filename).readlines() |
|
843 else: |
|
844 # Python 3: decode to latin-1. |
|
845 # This function is lazy, it does not read the encoding declaration. |
|
846 # XXX: use tokenize.detect_encoding() |
|
847 def readlines(filename): # __IGNORE_WARNING__ |
|
848 return open(filename, encoding='latin-1').readlines() |
|
849 |
|
850 |
|
851 def expand_indent(line): |
|
852 """ |
|
853 Return the amount of indentation. |
|
854 Tabs are expanded to the next multiple of 8. |
|
855 |
|
856 >>> expand_indent(' ') |
|
857 4 |
|
858 >>> expand_indent('\\t') |
|
859 8 |
|
860 >>> expand_indent(' \\t') |
|
861 8 |
|
862 >>> expand_indent(' \\t') |
|
863 8 |
|
864 >>> expand_indent(' \\t') |
|
865 16 |
|
866 """ |
|
867 result = 0 |
|
868 for char in line: |
|
869 if char == '\t': |
|
870 result = result // 8 * 8 + 8 |
|
871 elif char == ' ': |
|
872 result += 1 |
|
873 else: |
|
874 break |
|
875 return result |
|
876 |
|
877 |
|
878 def mute_string(text): |
|
879 """ |
|
880 Replace contents with 'xxx' to prevent syntax matching. |
|
881 |
|
882 >>> mute_string('"abc"') |
|
883 '"xxx"' |
|
884 >>> mute_string("'''abc'''") |
|
885 "'''xxx'''" |
|
886 >>> mute_string("r'abc'") |
|
887 "r'xxx'" |
|
888 """ |
|
889 start = 1 |
|
890 end = len(text) - 1 |
|
891 # String modifiers (e.g. u or r) |
|
892 if text.endswith('"'): |
|
893 start += text.index('"') |
|
894 elif text.endswith("'"): |
|
895 start += text.index("'") |
|
896 # Triple quotes |
|
897 if text.endswith('"""') or text.endswith("'''"): |
|
898 start += 2 |
|
899 end -= 2 |
|
900 return text[:start] + 'x' * (end - start) + text[end:] |
|
901 |
|
902 |
|
903 def message(text): |
|
904 """Print a message.""" |
|
905 # print >> sys.stderr, options.prog + ': ' + text |
|
906 # print >> sys.stderr, text |
|
907 print(text) |
|
908 |
|
909 |
|
910 ############################################################################## |
|
911 # Framework to run all checks |
|
912 ############################################################################## |
|
913 |
|
914 |
|
915 def find_checks(argument_name): |
|
916 """ |
|
917 Find all globally visible functions where the first argument name |
|
918 starts with argument_name. |
|
919 """ |
|
920 checks = [] |
|
921 for name, function in globals().items(): |
|
922 if not inspect.isfunction(function): |
|
923 continue |
|
924 args = inspect.getargspec(function)[0] |
|
925 if args and args[0].startswith(argument_name): |
|
926 codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') |
|
927 for code in codes or ['']: |
|
928 if not code or not ignore_code(code): |
|
929 checks.append((name, function, args)) |
|
930 break |
|
931 checks.sort() |
|
932 return checks |
|
933 |
|
934 |
|
935 class Checker(object): |
|
936 """ |
|
937 Load a Python source file, tokenize it, check coding style. |
|
938 """ |
|
939 |
|
940 def __init__(self, filename, lines=None): |
|
941 self.filename = filename |
|
942 if filename is None: |
|
943 self.filename = 'stdin' |
|
944 self.lines = lines or [] |
|
945 elif lines is None: |
|
946 self.lines = readlines(filename) |
|
947 else: |
|
948 self.lines = lines |
|
949 options.counters['physical lines'] += len(self.lines) |
|
950 |
|
951 def readline(self): |
|
952 """ |
|
953 Get the next line from the input buffer. |
|
954 """ |
|
955 self.line_number += 1 |
|
956 if self.line_number > len(self.lines): |
|
957 return '' |
|
958 return self.lines[self.line_number - 1] |
|
959 |
|
960 def readline_check_physical(self): |
|
961 """ |
|
962 Check and return the next physical line. This method can be |
|
963 used to feed tokenize.generate_tokens. |
|
964 """ |
|
965 line = self.readline() |
|
966 if line: |
|
967 self.check_physical(line) |
|
968 return line |
|
969 |
|
970 def run_check(self, check, argument_names): |
|
971 """ |
|
972 Run a check plugin. |
|
973 """ |
|
974 arguments = [] |
|
975 for name in argument_names: |
|
976 arguments.append(getattr(self, name)) |
|
977 return check(*arguments) |
|
978 |
|
979 def check_physical(self, line): |
|
980 """ |
|
981 Run all physical checks on a raw input line. |
|
982 """ |
|
983 self.physical_line = line |
|
984 if self.indent_char is None and len(line) and line[0] in ' \t': |
|
985 self.indent_char = line[0] |
|
986 for name, check, argument_names in options.physical_checks: |
|
987 result = self.run_check(check, argument_names) |
|
988 if result is not None: |
|
989 offset, code, *args = result |
|
990 self.report_error_args(self.line_number, offset, code, check, |
|
991 *args) |
|
992 |
|
993 def build_tokens_line(self): |
|
994 """ |
|
995 Build a logical line from tokens. |
|
996 """ |
|
997 self.mapping = [] |
|
998 logical = [] |
|
999 length = 0 |
|
1000 previous = None |
|
1001 for token in self.tokens: |
|
1002 token_type, text = token[0:2] |
|
1003 if token_type in SKIP_TOKENS: |
|
1004 continue |
|
1005 if token_type == tokenize.STRING: |
|
1006 text = mute_string(text) |
|
1007 if previous: |
|
1008 end_line, end = previous[3] |
|
1009 start_line, start = token[2] |
|
1010 if end_line != start_line: # different row |
|
1011 prev_text = self.lines[end_line - 1][end - 1] |
|
1012 if prev_text == ',' or (prev_text not in '{[(' |
|
1013 and text not in '}])'): |
|
1014 logical.append(' ') |
|
1015 length += 1 |
|
1016 elif end != start: # different column |
|
1017 fill = self.lines[end_line - 1][end:start] |
|
1018 logical.append(fill) |
|
1019 length += len(fill) |
|
1020 self.mapping.append((length, token)) |
|
1021 logical.append(text) |
|
1022 length += len(text) |
|
1023 previous = token |
|
1024 self.logical_line = ''.join(logical) |
|
1025 assert self.logical_line.lstrip() == self.logical_line |
|
1026 assert self.logical_line.rstrip() == self.logical_line |
|
1027 |
|
1028 def check_logical(self): |
|
1029 """ |
|
1030 Build a line from tokens and run all logical checks on it. |
|
1031 """ |
|
1032 options.counters['logical lines'] += 1 |
|
1033 self.build_tokens_line() |
|
1034 first_line = self.lines[self.mapping[0][1][2][0] - 1] |
|
1035 indent = first_line[:self.mapping[0][1][2][1]] |
|
1036 self.previous_indent_level = self.indent_level |
|
1037 self.indent_level = expand_indent(indent) |
|
1038 if options.verbose >= 2: |
|
1039 print(self.logical_line[:80].rstrip()) |
|
1040 for name, check, argument_names in options.logical_checks: |
|
1041 if options.verbose >= 4: |
|
1042 print(' ' + name) |
|
1043 result = self.run_check(check, argument_names) |
|
1044 if result is not None: |
|
1045 offset, code, *args = result |
|
1046 if isinstance(offset, tuple): |
|
1047 original_number, original_offset = offset |
|
1048 else: |
|
1049 for token_offset, token in self.mapping: |
|
1050 if offset >= token_offset: |
|
1051 original_number = token[2][0] |
|
1052 original_offset = (token[2][1] |
|
1053 + offset - token_offset) |
|
1054 self.report_error_args(original_number, original_offset, |
|
1055 code, check, *args) |
|
1056 self.previous_logical = self.logical_line |
|
1057 |
|
1058 def check_all(self, expected=None, line_offset=0): |
|
1059 """ |
|
1060 Run all checks on the input file. |
|
1061 """ |
|
1062 self.expected = expected or () |
|
1063 self.line_offset = line_offset |
|
1064 self.line_number = 0 |
|
1065 self.file_errors = 0 |
|
1066 self.indent_char = None |
|
1067 self.indent_level = 0 |
|
1068 self.previous_logical = '' |
|
1069 self.blank_lines = 0 |
|
1070 self.blank_lines_before_comment = 0 |
|
1071 self.tokens = [] |
|
1072 parens = 0 |
|
1073 for token in tokenize.generate_tokens(self.readline_check_physical): |
|
1074 if options.verbose >= 3: |
|
1075 if token[2][0] == token[3][0]: |
|
1076 pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) |
|
1077 else: |
|
1078 pos = 'l.%s' % token[3][0] |
|
1079 print('l.%s\t%s\t%s\t%r' % |
|
1080 (token[2][0], pos, tokenize.tok_name[token[0]], token[1])) |
|
1081 self.tokens.append(token) |
|
1082 token_type, text = token[0:2] |
|
1083 if token_type == tokenize.OP and text in '([{': |
|
1084 parens += 1 |
|
1085 if token_type == tokenize.OP and text in '}])': |
|
1086 parens -= 1 |
|
1087 if token_type == tokenize.NEWLINE and not parens: |
|
1088 self.check_logical() |
|
1089 self.blank_lines = 0 |
|
1090 self.blank_lines_before_comment = 0 |
|
1091 self.tokens = [] |
|
1092 if token_type == tokenize.NL and not parens: |
|
1093 if len(self.tokens) <= 1: |
|
1094 # The physical line contains only this token. |
|
1095 self.blank_lines += 1 |
|
1096 self.tokens = [] |
|
1097 if token_type == tokenize.COMMENT: |
|
1098 source_line = token[4] |
|
1099 token_start = token[2][1] |
|
1100 if source_line[:token_start].strip() == '': |
|
1101 self.blank_lines_before_comment = max(self.blank_lines, |
|
1102 self.blank_lines_before_comment) |
|
1103 self.blank_lines = 0 |
|
1104 if text.endswith('\n') and not parens: |
|
1105 # The comment also ends a physical line. This works around |
|
1106 # Python < 2.6 behaviour, which does not generate NL after |
|
1107 # a comment which is on a line by itself. |
|
1108 self.tokens = [] |
|
1109 return self.file_errors |
|
1110 |
|
1111 def report_error(self, line_number, offset, text, check): |
|
1112 """ |
|
1113 Report an error, according to options. |
|
1114 """ |
|
1115 code = text[:4] |
|
1116 if ignore_code(code): |
|
1117 return |
|
1118 if options.quiet == 1 and not self.file_errors: |
|
1119 message(self.filename) |
|
1120 if code in options.counters: |
|
1121 options.counters[code] += 1 |
|
1122 else: |
|
1123 options.counters[code] = 1 |
|
1124 options.messages[code] = text[5:] |
|
1125 if options.quiet or code in self.expected: |
|
1126 # Don't care about expected errors or warnings |
|
1127 return |
|
1128 self.file_errors += 1 |
|
1129 if options.counters[code] == 1 or options.repeat: |
|
1130 message("%s:%s:%d: %s" % |
|
1131 (self.filename, self.line_offset + line_number, |
|
1132 offset + 1, text)) |
|
1133 if options.show_source: |
|
1134 line = self.lines[line_number - 1] |
|
1135 message(line.rstrip()) |
|
1136 message(' ' * offset + '^') |
|
1137 if options.show_pep8: |
|
1138 message(check.__doc__.lstrip('\n').rstrip()) |
|
1139 |
|
1140 |
|
1141 def input_file(filename): |
|
1142 """ |
|
1143 Run all checks on a Python source file. |
|
1144 """ |
|
1145 if options.verbose: |
|
1146 message('checking ' + filename) |
|
1147 Checker(filename).check_all() |
|
1148 |
|
1149 |
|
1150 def input_dir(dirname, runner=None): |
|
1151 """ |
|
1152 Check all Python source files in this directory and all subdirectories. |
|
1153 """ |
|
1154 dirname = dirname.rstrip('/') |
|
1155 if excluded(dirname): |
|
1156 return |
|
1157 if runner is None: |
|
1158 runner = input_file |
|
1159 for root, dirs, files in os.walk(dirname): |
|
1160 if options.verbose: |
|
1161 message('directory ' + root) |
|
1162 options.counters['directories'] += 1 |
|
1163 dirs.sort() |
|
1164 for subdir in dirs: |
|
1165 if excluded(subdir): |
|
1166 dirs.remove(subdir) |
|
1167 files.sort() |
|
1168 for filename in files: |
|
1169 if filename_match(filename) and not excluded(filename): |
|
1170 options.counters['files'] += 1 |
|
1171 runner(os.path.join(root, filename)) |
|
1172 |
|
1173 |
|
1174 def excluded(filename): |
|
1175 """ |
|
1176 Check if options.exclude contains a pattern that matches filename. |
|
1177 """ |
|
1178 basename = os.path.basename(filename) |
|
1179 for pattern in options.exclude: |
|
1180 if fnmatch(basename, pattern): |
|
1181 # print basename, 'excluded because it matches', pattern |
|
1182 return True |
|
1183 |
|
1184 |
|
1185 def filename_match(filename): |
|
1186 """ |
|
1187 Check if options.filename contains a pattern that matches filename. |
|
1188 If options.filename is unspecified, this always returns True. |
|
1189 """ |
|
1190 if not options.filename: |
|
1191 return True |
|
1192 for pattern in options.filename: |
|
1193 if fnmatch(filename, pattern): |
|
1194 return True |
|
1195 |
|
1196 |
|
1197 def ignore_code(code): |
|
1198 """ |
|
1199 Check if options.ignore contains a prefix of the error code. |
|
1200 If options.select contains a prefix of the error code, do not ignore it. |
|
1201 """ |
|
1202 for select in options.select: |
|
1203 if code.startswith(select): |
|
1204 return False |
|
1205 for ignore in options.ignore: |
|
1206 if code.startswith(ignore): |
|
1207 return True |
|
1208 |
|
1209 |
|
1210 def reset_counters(): |
|
1211 for key in list(options.counters.keys()): |
|
1212 if key not in BENCHMARK_KEYS: |
|
1213 del options.counters[key] |
|
1214 options.messages = {} |
|
1215 |
|
1216 |
|
1217 def get_error_statistics(): |
|
1218 """Get error statistics.""" |
|
1219 return get_statistics("E") |
|
1220 |
|
1221 |
|
1222 def get_warning_statistics(): |
|
1223 """Get warning statistics.""" |
|
1224 return get_statistics("W") |
|
1225 |
|
1226 |
|
1227 def get_statistics(prefix=''): |
|
1228 """ |
|
1229 Get statistics for message codes that start with the prefix. |
|
1230 |
|
1231 prefix='' matches all errors and warnings |
|
1232 prefix='E' matches all errors |
|
1233 prefix='W' matches all warnings |
|
1234 prefix='E4' matches all errors that have to do with imports |
|
1235 """ |
|
1236 stats = [] |
|
1237 keys = list(options.messages.keys()) |
|
1238 keys.sort() |
|
1239 for key in keys: |
|
1240 if key.startswith(prefix): |
|
1241 stats.append('%-7s %s %s' % |
|
1242 (options.counters[key], key, options.messages[key])) |
|
1243 return stats |
|
1244 |
|
1245 |
|
1246 def get_count(prefix=''): |
|
1247 """Return the total count of errors and warnings.""" |
|
1248 keys = list(options.messages.keys()) |
|
1249 count = 0 |
|
1250 for key in keys: |
|
1251 if key.startswith(prefix): |
|
1252 count += options.counters[key] |
|
1253 return count |
|
1254 |
|
1255 |
|
1256 def print_statistics(prefix=''): |
|
1257 """Print overall statistics (number of errors and warnings).""" |
|
1258 for line in get_statistics(prefix): |
|
1259 print(line) |
|
1260 |
|
1261 |
|
1262 def print_benchmark(elapsed): |
|
1263 """ |
|
1264 Print benchmark numbers. |
|
1265 """ |
|
1266 print('%-7.2f %s' % (elapsed, 'seconds elapsed')) |
|
1267 for key in BENCHMARK_KEYS: |
|
1268 print('%-7d %s per second (%d total)' % ( |
|
1269 options.counters[key] / elapsed, key, |
|
1270 options.counters[key])) |
|
1271 |
|
1272 |
|
1273 def run_tests(filename): |
|
1274 """ |
|
1275 Run all the tests from a file. |
|
1276 |
|
1277 A test file can provide many tests. Each test starts with a declaration. |
|
1278 This declaration is a single line starting with '#:'. |
|
1279 It declares codes of expected failures, separated by spaces or 'Okay' |
|
1280 if no failure is expected. |
|
1281 If the file does not contain such declaration, it should pass all tests. |
|
1282 If the declaration is empty, following lines are not checked, until next |
|
1283 declaration. |
|
1284 |
|
1285 Examples: |
|
1286 |
|
1287 * Only E224 and W701 are expected: #: E224 W701 |
|
1288 * Following example is conform: #: Okay |
|
1289 * Don't check these lines: #: |
|
1290 """ |
|
1291 lines = readlines(filename) + ['#:\n'] |
|
1292 line_offset = 0 |
|
1293 codes = ['Okay'] |
|
1294 testcase = [] |
|
1295 for index, line in enumerate(lines): |
|
1296 if not line.startswith('#:'): |
|
1297 if codes: |
|
1298 # Collect the lines of the test case |
|
1299 testcase.append(line) |
|
1300 continue |
|
1301 if codes and index > 0: |
|
1302 label = '%s:%s:1' % (filename, line_offset + 1) |
|
1303 codes = [c for c in codes if c != 'Okay'] |
|
1304 # Run the checker |
|
1305 errors = Checker(filename, testcase).check_all(codes, line_offset) |
|
1306 # Check if the expected errors were found |
|
1307 for code in codes: |
|
1308 if not options.counters.get(code): |
|
1309 errors += 1 |
|
1310 message('%s: error %s not found' % (label, code)) |
|
1311 if options.verbose and not errors: |
|
1312 message('%s: passed (%s)' % (label, ' '.join(codes))) |
|
1313 # Keep showing errors for multiple tests |
|
1314 reset_counters() |
|
1315 # output the real line numbers |
|
1316 line_offset = index |
|
1317 # configure the expected errors |
|
1318 codes = line.split()[1:] |
|
1319 # empty the test case buffer |
|
1320 del testcase[:] |
|
1321 |
|
1322 |
|
1323 def selftest(): |
|
1324 """ |
|
1325 Test all check functions with test cases in docstrings. |
|
1326 """ |
|
1327 count_passed = 0 |
|
1328 count_failed = 0 |
|
1329 checks = options.physical_checks + options.logical_checks |
|
1330 for name, check, argument_names in checks: |
|
1331 for line in check.__doc__.splitlines(): |
|
1332 line = line.lstrip() |
|
1333 match = SELFTEST_REGEX.match(line) |
|
1334 if match is None: |
|
1335 continue |
|
1336 code, source = match.groups() |
|
1337 checker = Checker(None) |
|
1338 for part in source.split(r'\n'): |
|
1339 part = part.replace(r'\t', '\t') |
|
1340 part = part.replace(r'\s', ' ') |
|
1341 checker.lines.append(part + '\n') |
|
1342 options.quiet = 2 |
|
1343 checker.check_all() |
|
1344 error = None |
|
1345 if code == 'Okay': |
|
1346 if len(options.counters) > len(BENCHMARK_KEYS): |
|
1347 codes = [key for key in options.counters.keys() |
|
1348 if key not in BENCHMARK_KEYS] |
|
1349 error = "incorrectly found %s" % ', '.join(codes) |
|
1350 elif not options.counters.get(code): |
|
1351 error = "failed to find %s" % code |
|
1352 # Reset the counters |
|
1353 reset_counters() |
|
1354 if not error: |
|
1355 count_passed += 1 |
|
1356 else: |
|
1357 count_failed += 1 |
|
1358 if len(checker.lines) == 1: |
|
1359 print("pep8.py: %s: %s" % |
|
1360 (error, checker.lines[0].rstrip())) |
|
1361 else: |
|
1362 print("pep8.py: %s:" % error) |
|
1363 for line in checker.lines: |
|
1364 print(line.rstrip()) |
|
1365 if options.verbose: |
|
1366 print("%d passed and %d failed." % (count_passed, count_failed)) |
|
1367 if count_failed: |
|
1368 print("Test failed.") |
|
1369 else: |
|
1370 print("Test passed.") |
|
1371 |
|
1372 |
|
1373 def process_options(arglist=None): |
|
1374 """ |
|
1375 Process options passed either via arglist or via command line args. |
|
1376 """ |
|
1377 global options, args |
|
1378 parser = OptionParser(version=__version__, |
|
1379 usage="%prog [options] input ...") |
|
1380 parser.add_option('-v', '--verbose', default=0, action='count', |
|
1381 help="print status messages, or debug with -vv") |
|
1382 parser.add_option('-q', '--quiet', default=0, action='count', |
|
1383 help="report only file names, or nothing with -qq") |
|
1384 parser.add_option('-r', '--repeat', action='store_true', |
|
1385 help="show all occurrences of the same error") |
|
1386 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, |
|
1387 help="exclude files or directories which match these " |
|
1388 "comma separated patterns (default: %s)" % |
|
1389 DEFAULT_EXCLUDE) |
|
1390 parser.add_option('--filename', metavar='patterns', default='*.py', |
|
1391 help="when parsing directories, only check filenames " |
|
1392 "matching these comma separated patterns (default: " |
|
1393 "*.py)") |
|
1394 parser.add_option('--select', metavar='errors', default='', |
|
1395 help="select errors and warnings (e.g. E,W6)") |
|
1396 parser.add_option('--ignore', metavar='errors', default='', |
|
1397 help="skip errors and warnings (e.g. E4,W)") |
|
1398 parser.add_option('--show-source', action='store_true', |
|
1399 help="show source code for each error") |
|
1400 parser.add_option('--show-pep8', action='store_true', |
|
1401 help="show text of PEP 8 for each error") |
|
1402 parser.add_option('--statistics', action='store_true', |
|
1403 help="count errors and warnings") |
|
1404 parser.add_option('--count', action='store_true', |
|
1405 help="print total number of errors and warnings " |
|
1406 "to standard error and set exit code to 1 if " |
|
1407 "total is not null") |
|
1408 parser.add_option('--benchmark', action='store_true', |
|
1409 help="measure processing speed") |
|
1410 parser.add_option('--testsuite', metavar='dir', |
|
1411 help="run regression tests from dir") |
|
1412 parser.add_option('--doctest', action='store_true', |
|
1413 help="run doctest on myself") |
|
1414 options, args = parser.parse_args(arglist) |
|
1415 if options.testsuite: |
|
1416 args.append(options.testsuite) |
|
1417 if not args and not options.doctest: |
|
1418 parser.error('input not specified') |
|
1419 options.prog = os.path.basename(sys.argv[0]) |
|
1420 options.exclude = options.exclude.split(',') |
|
1421 for index in range(len(options.exclude)): |
|
1422 options.exclude[index] = options.exclude[index].rstrip('/') |
|
1423 if options.filename: |
|
1424 options.filename = options.filename.split(',') |
|
1425 if options.select: |
|
1426 options.select = options.select.split(',') |
|
1427 else: |
|
1428 options.select = [] |
|
1429 if options.ignore: |
|
1430 options.ignore = options.ignore.split(',') |
|
1431 elif options.select: |
|
1432 # Ignore all checks which are not explicitly selected |
|
1433 options.ignore = [''] |
|
1434 elif options.testsuite or options.doctest: |
|
1435 # For doctest and testsuite, all checks are required |
|
1436 options.ignore = [] |
|
1437 else: |
|
1438 # The default choice: ignore controversial checks |
|
1439 options.ignore = DEFAULT_IGNORE.split(',') |
|
1440 options.physical_checks = find_checks('physical_line') |
|
1441 options.logical_checks = find_checks('logical_line') |
|
1442 options.counters = dict.fromkeys(BENCHMARK_KEYS, 0) |
|
1443 options.messages = {} |
|
1444 return options, args |
|
1445 |
|
1446 |
|
1447 def _main(): |
|
1448 """ |
|
1449 Parse options and run checks on Python source. |
|
1450 """ |
|
1451 options, args = process_options() |
|
1452 if options.doctest: |
|
1453 import doctest |
|
1454 doctest.testmod(verbose=options.verbose) |
|
1455 selftest() |
|
1456 if options.testsuite: |
|
1457 runner = run_tests |
|
1458 else: |
|
1459 runner = input_file |
|
1460 start_time = time.time() |
|
1461 for path in args: |
|
1462 if os.path.isdir(path): |
|
1463 input_dir(path, runner=runner) |
|
1464 elif not excluded(path): |
|
1465 options.counters['files'] += 1 |
|
1466 runner(path) |
|
1467 elapsed = time.time() - start_time |
|
1468 if options.statistics: |
|
1469 print_statistics() |
|
1470 if options.benchmark: |
|
1471 print_benchmark(elapsed) |
|
1472 count = get_count() |
|
1473 if count: |
|
1474 if options.count: |
|
1475 sys.stderr.write(str(count) + '\n') |
|
1476 sys.exit(1) |
|
1477 |
|
1478 |
|
1479 if __name__ == '__main__': |
|
1480 _main() |