Plugins/CheckerPlugins/CodeStyleChecker/pycodestyle.py

changeset 6246
fe07a9f16f23
parent 6048
82ad8ec9548c
child 6263
4dd53711d869
equal deleted inserted replaced
6235:0e6a395ecfe8 6246:fe07a9f16f23
67 import re 67 import re
68 import sys 68 import sys
69 import time 69 import time
70 import tokenize 70 import tokenize
71 import warnings 71 import warnings
72 import bisect
73
74 try:
75 from functools import lru_cache
76 except ImportError:
77 def lru_cache(maxsize=128): # noqa as it's a fake implementation.
78 """Does not really need a real a lru_cache, it's just optimization, so
79 let's just do nothing here. Python 3.2+ will just get better
80 performances, time to upgrade?
81 """
82 return lambda function: function
72 83
73 from fnmatch import fnmatch 84 from fnmatch import fnmatch
74 from optparse import OptionParser 85 from optparse import OptionParser
75 86
76 try: 87 try:
77 from configparser import RawConfigParser 88 from configparser import RawConfigParser
78 from io import TextIOWrapper 89 from io import TextIOWrapper
79 except ImportError: 90 except ImportError:
80 from ConfigParser import RawConfigParser # __IGNORE_WARNING__ 91 from ConfigParser import RawConfigParser # __IGNORE_WARNING__
81 92
82 __version__ = '2.3.1-eric' 93 __version__ = '2.4.0-eric'
83 94
84 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox' 95 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
85 DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503' 96 DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
86 try: 97 try:
87 if sys.platform == 'win32': 98 if sys.platform == 'win32':
88 USER_CONFIG = os.path.expanduser(r'~\.pycodestyle') 99 USER_CONFIG = os.path.expanduser(r'~\.pycodestyle')
89 else: 100 else:
90 USER_CONFIG = os.path.join( 101 USER_CONFIG = os.path.join(
95 USER_CONFIG = None 106 USER_CONFIG = None
96 107
97 PROJECT_CONFIG = ('setup.cfg', 'tox.ini') 108 PROJECT_CONFIG = ('setup.cfg', 'tox.ini')
98 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite') 109 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
99 MAX_LINE_LENGTH = 79 110 MAX_LINE_LENGTH = 79
111 # Number of blank lines between various code parts.
112 BLANK_LINES_CONFIG = {
113 # Top level class and function.
114 'top_level': 2,
115 # Methods and nested class and function.
116 'method': 1,
117 }
100 REPORT_FORMAT = { 118 REPORT_FORMAT = {
101 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', 119 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
102 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', 120 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
103 } 121 }
104 122
105 PyCF_ONLY_AST = 1024 123 PyCF_ONLY_AST = 1024
106 SINGLETONS = frozenset(['False', 'None', 'True']) 124 SINGLETONS = frozenset(['False', 'None', 'True'])
107 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS 125 KEYWORDS = frozenset(keyword.kwlist + ['print', 'async']) - SINGLETONS
108 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) 126 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
109 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-']) 127 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
110 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) 128 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
111 WS_NEEDED_OPERATORS = frozenset([ 129 WS_NEEDED_OPERATORS = frozenset([
112 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', 130 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
121 INDENT_REGEX = re.compile(r'([ \t]*)') 139 INDENT_REGEX = re.compile(r'([ \t]*)')
122 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') 140 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
123 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$') 141 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
124 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') 142 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
125 DOCSTRING_REGEX = re.compile(r'u?r?["\']') 143 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
126 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') 144 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({] | [\]}),;:]')
127 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') 145 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
128 COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)' 146 COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)'
129 r'\s*(?(1)|(None|False|True))\b') 147 r'\s*(?(1)|(None|False|True))\b')
130 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s') 148 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
131 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' 149 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
132 r'|\s*\(\s*([^)]*[^ )])\s*\))') 150 r'|\s*\(\s*([^)]*[^ )])\s*\))')
133 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) 151 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
134 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') 152 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
135 LAMBDA_REGEX = re.compile(r'\blambda\b') 153 LAMBDA_REGEX = re.compile(r'\blambda\b')
136 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') 154 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
137 STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)') 155 STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
138 STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)') 156 STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)')
139 STARTSWITH_INDENT_STATEMENT_REGEX = re.compile( 157 STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
140 r'^\s*({0})'.format('|'.join(s.replace(' ', '\s+') for s in ( 158 r'^\s*({0})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
141 'def', 'async def', 159 'def', 'async def',
142 'for', 'async for', 160 'for', 'async for',
143 'if', 'elif', 'else', 161 'if', 'elif', 'else',
144 'try', 'except', 'finally', 162 'try', 'except', 'finally',
145 'with', 'async with', 163 'with', 'async with',
152 # Work around Python < 2.6 behaviour, which does not generate NL after 170 # Work around Python < 2.6 behaviour, which does not generate NL after
153 # a comment which is on a line by itself. 171 # a comment which is on a line by itself.
154 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n' 172 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
155 173
156 174
175 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
176
177
178 def _get_parameters(function):
179 if sys.version_info >= (3, 3):
180 return [parameter.name
181 for parameter
182 in inspect.signature(function).parameters.values()
183 if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
184 else:
185 return inspect.getargspec(function)[0]
186
187 def register_check(check, codes=None):
188 """Register a new check object."""
189 def _add_check(check, kind, codes, args):
190 if check in _checks[kind]:
191 _checks[kind][check][0].extend(codes or [])
192 else:
193 _checks[kind][check] = (codes or [''], args)
194 if inspect.isfunction(check):
195 args = _get_parameters(check)
196 if args and args[0] in ('physical_line', 'logical_line'):
197 if codes is None:
198 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
199 _add_check(check, args[0], codes, args)
200 elif inspect.isclass(check):
201 if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
202 _add_check(check, 'tree', codes, None)
203 return check
204
205
157 ############################################################################## 206 ##############################################################################
158 # Plugins (check functions) for physical lines 207 # Plugins (check functions) for physical lines
159 ############################################################################## 208 ##############################################################################
160 209
161 210 @register_check
162 def tabs_or_spaces(physical_line, indent_char): 211 def tabs_or_spaces(physical_line, indent_char):
163 r"""Never mix tabs and spaces. 212 r"""Never mix tabs and spaces.
164 213
165 The most popular way of indenting Python is with spaces only. The 214 The most popular way of indenting Python is with spaces only. The
166 second-most popular way is with tabs only. Code indented with a mixture 215 second-most popular way is with tabs only. Code indented with a mixture
176 for offset, char in enumerate(indent): 225 for offset, char in enumerate(indent):
177 if char != indent_char: 226 if char != indent_char:
178 return offset, "E101 indentation contains mixed spaces and tabs" 227 return offset, "E101 indentation contains mixed spaces and tabs"
179 228
180 229
230 @register_check
181 def tabs_obsolete(physical_line): 231 def tabs_obsolete(physical_line):
182 r"""For new projects, spaces-only are strongly recommended over tabs. 232 r"""For new projects, spaces-only are strongly recommended over tabs.
183 233
184 Okay: if True:\n return 234 Okay: if True:\n return
185 W191: if True:\n\treturn 235 W191: if True:\n\treturn
187 indent = INDENT_REGEX.match(physical_line).group(1) 237 indent = INDENT_REGEX.match(physical_line).group(1)
188 if '\t' in indent: 238 if '\t' in indent:
189 return indent.index('\t'), "W191 indentation contains tabs" 239 return indent.index('\t'), "W191 indentation contains tabs"
190 240
191 241
242 @register_check
192 def trailing_whitespace(physical_line): 243 def trailing_whitespace(physical_line):
193 r"""Trailing whitespace is superfluous. 244 r"""Trailing whitespace is superfluous.
194 245
195 The warning returned varies on whether the line itself is blank, for easier 246 The warning returned varies on whether the line itself is blank, for easier
196 filtering for those who want to indent their blank lines. 247 filtering for those who want to indent their blank lines.
208 return len(stripped), "W291 trailing whitespace" 259 return len(stripped), "W291 trailing whitespace"
209 else: 260 else:
210 return 0, "W293 blank line contains whitespace" 261 return 0, "W293 blank line contains whitespace"
211 262
212 263
264 @register_check
213 def trailing_blank_lines(physical_line, lines, line_number, total_lines): 265 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
214 r"""Trailing blank lines are superfluous. 266 r"""Trailing blank lines are superfluous.
215 267
216 Okay: spam(1) 268 Okay: spam(1)
217 W391: spam(1)\n 269 W391: spam(1)\n
224 return 0, "W391 blank line at end of file" 276 return 0, "W391 blank line at end of file"
225 if stripped_last_line == physical_line: 277 if stripped_last_line == physical_line:
226 return len(physical_line), "W292 no newline at end of file" 278 return len(physical_line), "W292 no newline at end of file"
227 279
228 280
229 def maximum_line_length(physical_line, max_line_length, multiline, noqa): 281 @register_check
282 def maximum_line_length(physical_line, max_line_length, multiline,
283 line_number, noqa):
230 r"""Limit all lines to a maximum of 79 characters. 284 r"""Limit all lines to a maximum of 79 characters.
231 285
232 There are still many devices around that are limited to 80 character 286 There are still many devices around that are limited to 80 character
233 lines; plus, limiting windows to 80 characters makes it possible to have 287 lines; plus, limiting windows to 80 characters makes it possible to have
234 several windows side-by-side. The default wrapping on such devices looks 288 several windows side-by-side. The default wrapping on such devices looks
239 Reports error E501. 293 Reports error E501.
240 """ 294 """
241 line = physical_line.rstrip() 295 line = physical_line.rstrip()
242 length = len(line) 296 length = len(line)
243 if length > max_line_length and not noqa: 297 if length > max_line_length and not noqa:
298 # Special case: ignore long shebang lines.
299 if line_number == 1 and line.startswith('#!'):
300 return
244 # Special case for long URLs in multi-line docstrings or comments, 301 # Special case for long URLs in multi-line docstrings or comments,
245 # but still report the error when the 72 first chars are whitespaces. 302 # but still report the error when the 72 first chars are whitespaces.
246 chunks = line.split() 303 chunks = line.split()
247 if ((len(chunks) == 1 and multiline) or 304 if ((len(chunks) == 1 and multiline) or
248 (len(chunks) == 2 and chunks[0] == '#')) and \ 305 (len(chunks) == 2 and chunks[0] == '#')) and \
263 ############################################################################## 320 ##############################################################################
264 # Plugins (check functions) for logical lines 321 # Plugins (check functions) for logical lines
265 ############################################################################## 322 ##############################################################################
266 323
267 324
325 @register_check
268 def blank_lines(logical_line, blank_lines, indent_level, line_number, 326 def blank_lines(logical_line, blank_lines, indent_level, line_number,
269 blank_before, previous_logical, 327 blank_before, previous_logical,
270 previous_unindented_logical_line, previous_indent_level, 328 previous_unindented_logical_line, previous_indent_level,
271 lines): 329 lines):
272 r"""Separate top-level function and class definitions with two blank lines. 330 r"""Separate top-level function and class definitions with two blank lines.
292 E303: def a():\n\n\n\n pass 350 E303: def a():\n\n\n\n pass
293 E304: @decorator\n\ndef a():\n pass 351 E304: @decorator\n\ndef a():\n pass
294 E305: def a():\n pass\na() 352 E305: def a():\n pass\na()
295 E306: def a():\n def b():\n pass\n def c():\n pass 353 E306: def a():\n def b():\n pass\n def c():\n pass
296 """ 354 """
297 if line_number < 3 and not previous_logical: 355 top_level_lines = BLANK_LINES_CONFIG['top_level']
356 method_lines = BLANK_LINES_CONFIG['method']
357
358 if line_number < top_level_lines + 1 and not previous_logical:
298 return # Don't expect blank lines before the first line 359 return # Don't expect blank lines before the first line
299 if previous_logical.startswith('@'): 360 if previous_logical.startswith('@'):
300 if blank_lines: 361 if blank_lines:
301 yield 0, "E304 blank lines found after function decorator" 362 yield 0, "E304 blank lines found after function decorator"
302 elif blank_lines > 2 or (indent_level and blank_lines == 2): 363 elif (blank_lines > top_level_lines or
364 (indent_level and blank_lines == method_lines + 1)
365 ):
303 yield 0, "E303 too many blank lines (%d)", blank_lines 366 yield 0, "E303 too many blank lines (%d)", blank_lines
304 elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line): 367 elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
305 if indent_level: 368 if indent_level:
306 if not (blank_before or previous_indent_level < indent_level or 369 if not (blank_before == method_lines or
307 DOCSTRING_REGEX.match(previous_logical)): 370 previous_indent_level < indent_level or
371 DOCSTRING_REGEX.match(previous_logical)
372 ):
308 ancestor_level = indent_level 373 ancestor_level = indent_level
309 nested = False 374 nested = False
310 # Search backwards for a def ancestor or tree root (top level). 375 # Search backwards for a def ancestor or tree root (top level).
311 for line in lines[line_number - 2::-1]: 376 for line in lines[line_number - top_level_lines::-1]:
312 if line.strip() and expand_indent(line) < ancestor_level: 377 if line.strip() and expand_indent(line) < ancestor_level:
313 ancestor_level = expand_indent(line) 378 ancestor_level = expand_indent(line)
314 nested = line.lstrip().startswith('def ') 379 nested = line.lstrip().startswith('def ')
315 if nested or ancestor_level == 0: 380 if nested or ancestor_level == 0:
316 break 381 break
317 if nested: 382 if nested:
318 yield 0, "E306 expected 1 blank line before a " \ 383 yield 0, "E306 expected %s blank line before a " \
319 "nested definition, found 0" 384 "nested definition, found 0", method_lines
320 else: 385 else:
321 yield 0, "E301 expected 1 blank line, found 0" 386 yield (0, "E301 expected %s blank line, found 0",
322 elif blank_before != 2: 387 method_lines)
323 yield 0, "E302 expected 2 blank lines, found %d", blank_before 388 elif blank_before != top_level_lines:
324 elif (logical_line and not indent_level and blank_before != 2 and 389 yield (0, "E302 expected %s blank lines, found %d",
325 previous_unindented_logical_line.startswith(('def ', 'class '))): 390 top_level_lines, blank_before)
326 yield 0, "E305 expected 2 blank lines after " \ 391 elif (logical_line and
327 "class or function definition, found %d", blank_before 392 not indent_level and
328 393 blank_before != top_level_lines and
329 394 previous_unindented_logical_line.startswith(('def ', 'class '))
395 ):
396 yield (0, "E305 expected %s blank lines after " \
397 "class or function definition, found %d",
398 top_level_lines, blank_before)
399
400
401 @register_check
330 def extraneous_whitespace(logical_line): 402 def extraneous_whitespace(logical_line):
331 r"""Avoid extraneous whitespace. 403 r"""Avoid extraneous whitespace.
332 404
333 Avoid extraneous whitespace in these situations: 405 Avoid extraneous whitespace in these situations:
334 - Immediately inside parentheses, brackets or braces. 406 - Immediately inside parentheses, brackets or braces.
357 elif line[found - 1] != ',': 429 elif line[found - 1] != ',':
358 code = ('E202' if char in '}])' else 'E203') # if char in ',;:' 430 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
359 yield found, "%s whitespace before '%s'" % (code, char), char 431 yield found, "%s whitespace before '%s'" % (code, char), char
360 432
361 433
434 @register_check
362 def whitespace_around_keywords(logical_line): 435 def whitespace_around_keywords(logical_line):
363 r"""Avoid extraneous whitespace around keywords. 436 r"""Avoid extraneous whitespace around keywords.
364 437
365 Okay: True and False 438 Okay: True and False
366 E271: True and False 439 E271: True and False
380 yield match.start(2), "E273 tab after keyword" 453 yield match.start(2), "E273 tab after keyword"
381 elif len(after) > 1: 454 elif len(after) > 1:
382 yield match.start(2), "E271 multiple spaces after keyword" 455 yield match.start(2), "E271 multiple spaces after keyword"
383 456
384 457
458 @register_check
385 def missing_whitespace_after_import_keyword(logical_line): 459 def missing_whitespace_after_import_keyword(logical_line):
386 r"""Multiple imports in form from x import (a, b, c) should have space 460 r"""Multiple imports in form from x import (a, b, c) should have space
387 between import statement and parenthesised name list. 461 between import statement and parenthesised name list.
388 462
389 Okay: from foo import (bar, baz) 463 Okay: from foo import (bar, baz)
397 if -1 < found: 471 if -1 < found:
398 pos = found + len(indicator) - 1 472 pos = found + len(indicator) - 1
399 yield pos, "E275 missing whitespace after keyword" 473 yield pos, "E275 missing whitespace after keyword"
400 474
401 475
476 @register_check
402 def missing_whitespace(logical_line): 477 def missing_whitespace(logical_line):
403 r"""Each comma, semicolon or colon should be followed by whitespace. 478 r"""Each comma, semicolon or colon should be followed by whitespace.
404 479
405 Okay: [a, b] 480 Okay: [a, b]
406 Okay: (3,) 481 Okay: (3,)
423 if char == ',' and line[index + 1] == ')': 498 if char == ',' and line[index + 1] == ')':
424 continue # Allow tuple with only one element: (3,) 499 continue # Allow tuple with only one element: (3,)
425 yield index, "E231 missing whitespace after '%s'", char 500 yield index, "E231 missing whitespace after '%s'", char
426 501
427 502
503 @register_check
428 def indentation(logical_line, previous_logical, indent_char, 504 def indentation(logical_line, previous_logical, indent_char,
429 indent_level, previous_indent_level): 505 indent_level, previous_indent_level):
430 r"""Use 4 spaces per indentation level. 506 r"""Use 4 spaces per indentation level.
431 507
432 For really old code that you don't want to mess up, you can continue to 508 For really old code that you don't want to mess up, you can continue to
454 yield 0, tmpl % (2 + c, "expected an indented block") 530 yield 0, tmpl % (2 + c, "expected an indented block")
455 elif not indent_expect and indent_level > previous_indent_level: 531 elif not indent_expect and indent_level > previous_indent_level:
456 yield 0, tmpl % (3 + c, "unexpected indentation") 532 yield 0, tmpl % (3 + c, "unexpected indentation")
457 533
458 534
535 @register_check
459 def continued_indentation(logical_line, tokens, indent_level, hang_closing, 536 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
460 indent_char, noqa, verbose): 537 indent_char, noqa, verbose):
461 r"""Continuation lines indentation. 538 r"""Continuation lines indentation.
462 539
463 Continuation lines should align wrapped elements either vertically 540 Continuation lines should align wrapped elements either vertically
653 else: 730 else:
654 code = "E125 continuation line" 731 code = "E125 continuation line"
655 yield pos, "%s with same indent as next logical line" % code 732 yield pos, "%s with same indent as next logical line" % code
656 733
657 734
735 @register_check
658 def whitespace_before_parameters(logical_line, tokens): 736 def whitespace_before_parameters(logical_line, tokens):
659 r"""Avoid extraneous whitespace. 737 r"""Avoid extraneous whitespace.
660 738
661 Avoid extraneous whitespace in the following situations: 739 Avoid extraneous whitespace in the following situations:
662 - before the open parenthesis that starts the argument list of a 740 - before the open parenthesis that starts the argument list of a
685 prev_type = token_type 763 prev_type = token_type
686 prev_text = text 764 prev_text = text
687 prev_end = end 765 prev_end = end
688 766
689 767
768 @register_check
690 def whitespace_around_operator(logical_line): 769 def whitespace_around_operator(logical_line):
691 r"""Avoid extraneous whitespace around an operator. 770 r"""Avoid extraneous whitespace around an operator.
692 771
693 Okay: a = 12 + 3 772 Okay: a = 12 + 3
694 E221: a = 4 + 5 773 E221: a = 4 + 5
708 yield match.start(2), "E224 tab after operator" 787 yield match.start(2), "E224 tab after operator"
709 elif len(after) > 1: 788 elif len(after) > 1:
710 yield match.start(2), "E222 multiple spaces after operator" 789 yield match.start(2), "E222 multiple spaces after operator"
711 790
712 791
792 @register_check
713 def missing_whitespace_around_operator(logical_line, tokens): 793 def missing_whitespace_around_operator(logical_line, tokens):
714 r"""Surround operators with a single space on either side. 794 r"""Surround operators with a single space on either side.
715 795
716 - Always surround these binary operators with a single space on 796 - Always surround these binary operators with a single space on
717 either side: assignment (=), augmented assignment (+=, -= etc.), 797 either side: assignment (=), augmented assignment (+=, -= etc.),
800 prev_type = token_type 880 prev_type = token_type
801 prev_text = text 881 prev_text = text
802 prev_end = end 882 prev_end = end
803 883
804 884
885 @register_check
805 def whitespace_around_comma(logical_line): 886 def whitespace_around_comma(logical_line):
806 r"""Avoid extraneous whitespace after a comma or a colon. 887 r"""Avoid extraneous whitespace after a comma or a colon.
807 888
808 Note: these checks are disabled by default 889 Note: these checks are disabled by default
809 890
818 yield found, "E242 tab after '%s'", m.group()[0] 899 yield found, "E242 tab after '%s'", m.group()[0]
819 else: 900 else:
820 yield found, "E241 multiple spaces after '%s'", m.group()[0] 901 yield found, "E241 multiple spaces after '%s'", m.group()[0]
821 902
822 903
904 @register_check
823 def whitespace_around_named_parameter_equals(logical_line, tokens): 905 def whitespace_around_named_parameter_equals(logical_line, tokens):
824 r"""Don't use spaces around the '=' sign in function arguments. 906 r"""Don't use spaces around the '=' sign in function arguments.
825 907
826 Don't use spaces around the '=' sign when used to indicate a 908 Don't use spaces around the '=' sign when used to indicate a
827 keyword argument or a default parameter value. 909 keyword argument or a default parameter value, except when using a type
910 annotation.
828 911
829 Okay: def complex(real, imag=0.0): 912 Okay: def complex(real, imag=0.0):
830 Okay: return magic(r=real, i=imag) 913 Okay: return magic(r=real, i=imag)
831 Okay: boolean(a == b) 914 Okay: boolean(a == b)
832 Okay: boolean(a != b) 915 Okay: boolean(a != b)
835 Okay: def foo(arg: int = 42): 918 Okay: def foo(arg: int = 42):
836 Okay: async def foo(arg: int = 42): 919 Okay: async def foo(arg: int = 42):
837 920
838 E251: def complex(real, imag = 0.0): 921 E251: def complex(real, imag = 0.0):
839 E251: return magic(r = real, i = imag) 922 E251: return magic(r = real, i = imag)
923 E252: def complex(real, image: float=0.0):
840 """ 924 """
841 parens = 0 925 parens = 0
842 no_space = False 926 no_space = False
927 require_space = False
843 prev_end = None 928 prev_end = None
844 annotated_func_arg = False 929 annotated_func_arg = False
845 in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line)) 930 in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line))
931
846 message = "E251 unexpected spaces around keyword / parameter equals" 932 message = "E251 unexpected spaces around keyword / parameter equals"
933 missing_message = "E252 missing whitespace around parameter equals"
934
847 for token_type, text, start, end, line in tokens: 935 for token_type, text, start, end, line in tokens:
848 if token_type == tokenize.NL: 936 if token_type == tokenize.NL:
849 continue 937 continue
850 if no_space: 938 if no_space:
851 no_space = False 939 no_space = False
852 if start != prev_end: 940 if start != prev_end:
853 yield (prev_end, message) 941 yield (prev_end, message)
942 if require_space:
943 require_space = False
944 if start == prev_end:
945 yield (prev_end, missing_message)
854 if token_type == tokenize.OP: 946 if token_type == tokenize.OP:
855 if text in '([': 947 if text in '([':
856 parens += 1 948 parens += 1
857 elif text in ')]': 949 elif text in ')]':
858 parens -= 1 950 parens -= 1
859 elif in_def and text == ':' and parens == 1: 951 elif in_def and text == ':' and parens == 1:
860 annotated_func_arg = True 952 annotated_func_arg = True
861 elif parens and text == ',' and parens == 1: 953 elif parens and text == ',' and parens == 1:
862 annotated_func_arg = False 954 annotated_func_arg = False
863 elif parens and text == '=' and not annotated_func_arg: 955 elif parens and text == '=':
864 no_space = True 956 if not annotated_func_arg:
865 if start != prev_end: 957 no_space = True
866 yield (prev_end, message) 958 if start != prev_end:
959 yield (prev_end, message)
960 else:
961 require_space = True
962 if start == prev_end:
963 yield (prev_end, missing_message)
867 if not parens: 964 if not parens:
868 annotated_func_arg = False 965 annotated_func_arg = False
869 966
870 prev_end = end 967 prev_end = end
871 968
872 969
970 @register_check
873 def whitespace_before_comment(logical_line, tokens): 971 def whitespace_before_comment(logical_line, tokens):
874 r"""Separate inline comments by at least two spaces. 972 r"""Separate inline comments by at least two spaces.
875 973
876 An inline comment is a comment on the same line as a statement. Inline 974 An inline comment is a comment on the same line as a statement. Inline
877 comments should be separated by at least two spaces from the statement. 975 comments should be separated by at least two spaces from the statement.
909 yield start, "E266 too many leading '#' for block comment" 1007 yield start, "E266 too many leading '#' for block comment"
910 elif token_type != tokenize.NL: 1008 elif token_type != tokenize.NL:
911 prev_end = end 1009 prev_end = end
912 1010
913 1011
1012 @register_check
914 def imports_on_separate_lines(logical_line): 1013 def imports_on_separate_lines(logical_line):
915 r"""Place imports on separate lines. 1014 r"""Place imports on separate lines.
916 1015
917 Okay: import os\nimport sys 1016 Okay: import os\nimport sys
918 E401: import sys, os 1017 E401: import sys, os
928 found = line.find(',') 1027 found = line.find(',')
929 if -1 < found and ';' not in line[:found]: 1028 if -1 < found and ';' not in line[:found]:
930 yield found, "E401 multiple imports on one line" 1029 yield found, "E401 multiple imports on one line"
931 1030
932 1031
1032 @register_check
933 def module_imports_on_top_of_file( 1033 def module_imports_on_top_of_file(
934 logical_line, indent_level, checker_state, noqa): 1034 logical_line, indent_level, checker_state, noqa):
935 r"""Place imports at the top of the file. 1035 r"""Place imports at the top of the file.
936 1036
937 Always put imports at the top of the file, just after any module comments 1037 Always put imports at the top of the file, just after any module comments
985 checker_state['seen_docstring'] = True 1085 checker_state['seen_docstring'] = True
986 else: 1086 else:
987 checker_state['seen_non_imports'] = True 1087 checker_state['seen_non_imports'] = True
988 1088
989 1089
1090 @register_check
990 def compound_statements(logical_line): 1091 def compound_statements(logical_line):
991 r"""Compound statements (on the same line) are generally discouraged. 1092 r"""Compound statements (on the same line) are generally discouraged.
992 1093
993 While sometimes it's okay to put an if/for/while with a small body 1094 While sometimes it's okay to put an if/for/while with a small body
994 on the same line, never do this for multi-clause statements. 1095 on the same line, never do this for multi-clause statements.
1045 else: 1146 else:
1046 yield found, "E703 statement ends with a semicolon" 1147 yield found, "E703 statement ends with a semicolon"
1047 found = line.find(';', found + 1) 1148 found = line.find(';', found + 1)
1048 1149
1049 1150
1151 @register_check
1050 def explicit_line_join(logical_line, tokens): 1152 def explicit_line_join(logical_line, tokens):
1051 r"""Avoid explicit line join between brackets. 1153 r"""Avoid explicit line join between brackets.
1052 1154
1053 The preferred way of wrapping long lines is by using Python's implied line 1155 The preferred way of wrapping long lines is by using Python's implied line
1054 continuation inside parentheses, brackets and braces. Long lines can be 1156 continuation inside parentheses, brackets and braces. Long lines can be
1084 parens += 1 1186 parens += 1
1085 elif text in ')]}': 1187 elif text in ')]}':
1086 parens -= 1 1188 parens -= 1
1087 1189
1088 1190
1089 def break_around_binary_operator(logical_line, tokens): 1191 def _is_binary_operator(token_type, text):
1090 r""" 1192 is_op_token = token_type == tokenize.OP
1091 Avoid breaks before binary operators. 1193 is_conjunction = text in ['and', 'or']
1092 1194 # NOTE(sigmavirus24): Previously the not_a_symbol check was executed
1093 The preferred place to break around a binary operator is after the 1195 # conditionally. Since it is now *always* executed, text may be None.
1094 operator, not before it. 1196 # In that case we get a TypeError for `text not in str`.
1095 1197 not_a_symbol = text and text not in "()[]{},:.;@=%~"
1096 W503: (width == 0\n + height == 0) 1198 # The % character is strictly speaking a binary operator, but the
1097 W503: (width == 0\n and height == 0) 1199 # common usage seems to be to put it next to the format parameters,
1098 1200 # after a line break.
1099 Okay: (width == 0 +\n height == 0) 1201 return ((is_op_token or is_conjunction) and not_a_symbol)
1100 Okay: foo(\n -x) 1202
1101 Okay: foo(x\n []) 1203
1102 Okay: x = '''\n''' + '' 1204 def _break_around_binary_operators(tokens):
1103 Okay: foo(x,\n -y) 1205 """Private function to reduce duplication.
1104 Okay: foo(x, # comment\n -y) 1206
1105 Okay: var = (1 &\n ~2) 1207 This factors out the shared details between
1106 Okay: var = (1 /\n -2) 1208 :func:`break_before_binary_operator` and
1107 Okay: var = (1 +\n -1 +\n -2) 1209 :func:`break_after_binary_operator`.
1108 """ 1210 """
1109 def is_binary_operator(token_type, text):
1110 # The % character is strictly speaking a binary operator, but the
1111 # common usage seems to be to put it next to the format parameters,
1112 # after a line break.
1113 return ((token_type == tokenize.OP or text in ['and', 'or']) and
1114 text not in "()[]{},:.;@=%~")
1115
1116 line_break = False 1211 line_break = False
1117 unary_context = True 1212 unary_context = True
1118 # Previous non-newline token types and text 1213 # Previous non-newline token types and text
1119 previous_token_type = None 1214 previous_token_type = None
1120 previous_text = None 1215 previous_text = None
1122 if token_type == tokenize.COMMENT: 1217 if token_type == tokenize.COMMENT:
1123 continue 1218 continue
1124 if ('\n' in text or '\r' in text) and token_type != tokenize.STRING: 1219 if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
1125 line_break = True 1220 line_break = True
1126 else: 1221 else:
1127 if (is_binary_operator(token_type, text) and line_break and 1222 yield (token_type, text, previous_token_type, previous_text,
1128 not unary_context and 1223 line_break, unary_context, start)
1129 not is_binary_operator(previous_token_type,
1130 previous_text)):
1131 yield start, "W503 line break before binary operator"
1132 unary_context = text in '([{,;' 1224 unary_context = text in '([{,;'
1133 line_break = False 1225 line_break = False
1134 previous_token_type = token_type 1226 previous_token_type = token_type
1135 previous_text = text 1227 previous_text = text
1136 1228
1137 1229
1230 @register_check
1231 def break_before_binary_operator(logical_line, tokens):
1232 r"""
1233 Avoid breaks before binary operators.
1234
1235 The preferred place to break around a binary operator is after the
1236 operator, not before it.
1237
1238 W503: (width == 0\n + height == 0)
1239 W503: (width == 0\n and height == 0)
1240 W503: var = (1\n & ~2)
1241 W503: var = (1\n / -2)
1242 W503: var = (1\n + -1\n + -2)
1243
1244 Okay: foo(\n -x)
1245 Okay: foo(x\n [])
1246 Okay: x = '''\n''' + ''
1247 Okay: foo(x,\n -y)
1248 Okay: foo(x, # comment\n -y)
1249 """
1250 for context in _break_around_binary_operators(tokens):
1251 (token_type, text, previous_token_type, previous_text,
1252 line_break, unary_context, start) = context
1253 if (_is_binary_operator(token_type, text) and line_break and
1254 not unary_context and
1255 not _is_binary_operator(previous_token_type,
1256 previous_text)):
1257 yield start, "W503 line break before binary operator"
1258
1259
1260 @register_check
1261 def break_after_binary_operator(logical_line, tokens):
1262 r"""
1263 Avoid breaks after binary operators.
1264
1265 The preferred place to break around a binary operator is before the
1266 operator, not after it.
1267
1268 W504: (width == 0 +\n height == 0)
1269 W504: (width == 0 and\n height == 0)
1270 W504: var = (1 &\n ~2)
1271
1272 Okay: foo(\n -x)
1273 Okay: foo(x\n [])
1274 Okay: x = '''\n''' + ''
1275 Okay: x = '' + '''\n'''
1276 Okay: foo(x,\n -y)
1277 Okay: foo(x, # comment\n -y)
1278
1279 The following should be W504 but unary_context is tricky with these
1280 Okay: var = (1 /\n -2)
1281 Okay: var = (1 +\n -1 +\n -2)
1282 """
1283 for context in _break_around_binary_operators(tokens):
1284 (token_type, text, previous_token_type, previous_text,
1285 line_break, unary_context, start) = context
1286 if (_is_binary_operator(previous_token_type, previous_text) and
1287 line_break and
1288 not unary_context and
1289 not _is_binary_operator(token_type, text)):
1290 error_pos = (start[0] - 1, start[1])
1291 yield error_pos, "W504 line break after binary operator"
1292
1293
1294 @register_check
1138 def comparison_to_singleton(logical_line, noqa): 1295 def comparison_to_singleton(logical_line, noqa):
1139 r"""Comparison to singletons should use "is" or "is not". 1296 r"""Comparison to singletons should use "is" or "is not".
1140 1297
1141 Comparisons to singletons like None should always be done 1298 Comparisons to singletons like None should always be done
1142 with "is" or "is not", never the equality operators. 1299 with "is" or "is not", never the equality operators.
1167 msg += " or 'if %scond:'" % ('' if nonzero else 'not ') 1324 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1168 yield (match.start(2), ("%s comparison to %s should be %s" % 1325 yield (match.start(2), ("%s comparison to %s should be %s" %
1169 (code, singleton, msg)), singleton, msg) 1326 (code, singleton, msg)), singleton, msg)
1170 1327
1171 1328
1329 @register_check
1172 def comparison_negative(logical_line): 1330 def comparison_negative(logical_line):
1173 r"""Negative comparison should be done using "not in" and "is not". 1331 r"""Negative comparison should be done using "not in" and "is not".
1174 1332
1175 Okay: if x not in y:\n pass 1333 Okay: if x not in y:\n pass
1176 Okay: assert (X in Y or X is Z) 1334 Okay: assert (X in Y or X is Z)
1188 yield pos, "E713 test for membership should be 'not in'" 1346 yield pos, "E713 test for membership should be 'not in'"
1189 else: 1347 else:
1190 yield pos, "E714 test for object identity should be 'is not'" 1348 yield pos, "E714 test for object identity should be 'is not'"
1191 1349
1192 1350
1351 @register_check
1193 def comparison_type(logical_line, noqa): 1352 def comparison_type(logical_line, noqa):
1194 r"""Object type comparisons should always use isinstance(). 1353 r"""Object type comparisons should always use isinstance().
1195 1354
1196 Do not compare types directly. 1355 Do not compare types directly.
1197 1356
1211 if inst and isidentifier(inst) and inst not in SINGLETONS: 1370 if inst and isidentifier(inst) and inst not in SINGLETONS:
1212 return # Allow comparison for types which are not obvious 1371 return # Allow comparison for types which are not obvious
1213 yield match.start(), "E721 do not compare types, use 'isinstance()'" 1372 yield match.start(), "E721 do not compare types, use 'isinstance()'"
1214 1373
1215 1374
1375 @register_check
1216 def bare_except(logical_line, noqa): 1376 def bare_except(logical_line, noqa):
1217 r"""When catching exceptions, mention specific exceptions whenever possible. 1377 r"""When catching exceptions, mention specific exceptions when possible.
1218 1378
1219 Okay: except Exception: 1379 Okay: except Exception:
1220 Okay: except BaseException: 1380 Okay: except BaseException:
1221 E722: except: 1381 E722: except:
1222 """ 1382 """
1224 return 1384 return
1225 1385
1226 regex = re.compile(r"except\s*:") 1386 regex = re.compile(r"except\s*:")
1227 match = regex.match(logical_line) 1387 match = regex.match(logical_line)
1228 if match: 1388 if match:
1229 yield match.start(), "E722 do not use bare except'" 1389 yield match.start(), "E722 do not use bare 'except'"
1230 1390
1231 1391
1392 @register_check
1232 def ambiguous_identifier(logical_line, tokens): 1393 def ambiguous_identifier(logical_line, tokens):
1233 r"""Never use the characters 'l', 'O', or 'I' as variable names. 1394 r"""Never use the characters 'l', 'O', or 'I' as variable names.
1234 1395
1235 In some fonts, these characters are indistinguishable from the numerals 1396 In some fonts, these characters are indistinguishable from the numerals
1236 one and zero. When tempted to use 'l', use 'L' instead. 1397 one and zero. When tempted to use 'l', use 'L' instead.
1279 yield pos, "E741 ambiguous variable name '%s'", ident 1440 yield pos, "E741 ambiguous variable name '%s'", ident
1280 prev_text = text 1441 prev_text = text
1281 prev_start = start 1442 prev_start = start
1282 1443
1283 1444
1445 @register_check
1284 def python_3000_has_key(logical_line, noqa): 1446 def python_3000_has_key(logical_line, noqa):
1285 r"""The {}.has_key() method is removed in Python 3: use the 'in' operator. 1447 r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
1286 1448
1287 Okay: if "alph" in d:\n print d["alph"] 1449 Okay: if "alph" in d:\n print d["alph"]
1288 W601: assert d.has_key('alph') 1450 W601: assert d.has_key('alph')
1290 pos = logical_line.find('.has_key(') 1452 pos = logical_line.find('.has_key(')
1291 if pos > -1 and not noqa: 1453 if pos > -1 and not noqa:
1292 yield pos, "W601 .has_key() is deprecated, use 'in'" 1454 yield pos, "W601 .has_key() is deprecated, use 'in'"
1293 1455
1294 1456
1457 @register_check
1295 def python_3000_raise_comma(logical_line): 1458 def python_3000_raise_comma(logical_line):
1296 r"""When raising an exception, use "raise ValueError('message')". 1459 r"""When raising an exception, use "raise ValueError('message')".
1297 1460
1298 The older form is removed in Python 3. 1461 The older form is removed in Python 3.
1299 1462
1303 match = RAISE_COMMA_REGEX.match(logical_line) 1466 match = RAISE_COMMA_REGEX.match(logical_line)
1304 if match and not RERAISE_COMMA_REGEX.match(logical_line): 1467 if match and not RERAISE_COMMA_REGEX.match(logical_line):
1305 yield match.end() - 1, "W602 deprecated form of raising exception" 1468 yield match.end() - 1, "W602 deprecated form of raising exception"
1306 1469
1307 1470
1471 @register_check
1308 def python_3000_not_equal(logical_line): 1472 def python_3000_not_equal(logical_line):
1309 r"""New code should always use != instead of <>. 1473 r"""New code should always use != instead of <>.
1310 1474
1311 The older syntax is removed in Python 3. 1475 The older syntax is removed in Python 3.
1312 1476
1316 pos = logical_line.find('<>') 1480 pos = logical_line.find('<>')
1317 if pos > -1: 1481 if pos > -1:
1318 yield pos, "W603 '<>' is deprecated, use '!='" 1482 yield pos, "W603 '<>' is deprecated, use '!='"
1319 1483
1320 1484
1485 @register_check
1321 def python_3000_backticks(logical_line): 1486 def python_3000_backticks(logical_line):
1322 r"""Use repr() instead of backticks in Python 3. 1487 r"""Use repr() instead of backticks in Python 3.
1323 1488
1324 Okay: val = repr(1 + 2) 1489 Okay: val = repr(1 + 2)
1325 W604: val = `1 + 2` 1490 W604: val = `1 + 2`
1326 """ 1491 """
1327 pos = logical_line.find('`') 1492 pos = logical_line.find('`')
1328 if pos > -1: 1493 if pos > -1:
1329 yield pos, "W604 backticks are deprecated, use 'repr()'" 1494 yield pos, "W604 backticks are deprecated, use 'repr()'"
1495
1496
1497 @register_check
1498 def python_3000_invalid_escape_sequence(logical_line, tokens):
1499 r"""Invalid escape sequences are deprecated in Python 3.6.
1500
1501 Okay: regex = r'\.png$'
1502 W605: regex = '\.png$'
1503 """
1504 # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
1505 valid = [
1506 '\n',
1507 '\\',
1508 '\'',
1509 '"',
1510 'a',
1511 'b',
1512 'f',
1513 'n',
1514 'r',
1515 't',
1516 'v',
1517 '0', '1', '2', '3', '4', '5', '6', '7',
1518 'x',
1519
1520 # Escape sequences only recognized in string literals
1521 'N',
1522 'u',
1523 'U',
1524 ]
1525
1526 for token_type, text, start, end, line in tokens:
1527 if token_type == tokenize.STRING:
1528 quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
1529 # Extract string modifiers (e.g. u or r)
1530 quote_pos = text.index(quote)
1531 prefix = text[:quote_pos].lower()
1532 start = quote_pos + len(quote)
1533 string = text[start:-len(quote)]
1534
1535 if 'r' not in prefix:
1536 pos = string.find('\\')
1537 while pos >= 0:
1538 pos += 1
1539 if string[pos] not in valid:
1540 yield (
1541 pos,
1542 "W605 invalid escape sequence '\\%s'",
1543 string[pos],
1544 )
1545 pos = string.find('\\', pos + 1)
1546
1547
1548 @register_check
1549 def python_3000_async_await_keywords(logical_line, tokens):
1550 """'async' and 'await' are reserved keywords starting with Python 3.7
1551
1552 W606: async = 42
1553 W606: await = 42
1554 Okay: async def read_data(db):\n data = await db.fetch('SELECT ...')
1555 """
1556 # The Python tokenize library before Python 3.5 recognizes async/await as a
1557 # NAME token. Therefore, use a state machine to look for the possible
1558 # async/await constructs as defined by the Python grammar:
1559 # https://docs.python.org/3/reference/grammar.html
1560
1561 state = None
1562 for token_type, text, start, end, line in tokens:
1563 error = False
1564
1565 if state is None:
1566 if token_type == tokenize.NAME:
1567 if text == 'async':
1568 state = ('async_stmt', start)
1569 elif text == 'await':
1570 state = ('await', start)
1571 elif state[0] == 'async_stmt':
1572 if token_type == tokenize.NAME and text in ('def', 'with', 'for'):
1573 # One of funcdef, with_stmt, or for_stmt. Return to looking
1574 # for async/await names.
1575 state = None
1576 else:
1577 error = True
1578 elif state[0] == 'await':
1579 if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
1580 # An await expression. Return to looking for async/await names.
1581 state = None
1582 else:
1583 error = True
1584
1585 if error:
1586 yield (
1587 state[1],
1588 "W606 'async' and 'await' are reserved keywords starting with "
1589 "Python 3.7",
1590 )
1591 state = None
1592
1593 # Last token
1594 if state is not None:
1595 yield (
1596 state[1],
1597 "W606 'async' and 'await' are reserved keywords starting with "
1598 "Python 3.7",
1599 )
1330 1600
1331 1601
1332 ############################################################################## 1602 ##############################################################################
1333 # Helper functions 1603 # Helper functions
1334 ############################################################################## 1604 ##############################################################################
1359 1629
1360 def stdin_get_value(): 1630 def stdin_get_value():
1361 """Read the value from stdin.""" 1631 """Read the value from stdin."""
1362 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() 1632 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1363 1633
1364 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search 1634 noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search)
1365 1635
1366 1636
1367 def expand_indent(line): 1637 def expand_indent(line):
1368 r"""Return the amount of indentation. 1638 r"""Return the amount of indentation.
1369 1639
1426 hunk_match = HUNK_REGEX.match(line) 1696 hunk_match = HUNK_REGEX.match(line)
1427 (row, nrows) = [int(g or '1') for g in hunk_match.groups()] 1697 (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1428 rv[path].update(range(row, row + nrows)) 1698 rv[path].update(range(row, row + nrows))
1429 elif line[:3] == '+++': 1699 elif line[:3] == '+++':
1430 path = line[4:].split('\t', 1)[0] 1700 path = line[4:].split('\t', 1)[0]
1431 if path[:2] == 'b/': 1701 # Git diff will use (i)ndex, (w)ork tree, (c)ommit and (o)bject
1702 # instead of a/b/c/d as prefixes for patches
1703 if path[:2] in ('b/', 'w/', 'i/'):
1432 path = path[2:] 1704 path = path[2:]
1433 rv[path] = set() 1705 rv[path] = set()
1434 return dict([(os.path.join(parent, path), rows) 1706 return dict([(os.path.join(parent, path), rows)
1435 for (path, rows) in rv.items() 1707 for (path, rows) in rv.items()
1436 if rows and filename_match(path, patterns)]) 1708 if rows and filename_match(path, patterns)])
1482 token[1] == token[4]) 1754 token[1] == token[4])
1483 1755
1484 ############################################################################## 1756 ##############################################################################
1485 # Framework to run all checks 1757 # Framework to run all checks
1486 ############################################################################## 1758 ##############################################################################
1487
1488
1489 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1490
1491
1492 def _get_parameters(function):
1493 if sys.version_info >= (3, 3):
1494 return [parameter.name
1495 for parameter
1496 in inspect.signature(function).parameters.values()
1497 if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
1498 else:
1499 return inspect.getargspec(function)[0]
1500
1501
1502 def register_check(check, codes=None):
1503 """Register a new check object."""
1504 def _add_check(check, kind, codes, args):
1505 if check in _checks[kind]:
1506 _checks[kind][check][0].extend(codes or [])
1507 else:
1508 _checks[kind][check] = (codes or [''], args)
1509 if inspect.isfunction(check):
1510 args = _get_parameters(check)
1511 if args and args[0] in ('physical_line', 'logical_line'):
1512 if codes is None:
1513 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1514 _add_check(check, args[0], codes, args)
1515 elif inspect.isclass(check):
1516 if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
1517 _add_check(check, 'tree', codes, None)
1518
1519
1520 def init_checks_registry():
1521 """Register all globally visible functions.
1522
1523 The first argument name is either 'physical_line' or 'logical_line'.
1524 """
1525 mod = inspect.getmodule(register_check)
1526 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1527 register_check(function)
1528
1529
1530 init_checks_registry()
1531 1759
1532 1760
1533 class Checker(object): 1761 class Checker(object):
1534 """Load a Python source file, tokenize it, check coding style.""" 1762 """Load a Python source file, tokenize it, check coding style."""
1535 1763
1664 1892
1665 def check_logical(self): 1893 def check_logical(self):
1666 """Build a line from tokens and run all logical checks on it.""" 1894 """Build a line from tokens and run all logical checks on it."""
1667 self.report.increment_logical_line() 1895 self.report.increment_logical_line()
1668 mapping = self.build_tokens_line() 1896 mapping = self.build_tokens_line()
1669
1670 if not mapping: 1897 if not mapping:
1671 return 1898 return
1672 1899
1900 mapping_offsets = [offset for offset, _ in mapping]
1673 (start_row, start_col) = mapping[0][1] 1901 (start_row, start_col) = mapping[0][1]
1674 start_line = self.lines[start_row - 1] 1902 start_line = self.lines[start_row - 1]
1675 self.indent_level = expand_indent(start_line[:start_col]) 1903 self.indent_level = expand_indent(start_line[:start_col])
1676 if self.blank_before < self.blank_lines: 1904 if self.blank_before < self.blank_lines:
1677 self.blank_before = self.blank_lines 1905 self.blank_before = self.blank_lines
1683 self.init_checker_state(name, argument_names) 1911 self.init_checker_state(name, argument_names)
1684 for result in self.run_check(check, argument_names) or (): 1912 for result in self.run_check(check, argument_names) or ():
1685 offset, text = result[:2] 1913 offset, text = result[:2]
1686 args = result[2:] 1914 args = result[2:]
1687 if not isinstance(offset, tuple): 1915 if not isinstance(offset, tuple):
1688 for token_offset, pos in mapping: 1916 # As mappings are ordered, bisecting is a fast way
1689 if offset <= token_offset: 1917 # to find a given offset in them.
1690 break 1918 token_offset, pos = mapping[bisect.bisect_left(
1919 mapping_offsets, offset)]
1691 offset = (pos[0], pos[1] + offset - token_offset) 1920 offset = (pos[0], pos[1] + offset - token_offset)
1692 self.report_error_args( 1921 self.report_error_args(
1693 offset[0], offset[1], text, check, *args) 1922 offset[0], offset[1], text, check, *args)
1694 if self.logical_line: 1923 if self.logical_line:
1695 self.previous_indent_level = self.indent_level 1924 self.previous_indent_level = self.indent_level
1751 # check_physical() to give accurate feedback 1980 # check_physical() to give accurate feedback
1752 if noqa(token[4]): 1981 if noqa(token[4]):
1753 return 1982 return
1754 self.multiline = True 1983 self.multiline = True
1755 self.line_number = token[2][0] 1984 self.line_number = token[2][0]
1756 for line in token[1].split('\n')[:-1]: 1985 _, src, (_, offset), _, _ = token
1986 src = self.lines[self.line_number - 1][:offset] + src
1987 for line in src.split('\n')[:-1]:
1757 self.check_physical(line + '\n') 1988 self.check_physical(line + '\n')
1758 self.line_number += 1 1989 self.line_number += 1
1759 self.multiline = False 1990 self.multiline = False
1760 1991
1761 def check_all(self, expected=None, line_offset=0): 1992 def check_all(self, expected=None, line_offset=0):
2014 config_file = kwargs.pop('config_file', False) 2245 config_file = kwargs.pop('config_file', False)
2015 parser = kwargs.pop('parser', None) 2246 parser = kwargs.pop('parser', None)
2016 # build options from dict 2247 # build options from dict
2017 options_dict = dict(*args, **kwargs) 2248 options_dict = dict(*args, **kwargs)
2018 arglist = None if parse_argv else options_dict.get('paths', None) 2249 arglist = None if parse_argv else options_dict.get('paths', None)
2250 verbose = options_dict.get('verbose', None)
2019 options, self.paths = process_options( 2251 options, self.paths = process_options(
2020 arglist, parse_argv, config_file, parser) 2252 arglist, parse_argv, config_file, parser, verbose)
2021 if options_dict: 2253 if options_dict:
2022 options.__dict__.update(options_dict) 2254 options.__dict__.update(options_dict)
2023 if 'paths' in options_dict: 2255 if 'paths' in options_dict:
2024 self.paths = options_dict['paths'] 2256 self.paths = options_dict['paths']
2025 2257
2278 options.doctest = options.testsuite = False 2510 options.doctest = options.testsuite = False
2279 return options 2511 return options
2280 2512
2281 2513
2282 def process_options(arglist=None, parse_argv=False, config_file=None, 2514 def process_options(arglist=None, parse_argv=False, config_file=None,
2283 parser=None): 2515 parser=None, verbose=None):
2284 """Process options passed either via arglist or via command line args. 2516 """Process options passed either via arglist or via command line args.
2285 2517
2286 Passing in the ``config_file`` parameter allows other tools, such as flake8 2518 Passing in the ``config_file`` parameter allows other tools, such as flake8
2287 to specify their own options to be processed in pycodestyle. 2519 to specify their own options to be processed in pycodestyle.
2288 """ 2520 """
2302 # If parse_argv is True and arglist is None, arguments are 2534 # If parse_argv is True and arglist is None, arguments are
2303 # parsed from the command line (sys.argv) 2535 # parsed from the command line (sys.argv)
2304 (options, args) = parser.parse_args(arglist) 2536 (options, args) = parser.parse_args(arglist)
2305 options.reporter = None 2537 options.reporter = None
2306 2538
2539 # If explicity specified verbosity, override any `-v` CLI flag
2540 if verbose is not None:
2541 options.verbose = verbose
2542
2307 if options.ensure_value('testsuite', False): 2543 if options.ensure_value('testsuite', False):
2308 args.append(options.testsuite) 2544 args.append(options.testsuite)
2309 elif not options.ensure_value('doctest', False): 2545 elif not options.ensure_value('doctest', False):
2310 if parse_argv and not args: 2546 if parse_argv and not args:
2311 if options.diff or any(os.path.exists(name) 2547 if options.diff or any(os.path.exists(name)

eric ide

mercurial