Plugins/CheckerPlugins/CodeStyleChecker/pycodestyle.py

changeset 6732
1a4758379c45
parent 6645
ad476851d7e0
child 6733
519422827ace
--- a/Plugins/CheckerPlugins/CodeStyleChecker/pycodestyle.py	Mon Feb 11 19:31:23 2019 +0100
+++ b/Plugins/CheckerPlugins/CodeStyleChecker/pycodestyle.py	Mon Feb 11 22:22:12 2019 +0100
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# pycodestyle.py - Check Python source code formatting, according to PEP 8
+# pycodestyle.py - Check Python source code formatting, according to
+# PEP 8
 #
 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
@@ -58,7 +59,7 @@
 # - made messages translatable via Qt
 # - added code for eric6 integration
 #
-# Copyright (c) 2011 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
+# Copyright (c) 2011 - 2018 Detlev Offenbach <detlev@die-offenbachs.de>
 #
 
 import inspect
@@ -75,9 +76,9 @@
     from functools import lru_cache
 except ImportError:
     def lru_cache(maxsize=128):  # noqa as it's a fake implementation.
-        """Does not really need a real a lru_cache, it's just optimization, so
-        let's just do nothing here. Python 3.2+ will just get better
-        performances, time to upgrade?
+        """Does not really need a real a lru_cache, it's just
+        optimization, so let's just do nothing here. Python 3.2+ will
+        just get better performances, time to upgrade?
         """
         return lambda function: function
 
@@ -90,7 +91,7 @@
 except ImportError:
     from ConfigParser import RawConfigParser            # __IGNORE_WARNING__
 
-__version__ = '2.4.0-eric'
+__version__ = '2.5.0-eric'
 
 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
 DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
@@ -115,6 +116,7 @@
     # Methods and nested class and function.
     'method': 1,
 }
+MAX_DOC_LENGTH = 72
 REPORT_FORMAT = {
     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
@@ -126,9 +128,12 @@
 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
+# Warn for -> function annotation operator in py3.5+ (issue 803)
+FUNCTION_RETURN_ANNOTATION_OP = ['->'] if sys.version_info >= (3, 5) else []
 WS_NEEDED_OPERATORS = frozenset([
     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
-    '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
+    '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='] +
+    FUNCTION_RETURN_ANNOTATION_OP)
 WHITESPACE = frozenset(' \t')
 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
@@ -167,11 +172,6 @@
 )
 DUNDER_REGEX = re.compile(r'^__([^\s]+)__ = ')
 
-# Work around Python < 2.6 behaviour, which does not generate NL after
-# a comment which is on a line by itself.
-COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
-
-
 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
 
 
@@ -184,6 +184,7 @@
     else:
         return inspect.getargspec(function)[0]
 
+
 def register_check(check, codes=None):
     """Register a new check object."""
     def _add_check(check, kind, codes, args):
@@ -203,22 +204,23 @@
     return check
 
 
-##############################################################################
+########################################################################
 # Plugins (check functions) for physical lines
-##############################################################################
+########################################################################
 
 @register_check
 def tabs_or_spaces(physical_line, indent_char):
     r"""Never mix tabs and spaces.
 
     The most popular way of indenting Python is with spaces only.  The
-    second-most popular way is with tabs only.  Code indented with a mixture
-    of tabs and spaces should be converted to using spaces exclusively.  When
-    invoking the Python command line interpreter with the -t option, it issues
-    warnings about code that illegally mixes tabs and spaces.  When using -tt
-    these warnings become errors.  These options are highly recommended!
+    second-most popular way is with tabs only.  Code indented with a
+    mixture of tabs and spaces should be converted to using spaces
+    exclusively.  When invoking the Python command line interpreter with
+    the -t option, it issues warnings about code that illegally mixes
+    tabs and spaces.  When using -tt these warnings become errors.
+    These options are highly recommended!
 
-    Okay: if a == 0:\n        a = 1\n        b = 1
+    Okay: if a == 0:\n    a = 1\n    b = 1
     E101: if a == 0:\n        a = 1\n\tb = 1
     """
     indent = INDENT_REGEX.match(physical_line).group(1)
@@ -229,7 +231,7 @@
 
 @register_check
 def tabs_obsolete(physical_line):
-    r"""For new projects, spaces-only are strongly recommended over tabs.
+    r"""On new projects, spaces-only are strongly recommended over tabs.
 
     Okay: if True:\n    return
     W191: if True:\n\treturn
@@ -243,8 +245,8 @@
 def trailing_whitespace(physical_line):
     r"""Trailing whitespace is superfluous.
 
-    The warning returned varies on whether the line itself is blank, for easier
-    filtering for those who want to indent their blank lines.
+    The warning returned varies on whether the line itself is blank,
+    for easier filtering for those who want to indent their blank lines.
 
     Okay: spam(1)\n#
     W291: spam(1) \n#
@@ -272,10 +274,10 @@
     """
     if line_number == total_lines:
         stripped_last_line = physical_line.rstrip()
-        if not stripped_last_line:
+        if physical_line and not stripped_last_line:
             return 0, "W391 blank line at end of file"
         if stripped_last_line == physical_line:
-            return len(physical_line), "W292 no newline at end of file"
+            return len(lines[-1]), "W292 no newline at end of file"
 
 
 @register_check
@@ -284,11 +286,11 @@
     r"""Limit all lines to a maximum of 79 characters.
 
     There are still many devices around that are limited to 80 character
-    lines; plus, limiting windows to 80 characters makes it possible to have
-    several windows side-by-side.  The default wrapping on such devices looks
-    ugly.  Therefore, please limit all lines to a maximum of 79 characters.
-    For flowing long blocks of text (docstrings or comments), limiting the
-    length to 72 characters is recommended.
+    lines; plus, limiting windows to 80 characters makes it possible to
+    have several windows side-by-side.  The default wrapping on such
+    devices looks ugly.  Therefore, please limit all lines to a maximum
+    of 79 characters. For flowing long blocks of text (docstrings or
+    comments), limiting the length to 72 characters is recommended.
 
     Reports error E501.
     """
@@ -298,8 +300,9 @@
         # Special case: ignore long shebang lines.
         if line_number == 1 and line.startswith('#!'):
             return
-        # Special case for long URLs in multi-line docstrings or comments,
-        # but still report the error when the 72 first chars are whitespaces.
+        # Special case for long URLs in multi-line docstrings or
+        # comments, but still report the error when the 72 first chars
+        # are whitespaces.
         chunks = line.split()
         if ((len(chunks) == 1 and multiline) or
             (len(chunks) == 2 and chunks[0] == '#')) and \
@@ -317,9 +320,9 @@
                     length, max_line_length)
 
 
-##############################################################################
+########################################################################
 # Plugins (check functions) for logical lines
-##############################################################################
+########################################################################
 
 
 @register_check
@@ -327,15 +330,18 @@
                 blank_before, previous_logical,
                 previous_unindented_logical_line, previous_indent_level,
                 lines):
-    r"""Separate top-level function and class definitions with two blank lines.
+    r"""Separate top-level function and class definitions with two blank
+    lines.
 
-    Method definitions inside a class are separated by a single blank line.
+    Method definitions inside a class are separated by a single blank
+    line.
 
-    Extra blank lines may be used (sparingly) to separate groups of related
-    functions.  Blank lines may be omitted between a bunch of related
-    one-liners (e.g. a set of dummy implementations).
+    Extra blank lines may be used (sparingly) to separate groups of
+    related functions.  Blank lines may be omitted between a bunch of
+    related one-liners (e.g. a set of dummy implementations).
 
-    Use blank lines in functions, sparingly, to indicate logical sections.
+    Use blank lines in functions, sparingly, to indicate logical
+    sections.
 
     Okay: def a():\n    pass\n\n\ndef b():\n    pass
     Okay: def a():\n    pass\n\n\nasync def b():\n    pass
@@ -374,6 +380,16 @@
             yield (0, "E303 too many blank lines (%d), expected %d",
                 blank_lines, top_level_lines)
     elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
+        # If this is a one-liner (i.e. the next line is not more
+        # indented), and the previous line is also not deeper
+        # (it would be better to check if the previous line is part
+        # of another def/class at the same level), don't require blank
+        # lines around this.
+        prev_line = lines[line_number - 2] if line_number >= 2 else ''
+        next_line = lines[line_number] if line_number < len(lines) else ''
+        if (expand_indent(prev_line) <= indent_level and
+                expand_indent(next_line) <= indent_level):
+            return
         if indent_level:
             if not (blank_before == method_lines or
                     previous_indent_level < indent_level or
@@ -381,7 +397,8 @@
                     ):
                 ancestor_level = indent_level
                 nested = False
-                # Search backwards for a def ancestor or tree root (top level).
+                # Search backwards for a def ancestor or tree root
+                # (top level).
                 for line in lines[line_number - top_level_lines::-1]:
                     if line.strip() and expand_indent(line) < ancestor_level:
                         ancestor_level = expand_indent(line)
@@ -467,8 +484,8 @@
 
 @register_check
 def missing_whitespace_after_import_keyword(logical_line):
-    r"""Multiple imports in form from x import (a, b, c) should have space
-    between import statement and parenthesised name list.
+    r"""Multiple imports in form from x import (a, b, c) should have
+    space between import statement and parenthesised name list.
 
     Okay: from foo import (bar, baz)
     E275: from foo import(bar, baz)
@@ -515,8 +532,8 @@
                 indent_level, previous_indent_level):
     r"""Use 4 spaces per indentation level.
 
-    For really old code that you don't want to mess up, you can continue to
-    use 8-space tabs.
+    For really old code that you don't want to mess up, you can continue
+    to use 8-space tabs.
 
     Okay: a = 1
     Okay: if a == 0:\n    a = 1
@@ -541,6 +558,10 @@
     elif not indent_expect and indent_level > previous_indent_level:
         yield 0, tmpl % (3 + c, "unexpected indentation")
 
+    expected_indent_level = previous_indent_level + 4
+    if indent_expect and indent_level > expected_indent_level:
+        yield 0, tmpl % (7, 'over-indented')
+
 
 @register_check
 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
@@ -553,8 +574,8 @@
 
     When using a hanging indent these considerations should be applied:
     - there should be no arguments on the first line, and
-    - further indentation should be used to clearly distinguish itself as a
-      continuation line.
+    - further indentation should be used to clearly distinguish itself
+      as a continuation line.
 
     Okay: a = (\n)
     E123: a = (\n    )
@@ -639,7 +660,8 @@
                     yield (start, "E124 closing bracket does not match "
                            "visual indentation")
             elif close_bracket and not hang:
-                # closing bracket matches indentation of opening bracket's line
+                # closing bracket matches indentation of opening
+                # bracket's line
                 if hang_closing:
                     yield start, "E133 closing bracket is missing indentation"
             elif indent[depth] and start[1] < indent[depth]:
@@ -657,7 +679,8 @@
                 # visual indent is verified
                 indent[depth] = start[1]
             elif visual_indent in (text, str):
-                # ignore token lined up with matching one from a previous line
+                # ignore token lined up with matching one from a
+                # previous line
                 pass
             else:
                 # indent is broken
@@ -870,7 +893,7 @@
             elif text in WS_NEEDED_OPERATORS:
                 need_space = True
             elif text in UNARY_OPERATORS:
-                # Check if the operator is being used as a binary operator
+                # Check if the operator is used as a binary operator
                 # Allow unary operators: -123, -x, +1.
                 # Allow argument unpacking: foo(*args, **kwargs).
                 if (prev_text in '}])' if prev_type == tokenize.OP
@@ -916,8 +939,8 @@
     r"""Don't use spaces around the '=' sign in function arguments.
 
     Don't use spaces around the '=' sign when used to indicate a
-    keyword argument or a default parameter value, except when using a type
-    annotation.
+    keyword argument or a default parameter value, except when
+    using a type annotation.
 
     Okay: def complex(real, imag=0.0):
     Okay: return magic(r=real, i=imag)
@@ -960,17 +983,17 @@
                 parens -= 1
             elif in_def and text == ':' and parens == 1:
                 annotated_func_arg = True
-            elif parens and text == ',' and parens == 1:
+            elif parens == 1 and text == ',':
                 annotated_func_arg = False
             elif parens and text == '=':
-                if not annotated_func_arg:
+                if annotated_func_arg and parens == 1:
+                    require_space = True
+                    if start == prev_end:
+                        yield (prev_end, missing_message)
+                else:
                     no_space = True
                     if start != prev_end:
                         yield (prev_end, message)
-                else:
-                    require_space = True
-                    if start == prev_end:
-                        yield (prev_end, missing_message)
             if not parens:
                 annotated_func_arg = False
 
@@ -981,9 +1004,9 @@
 def whitespace_before_comment(logical_line, tokens):
     r"""Separate inline comments by at least two spaces.
 
-    An inline comment is a comment on the same line as a statement.  Inline
-    comments should be separated by at least two spaces from the statement.
-    They should start with a # and a single space.
+    An inline comment is a comment on the same line as a statement.
+    Inline comments should be separated by at least two spaces from the
+    statement. They should start with a # and a single space.
 
     Each line of a block comment starts with a # and a single space
     (unless it is indented text inside the comment).
@@ -1044,8 +1067,8 @@
         logical_line, indent_level, checker_state, noqa):
     r"""Place imports at the top of the file.
 
-    Always put imports at the top of the file, just after any module comments
-    and docstrings, and before module globals and constants.
+    Always put imports at the top of the file, just after any module
+    comments and docstrings, and before module globals and constants.
 
     Okay: import os
     Okay: # this is a comment\nimport os
@@ -1061,7 +1084,7 @@
     E402: a=1\nfrom sys import x
 
     Okay: if x:\n    import os
-    """
+    """  # noqa
     def is_string_literal(line):
         if line[0] in 'uUbB':
             line = line[1:]
@@ -1071,7 +1094,7 @@
 
     allowed_try_keywords = ('try', 'except', 'else', 'finally')
 
-    if indent_level:  # Allow imports in conditional statements or functions
+    if indent_level:  # Allow imports in conditional statement/function
         return
     if not logical_line:  # Allow empty lines or comments
         return
@@ -1084,11 +1107,12 @@
     elif re.match(DUNDER_REGEX, line):
         return
     elif any(line.startswith(kw) for kw in allowed_try_keywords):
-        # Allow try, except, else, finally keywords intermixed with imports in
-        # order to support conditional importing
+        # Allow try, except, else, finally keywords intermixed with
+        # imports in order to support conditional importing
         return
     elif is_string_literal(line):
-        # The first literal is a docstring, allow it. Otherwise, report error.
+        # The first literal is a docstring, allow it. Otherwise, report
+        # error.
         if checker_state.get('seen_docstring', False):
             checker_state['seen_non_imports'] = True
         else:
@@ -1099,7 +1123,8 @@
 
 @register_check
 def compound_statements(logical_line):
-    r"""Compound statements (on the same line) are generally discouraged.
+    r"""Compound statements (on the same line) are generally
+    discouraged.
 
     While sometimes it's okay to put an if/for/while with a small body
     on the same line, never do this for multi-clause statements.
@@ -1130,7 +1155,7 @@
     last_char = len(line) - 1
     found = line.find(':')
     prev_found = 0
-    counts = dict((char, 0) for char in '{}[]()')
+    counts = {char: 0 for char in '{}[]()'}
     while -1 < found < last_char:
         update_counts(line[prev_found:found], counts)
         if ((counts['{'] <= counts['}'] and   # {'a': 1} (dict)
@@ -1162,10 +1187,11 @@
 def explicit_line_join(logical_line, tokens):
     r"""Avoid explicit line join between brackets.
 
-    The preferred way of wrapping long lines is by using Python's implied line
-    continuation inside parentheses, brackets and braces.  Long lines can be
-    broken over multiple lines by wrapping expressions in parentheses.  These
-    should be used in preference to using a backslash for line continuation.
+    The preferred way of wrapping long lines is by using Python's
+    implied line continuation inside parentheses, brackets and braces.
+    Long lines can be broken over multiple lines by wrapping expressions
+    in parentheses.  These should be used in preference to using a
+    backslash for line continuation.
 
     E502: aaa = [123, \\n       123]
     E502: aaa = ("bbb " \\n       "ccc")
@@ -1202,8 +1228,8 @@
     is_op_token = token_type == tokenize.OP
     is_conjunction = text in ['and', 'or']
     # NOTE(sigmavirus24): Previously the not_a_symbol check was executed
-    # conditionally. Since it is now *always* executed, text may be None.
-    # In that case we get a TypeError for `text not in str`.
+    # conditionally. Since it is now *always* executed, text may be
+    # None. In that case we get a TypeError for `text not in str`.
     not_a_symbol = text and text not in "()[]{},:.;@=%~"
     # The % character is strictly speaking a binary operator, but the
     # common usage seems to be to put it next to the format parameters,
@@ -1290,6 +1316,7 @@
     Okay: var = (1 /\n       -2)
     Okay: var = (1 +\n       -1 +\n       -2)
     """
+    prev_start = None
     for context in _break_around_binary_operators(tokens):
         (token_type, text, previous_token_type, previous_text,
          line_break, unary_context, start) = context
@@ -1297,8 +1324,8 @@
                 line_break and
                 not unary_context and
                 not _is_binary_operator(token_type, text)):
-            error_pos = (start[0] - 1, start[1])
-            yield error_pos, "W504 line break after binary operator"
+            yield prev_start, "W504 line break after binary operator"
+        prev_start = start
 
 
 @register_check
@@ -1314,10 +1341,10 @@
     E712: if arg == True:
     E712: if False == arg:
 
-    Also, beware of writing if x when you really mean if x is not None --
-    e.g. when testing whether a variable or argument that defaults to None was
-    set to some other value.  The other value might have a type (such as a
-    container) that could be false in a boolean context!
+    Also, beware of writing if x when you really mean if x is not None
+    -- e.g. when testing whether a variable or argument that defaults to
+    None was set to some other value.  The other value might have a type
+    (such as a container) that could be false in a boolean context!
     """
     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
     if match:
@@ -1367,9 +1394,9 @@
     Okay: if isinstance(obj, int):
     E721: if type(obj) is type(1):
 
-    When checking if an object is a string, keep in mind that it might be a
-    unicode string too! In Python 2.3, str and unicode have a common base
-    class, basestring, so you can do:
+    When checking if an object is a string, keep in mind that it might
+    be a unicode string too! In Python 2.3, str and unicode have a
+    common base class, basestring, so you can do:
 
     Okay: if isinstance(obj, basestring):
     Okay: if type(a1) is type(b1):
@@ -1384,7 +1411,8 @@
 
 @register_check
 def bare_except(logical_line, noqa):
-    r"""When catching exceptions, mention specific exceptions when possible.
+    r"""When catching exceptions, mention specific exceptions when
+    possible.
 
     Okay: except Exception:
     Okay: except BaseException:
@@ -1403,8 +1431,8 @@
 def ambiguous_identifier(logical_line, tokens):
     r"""Never use the characters 'l', 'O', or 'I' as variable names.
 
-    In some fonts, these characters are indistinguishable from the numerals
-    one and zero. When tempted to use 'l', use 'L' instead.
+    In some fonts, these characters are indistinguishable from the
+    numerals one and zero. When tempted to use 'l', use 'L' instead.
 
     Okay: L = 0
     Okay: o = 123
@@ -1413,9 +1441,9 @@
     E741: O = 123
     E741: I = 42
 
-    Variables can be bound in several other contexts, including class and
-    function definitions, 'global' and 'nonlocal' statements, exception
-    handlers, and 'with' statements.
+    Variables can be bound in several other contexts, including class
+    and function definitions, 'global' and 'nonlocal' statements,
+    exception handlers, and 'with' statements.
 
     Okay: except AttributeError as o:
     Okay: with lock as L:
@@ -1435,7 +1463,7 @@
             if prev_text in idents_to_avoid:
                 ident = prev_text
                 pos = prev_start
-        # identifiers bound to a value with 'as', 'global', or 'nonlocal'
+        # identifiers bound to values with 'as', 'global', or 'nonlocal'
         if prev_text in ('as', 'global', 'nonlocal'):
             if text in idents_to_avoid:
                 ident = text
@@ -1454,7 +1482,8 @@
 
 @register_check
 def python_3000_has_key(logical_line, noqa):
-    r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
+    r"""The {}.has_key() method is removed in Python 3: use the 'in'
+    operator.
 
     Okay: if "alph" in d:\n    print d["alph"]
     W601: assert d.has_key('alph')
@@ -1505,12 +1534,15 @@
 
 
 @register_check
-def python_3000_invalid_escape_sequence(logical_line, tokens):
+def python_3000_invalid_escape_sequence(logical_line, tokens, noqa):
     r"""Invalid escape sequences are deprecated in Python 3.6.
 
     Okay: regex = r'\.png$'
     W605: regex = '\.png$'
     """
+    if noqa:
+        return
+
     # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
     valid = [
         '\n',
@@ -1535,6 +1567,7 @@
 
     for token_type, text, start, end, line in tokens:
         if token_type == tokenize.STRING:
+            start_line, start_col = start
             quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
             # Extract string modifiers (e.g. u or r)
             quote_pos = text.index(quote)
@@ -1547,8 +1580,13 @@
                 while pos >= 0:
                     pos += 1
                     if string[pos] not in valid:
+                        line = start_line + string.count('\n', 0, pos)
+                        if line == start_line:
+                            col = start_col + len(prefix) + len(quote) + pos
+                        else:
+                            col = pos - string.rfind('\n', 0, pos) - 1
                         yield (
-                            pos,
+                            (line, col - 1),
                             "W605 invalid escape sequence '\\%s'",
                             string[pos],
                         )
@@ -1557,40 +1595,56 @@
 
 @register_check
 def python_3000_async_await_keywords(logical_line, tokens):
-    """'async' and 'await' are reserved keywords starting with Python 3.7
+    """'async' and 'await' are reserved keywords starting at Python 3.7.
 
     W606: async = 42
     W606: await = 42
-    Okay: async def read_data(db):\n    data = await db.fetch('SELECT ...')
+    Okay: async def read(db):\n    data = await db.fetch('SELECT ...')
     """
-    # The Python tokenize library before Python 3.5 recognizes async/await as a
-    # NAME token. Therefore, use a state machine to look for the possible
-    # async/await constructs as defined by the Python grammar:
+    # The Python tokenize library before Python 3.5 recognizes
+    # async/await as a NAME token. Therefore, use a state machine to
+    # look for the possible async/await constructs as defined by the
+    # Python grammar:
     # https://docs.python.org/3/reference/grammar.html
 
     state = None
     for token_type, text, start, end, line in tokens:
         error = False
 
+        if token_type == tokenize.NL:
+            continue
+
         if state is None:
             if token_type == tokenize.NAME:
                 if text == 'async':
                     state = ('async_stmt', start)
                 elif text == 'await':
                     state = ('await', start)
+                elif (token_type == tokenize.NAME and
+                      text in ('def', 'for')):
+                    state = ('define', start)
+
         elif state[0] == 'async_stmt':
             if token_type == tokenize.NAME and text in ('def', 'with', 'for'):
-                # One of funcdef, with_stmt, or for_stmt. Return to looking
-                # for async/await names.
+                # One of funcdef, with_stmt, or for_stmt. Return to
+                # looking for async/await names.
                 state = None
             else:
                 error = True
         elif state[0] == 'await':
-            if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
-                # An await expression. Return to looking for async/await names.
+            if token_type == tokenize.NAME:
+                # An await expression. Return to looking for async/await
+                # names.
+                state = None
+            elif token_type == tokenize.OP and text == '(':
                 state = None
             else:
                 error = True
+        elif state[0] == 'define':
+            if token_type == tokenize.NAME and text in ('async', 'await'):
+                error = True
+            else:
+                state = None
 
         if error:
             yield (
@@ -1609,9 +1663,64 @@
         )
 
 
-##############################################################################
+########################################################################
+@register_check
+def maximum_doc_length(logical_line, max_doc_length, noqa, tokens):
+    r"""Limit all doc lines to a maximum of 72 characters.
+
+    For flowing long blocks of text (docstrings or comments), limiting
+    the length to 72 characters is recommended.
+
+    Reports warning W505
+    """
+    if max_doc_length is None or noqa:
+        return
+
+    prev_token = None
+    skip_lines = set()
+    # Skip lines that
+    for token_type, text, start, end, line in tokens:
+        if token_type not in SKIP_COMMENTS.union([tokenize.STRING]):
+            skip_lines.add(line)
+
+    for token_type, text, start, end, line in tokens:
+        # Skip lines that aren't pure strings
+        if token_type == tokenize.STRING and skip_lines:
+            continue
+        if token_type in (tokenize.STRING, tokenize.COMMENT):
+            # Only check comment-only lines
+            if prev_token is None or prev_token in SKIP_TOKENS:
+                lines = line.splitlines()
+                for line_num, physical_line in enumerate(lines):
+                    if hasattr(physical_line, 'decode'):  # Python 2
+                        # The line could contain multi-byte characters
+                        try:
+                            physical_line = physical_line.decode('utf-8')
+                        except UnicodeError:
+                            pass
+                    if start[0] + line_num == 1 and line.startswith('#!'):
+                        return
+                    length = len(physical_line)
+                    chunks = physical_line.split()
+                    if token_type == tokenize.COMMENT:
+                        if (len(chunks) == 2 and
+                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
+                            continue
+                    if len(chunks) == 1 and line_num + 1 < len(lines):
+                        if (len(chunks) == 1 and
+                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
+                            continue
+                    if length > max_doc_length:
+                        doc_error = (start[0] + line_num, max_doc_length)
+                        yield (doc_error, "W505 doc line too long "
+                                          "(%d > %d characters)",
+                                length, max_doc_length)
+        prev_token = token_type
+
+
+########################################################################
 # Helper functions
-##############################################################################
+########################################################################
 
 
 if sys.version_info < (3,):
@@ -1708,14 +1817,16 @@
             rv[path].update(range(row, row + nrows))
         elif line[:3] == '+++':
             path = line[4:].split('\t', 1)[0]
-            # Git diff will use (i)ndex, (w)ork tree, (c)ommit and (o)bject
-            # instead of a/b/c/d as prefixes for patches
+            # Git diff will use (i)ndex, (w)ork tree, (c)ommit and
+            # (o)bject instead of a/b/c/d as prefixes for patches
             if path[:2] in ('b/', 'w/', 'i/'):
                 path = path[2:]
             rv[path] = set()
-    return dict([(os.path.join(parent, path), rows)
-                 for (path, rows) in rv.items()
-                 if rows and filename_match(path, patterns)])
+    return {
+        os.path.join(parent, filepath): rows
+        for (filepath, rows) in rv.items()
+        if rows and filename_match(filepath, patterns)
+    }
 
 
 def normalize_paths(value, parent=os.curdir):
@@ -1758,14 +1869,9 @@
     return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
 
 
-if COMMENT_WITH_NL:
-    def _is_eol_token(token, _eol_token=_is_eol_token):
-        return _eol_token(token) or (token[0] == tokenize.COMMENT and
-                                     token[1] == token[4])
-
-##############################################################################
+########################################################################
 # Framework to run all checks
-##############################################################################
+########################################################################
 
 
 class Checker(object):
@@ -1782,6 +1888,7 @@
         self._logical_checks = options.logical_checks
         self._ast_checks = options.ast_checks
         self.max_line_length = options.max_line_length
+        self.max_doc_length = options.max_doc_length
         self.multiline = False  # in a multiline string?
         self.hang_closing = options.hang_closing
         self.verbose = options.verbose
@@ -1953,7 +2060,7 @@
                     self.report_error_args(lineno, *args[1:])
 
     def generate_tokens(self):
-        """Tokenize the file, run physical line checks and yield tokens."""
+        """Tokenize file, run physical line checks and yield tokens."""
         if self._io_error:
             self.report_error_args(1, 0, 'E902', self._io_error, readlines)
         tokengen = tokenize.generate_tokens(self.readline)
@@ -1968,7 +2075,7 @@
             self.report_invalid_syntax()
 
     def maybe_check_physical(self, token):
-        """If appropriate (based on token), check current physical line(s)."""
+        """If appropriate for token, check current physical line(s)."""
         # Called after every token, but act only on end of line.
         if _is_eol_token(token):
             # Obviously, a newline token ends a single physical line.
@@ -1976,15 +2083,16 @@
         elif token[0] == tokenize.STRING and '\n' in token[1]:
             # Less obviously, a string that contains newlines is a
             # multiline string, either triple-quoted or with internal
-            # newlines backslash-escaped. Check every physical line in the
-            # string *except* for the last one: its newline is outside of
-            # the multiline string, so we consider it a regular physical
-            # line, and will check it like any other physical line.
+            # newlines backslash-escaped. Check every physical line in
+            # the string *except* for the last one: its newline is
+            # outside of the multiline string, so we consider it a
+            # regular physical line, and will check it like any other
+            # physical line.
             #
             # Subtleties:
-            # - we don't *completely* ignore the last line; if it contains
-            #   the magical "# noqa" comment, we disable all physical
-            #   checks for the entire multiline string
+            # - we don't *completely* ignore the last line; if it
+            #   contains the magical "# noqa" comment, we disable all
+            #   physical checks for the entire multiline string
             # - have to wind self.line_number back because initially it
             #   points to the last line of the string, and we want
             #   check_physical() to give accurate feedback
@@ -2039,14 +2147,6 @@
                         del self.tokens[0]
                     else:
                         self.check_logical()
-                elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
-                    if len(self.tokens) == 1:
-                        # The comment also ends a physical line
-                        token = list(token)
-                        token[1] = text.rstrip('\r\n')
-                        token[3] = (token[2][0], token[2][1] + len(token[1]))
-                        self.tokens = [tuple(token)]
-                        self.check_logical()
         if self.tokens:
             self.check_physical(self.lines[-1])
             self.check_logical()
@@ -2133,8 +2233,8 @@
 
     def get_count(self, prefix=''):
         """Return the total count of errors and warnings."""
-        return sum([self.counters[key]
-                    for key in self.messages if key.startswith(prefix)])
+        return sum(self.counters[key]
+                   for key in self.messages if key.startswith(prefix))
 
     def get_statistics(self, prefix=''):
         """Get statistics for message codes that start with the prefix.
@@ -2163,7 +2263,7 @@
 
 
 class FileReport(BaseReport):
-    """Collect the results of the checks and print only the filenames."""
+    """Collect the results of the checks and print the filenames."""
 
     print_filename = True
 
@@ -2204,7 +2304,7 @@
         return code
 
     def get_file_results(self):
-        """Print the result and return the overall count for this file."""
+        """Print results and return the overall count for this file."""
         self._deferred_print.sort()
         for line_number, offset, code, text, doc in self._deferred_print:
             print(self._fmt % {
@@ -2223,8 +2323,8 @@
                 print('    ' + doc.strip())
 
             # stdout is block buffered when not stdout.isatty().
-            # line can be broken where buffer boundary since other processes
-            # write to same file.
+            # line can be broken where buffer boundary since other
+            # processes write to same file.
             # flush() after print() to avoid buffer boundary.
             # Typical buffer size is 8192. line written safely when
             # len(line) < 8192.
@@ -2345,7 +2445,7 @@
     def excluded(self, filename, parent=None):
         """Check if the file should be excluded.
 
-        Check if 'options.exclude' contains a pattern that matches filename.
+        Check if 'options.exclude' contains a pattern matching filename.
         """
         if not self.options.exclude:
             return False
@@ -2373,8 +2473,8 @@
     def get_checks(self, argument_name):
         """Get all the checks for this category.
 
-        Find all globally visible functions where the first argument name
-        starts with argument_name and which contain selected tests.
+        Find all globally visible functions where the first argument
+        name starts with argument_name and which contain selected tests.
         """
         checks = []
         for check, attrs in _checks[argument_name].items():
@@ -2390,8 +2490,8 @@
                           usage="%prog [options] input ...")
     parser.config_options = [
         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
-        'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
-        'show-source', 'statistics', 'verbose']
+        'max-doc-length', 'hang-closing', 'count', 'format', 'quiet',
+        'show-pep8', 'show-source', 'statistics', 'verbose']
     parser.add_option('-v', '--verbose', default=0, action='count',
                       help="print status messages, or debug with -vv")
     parser.add_option('-q', '--quiet', default=0, action='count',
@@ -2427,6 +2527,10 @@
                       default=MAX_LINE_LENGTH,
                       help="set maximum allowed line length "
                            "(default: %default)")
+    parser.add_option('--max-doc-length', type='int', metavar='n',
+                      default=None,
+                      help="set maximum allowed doc line length and perform "
+                           "these checks (unchecked if not set)")
     parser.add_option('--hang-closing', action='store_true',
                       help="hang closing bracket instead of matching "
                            "indentation of opening bracket's line")
@@ -2449,12 +2553,13 @@
 def read_config(options, args, arglist, parser):
     """Read and parse configurations.
 
-    If a config file is specified on the command line with the "--config"
-    option, then only it is used for configuration.
+    If a config file is specified on the command line with the
+    "--config" option, then only it is used for configuration.
 
-    Otherwise, the user configuration (~/.config/pycodestyle) and any local
-    configurations in the current directory or above will be merged together
-    (in that order) using the read method of ConfigParser.
+    Otherwise, the user configuration (~/.config/pycodestyle) and any
+    local configurations in the current directory or above will be
+    merged together (in that order) using the read method of
+    ConfigParser.
     """
     config = RawConfigParser()
 
@@ -2489,8 +2594,7 @@
         warnings.warn('[pep8] section is deprecated. Use [pycodestyle].')
 
     if pycodestyle_section:
-        option_list = dict([(o.dest, o.type or o.action)
-                            for o in parser.option_list])
+        option_list = {o.dest: o.type or o.action for o in parser.option_list}
 
         # First, read the default values
         (new_options, __) = parser.parse_args([])
@@ -2523,10 +2627,10 @@
 
 def process_options(arglist=None, parse_argv=False, config_file=None,
                     parser=None, verbose=None):
-    """Process options passed either via arglist or via command line args.
+    """Process options passed either via arglist or command line args.
 
-    Passing in the ``config_file`` parameter allows other tools, such as flake8
-    to specify their own options to be processed in pycodestyle.
+    Passing in the ``config_file`` parameter allows other tools, such as
+    flake8 to specify their own options to be processed in pycodestyle.
     """
     if not parser:
         parser = get_parser()
@@ -2546,7 +2650,7 @@
     (options, args) = parser.parse_args(arglist)
     options.reporter = None
 
-    # If explicity specified verbosity, override any `-v` CLI flag
+    # If explicitly specified verbosity, override any `-v` CLI flag
     if verbose is not None:
         options.verbose = verbose
 

eric ide

mercurial