UtilitiesPython2/Pep257CheckerPy2.py

Mon, 16 Sep 2013 19:47:06 +0200

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Mon, 16 Sep 2013 19:47:06 +0200
changeset 2917
fe82710d02cb
child 2929
28ab0bc63d69
permissions
-rw-r--r--

Did the Python2 variant of the PEP-257 checker.

# -*- coding: utf-8 -*-

# Copyright (c) 2013 Detlev Offenbach <detlev@die-offenbachs.de>
#

"""
Module implementing a checker for PEP-257 documentation string conventions.
"""

#
# The routines of the checker class are modeled after the ones found in
# pep257.py (version 0.2.4).
#

try:
    # Python 2
    from StringIO import StringIO       # __IGNORE_EXCEPTION__
except ImportError:
    # Python 3
    from io import StringIO             # __IGNORE_WARNING__
import tokenize


class Pep257Context(object):
    """
    Class implementing the source context.
    """
    def __init__(self, source, startLine, contextType):
        """
        Constructor
        
        @param source source code of the context (list of string or string)
        @param startLine line number the context starts in the source (integer)
        @param contextType type of the context object (string)
        """
        if isinstance(source, str):
            self.__source = source.splitlines(True)
        else:
            self.__source = source[:]
        self.__start = startLine
        self.__indent = ""
        self.__type = contextType
        
        # ensure first line is left justified
        if self.__source:
            self.__indent = self.__source[0].replace(
                self.__source[0].lstrip(), "")
            self.__source[0] = self.__source[0].lstrip()
    
    def source(self):
        """
        Public method to get the source.
        
        @return source (list of string)
        """
        return self.__source
    
    def ssource(self):
        """
        Public method to get the joined source lines.
        
        @return source (string)
        """
        return "".join(self.__source)
    
    def start(self):
        """
        Public method to get the start line number.
        
        @return start line number (integer)
        """
        return self.__start
    
    def end(self):
        """
        Public method to get the end line number.
        
        @return end line number (integer)
        """
        return self.__start + len(self.__source) - 1
    
    def indent(self):
        """
        Public method to get the indentation of the first line.
        
        @return indentation string (string)
        """
        return self.__indent
    
    def contextType(self):
        """
        Public method to get the context type.
        
        @return context type (string)
        """
        return self.__type


class Pep257Checker(object):
    """
    Class implementing a checker for PEP-257 documentation string conventions.
    """
    Codes = [
        "D101", "D102", "D103", "D104", "D105",
        "D111", "D112", "D113",
        "D121", "D122",
        "D131", "D132", "D133", "D134",
        "D141", "D142", "D143", "D144", "D145",
    ]
    
    def __init__(self, source, filename, select, ignore, expected, repeat,
                 maxLineLength=79):
        """
        Constructor (according to 'extended' pep8.py API)
        
        @param source source code to be checked (list of string)
        @param filename name of the source file (string)
        @param select list of selected codes (list of string)
        @param ignore list of codes to be ignored (list of string)
        @param expected list of expected codes (list of string)
        @param repeat flag indicating to report each occurrence of a code
            (boolean)
        @param maxLineLength allowed line length (integer)
        """
        self.__select = tuple(select)
        self.__ignore = tuple(ignore)
        self.__expected = expected[:]
        self.__repeat = repeat
        self.__maxLineLength = maxLineLength
        self.__filename = filename
        self.__source = source[:]
        self.__isScript = self.__source[0].startswith('#!')
        
        # statistics counters
        self.counters = {}
        
        # collection of detected errors
        self.errors = []
        
        self.__lineNumber = 0
        
        # caches
        self.__functionsCache = None
        self.__classesCache = None
        self.__methodsCache = None
        
        self.__keywords = [
            'moduleDocstring', 'functionDocstring',
            'classDocstring', 'methodDocstring',
            'defDocstring', 'docstring'
        ]
        self.__checkersWithCodes = {
            "moduleDocstring": [
                (self.__checkModulesDocstrings, ("D101",)),
            ],
            "functionDocstring": [
            ],
            "classDocstring": [
                (self.__checkClassDocstring, ("D104", "D105")),
                (self.__checkBlankBeforeAndAfterClass, ("D142", "D143")),
            ],
            "methodDocstring": [
            ],
            "defDocstring": [
                (self.__checkFunctionDocstring, ("D102", "D103")),
                (self.__checkImperativeMood, ("D132",)),
                (self.__checkNoSignature, ("D133",)),
                (self.__checkReturnType, ("D134",)),
                (self.__checkNoBlankLineBefore, ("D141",)),
            ],
            "docstring": [
                (self.__checkTripleDoubleQuotes, ("D111",)),
                (self.__checkBackslashes, ("D112",)),
                (self.__checkUnicode, ("D113",)),
                (self.__checkOneLiner, ("D121",)),
                (self.__checkIndent, ("D122",)),
                (self.__checkEndsWithPeriod, ("D131",)),
                (self.__checkBlankAfterSummary, ("D144",)),
                (self.__checkBlankAfterLastParagraph, ("D145",)),
            ],
        }
        
        self.__checkers = {}
        for key, checkers in self.__checkersWithCodes.items():
            for checker, codes in checkers:
                if any(not (code and self.__ignoreCode(code))
                        for code in codes):
                    if key not in self.__checkers:
                        self.__checkers[key] = []
                    self.__checkers[key].append(checker)
    
    def __ignoreCode(self, code):
        """
        Private method to check if the error code should be ignored.

        @param code message code to check for (string)
        @return flag indicating to ignore the given code (boolean)
        """
        return (code.startswith(self.__ignore) and
                not code.startswith(self.__select))
    
    def __error(self, lineNumber, offset, code, *args):
        """
        Private method to record an issue.
        
        @param lineNumber line number of the issue (integer)
        @param offset position within line of the issue (integer)
        @param code message code (string)
        @param args arguments for the message (list)
        """
        if self.__ignoreCode(code):
            return
        
        if code in self.counters:
            self.counters[code] += 1
        else:
            self.counters[code] = 1
        
        # Don't care about expected codes
        if code in self.__expected:
            return
        
        if code and (self.counters[code] == 1 or self.__repeat):
            # record the issue with one based line number
            self.errors.append(
                (self.__filename, lineNumber + 1, offset, code, args))
    
    def __resetReadline(self):
        """
        Private method to reset the internal readline function.
        """
        self.__lineNumber = 0
    
    def __readline(self):
        """
        Private method to get the next line from the source.
        
        @return next line of source (string)
        """
        self.__lineNumber += 1
        if self.__lineNumber > len(self.__source):
            return ''
        return self.__source[self.__lineNumber - 1]
    
    def run(self):
        """
        Public method to check the given source for violations of doc string
        conventions according to PEP-257.
        """
        if not self.__source or not self.__filename:
            # don't do anything, if essential data is missing
            return
        
        for keyword in self.__keywords:
            if keyword in self.__checkers:
                for check in self.__checkers[keyword]:
                    for context in self.__parseContexts(keyword):
                        docstring = self.__parseDocstring(context, keyword)
                        check(docstring, context)
    
    def __getSummaryLine(self, docstringContext):
        """
        Private method to extract the summary line.
        
        @param docstringContext docstring context (Pep257Context)
        @return summary line (string) and the line it was found on (integer)
        """
        lines = docstringContext.source()
        
        line = (lines[0]
                .replace('r"""', "", 1)
                .replace('u"""', "", 1)
                .replace('"""', "")
                .replace("r'''", "", 1)
                .replace("u'''", "", 1)
                .replace("'''", "")
                .strip())
        
        if len(lines) == 1 or len(line) > 0:
            return line, 0
        return lines[1].strip(), 1
    
    ##################################################################
    ## Parsing functionality below
    ##################################################################
    
    def __parseModuleDocstring(self, source):
        """
        Private method to extract a docstring given a module source.
        
        @param source source to parse (list of string)
        @return context of extracted docstring (Pep257Context)
        """
        for kind, value, (line, char), _, _ in tokenize.generate_tokens(
                StringIO("".join(source)).readline):
            if kind in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]:
                continue
            elif kind == tokenize.STRING:  # first STRING should be docstring
                return Pep257Context(value, line - 1, "docstring")
            else:
                return None

    def __parseDocstring(self, context, what=''):
        """
        Private method to extract a docstring given `def` or `class` source.
        
        @param context context data to get the docstring from (Pep257Context)
        @return context of extracted docstring (Pep257Context)
        """
        moduleDocstring = self.__parseModuleDocstring(context.source())
        if what.startswith('module') or context.contextType() == "module":
            return moduleDocstring
        if moduleDocstring:
            return moduleDocstring
        
        tokenGenerator = tokenize.generate_tokens(
            StringIO(context.ssource()).readline)
        try:
            kind = None
            while kind != tokenize.INDENT:
                kind, _, _, _, _ = next(tokenGenerator)
            kind, value, (line, char), _, _ = next(tokenGenerator)
            if kind == tokenize.STRING:  # STRING after INDENT is a docstring
                return Pep257Context(
                    value, context.start() + line - 1, "docstring")
        except StopIteration:
            pass
        
        return None
    
    def __parseTopLevel(self, keyword):
        """
        Private method to extract top-level functions or classes.
        
        @param keyword keyword signaling what to extract (string)
        @return extracted function or class contexts (list of Pep257Context)
        """
        self.__resetReadline()
        tokenGenerator = tokenize.generate_tokens(self.__readline)
        kind, value, char = None, None, None
        contexts = []
        try:
            while True:
                start, end = None, None
                while not (kind == tokenize.NAME and
                           value == keyword and
                           char == 0):
                    kind, value, (line, char), _, _ = next(tokenGenerator)
                start = line - 1, char
                while not (kind == tokenize.DEDENT and
                           value == '' and
                           char == 0):
                    kind, value, (line, char), _, _ = next(tokenGenerator)
                end = line - 1, char
                contexts.append(Pep257Context(
                    self.__source[start[0]:end[0]], start[0], keyword))
        except StopIteration:
            return contexts
    
    def __parseFunctions(self):
        """
        Private method to extract top-level functions.
        
        @return extracted function contexts (list of Pep257Context)
        """
        if not self.__functionsCache:
            self.__functionsCache = self.__parseTopLevel('def')
        return self.__functionsCache
    
    def __parseClasses(self):
        """
        Private method to extract top-level classes.
        
        @return extracted class contexts (list of Pep257Context)
        """
        if not self.__classesCache:
            self.__classesCache = self.__parseTopLevel('class')
        return self.__classesCache
    
    def __skipIndentedBlock(self, tokenGenerator):
        """
        Private method to skip over an indented block of source code.
        
        @param tokenGenerator token generator
        @return last token of the indented block
        """
        kind, value, start, end, raw = next(tokenGenerator)
        while kind != tokenize.INDENT:
            kind, value, start, end, raw = next(tokenGenerator)
        indent = 1
        for kind, value, start, end, raw in tokenGenerator:
            if kind == tokenize.INDENT:
                indent += 1
            elif kind == tokenize.DEDENT:
                indent -= 1
            if indent == 0:
                return kind, value, start, end, raw
    
    def __parseMethods(self):
        """
        Private method to extract methods of all classes.
        
        @return extracted method contexts (list of Pep257Context)
        """
        if not self.__methodsCache:
            contexts = []
            for classContext in self.__parseClasses():
                tokenGenerator = tokenize.generate_tokens(
                    StringIO(classContext.ssource()).readline)
                kind, value, char = None, None, None
                try:
                    while True:
                        start, end = None, None
                        while not (kind == tokenize.NAME and value == 'def'):
                            kind, value, (line, char), _, _ = \
                                next(tokenGenerator)
                        start = line - 1, char
                        kind, value, (line, char), _, _ = \
                            self.__skipIndentedBlock(tokenGenerator)
                        end = line - 1, char
                        startLine = classContext.start() + start[0]
                        endLine = classContext.start() + end[0]
                        contexts.append(
                            Pep257Context(self.__source[startLine:endLine],
                                          startLine, "def"))
                except StopIteration:
                    pass
            self.__methodsCache = contexts
        
        return self.__methodsCache

    def __parseContexts(self, kind):
        """
        Private method to extract a context from the source.
        
        @param kind kind of context to extract (string)
        @return requested contexts (list of Pep257Context)
        """
        if kind == 'moduleDocstring':
            return [Pep257Context(self.__source, 0, "module")]
        if kind == 'functionDocstring':
            return self.__parseFunctions()
        if kind == 'classDocstring':
            return self.__parseClasses()
        if kind == 'methodDocstring':
            return self.__parseMethods()
        if kind == 'defDocstring':
            return self.__parseFunctions() + self.__parseMethods()
        if kind == 'docstring':
            return ([Pep257Context(self.__source, 0, "module")] +
                    self.__parseFunctions() +
                    self.__parseClasses() +
                    self.__parseMethods())
        return []       # fall back
    
    ##################################################################
    ## Checking functionality below
    ##################################################################

    def __checkModulesDocstrings(self, docstringContext, context):
        """
        Private method to check, if the module has a docstring.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            self.__error(context.start(), 0, "D101")
            return
        
        docstring = docstringContext.ssource()
        if (not docstring or not docstring.strip() or
                not docstring.strip('\'"')):
            self.__error(context.start(), 0, "D101")
    
    def __checkFunctionDocstring(self, docstringContext, context):
        """
        Private method to check, that all public functions and methods
        have a docstring.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if self.__isScript:
            # assume nothing is exported
            return
        
        functionName = context.source()[0].lstrip().split()[1].split("(")[0]
        if functionName.startswith('_') and not functionName.endswith('__'):
            code = "D103"
        else:
            code = "D102"
        
        if docstringContext is None:
            self.__error(context.start(), 0, code)
            return
        
        docstring = docstringContext.ssource()
        if (not docstring or not docstring.strip() or
                not docstring.strip('\'"')):
            self.__error(context.start(), 0, code)
    
    def __checkClassDocstring(self, docstringContext, context):
        """
        Private method to check, that all public functions and methods
        have a docstring.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if self.__isScript:
            # assume nothing is exported
            return
        
        className = context.source()[0].lstrip().split()[1].split("(")[0]
        if className.startswith('_'):
            code = "D105"
        else:
            code = "D104"
        
        if docstringContext is None:
            self.__error(context.start(), 0, code)
            return
        
        docstring = docstringContext.ssource()
        if (not docstring or not docstring.strip() or
                not docstring.strip('\'"')):
            self.__error(context.start(), 0, code)
    
    def __checkTripleDoubleQuotes(self, docstringContext, context):
        """
        Private method to check, that all docstrings are surrounded
        by triple double quotes.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        docstring = docstringContext.ssource().strip()
        if not docstring.startswith(('"""', 'r"""', 'u"""')):
            self.__error(docstringContext.start(), 0, "D111")
    
    def __checkBackslashes(self, docstringContext, context):
        """
        Private method to check, that all docstrings containing
        backslashes are surrounded by raw triple double quotes.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        docstring = docstringContext.ssource().strip()
        if "\\" in docstring and not docstring.startswith('r"""'):
            self.__error(docstringContext.start(), 0, "D112")
    
    def __checkUnicode(self, docstringContext, context):
        """
        Private method to check, that all docstrings containing unicode
        characters are surrounded by unicode triple double quotes.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        docstring = docstringContext.ssource().strip()
        if not docstring.startswith('u"""') and \
                any(ord(char) > 127 for char in docstring):
            self.__error(docstringContext.start(), 0, "D113")
    
    def __checkOneLiner(self, docstringContext, context):
        """
        Private method to check, that one-liner docstrings fit on
        one line with quotes.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        lines = docstringContext.source()
        if len(lines) > 1:
            nonEmptyLines = [l for l in lines if l.strip().strip('\'"')]
            if len(nonEmptyLines) == 1:
                modLen = len(context.indent() + '"""' +
                             nonEmptyLines[0].strip() + '"""')
                if context.contextType() != "module":
                    modLen += 4
                if modLen <= self.__maxLineLength:
                    self.__error(docstringContext.start(), 0, "D121")
    
    def __checkIndent(self, docstringContext, context):
        """
        Private method to check, that docstrings are properly indented.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        lines = docstringContext.source()
        if len(lines) == 1:
            return
        
        nonEmptyLines = [l.rstrip() for l in lines[1:] if l.strip()]
        if not nonEmptyLines:
            return
        
        indent = min([len(l) - len(l.strip()) for l in nonEmptyLines])
        if context.contextType() == "module":
            expectedIndent = 0
        else:
            expectedIndent = len(context.indent()) + 4
        if indent != expectedIndent:
            self.__error(docstringContext.start(), 0, "D122")
    
    def __checkEndsWithPeriod(self, docstringContext, context):
        """
        Private method to check, that docstring summaries end with a period.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        summary, lineNumber = self.__getSummaryLine(docstringContext)
        if not summary.endswith("."):
            self.__error(docstringContext.start() + lineNumber, 0, "D131")
    
    def __checkImperativeMood(self, docstringContext, context):
        """
        Private method to check, that docstring summaries are in
        imperative mood.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        summary, lineNumber = self.__getSummaryLine(docstringContext)
        firstWord = summary.strip().split()[0]
        if firstWord.endswith("s") and not firstWord.endswith("ss"):
            self.__error(docstringContext.start() + lineNumber, 0, "D132")
    
    def __checkNoSignature(self, docstringContext, context):
        """
        Private method to check, that docstring summaries don't repeat
        the function's signature.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        functionName = context.source()[0].lstrip().split()[1].split("(")[0]
        summary, lineNumber = self.__getSummaryLine(docstringContext)
        if functionName + "(" in summary.replace(" ", ""):
            self.__error(docstringContext.start() + lineNumber, 0, "D133")
    
    def __checkReturnType(self, docstringContext, context):
        """
        Private method to check, that docstrings mention the return value type.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None or self.__isScript:
            return
        
        if "return" not in docstringContext.ssource().lower():
            tokens = list(
                tokenize.generate_tokens(StringIO(context.ssource()).readline))
            return_ = [tokens[i + 1][0] for i,  token in enumerate(tokens)
                       if token[1] == "return"]
            if (set(return_) -
                    set([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE]) !=
                    set([])):
                self.__error(docstringContext.end(), 0, "D134")
    
    def __checkNoBlankLineBefore(self, docstringContext, context):
        """
        Private method to check, that function/method docstrings are not
        preceded by a blank line.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        contextLines = context.source()
        cti = 0
        while cti < len(contextLines) and \
                not contextLines[cti].strip().startswith(
                ('"""', 'r"""', 'u"""', "'''", "r'''", "u'''")):
            cti += 1
        
        if cti == len(contextLines):
            return
        
        if not contextLines[cti - 1].strip():
            self.__error(docstringContext.start(), 0, "D141")
    
    def __checkBlankBeforeAndAfterClass(self, docstringContext, context):
        """
        Private method to check, that class docstrings have one
        blank line around them.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        contextLines = context.source()
        cti = 0
        while cti < len(contextLines) and \
            not contextLines[cti].strip().startswith(
                ('"""', 'r"""', 'u"""', "'''", "r'''", "u'''")):
            cti += 1
        
        if cti == len(contextLines):
            return
        
        start = cti
        if contextLines[cti].strip() in (
                '"""', 'r"""', 'u"""', "'''", "r'''", "u'''"):
            # it is a multi line docstring
            cti += 1
        
        while cti < len(contextLines) and \
                not contextLines[cti].strip().endswith(('"""', "'''")):
            cti += 1
        end = cti
        
        if contextLines[start - 1].strip():
            self.__error(docstringContext.start(), 0, "D142")
        if contextLines[end + 1].strip():
            self.__error(docstringContext.end(), 0, "D143")
    
    def __checkBlankAfterSummary(self, docstringContext, context):
        """
        Private method to check, that docstring summaries are followed
        by a blank line.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        docstrings = docstringContext.source()
        if len(docstrings) in [1, 3]:
            # correct/invalid one-liner
            return
        
        summary, lineNumber = self.__getSummaryLine(docstringContext)
        if docstrings[lineNumber + 1].strip():
            self.__error(docstringContext.start() + lineNumber, 0, "D144")
    
    def __checkBlankAfterLastParagraph(self, docstringContext, context):
        """
        Private method to check, that docstring summaries are followed
        by a blank line.
        
        @param docstringContext docstring context (Pep257Context)
        @param context context of the docstring (Pep257Context)
        """
        if docstringContext is None:
            return
        
        docstrings = docstringContext.source()
        if len(docstrings) in [1, 3]:
            # correct/invalid one-liner
            return
        
        if docstrings[-2].strip():
            self.__error(docstringContext.end(), 0, "D145")

#
# eflag: FileType = Python2

eric ide

mercurial