Plugins/CheckerPlugins/CodeStyleChecker/MiscellaneousChecker.py

changeset 4511
b5e4e7efa904
parent 4510
43437fc9f4c9
child 4515
d7cebe39ffba
--- a/Plugins/CheckerPlugins/CodeStyleChecker/MiscellaneousChecker.py	Wed Oct 28 20:15:46 2015 +0100
+++ b/Plugins/CheckerPlugins/CodeStyleChecker/MiscellaneousChecker.py	Thu Oct 29 19:06:49 2015 +0100
@@ -12,6 +12,8 @@
 import sys
 import ast
 import re
+import itertools
+from string import Formatter
 
 
 class MiscellaneousChecker(object):
@@ -22,13 +24,22 @@
         "M101", "M102",
         "M111", "M112",
         "M121",
-        "M131",
+        
+        "M601",
+        "M611", "M612", "M613",
+        "M621", "M622", "M623", "M624", "M625",
+        "M631", "M632",
+        
         "M701", "M702",
+        
         "M801",
         "M811",
         
         "M901",
     ]
+    
+    Formatter = Formatter()
+    FormatFieldRegex = re.compile(r'^((?:\s|.)*?)(\..*|\[.*\])?$')
 
     def __init__(self, source, filename, select, ignore, expected, repeat,
                  args):
@@ -70,14 +81,16 @@
         self.errors = []
         
         checkersWithCodes = [
-            # TODO: fill this
             (self.__checkCoding, ("M101", "M102")),
             (self.__checkCopyright, ("M111", "M112")),
             (self.__checkBlindExcept, ("M121",)),
-            (self.__checkPep3101, ("M131",)),
+            (self.__checkPep3101, ("M601",)),
+            (self.__checkFormatString, ("M611", "M612", "M613",
+                                        "M621", "M622", "M623", "M624", "M625",
+                                        "M631", "M632")),
+            (self.__checkFuture, ("M701", "M702")),
             (self.__checkPrintStatements, ("M801",)),
             (self.__checkTuple, ("M811", )),
-            (self.__checkFuture, ("M701", "M702")),
         ]
         
         self.__defaultArgs = {
@@ -244,29 +257,6 @@
             if match:
                 self.__error(lineno, match.start(), "M121")
     
-    def __checkPep3101(self):
-        """
-        Private method to check for old style string formatting.
-        """
-        for lineno, line in enumerate(self.__source):
-            match = self.__pep3101FormatRegex.search(line)
-            if match:
-                lineLen = len(line)
-                pos = line.find('%')
-                formatPos = pos
-                formatter = '%'
-                if line[pos + 1] == "(":
-                    pos = line.find(")", pos)
-                c = line[pos]
-                while c not in "diouxXeEfFgGcrs":
-                    pos += 1
-                    if pos >= lineLen:
-                        break
-                    c = line[pos]
-                if c in "diouxXeEfFgGcrs":
-                    formatter += c
-                self.__error(lineno, formatPos, "M131", formatter)
-    
     def __checkPrintStatements(self):
         """
         Private method to check for print statements.
@@ -321,3 +311,291 @@
             else:
                 self.__error(node.lineno - 1, node.col_offset, "M702",
                              ", ".join(expectedImports))
+    
+    def __checkPep3101(self):
+        """
+        Private method to check for old style string formatting.
+        """
+        for lineno, line in enumerate(self.__source):
+            match = self.__pep3101FormatRegex.search(line)
+            if match:
+                lineLen = len(line)
+                pos = line.find('%')
+                formatPos = pos
+                formatter = '%'
+                if line[pos + 1] == "(":
+                    pos = line.find(")", pos)
+                c = line[pos]
+                while c not in "diouxXeEfFgGcrs":
+                    pos += 1
+                    if pos >= lineLen:
+                        break
+                    c = line[pos]
+                if c in "diouxXeEfFgGcrs":
+                    formatter += c
+                self.__error(lineno, formatPos, "M601", formatter)
+    
+    def __checkFormatString(self):
+        """
+        Private method to check string format strings.
+        """
+        visitor = TextVisitor()
+        visitor.visit(self.__tree)
+        for node in visitor.nodes:
+            text = node.s
+            if sys.version_info[0] > 2 and isinstance(text, bytes):
+                try:
+                    # TODO: Maybe decode using file encoding?
+                    text = text.decode('utf-8')
+                except UnicodeDecodeError:
+                    continue
+            fields, implicit, explicit = self.__getFields(text)
+            if implicit:
+                if node in visitor.calls:
+                    self.__error(node.lineno - 1, node.col_offset, "M611")
+                else:
+                    if node.is_docstring:
+                        self.__error(node.lineno - 1, node.col_offset, "M612")
+                    else:
+                        self.__error(node.lineno - 1, node.col_offset, "M613")
+            
+            if node in visitor.calls:
+                call, strArgs = visitor.calls[node]
+                
+                numbers = set()
+                names = set()
+                # Determine which fields require a keyword and which an arg
+                for name in fields:
+                    fieldMatch = self.FormatFieldRegex.match(name)
+                    try:
+                        number = int(fieldMatch.group(1))
+                    except ValueError:
+                        number = -1
+                    # negative numbers are considered keywords
+                    if number >= 0:
+                        numbers.add(number)
+                    else:
+                        names.add(fieldMatch.group(1))
+                
+                keywords = set(keyword.arg for keyword in call.keywords)
+                numArgs = len(call.args)
+                if strArgs:
+                    numArgs -= 1
+                if sys.version_info < (3, 5):
+                    hasKwArgs = bool(call.kwargs)
+                    hasStarArgs = bool(call.starargs)
+                else:
+                    hasKwArgs = any(kw.arg is None for kw in call.keywords)
+                    hasStarArgs = sum(1 for arg in call.args
+                                      if isinstance(arg, ast.Starred))
+                    
+                    if hasKwArgs:
+                        keywords.discard(None)
+                    if hasStarArgs:
+                        numArgs -= 1
+                
+                # if starargs or kwargs is not None, it can't count the
+                # parameters but at least check if the args are used
+                if hasKwArgs:
+                    if not names:
+                        # No names but kwargs
+                        self.__error(call.lineno - 1, call.col_offset, "M623")
+                if hasStarArgs:
+                    if not numbers:
+                        # No numbers but args
+                        self.__error(call.lineno - 1, call.col_offset, "M624")
+                
+                if not hasKwArgs and not hasStarArgs:
+                    # can actually verify numbers and names
+                    for number in sorted(numbers):
+                        if number >= numArgs:
+                            self.__error(call.lineno - 1, call.col_offset,
+                                         "M621", number)
+                    
+                    for name in sorted(names):
+                        if name not in keywords:
+                            self.__error(call.lineno - 1, call.col_offset,
+                                         "M622", name)
+                
+                for arg in range(numArgs):
+                    if arg not in numbers:
+                        self.__error(call.lineno - 1, call.col_offset, "M631",
+                                     arg)
+                
+                for keyword in keywords:
+                    if keyword not in names:
+                        self.__error(call.lineno - 1, call.col_offset, "M632",
+                                     keyword)
+                
+                if implicit and explicit:
+                    self.__error(call.lineno - 1, call.col_offset, "M625")
+    
+    def __getFields(self, string):
+        """
+        Private method to extract the format field information.
+        
+        @param string format string to be parsed
+        @type str
+        @return format field information as a tuple with fields, implicit
+            field definitions present and explicit field definitions present
+        @rtype tuple of set of str, bool, bool
+        """
+        fields = set()
+        cnt = itertools.count()
+        implicit = False
+        explicit = False
+        try:
+            for literal, field, spec, conv in self.Formatter.parse(string):
+                if field is not None and (conv is None or conv in 'rsa'):
+                    if not field:
+                        field = str(next(cnt))
+                        implicit = True
+                    else:
+                        explicit = True
+                    fields.add(field)
+                    fields.update(parsedSpec[1]
+                                  for parsedSpec in self.Formatter.parse(spec)
+                                  if parsedSpec[1] is not None)
+        except ValueError:
+            return set(), False, False
+        else:
+            return fields, implicit, explicit
+
+
+class TextVisitor(ast.NodeVisitor):
+    """
+    Class implementing a node visitor for bytes and str instances.
+
+    It tries to detect docstrings as string of the first expression of each
+    module, class or function.
+    """
+    # modelled after the string format flake8 extension
+    
+    def __init__(self):
+        """
+        Constructor
+        """
+        super(TextVisitor, self).__init__()
+        self.nodes = []
+        self.calls = {}
+
+    def __addNode(self, node):
+        """
+        Private method to add a node to our list of nodes.
+        
+        @param node reference to the node to add
+        @type ast.AST
+        """
+        if not hasattr(node, 'is_docstring'):
+            node.is_docstring = False
+        self.nodes.append(node)
+
+    def __isBaseString(self, node):
+        """
+        Private method to determine, if a node is a base string node.
+        
+        @param node reference to the node to check
+        @type ast.AST
+        @return flag indicating a base string
+        @rtype bool
+        """
+        typ = (ast.Str,)
+        if sys.version_info[0] > 2:
+            typ += (ast.Bytes,)
+        return isinstance(node, typ)
+
+    def visit_Str(self, node):
+        """
+        Public method to record a string node.
+        
+        @param node reference to the string node
+        @type ast.Str
+        """
+        self.__addNode(node)
+
+    def visit_Bytes(self, node):
+        """
+        Public method to record a bytes node.
+        
+        @param node reference to the bytes node
+        @type ast.Bytes
+        """
+        self.__addNode(node)
+
+    def __visitDefinition(self, node):
+        """
+        Private method handling class and function definitions.
+        
+        @param node reference to the node to handle
+        @type ast.FunctionDef or ast.ClassDef
+        """
+        # Manually traverse class or function definition
+        # * Handle decorators normally
+        # * Use special check for body content
+        # * Don't handle the rest (e.g. bases)
+        for decorator in node.decorator_list:
+            self.visit(decorator)
+        self.__visitBody(node)
+
+    def __visitBody(self, node):
+        """
+        Private method to traverse the body of the node manually.
+
+        If the first node is an expression which contains a string or bytes it
+        marks that as a docstring.
+        
+        @param node reference to the node to traverse
+        @type ast.AST
+        """
+        if (node.body and isinstance(node.body[0], ast.Expr) and
+                self.__isBaseString(node.body[0].value)):
+            node.body[0].value.is_docstring = True
+
+        for subnode in node.body:
+            self.visit(subnode)
+
+    def visit_Module(self, node):
+        """
+        Public method to handle a module.
+        
+        @param node reference to the node to handle
+        @type ast.Module
+        """
+        self.__visitBody(node)
+
+    def visit_ClassDef(self, node):
+        """
+        Public method to handle a class definition.
+        
+        @param node reference to the node to handle
+        @type ast.ClassDef
+        """
+        # Skipped nodes: ('name', 'bases', 'keywords', 'starargs', 'kwargs')
+        self.__visitDefinition(node)
+
+    def visit_FunctionDef(self, node):
+        """
+        Public method to handle a function definition.
+        
+        @param node reference to the node to handle
+        @type ast.FunctionDef
+        """
+        # Skipped nodes: ('name', 'args', 'returns')
+        self.__visitDefinition(node)
+
+    def visit_Call(self, node):
+        """
+        Public method to handle a function call.
+        
+        @param node reference to the node to handle
+        @type ast.Call
+        """
+        if (isinstance(node.func, ast.Attribute) and
+                node.func.attr == 'format'):
+            if self.__isBaseString(node.func.value):
+                self.calls[node.func.value] = (node, False)
+            elif (isinstance(node.func.value, ast.Name) and
+                    node.func.value.id == 'str' and node.args and
+                    self.__isBaseString(node.args[0])):
+                self.calls[node.args[0]] = (node, True)
+        super(TextVisitor, self).generic_visit(node)

eric ide

mercurial