--- a/Plugins/CheckerPlugins/CodeStyleChecker/MiscellaneousChecker.py Wed Oct 28 20:15:46 2015 +0100 +++ b/Plugins/CheckerPlugins/CodeStyleChecker/MiscellaneousChecker.py Thu Oct 29 19:06:49 2015 +0100 @@ -12,6 +12,8 @@ import sys import ast import re +import itertools +from string import Formatter class MiscellaneousChecker(object): @@ -22,13 +24,22 @@ "M101", "M102", "M111", "M112", "M121", - "M131", + + "M601", + "M611", "M612", "M613", + "M621", "M622", "M623", "M624", "M625", + "M631", "M632", + "M701", "M702", + "M801", "M811", "M901", ] + + Formatter = Formatter() + FormatFieldRegex = re.compile(r'^((?:\s|.)*?)(\..*|\[.*\])?$') def __init__(self, source, filename, select, ignore, expected, repeat, args): @@ -70,14 +81,16 @@ self.errors = [] checkersWithCodes = [ - # TODO: fill this (self.__checkCoding, ("M101", "M102")), (self.__checkCopyright, ("M111", "M112")), (self.__checkBlindExcept, ("M121",)), - (self.__checkPep3101, ("M131",)), + (self.__checkPep3101, ("M601",)), + (self.__checkFormatString, ("M611", "M612", "M613", + "M621", "M622", "M623", "M624", "M625", + "M631", "M632")), + (self.__checkFuture, ("M701", "M702")), (self.__checkPrintStatements, ("M801",)), (self.__checkTuple, ("M811", )), - (self.__checkFuture, ("M701", "M702")), ] self.__defaultArgs = { @@ -244,29 +257,6 @@ if match: self.__error(lineno, match.start(), "M121") - def __checkPep3101(self): - """ - Private method to check for old style string formatting. - """ - for lineno, line in enumerate(self.__source): - match = self.__pep3101FormatRegex.search(line) - if match: - lineLen = len(line) - pos = line.find('%') - formatPos = pos - formatter = '%' - if line[pos + 1] == "(": - pos = line.find(")", pos) - c = line[pos] - while c not in "diouxXeEfFgGcrs": - pos += 1 - if pos >= lineLen: - break - c = line[pos] - if c in "diouxXeEfFgGcrs": - formatter += c - self.__error(lineno, formatPos, "M131", formatter) - def __checkPrintStatements(self): """ Private method to check for print statements. @@ -321,3 +311,291 @@ else: self.__error(node.lineno - 1, node.col_offset, "M702", ", ".join(expectedImports)) + + def __checkPep3101(self): + """ + Private method to check for old style string formatting. + """ + for lineno, line in enumerate(self.__source): + match = self.__pep3101FormatRegex.search(line) + if match: + lineLen = len(line) + pos = line.find('%') + formatPos = pos + formatter = '%' + if line[pos + 1] == "(": + pos = line.find(")", pos) + c = line[pos] + while c not in "diouxXeEfFgGcrs": + pos += 1 + if pos >= lineLen: + break + c = line[pos] + if c in "diouxXeEfFgGcrs": + formatter += c + self.__error(lineno, formatPos, "M601", formatter) + + def __checkFormatString(self): + """ + Private method to check string format strings. + """ + visitor = TextVisitor() + visitor.visit(self.__tree) + for node in visitor.nodes: + text = node.s + if sys.version_info[0] > 2 and isinstance(text, bytes): + try: + # TODO: Maybe decode using file encoding? + text = text.decode('utf-8') + except UnicodeDecodeError: + continue + fields, implicit, explicit = self.__getFields(text) + if implicit: + if node in visitor.calls: + self.__error(node.lineno - 1, node.col_offset, "M611") + else: + if node.is_docstring: + self.__error(node.lineno - 1, node.col_offset, "M612") + else: + self.__error(node.lineno - 1, node.col_offset, "M613") + + if node in visitor.calls: + call, strArgs = visitor.calls[node] + + numbers = set() + names = set() + # Determine which fields require a keyword and which an arg + for name in fields: + fieldMatch = self.FormatFieldRegex.match(name) + try: + number = int(fieldMatch.group(1)) + except ValueError: + number = -1 + # negative numbers are considered keywords + if number >= 0: + numbers.add(number) + else: + names.add(fieldMatch.group(1)) + + keywords = set(keyword.arg for keyword in call.keywords) + numArgs = len(call.args) + if strArgs: + numArgs -= 1 + if sys.version_info < (3, 5): + hasKwArgs = bool(call.kwargs) + hasStarArgs = bool(call.starargs) + else: + hasKwArgs = any(kw.arg is None for kw in call.keywords) + hasStarArgs = sum(1 for arg in call.args + if isinstance(arg, ast.Starred)) + + if hasKwArgs: + keywords.discard(None) + if hasStarArgs: + numArgs -= 1 + + # if starargs or kwargs is not None, it can't count the + # parameters but at least check if the args are used + if hasKwArgs: + if not names: + # No names but kwargs + self.__error(call.lineno - 1, call.col_offset, "M623") + if hasStarArgs: + if not numbers: + # No numbers but args + self.__error(call.lineno - 1, call.col_offset, "M624") + + if not hasKwArgs and not hasStarArgs: + # can actually verify numbers and names + for number in sorted(numbers): + if number >= numArgs: + self.__error(call.lineno - 1, call.col_offset, + "M621", number) + + for name in sorted(names): + if name not in keywords: + self.__error(call.lineno - 1, call.col_offset, + "M622", name) + + for arg in range(numArgs): + if arg not in numbers: + self.__error(call.lineno - 1, call.col_offset, "M631", + arg) + + for keyword in keywords: + if keyword not in names: + self.__error(call.lineno - 1, call.col_offset, "M632", + keyword) + + if implicit and explicit: + self.__error(call.lineno - 1, call.col_offset, "M625") + + def __getFields(self, string): + """ + Private method to extract the format field information. + + @param string format string to be parsed + @type str + @return format field information as a tuple with fields, implicit + field definitions present and explicit field definitions present + @rtype tuple of set of str, bool, bool + """ + fields = set() + cnt = itertools.count() + implicit = False + explicit = False + try: + for literal, field, spec, conv in self.Formatter.parse(string): + if field is not None and (conv is None or conv in 'rsa'): + if not field: + field = str(next(cnt)) + implicit = True + else: + explicit = True + fields.add(field) + fields.update(parsedSpec[1] + for parsedSpec in self.Formatter.parse(spec) + if parsedSpec[1] is not None) + except ValueError: + return set(), False, False + else: + return fields, implicit, explicit + + +class TextVisitor(ast.NodeVisitor): + """ + Class implementing a node visitor for bytes and str instances. + + It tries to detect docstrings as string of the first expression of each + module, class or function. + """ + # modelled after the string format flake8 extension + + def __init__(self): + """ + Constructor + """ + super(TextVisitor, self).__init__() + self.nodes = [] + self.calls = {} + + def __addNode(self, node): + """ + Private method to add a node to our list of nodes. + + @param node reference to the node to add + @type ast.AST + """ + if not hasattr(node, 'is_docstring'): + node.is_docstring = False + self.nodes.append(node) + + def __isBaseString(self, node): + """ + Private method to determine, if a node is a base string node. + + @param node reference to the node to check + @type ast.AST + @return flag indicating a base string + @rtype bool + """ + typ = (ast.Str,) + if sys.version_info[0] > 2: + typ += (ast.Bytes,) + return isinstance(node, typ) + + def visit_Str(self, node): + """ + Public method to record a string node. + + @param node reference to the string node + @type ast.Str + """ + self.__addNode(node) + + def visit_Bytes(self, node): + """ + Public method to record a bytes node. + + @param node reference to the bytes node + @type ast.Bytes + """ + self.__addNode(node) + + def __visitDefinition(self, node): + """ + Private method handling class and function definitions. + + @param node reference to the node to handle + @type ast.FunctionDef or ast.ClassDef + """ + # Manually traverse class or function definition + # * Handle decorators normally + # * Use special check for body content + # * Don't handle the rest (e.g. bases) + for decorator in node.decorator_list: + self.visit(decorator) + self.__visitBody(node) + + def __visitBody(self, node): + """ + Private method to traverse the body of the node manually. + + If the first node is an expression which contains a string or bytes it + marks that as a docstring. + + @param node reference to the node to traverse + @type ast.AST + """ + if (node.body and isinstance(node.body[0], ast.Expr) and + self.__isBaseString(node.body[0].value)): + node.body[0].value.is_docstring = True + + for subnode in node.body: + self.visit(subnode) + + def visit_Module(self, node): + """ + Public method to handle a module. + + @param node reference to the node to handle + @type ast.Module + """ + self.__visitBody(node) + + def visit_ClassDef(self, node): + """ + Public method to handle a class definition. + + @param node reference to the node to handle + @type ast.ClassDef + """ + # Skipped nodes: ('name', 'bases', 'keywords', 'starargs', 'kwargs') + self.__visitDefinition(node) + + def visit_FunctionDef(self, node): + """ + Public method to handle a function definition. + + @param node reference to the node to handle + @type ast.FunctionDef + """ + # Skipped nodes: ('name', 'args', 'returns') + self.__visitDefinition(node) + + def visit_Call(self, node): + """ + Public method to handle a function call. + + @param node reference to the node to handle + @type ast.Call + """ + if (isinstance(node.func, ast.Attribute) and + node.func.attr == 'format'): + if self.__isBaseString(node.func.value): + self.calls[node.func.value] = (node, False) + elif (isinstance(node.func.value, ast.Name) and + node.func.value.id == 'str' and node.args and + self.__isBaseString(node.args[0])): + self.calls[node.args[0]] = (node, True) + super(TextVisitor, self).generic_visit(node)