--- a/Plugins/CheckerPlugins/Pep8/Pep8Fixer.py Sun Sep 01 13:38:28 2013 +0200 +++ b/Plugins/CheckerPlugins/Pep8/Pep8Fixer.py Mon Sep 02 18:35:53 2013 +0200 @@ -1460,6 +1460,13 @@ """ Class used to shorten lines to a given maximum of characters. """ + ShortenOperatorGroups = frozenset([ + frozenset([',']), + frozenset(['%']), + frozenset([',', '(', '[', '{']), + frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), + ]) + def __init__(self, curLine, prevLine, nextLine, maxLength=79, eol="\n", indentWord=" "): """ @@ -1494,14 +1501,17 @@ # Wrap commented lines. newText = self.__shortenComment(lastComment) - return True, newText + if newText == self.__text: + return False, "" + else: + return True, newText indent = self.__getIndent(self.__text) source = self.__text[len(indent):] assert source.lstrip() == source sio = io.StringIO(source) - # Check for multiline string. + # Check for multi line string. try: tokens = list(tokenize.generate_tokens(sio.readline)) except (SyntaxError, tokenize.TokenError): @@ -1540,7 +1550,321 @@ (boolean) @return shortened comment line (string) """ - pass + if len(self.__text) <= self.__maxLength: + return self.__text + + newText = self.__text.rstrip() + + # PEP 8 recommends 72 characters for comment text. + indentation = self.__getIndent(newText) + '# ' + maxLength = min(self.__maxLength, + len(indentation) + 72) + + MIN_CHARACTER_REPEAT = 5 + if (len(newText) - len(newText.rstrip(newText[-1])) >= MIN_CHARACTER_REPEAT and + not newText[-1].isalnum()): + # Trim comments that end with things like --------- + return newText[:maxLength] + self.__eol + elif isLast and re.match(r"\s*#+\s*\w+", newText): + import textwrap + splitLines = textwrap.wrap(newText.lstrip(" \t#"), + initial_indent=indentation, + subsequent_indent=indentation, + width=maxLength, + break_long_words=False, + break_on_hyphens=False) + return self.__eol.join(splitLines) + self.__eol + else: + return newText + self.__eol + + def __breakMultiline(self): + """ + Private method to break multi line strings. + + @return broken multi line string or None, if a break is not possible + (string or None) + """ + indentation = self.__getIndent(self.__text) + + # Handle special case only. + for symbol in '([{': + # Only valid if symbol is not on a line by itself. + if ( + symbol in self.__text and + self.__text.strip() != symbol and + self.__text.rstrip().endswith((',', '%')) + ): + index = 1 + self.__text.find(symbol) + + if index <= len(self.__indentWord) + len(indentation): + continue + + if self.__isProbablyInsideStringOrComment(self.__text, index - 1): + continue + + return (self.__text[:index].rstrip() + self.__eol + + indentation + self.__indentWord + + self.__text[index:].lstrip()) + + return None + + def __isProbablyInsideStringOrComment(self, line, index): + """ + Private method to check, if the given string might be inside a string + or comment. + + @param line line to check (string) + @param index position inside line to check (integer) + @return flag indicating the possibility of being inside a string + or comment + """ + # Check against being in a string. + for quote in ['"', "'"]: + pos = line.find(quote) + if pos != -1 and pos <= index: + return True + + # Check against being in a comment. + pos = line.find('#') + if pos != -1 and pos <= index: + return True + + return False + + def __shortenLine(self, tokens, source, indent): + """ + Private method to shorten a line of code at an operator. + + @param tokens tokens of the line as generated by tokenize (list of token) + @param source code string to work at (string) + @param indent indentation string of the code line (string) + @return list of candidates (list of string) + """ + candidates = [] + + for tkn in tokens: + tokenType = tkn[0] + tokenString = tkn[1] + + if ( + tokenType == tokenize.COMMENT and + not self.__prevText.rstrip().endswith('\\') + ): + # Move inline comments to previous line. + offset = tkn[2][1] + first = source[:offset] + second = source[offset:] + candidates.append(indent + second.strip() + self.__eol + + indent + first.strip() + self.__eol) + elif tokenType == tokenize.OP and tokenString != '=': + # Don't break on '=' after keyword as this violates PEP 8. + + assert tokenType != tokenize.INDENT + + offset = tkn[2][1] + 1 + first = source[:offset] + + secondIndent = indent + if first.rstrip().endswith('('): + secondIndent += self.__indentWord + elif '(' in first: + secondIndent += ' ' * (1 + first.find('(')) + else: + secondIndent += self.__indentWord + + second = (secondIndent + source[offset:].lstrip()) + if not second.strip(): + continue + + # Do not begin a line with a comma + if second.lstrip().startswith(','): + continue + + # Do end a line with a dot + if first.rstrip().endswith('.'): + continue + + if tokenString in '+-*/': + newText = first + ' \\' + self.__eol + second + else: + newText = first + self.__eol + second + + # Only fix if syntax is okay. + if self.__checkSyntax(self.__normalizeMultiline(newText)): + candidates.append(indent + newText) + + for keyTokenStrings in self.ShortenOperatorGroups: + shortened = self.__shortenLineAtTokens( + tokens, source, indent, keyTokenStrings) + + if shortened is not None and shortened != source: + candidates.append(shortened) + + return candidates + + def __normalizeMultiline(self, text): + """ + Private method to remove multiline-related code that will cause syntax error. + + @param line code line to work on (string) + @return normalized code line (string) + """ + for quote in '\'"': + dictPattern = r"^{q}[^{q}]*{q} *: *".format(q=quote) + if re.match(dictPattern, text): + if not text.strip().endswith('}'): + text += '}' + return '{' + text + + if text.startswith('def ') and text.rstrip().endswith(':'): + # Do not allow ':' to be alone. That is invalid. + splitText = [item.strip() for item in text.split(self.__eol)] + if ':' not in splitText and 'def' not in splitText: + return text[len('def'):].strip().rstrip(':') + + return text + + def __shortenLineAtTokens(self, tokens, source, indent, keyTokenStrings): + """ + Private method to break lines at key tokens. + + @param tokens tokens of the line as generated by tokenize (list of token) + @param source code string to work at (string) + @param indent indentation string of the code line (string) + @param keyTokenStrings key tokens to break at + @return broken code line (string) + """ + offsets = [] + firstParen = True + for tkn in tokens: + tokenType = tkn[0] + tokenString = tkn[1] + nextOffset = tkn[2][1] + 1 + + assert tokenType != tokenize.INDENT + + if tokenString in keyTokenStrings or (firstParen and + tokenString == '('): + # Don't split right before newline. + if nextOffset < len(source) - 1: + offsets.append(nextOffset) + + if tokenString == '(': + firstParen = False + + currentIndent = None + newText = None + for text in self.__splitAtOffsets(source, offsets): + if newText: + newText += self.__eol + currentIndent + text + + for symbol in '([{': + if text.endswith(symbol): + currentIndent += self.__indentWord + else: + # First line. + newText = text + assert not currentIndent + currentIndent = self.__indentWord + + assert newText is not None + + if self.__checkSyntax(self.__normalizeMultiline(newText)): + return indent + newText + else: + return None + + def __splitAtOffsets(self, line, offsets): + """ + Private method to split the line at the given offsets. + + @param line line to split (string) + @param offsets offsets to split at (list of integer) + @return split line (list of string) + """ + result = [] + + previousOffset = 0 + currentOffset = 0 + for currentOffset in sorted(offsets): + if currentOffset < len(line) and previousOffset != currentOffset: + result.append(line[previousOffset:currentOffset]) + previousOffset = currentOffset + + result.append(line[currentOffset:]) + + return result + + def __lineShorteningRank(self, candidate): + """ + Private method to rank a candidate. + + @param candidate candidate line to rank (string) + @return rank of the candidate (integer) + """ + rank = 0 + if candidate.strip(): + lines = candidate.split(self.__eol) + + offset = 0 + if lines[0].rstrip()[-1] not in '([{': + for symbol in '([{': + offset = max(offset, 1 + lines[0].find(symbol)) + + maxLength = max([offset + len(x.strip()) for x in lines]) + rank += maxLength + rank += len(lines) + + badStartingSymbol = { + '(': ')', + '[': ']', + '{': '}'}.get(lines[0][-1], None) + + if len(lines) > 1: + if (badStartingSymbol and + lines[1].lstrip().startswith(badStartingSymbol)): + rank += 20 + + if re.match(r".*[+\-\*/] \($", lines[0]): + # "1 * (\n" is ugly as hell. + rank += 100 + + for currentLine in lines: + for badStart in ['.', '%', '+', '-', '/']: + if currentLine.startswith(badStart): + rank += 100 + + for ending in '([{': + # Avoid lonely opening. They result in longer lines. + if (currentLine.endswith(ending) and + len(currentLine.strip()) <= len(self.__indentWord)): + rank += 100 + + if currentLine.endswith('%'): + rank -= 20 + + # Try to break list comprehensions at the "for". + if currentLine.lstrip().startswith('for'): + rank -= 50 + + rank += 10 * self.__countUnbalancedBrackets(currentLine) + else: + rank = 100000 + + return max(0, rank) + + def __countUnbalancedBrackets(self, line): + """ + Private method to determine the number of unmatched open/close brackets. + + @param line line to work at (string) + @return number of unmatched open/close brackets (integer) + """ + count = 0 + for opening, closing in ['()', '[]', '{}']: + count += abs(line.count(opening) - line.count(closing)) + + return count def __getIndent(self, line): """ @@ -1551,3 +1875,16 @@ """ # copied from Pep8Fixer return line.replace(line.lstrip(), "") + + def __checkSyntax(self, code): + """ + Private method to check the syntax of the given code fragment. + + @param code code fragment to check (string) + @return flag indicating syntax is ok (boolean) + """ + code = code.replace("\r\n", "\n").replace("\r", "\n") + try: + return compile(code, '<string>', 'exec') + except (SyntaxError, TypeError, UnicodeDecodeError): + return False