Plugins/CheckerPlugins/Pep8/Pep8Fixer.py

changeset 2880
a45c59bddc79
parent 2879
12e6e199d0cf
child 2882
9b97bc92fdda
--- a/Plugins/CheckerPlugins/Pep8/Pep8Fixer.py	Sun Sep 01 13:38:28 2013 +0200
+++ b/Plugins/CheckerPlugins/Pep8/Pep8Fixer.py	Mon Sep 02 18:35:53 2013 +0200
@@ -1460,6 +1460,13 @@
     """
     Class used to shorten lines to a given maximum of characters.
     """
+    ShortenOperatorGroups = frozenset([
+        frozenset([',']),
+        frozenset(['%']),
+        frozenset([',', '(', '[', '{']),
+        frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
+    ])
+    
     def __init__(self, curLine, prevLine, nextLine,
                  maxLength=79, eol="\n", indentWord="    "):
         """
@@ -1494,14 +1501,17 @@
 
             # Wrap commented lines.
             newText = self.__shortenComment(lastComment)
-            return True, newText
+            if newText == self.__text:
+                return False, ""
+            else:
+                return True, newText
 
         indent = self.__getIndent(self.__text)
         source = self.__text[len(indent):]
         assert source.lstrip() == source
         sio = io.StringIO(source)
 
-        # Check for multiline string.
+        # Check for multi line string.
         try:
             tokens = list(tokenize.generate_tokens(sio.readline))
         except (SyntaxError, tokenize.TokenError):
@@ -1540,7 +1550,321 @@
             (boolean)
         @return shortened comment line (string)
         """
-        pass
+        if len(self.__text) <= self.__maxLength:
+            return self.__text
+        
+        newText = self.__text.rstrip()
+
+        # PEP 8 recommends 72 characters for comment text.
+        indentation = self.__getIndent(newText) + '# '
+        maxLength = min(self.__maxLength,
+                        len(indentation) + 72)
+
+        MIN_CHARACTER_REPEAT = 5
+        if (len(newText) - len(newText.rstrip(newText[-1])) >= MIN_CHARACTER_REPEAT and
+                not newText[-1].isalnum()):
+            # Trim comments that end with things like ---------
+            return newText[:maxLength] + self.__eol
+        elif isLast and re.match(r"\s*#+\s*\w+", newText):
+            import textwrap
+            splitLines = textwrap.wrap(newText.lstrip(" \t#"),
+                                       initial_indent=indentation,
+                                       subsequent_indent=indentation,
+                                       width=maxLength,
+                                       break_long_words=False,
+                                       break_on_hyphens=False)
+            return self.__eol.join(splitLines) + self.__eol
+        else:
+            return newText + self.__eol
+    
+    def __breakMultiline(self):
+        """
+        Private method to break multi line strings.
+        
+        @return broken multi line string or None, if a break is not possible
+            (string or None)
+        """
+        indentation = self.__getIndent(self.__text)
+
+        # Handle special case only.
+        for symbol in '([{':
+            # Only valid if symbol is not on a line by itself.
+            if (
+                symbol in self.__text and
+                self.__text.strip() != symbol and
+                self.__text.rstrip().endswith((',', '%'))
+            ):
+                index = 1 + self.__text.find(symbol)
+
+                if index <= len(self.__indentWord) + len(indentation):
+                    continue
+
+                if self.__isProbablyInsideStringOrComment(self.__text, index - 1):
+                    continue
+
+                return (self.__text[:index].rstrip() + self.__eol +
+                        indentation + self.__indentWord +
+                        self.__text[index:].lstrip())
+
+        return None
+    
+    def __isProbablyInsideStringOrComment(self, line, index):
+        """
+        Private method to check, if the given string might be inside a string
+        or comment.
+        
+        @param line line to check (string)
+        @param index position inside line to check (integer)
+        @return flag indicating the possibility of being inside a string
+            or comment
+        """
+        # Check against being in a string.
+        for quote in ['"', "'"]:
+            pos = line.find(quote)
+            if pos != -1 and pos <= index:
+                return True
+
+        # Check against being in a comment.
+        pos = line.find('#')
+        if pos != -1 and pos <= index:
+            return True
+
+        return False
+    
+    def __shortenLine(self, tokens, source, indent):
+        """
+        Private method to shorten a line of code at an operator.
+        
+        @param tokens tokens of the line as generated by tokenize (list of token)
+        @param source code string to work at (string)
+        @param indent indentation string of the code line (string)
+        @return list of candidates (list of string)
+        """
+        candidates = []
+        
+        for tkn in tokens:
+            tokenType = tkn[0]
+            tokenString = tkn[1]
+
+            if (
+                tokenType == tokenize.COMMENT and
+                not self.__prevText.rstrip().endswith('\\')
+            ):
+                # Move inline comments to previous line.
+                offset = tkn[2][1]
+                first = source[:offset]
+                second = source[offset:]
+                candidates.append(indent + second.strip() + self.__eol +
+                       indent + first.strip() + self.__eol)
+            elif tokenType == tokenize.OP and tokenString != '=':
+                # Don't break on '=' after keyword as this violates PEP 8.
+
+                assert tokenType != tokenize.INDENT
+
+                offset = tkn[2][1] + 1
+                first = source[:offset]
+
+                secondIndent = indent
+                if first.rstrip().endswith('('):
+                    secondIndent += self.__indentWord
+                elif '(' in first:
+                    secondIndent += ' ' * (1 + first.find('('))
+                else:
+                    secondIndent += self.__indentWord
+
+                second = (secondIndent + source[offset:].lstrip())
+                if not second.strip():
+                    continue
+
+                # Do not begin a line with a comma
+                if second.lstrip().startswith(','):
+                    continue
+                
+                # Do end a line with a dot
+                if first.rstrip().endswith('.'):
+                    continue
+                
+                if tokenString in '+-*/':
+                    newText = first + ' \\' + self.__eol + second
+                else:
+                    newText = first + self.__eol + second
+
+                # Only fix if syntax is okay.
+                if self.__checkSyntax(self.__normalizeMultiline(newText)):
+                    candidates.append(indent + newText)
+        
+        for keyTokenStrings in self.ShortenOperatorGroups:
+            shortened = self.__shortenLineAtTokens(
+                tokens, source, indent, keyTokenStrings)
+
+            if shortened is not None and shortened != source:
+                candidates.append(shortened)
+        
+        return candidates
+    
+    def __normalizeMultiline(self, text):
+        """
+        Private method to remove multiline-related code that will cause syntax error.
+        
+        @param line code line to work on (string)
+        @return normalized code line (string)
+        """
+        for quote in '\'"':
+            dictPattern = r"^{q}[^{q}]*{q} *: *".format(q=quote)
+            if re.match(dictPattern, text):
+                if not text.strip().endswith('}'):
+                    text += '}'
+                return '{' + text
+
+        if text.startswith('def ') and text.rstrip().endswith(':'):
+            # Do not allow ':' to be alone. That is invalid.
+            splitText = [item.strip() for item in text.split(self.__eol)]
+            if ':' not in splitText and 'def' not in splitText:
+                return text[len('def'):].strip().rstrip(':')
+
+        return text
+    
+    def __shortenLineAtTokens(self, tokens, source, indent, keyTokenStrings):
+        """
+        Private method to break lines at key tokens.
+        
+        @param tokens tokens of the line as generated by tokenize (list of token)
+        @param source code string to work at (string)
+        @param indent indentation string of the code line (string)
+        @param keyTokenStrings key tokens to break at
+        @return broken code line (string)
+        """
+        offsets = []
+        firstParen = True
+        for tkn in tokens:
+            tokenType = tkn[0]
+            tokenString = tkn[1]
+            nextOffset = tkn[2][1] + 1
+
+            assert tokenType != tokenize.INDENT
+
+            if tokenString in keyTokenStrings or (firstParen and
+                                                  tokenString == '('):
+                # Don't split right before newline.
+                if nextOffset < len(source) - 1:
+                    offsets.append(nextOffset)
+
+                if tokenString == '(':
+                    firstParen = False
+
+        currentIndent = None
+        newText = None
+        for text in self.__splitAtOffsets(source, offsets):
+            if newText:
+                newText += self.__eol + currentIndent + text
+
+                for symbol in '([{':
+                    if text.endswith(symbol):
+                        currentIndent += self.__indentWord
+            else:
+                # First line.
+                newText = text
+                assert not currentIndent
+                currentIndent = self.__indentWord
+
+        assert newText is not None
+
+        if self.__checkSyntax(self.__normalizeMultiline(newText)):
+            return indent + newText
+        else:
+            return None
+    
+    def __splitAtOffsets(self, line, offsets):
+        """
+        Private method to split the line at the given offsets.
+        
+        @param line line to split (string)
+        @param offsets offsets to split at (list of integer)
+        @return split line (list of string)
+        """
+        result = []
+
+        previousOffset = 0
+        currentOffset = 0
+        for currentOffset in sorted(offsets):
+            if currentOffset < len(line) and previousOffset != currentOffset:
+                result.append(line[previousOffset:currentOffset])
+            previousOffset = currentOffset
+
+        result.append(line[currentOffset:])
+
+        return result
+    
+    def __lineShorteningRank(self, candidate):
+        """
+        Private method to rank a candidate.
+        
+        @param candidate candidate line to rank (string)
+        @return rank of the candidate (integer)
+        """
+        rank = 0
+        if candidate.strip():
+            lines = candidate.split(self.__eol)
+
+            offset = 0
+            if lines[0].rstrip()[-1] not in '([{':
+                for symbol in '([{':
+                    offset = max(offset, 1 + lines[0].find(symbol))
+
+            maxLength = max([offset + len(x.strip()) for x in lines])
+            rank += maxLength
+            rank += len(lines)
+
+            badStartingSymbol = {
+                '(': ')',
+                '[': ']',
+                '{': '}'}.get(lines[0][-1], None)
+
+            if len(lines) > 1:
+                if (badStartingSymbol and
+                        lines[1].lstrip().startswith(badStartingSymbol)):
+                    rank += 20
+
+            if re.match(r".*[+\-\*/] \($", lines[0]):
+                # "1 * (\n" is ugly as hell.
+                rank += 100
+
+            for currentLine in lines:
+                for badStart in ['.', '%', '+', '-', '/']:
+                    if currentLine.startswith(badStart):
+                        rank += 100
+
+                for ending in '([{':
+                    # Avoid lonely opening. They result in longer lines.
+                    if (currentLine.endswith(ending) and
+                            len(currentLine.strip()) <= len(self.__indentWord)):
+                        rank += 100
+
+                if currentLine.endswith('%'):
+                    rank -= 20
+
+                # Try to break list comprehensions at the "for".
+                if currentLine.lstrip().startswith('for'):
+                    rank -= 50
+
+                rank += 10 * self.__countUnbalancedBrackets(currentLine)
+        else:
+            rank = 100000
+
+        return max(0, rank)
+    
+    def __countUnbalancedBrackets(self, line):
+        """
+        Private method to determine the number of unmatched open/close brackets.
+        
+        @param line line to work at (string)
+        @return number of unmatched open/close brackets (integer)
+        """
+        count = 0
+        for opening, closing in ['()', '[]', '{}']:
+            count += abs(line.count(opening) - line.count(closing))
+        
+        return count
     
     def __getIndent(self, line):
         """
@@ -1551,3 +1875,16 @@
         """
         # copied from Pep8Fixer
         return line.replace(line.lstrip(), "")
+    
+    def __checkSyntax(self, code):
+        """
+        Private method to check the syntax of the given code fragment.
+        
+        @param code code fragment to check (string)
+        @return flag indicating syntax is ok (boolean)
+        """
+        code = code.replace("\r\n", "\n").replace("\r", "\n")
+        try:
+            return compile(code, '<string>', 'exec')
+        except (SyntaxError, TypeError, UnicodeDecodeError):
+            return False

eric ide

mercurial