eric: comparison Plugins/CheckerPlugins/Pep8/Pep8Fixer.py

-:12e6e199d0cf
+:a45c59bddc79
 class Pep8LineShortener(object):
 """
 Class used to shorten lines to a given maximum of characters.
 """
+ShortenOperatorGroups = frozenset([
+frozenset([',']),
+frozenset(['%']),
+frozenset([',', '(', '[', '{']),
+frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
+])
 def __init__(self, curLine, prevLine, nextLine,
 maxLength=79, eol="\n", indentWord="    "):
 """
 Constructor
 if self.__nextText.lstrip().startswith('#'):
 lastComment = False
 # Wrap commented lines.
 newText = self.__shortenComment(lastComment)
-return True, newText
+if newText == self.__text:
+return False, ""
+else:
+return True, newText
 indent = self.__getIndent(self.__text)
 source = self.__text[len(indent):]
 assert source.lstrip() == source
 sio = io.StringIO(source)
-# Check for multiline string.
+# Check for multi line string.
 try:
 tokens = list(tokenize.generate_tokens(sio.readline))
 except (SyntaxError, tokenize.TokenError):
 multilineCandidate = self.__breakMultiline()
 if multilineCandidate:
 @param isLast flag indicating, that the line is the last comment line
 (boolean)
 @return shortened comment line (string)
 """
-pass
+if len(self.__text) <= self.__maxLength:
+return self.__text
+newText = self.__text.rstrip()
+# PEP 8 recommends 72 characters for comment text.
+indentation = self.__getIndent(newText) + '# '
+maxLength = min(self.__maxLength,
+len(indentation) + 72)
+MIN_CHARACTER_REPEAT = 5
+if (len(newText) - len(newText.rstrip(newText[-1])) >= MIN_CHARACTER_REPEAT and
+not newText[-1].isalnum()):
+# Trim comments that end with things like ---------
+return newText[:maxLength] + self.__eol
+elif isLast and re.match(r"\s*#+\s*\w+", newText):
+import textwrap
+splitLines = textwrap.wrap(newText.lstrip(" \t#"),
+initial_indent=indentation,
+subsequent_indent=indentation,
+width=maxLength,
+break_long_words=False,
+break_on_hyphens=False)
+return self.__eol.join(splitLines) + self.__eol
+else:
+return newText + self.__eol
+def __breakMultiline(self):
+"""
+Private method to break multi line strings.
+@return broken multi line string or None, if a break is not possible
+(string or None)
+"""
+indentation = self.__getIndent(self.__text)
+# Handle special case only.
+for symbol in '([{':
+# Only valid if symbol is not on a line by itself.
+if (
+symbol in self.__text and
+self.__text.strip() != symbol and
+self.__text.rstrip().endswith((',', '%'))
+):
+index = 1 + self.__text.find(symbol)
+if index <= len(self.__indentWord) + len(indentation):
+continue
+if self.__isProbablyInsideStringOrComment(self.__text, index - 1):
+continue
+return (self.__text[:index].rstrip() + self.__eol +
+indentation + self.__indentWord +
+self.__text[index:].lstrip())
+return None
+def __isProbablyInsideStringOrComment(self, line, index):
+"""
+Private method to check, if the given string might be inside a string
+or comment.
+@param line line to check (string)
+@param index position inside line to check (integer)
+@return flag indicating the possibility of being inside a string
+or comment
+"""
+# Check against being in a string.
+for quote in ['"', "'"]:
+pos = line.find(quote)
+if pos != -1 and pos <= index:
+return True
+# Check against being in a comment.
+pos = line.find('#')
+if pos != -1 and pos <= index:
+return True
+return False
+def __shortenLine(self, tokens, source, indent):
+"""
+Private method to shorten a line of code at an operator.
+@param tokens tokens of the line as generated by tokenize (list of token)
+@param source code string to work at (string)
+@param indent indentation string of the code line (string)
+@return list of candidates (list of string)
+"""
+candidates = []
+for tkn in tokens:
+tokenType = tkn[0]
+tokenString = tkn[1]
+if (
+tokenType == tokenize.COMMENT and
+not self.__prevText.rstrip().endswith('\\')
+):
+# Move inline comments to previous line.
+offset = tkn[2][1]
+first = source[:offset]
+second = source[offset:]
+candidates.append(indent + second.strip() + self.__eol +
+indent + first.strip() + self.__eol)
+elif tokenType == tokenize.OP and tokenString != '=':
+# Don't break on '=' after keyword as this violates PEP 8.
+assert tokenType != tokenize.INDENT
+offset = tkn[2][1] + 1
+first = source[:offset]
+secondIndent = indent
+if first.rstrip().endswith('('):
+secondIndent += self.__indentWord
+elif '(' in first:
+secondIndent += ' ' * (1 + first.find('('))
+else:
+secondIndent += self.__indentWord
+second = (secondIndent + source[offset:].lstrip())
+if not second.strip():
+continue
+# Do not begin a line with a comma
+if second.lstrip().startswith(','):
+continue
+# Do end a line with a dot
+if first.rstrip().endswith('.'):
+continue
+if tokenString in '+-*/':
+newText = first + ' \\' + self.__eol + second
+else:
+newText = first + self.__eol + second
+# Only fix if syntax is okay.
+if self.__checkSyntax(self.__normalizeMultiline(newText)):
+candidates.append(indent + newText)
+for keyTokenStrings in self.ShortenOperatorGroups:
+shortened = self.__shortenLineAtTokens(
+tokens, source, indent, keyTokenStrings)
+if shortened is not None and shortened != source:
+candidates.append(shortened)
+return candidates
+def __normalizeMultiline(self, text):
+"""
+Private method to remove multiline-related code that will cause syntax error.
+@param line code line to work on (string)
+@return normalized code line (string)
+"""
+for quote in '\'"':
+dictPattern = r"^{q}[^{q}]*{q} *: *".format(q=quote)
+if re.match(dictPattern, text):
+if not text.strip().endswith('}'):
+text += '}'
+return '{' + text
+if text.startswith('def ') and text.rstrip().endswith(':'):
+# Do not allow ':' to be alone. That is invalid.
+splitText = [item.strip() for item in text.split(self.__eol)]
+if ':' not in splitText and 'def' not in splitText:
+return text[len('def'):].strip().rstrip(':')
+return text
+def __shortenLineAtTokens(self, tokens, source, indent, keyTokenStrings):
+"""
+Private method to break lines at key tokens.
+@param tokens tokens of the line as generated by tokenize (list of token)
+@param source code string to work at (string)
+@param indent indentation string of the code line (string)
+@param keyTokenStrings key tokens to break at
+@return broken code line (string)
+"""
+offsets = []
+firstParen = True
+for tkn in tokens:
+tokenType = tkn[0]
+tokenString = tkn[1]
+nextOffset = tkn[2][1] + 1
+assert tokenType != tokenize.INDENT
+if tokenString in keyTokenStrings or (firstParen and
+tokenString == '('):
+# Don't split right before newline.
+if nextOffset < len(source) - 1:
+offsets.append(nextOffset)
+if tokenString == '(':
+firstParen = False
+currentIndent = None
+newText = None
+for text in self.__splitAtOffsets(source, offsets):
+if newText:
+newText += self.__eol + currentIndent + text
+for symbol in '([{':
+if text.endswith(symbol):
+currentIndent += self.__indentWord
+else:
+# First line.
+newText = text
+assert not currentIndent
+currentIndent = self.__indentWord
+assert newText is not None
+if self.__checkSyntax(self.__normalizeMultiline(newText)):
+return indent + newText
+else:
+return None
+def __splitAtOffsets(self, line, offsets):
+"""
+Private method to split the line at the given offsets.
+@param line line to split (string)
+@param offsets offsets to split at (list of integer)
+@return split line (list of string)
+"""
+result = []
+previousOffset = 0
+currentOffset = 0
+for currentOffset in sorted(offsets):
+if currentOffset < len(line) and previousOffset != currentOffset:
+result.append(line[previousOffset:currentOffset])
+previousOffset = currentOffset
+result.append(line[currentOffset:])
+return result
+def __lineShorteningRank(self, candidate):
+"""
+Private method to rank a candidate.
+@param candidate candidate line to rank (string)
+@return rank of the candidate (integer)
+"""
+rank = 0
+if candidate.strip():
+lines = candidate.split(self.__eol)
+offset = 0
+if lines[0].rstrip()[-1] not in '([{':
+for symbol in '([{':
+offset = max(offset, 1 + lines[0].find(symbol))
+maxLength = max([offset + len(x.strip()) for x in lines])
+rank += maxLength
+rank += len(lines)
+badStartingSymbol = {
+'(': ')',
+'[': ']',
+'{': '}'}.get(lines[0][-1], None)
+if len(lines) > 1:
+if (badStartingSymbol and
+lines[1].lstrip().startswith(badStartingSymbol)):
+rank += 20
+if re.match(r".*[+\-\*/] \($", lines[0]):
+# "1 * (\n" is ugly as hell.
+rank += 100
+for currentLine in lines:
+for badStart in ['.', '%', '+', '-', '/']:
+if currentLine.startswith(badStart):
+rank += 100
+for ending in '([{':
+# Avoid lonely opening. They result in longer lines.
+if (currentLine.endswith(ending) and
+len(currentLine.strip()) <= len(self.__indentWord)):
+rank += 100
+if currentLine.endswith('%'):
+rank -= 20
+# Try to break list comprehensions at the "for".
+if currentLine.lstrip().startswith('for'):
+rank -= 50
+rank += 10 * self.__countUnbalancedBrackets(currentLine)
+else:
+rank = 100000
+return max(0, rank)
+def __countUnbalancedBrackets(self, line):
+"""
+Private method to determine the number of unmatched open/close brackets.
+@param line line to work at (string)
+@return number of unmatched open/close brackets (integer)
+"""
+count = 0
+for opening, closing in ['()', '[]', '{}']:
+count += abs(line.count(opening) - line.count(closing))
+return count
 def __getIndent(self, line):
 """
 Private method to get the indentation string.
 @param line line to determine the indentation string from (string)
 @return indentation string (string)
 """
 # copied from Pep8Fixer
 return line.replace(line.lstrip(), "")
+def __checkSyntax(self, code):
+"""
+Private method to check the syntax of the given code fragment.
+@param code code fragment to check (string)
+@return flag indicating syntax is ok (boolean)
+"""
+code = code.replace("\r\n", "\n").replace("\r", "\n")
+try:
+return compile(code, '<string>', 'exec')
+except (SyntaxError, TypeError, UnicodeDecodeError):
+return False

Mercurial Repositories > eric / file comparison

comparison: Plugins/CheckerPlugins/Pep8/Pep8Fixer.py

Plugins/CheckerPlugins/Pep8/Pep8Fixer.py