Plugins/CheckerPlugins/Pep8/Pep8Fixer.py

changeset 2880
a45c59bddc79
parent 2879
12e6e199d0cf
child 2882
9b97bc92fdda
equal deleted inserted replaced
2879:12e6e199d0cf 2880:a45c59bddc79
1458 1458
1459 class Pep8LineShortener(object): 1459 class Pep8LineShortener(object):
1460 """ 1460 """
1461 Class used to shorten lines to a given maximum of characters. 1461 Class used to shorten lines to a given maximum of characters.
1462 """ 1462 """
1463 ShortenOperatorGroups = frozenset([
1464 frozenset([',']),
1465 frozenset(['%']),
1466 frozenset([',', '(', '[', '{']),
1467 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
1468 ])
1469
1463 def __init__(self, curLine, prevLine, nextLine, 1470 def __init__(self, curLine, prevLine, nextLine,
1464 maxLength=79, eol="\n", indentWord=" "): 1471 maxLength=79, eol="\n", indentWord=" "):
1465 """ 1472 """
1466 Constructor 1473 Constructor
1467 1474
1492 if self.__nextText.lstrip().startswith('#'): 1499 if self.__nextText.lstrip().startswith('#'):
1493 lastComment = False 1500 lastComment = False
1494 1501
1495 # Wrap commented lines. 1502 # Wrap commented lines.
1496 newText = self.__shortenComment(lastComment) 1503 newText = self.__shortenComment(lastComment)
1497 return True, newText 1504 if newText == self.__text:
1505 return False, ""
1506 else:
1507 return True, newText
1498 1508
1499 indent = self.__getIndent(self.__text) 1509 indent = self.__getIndent(self.__text)
1500 source = self.__text[len(indent):] 1510 source = self.__text[len(indent):]
1501 assert source.lstrip() == source 1511 assert source.lstrip() == source
1502 sio = io.StringIO(source) 1512 sio = io.StringIO(source)
1503 1513
1504 # Check for multiline string. 1514 # Check for multi line string.
1505 try: 1515 try:
1506 tokens = list(tokenize.generate_tokens(sio.readline)) 1516 tokens = list(tokenize.generate_tokens(sio.readline))
1507 except (SyntaxError, tokenize.TokenError): 1517 except (SyntaxError, tokenize.TokenError):
1508 multilineCandidate = self.__breakMultiline() 1518 multilineCandidate = self.__breakMultiline()
1509 if multilineCandidate: 1519 if multilineCandidate:
1538 1548
1539 @param isLast flag indicating, that the line is the last comment line 1549 @param isLast flag indicating, that the line is the last comment line
1540 (boolean) 1550 (boolean)
1541 @return shortened comment line (string) 1551 @return shortened comment line (string)
1542 """ 1552 """
1543 pass 1553 if len(self.__text) <= self.__maxLength:
1554 return self.__text
1555
1556 newText = self.__text.rstrip()
1557
1558 # PEP 8 recommends 72 characters for comment text.
1559 indentation = self.__getIndent(newText) + '# '
1560 maxLength = min(self.__maxLength,
1561 len(indentation) + 72)
1562
1563 MIN_CHARACTER_REPEAT = 5
1564 if (len(newText) - len(newText.rstrip(newText[-1])) >= MIN_CHARACTER_REPEAT and
1565 not newText[-1].isalnum()):
1566 # Trim comments that end with things like ---------
1567 return newText[:maxLength] + self.__eol
1568 elif isLast and re.match(r"\s*#+\s*\w+", newText):
1569 import textwrap
1570 splitLines = textwrap.wrap(newText.lstrip(" \t#"),
1571 initial_indent=indentation,
1572 subsequent_indent=indentation,
1573 width=maxLength,
1574 break_long_words=False,
1575 break_on_hyphens=False)
1576 return self.__eol.join(splitLines) + self.__eol
1577 else:
1578 return newText + self.__eol
1579
1580 def __breakMultiline(self):
1581 """
1582 Private method to break multi line strings.
1583
1584 @return broken multi line string or None, if a break is not possible
1585 (string or None)
1586 """
1587 indentation = self.__getIndent(self.__text)
1588
1589 # Handle special case only.
1590 for symbol in '([{':
1591 # Only valid if symbol is not on a line by itself.
1592 if (
1593 symbol in self.__text and
1594 self.__text.strip() != symbol and
1595 self.__text.rstrip().endswith((',', '%'))
1596 ):
1597 index = 1 + self.__text.find(symbol)
1598
1599 if index <= len(self.__indentWord) + len(indentation):
1600 continue
1601
1602 if self.__isProbablyInsideStringOrComment(self.__text, index - 1):
1603 continue
1604
1605 return (self.__text[:index].rstrip() + self.__eol +
1606 indentation + self.__indentWord +
1607 self.__text[index:].lstrip())
1608
1609 return None
1610
1611 def __isProbablyInsideStringOrComment(self, line, index):
1612 """
1613 Private method to check, if the given string might be inside a string
1614 or comment.
1615
1616 @param line line to check (string)
1617 @param index position inside line to check (integer)
1618 @return flag indicating the possibility of being inside a string
1619 or comment
1620 """
1621 # Check against being in a string.
1622 for quote in ['"', "'"]:
1623 pos = line.find(quote)
1624 if pos != -1 and pos <= index:
1625 return True
1626
1627 # Check against being in a comment.
1628 pos = line.find('#')
1629 if pos != -1 and pos <= index:
1630 return True
1631
1632 return False
1633
1634 def __shortenLine(self, tokens, source, indent):
1635 """
1636 Private method to shorten a line of code at an operator.
1637
1638 @param tokens tokens of the line as generated by tokenize (list of token)
1639 @param source code string to work at (string)
1640 @param indent indentation string of the code line (string)
1641 @return list of candidates (list of string)
1642 """
1643 candidates = []
1644
1645 for tkn in tokens:
1646 tokenType = tkn[0]
1647 tokenString = tkn[1]
1648
1649 if (
1650 tokenType == tokenize.COMMENT and
1651 not self.__prevText.rstrip().endswith('\\')
1652 ):
1653 # Move inline comments to previous line.
1654 offset = tkn[2][1]
1655 first = source[:offset]
1656 second = source[offset:]
1657 candidates.append(indent + second.strip() + self.__eol +
1658 indent + first.strip() + self.__eol)
1659 elif tokenType == tokenize.OP and tokenString != '=':
1660 # Don't break on '=' after keyword as this violates PEP 8.
1661
1662 assert tokenType != tokenize.INDENT
1663
1664 offset = tkn[2][1] + 1
1665 first = source[:offset]
1666
1667 secondIndent = indent
1668 if first.rstrip().endswith('('):
1669 secondIndent += self.__indentWord
1670 elif '(' in first:
1671 secondIndent += ' ' * (1 + first.find('('))
1672 else:
1673 secondIndent += self.__indentWord
1674
1675 second = (secondIndent + source[offset:].lstrip())
1676 if not second.strip():
1677 continue
1678
1679 # Do not begin a line with a comma
1680 if second.lstrip().startswith(','):
1681 continue
1682
1683 # Do end a line with a dot
1684 if first.rstrip().endswith('.'):
1685 continue
1686
1687 if tokenString in '+-*/':
1688 newText = first + ' \\' + self.__eol + second
1689 else:
1690 newText = first + self.__eol + second
1691
1692 # Only fix if syntax is okay.
1693 if self.__checkSyntax(self.__normalizeMultiline(newText)):
1694 candidates.append(indent + newText)
1695
1696 for keyTokenStrings in self.ShortenOperatorGroups:
1697 shortened = self.__shortenLineAtTokens(
1698 tokens, source, indent, keyTokenStrings)
1699
1700 if shortened is not None and shortened != source:
1701 candidates.append(shortened)
1702
1703 return candidates
1704
1705 def __normalizeMultiline(self, text):
1706 """
1707 Private method to remove multiline-related code that will cause syntax error.
1708
1709 @param line code line to work on (string)
1710 @return normalized code line (string)
1711 """
1712 for quote in '\'"':
1713 dictPattern = r"^{q}[^{q}]*{q} *: *".format(q=quote)
1714 if re.match(dictPattern, text):
1715 if not text.strip().endswith('}'):
1716 text += '}'
1717 return '{' + text
1718
1719 if text.startswith('def ') and text.rstrip().endswith(':'):
1720 # Do not allow ':' to be alone. That is invalid.
1721 splitText = [item.strip() for item in text.split(self.__eol)]
1722 if ':' not in splitText and 'def' not in splitText:
1723 return text[len('def'):].strip().rstrip(':')
1724
1725 return text
1726
1727 def __shortenLineAtTokens(self, tokens, source, indent, keyTokenStrings):
1728 """
1729 Private method to break lines at key tokens.
1730
1731 @param tokens tokens of the line as generated by tokenize (list of token)
1732 @param source code string to work at (string)
1733 @param indent indentation string of the code line (string)
1734 @param keyTokenStrings key tokens to break at
1735 @return broken code line (string)
1736 """
1737 offsets = []
1738 firstParen = True
1739 for tkn in tokens:
1740 tokenType = tkn[0]
1741 tokenString = tkn[1]
1742 nextOffset = tkn[2][1] + 1
1743
1744 assert tokenType != tokenize.INDENT
1745
1746 if tokenString in keyTokenStrings or (firstParen and
1747 tokenString == '('):
1748 # Don't split right before newline.
1749 if nextOffset < len(source) - 1:
1750 offsets.append(nextOffset)
1751
1752 if tokenString == '(':
1753 firstParen = False
1754
1755 currentIndent = None
1756 newText = None
1757 for text in self.__splitAtOffsets(source, offsets):
1758 if newText:
1759 newText += self.__eol + currentIndent + text
1760
1761 for symbol in '([{':
1762 if text.endswith(symbol):
1763 currentIndent += self.__indentWord
1764 else:
1765 # First line.
1766 newText = text
1767 assert not currentIndent
1768 currentIndent = self.__indentWord
1769
1770 assert newText is not None
1771
1772 if self.__checkSyntax(self.__normalizeMultiline(newText)):
1773 return indent + newText
1774 else:
1775 return None
1776
1777 def __splitAtOffsets(self, line, offsets):
1778 """
1779 Private method to split the line at the given offsets.
1780
1781 @param line line to split (string)
1782 @param offsets offsets to split at (list of integer)
1783 @return split line (list of string)
1784 """
1785 result = []
1786
1787 previousOffset = 0
1788 currentOffset = 0
1789 for currentOffset in sorted(offsets):
1790 if currentOffset < len(line) and previousOffset != currentOffset:
1791 result.append(line[previousOffset:currentOffset])
1792 previousOffset = currentOffset
1793
1794 result.append(line[currentOffset:])
1795
1796 return result
1797
1798 def __lineShorteningRank(self, candidate):
1799 """
1800 Private method to rank a candidate.
1801
1802 @param candidate candidate line to rank (string)
1803 @return rank of the candidate (integer)
1804 """
1805 rank = 0
1806 if candidate.strip():
1807 lines = candidate.split(self.__eol)
1808
1809 offset = 0
1810 if lines[0].rstrip()[-1] not in '([{':
1811 for symbol in '([{':
1812 offset = max(offset, 1 + lines[0].find(symbol))
1813
1814 maxLength = max([offset + len(x.strip()) for x in lines])
1815 rank += maxLength
1816 rank += len(lines)
1817
1818 badStartingSymbol = {
1819 '(': ')',
1820 '[': ']',
1821 '{': '}'}.get(lines[0][-1], None)
1822
1823 if len(lines) > 1:
1824 if (badStartingSymbol and
1825 lines[1].lstrip().startswith(badStartingSymbol)):
1826 rank += 20
1827
1828 if re.match(r".*[+\-\*/] \($", lines[0]):
1829 # "1 * (\n" is ugly as hell.
1830 rank += 100
1831
1832 for currentLine in lines:
1833 for badStart in ['.', '%', '+', '-', '/']:
1834 if currentLine.startswith(badStart):
1835 rank += 100
1836
1837 for ending in '([{':
1838 # Avoid lonely opening. They result in longer lines.
1839 if (currentLine.endswith(ending) and
1840 len(currentLine.strip()) <= len(self.__indentWord)):
1841 rank += 100
1842
1843 if currentLine.endswith('%'):
1844 rank -= 20
1845
1846 # Try to break list comprehensions at the "for".
1847 if currentLine.lstrip().startswith('for'):
1848 rank -= 50
1849
1850 rank += 10 * self.__countUnbalancedBrackets(currentLine)
1851 else:
1852 rank = 100000
1853
1854 return max(0, rank)
1855
1856 def __countUnbalancedBrackets(self, line):
1857 """
1858 Private method to determine the number of unmatched open/close brackets.
1859
1860 @param line line to work at (string)
1861 @return number of unmatched open/close brackets (integer)
1862 """
1863 count = 0
1864 for opening, closing in ['()', '[]', '{}']:
1865 count += abs(line.count(opening) - line.count(closing))
1866
1867 return count
1544 1868
1545 def __getIndent(self, line): 1869 def __getIndent(self, line):
1546 """ 1870 """
1547 Private method to get the indentation string. 1871 Private method to get the indentation string.
1548 1872
1549 @param line line to determine the indentation string from (string) 1873 @param line line to determine the indentation string from (string)
1550 @return indentation string (string) 1874 @return indentation string (string)
1551 """ 1875 """
1552 # copied from Pep8Fixer 1876 # copied from Pep8Fixer
1553 return line.replace(line.lstrip(), "") 1877 return line.replace(line.lstrip(), "")
1878
1879 def __checkSyntax(self, code):
1880 """
1881 Private method to check the syntax of the given code fragment.
1882
1883 @param code code fragment to check (string)
1884 @return flag indicating syntax is ok (boolean)
1885 """
1886 code = code.replace("\r\n", "\n").replace("\r", "\n")
1887 try:
1888 return compile(code, '<string>', 'exec')
1889 except (SyntaxError, TypeError, UnicodeDecodeError):
1890 return False

eric ide

mercurial