1538 |
1548 |
1539 @param isLast flag indicating, that the line is the last comment line |
1549 @param isLast flag indicating, that the line is the last comment line |
1540 (boolean) |
1550 (boolean) |
1541 @return shortened comment line (string) |
1551 @return shortened comment line (string) |
1542 """ |
1552 """ |
1543 pass |
1553 if len(self.__text) <= self.__maxLength: |
|
1554 return self.__text |
|
1555 |
|
1556 newText = self.__text.rstrip() |
|
1557 |
|
1558 # PEP 8 recommends 72 characters for comment text. |
|
1559 indentation = self.__getIndent(newText) + '# ' |
|
1560 maxLength = min(self.__maxLength, |
|
1561 len(indentation) + 72) |
|
1562 |
|
1563 MIN_CHARACTER_REPEAT = 5 |
|
1564 if (len(newText) - len(newText.rstrip(newText[-1])) >= MIN_CHARACTER_REPEAT and |
|
1565 not newText[-1].isalnum()): |
|
1566 # Trim comments that end with things like --------- |
|
1567 return newText[:maxLength] + self.__eol |
|
1568 elif isLast and re.match(r"\s*#+\s*\w+", newText): |
|
1569 import textwrap |
|
1570 splitLines = textwrap.wrap(newText.lstrip(" \t#"), |
|
1571 initial_indent=indentation, |
|
1572 subsequent_indent=indentation, |
|
1573 width=maxLength, |
|
1574 break_long_words=False, |
|
1575 break_on_hyphens=False) |
|
1576 return self.__eol.join(splitLines) + self.__eol |
|
1577 else: |
|
1578 return newText + self.__eol |
|
1579 |
|
1580 def __breakMultiline(self): |
|
1581 """ |
|
1582 Private method to break multi line strings. |
|
1583 |
|
1584 @return broken multi line string or None, if a break is not possible |
|
1585 (string or None) |
|
1586 """ |
|
1587 indentation = self.__getIndent(self.__text) |
|
1588 |
|
1589 # Handle special case only. |
|
1590 for symbol in '([{': |
|
1591 # Only valid if symbol is not on a line by itself. |
|
1592 if ( |
|
1593 symbol in self.__text and |
|
1594 self.__text.strip() != symbol and |
|
1595 self.__text.rstrip().endswith((',', '%')) |
|
1596 ): |
|
1597 index = 1 + self.__text.find(symbol) |
|
1598 |
|
1599 if index <= len(self.__indentWord) + len(indentation): |
|
1600 continue |
|
1601 |
|
1602 if self.__isProbablyInsideStringOrComment(self.__text, index - 1): |
|
1603 continue |
|
1604 |
|
1605 return (self.__text[:index].rstrip() + self.__eol + |
|
1606 indentation + self.__indentWord + |
|
1607 self.__text[index:].lstrip()) |
|
1608 |
|
1609 return None |
|
1610 |
|
1611 def __isProbablyInsideStringOrComment(self, line, index): |
|
1612 """ |
|
1613 Private method to check, if the given string might be inside a string |
|
1614 or comment. |
|
1615 |
|
1616 @param line line to check (string) |
|
1617 @param index position inside line to check (integer) |
|
1618 @return flag indicating the possibility of being inside a string |
|
1619 or comment |
|
1620 """ |
|
1621 # Check against being in a string. |
|
1622 for quote in ['"', "'"]: |
|
1623 pos = line.find(quote) |
|
1624 if pos != -1 and pos <= index: |
|
1625 return True |
|
1626 |
|
1627 # Check against being in a comment. |
|
1628 pos = line.find('#') |
|
1629 if pos != -1 and pos <= index: |
|
1630 return True |
|
1631 |
|
1632 return False |
|
1633 |
|
1634 def __shortenLine(self, tokens, source, indent): |
|
1635 """ |
|
1636 Private method to shorten a line of code at an operator. |
|
1637 |
|
1638 @param tokens tokens of the line as generated by tokenize (list of token) |
|
1639 @param source code string to work at (string) |
|
1640 @param indent indentation string of the code line (string) |
|
1641 @return list of candidates (list of string) |
|
1642 """ |
|
1643 candidates = [] |
|
1644 |
|
1645 for tkn in tokens: |
|
1646 tokenType = tkn[0] |
|
1647 tokenString = tkn[1] |
|
1648 |
|
1649 if ( |
|
1650 tokenType == tokenize.COMMENT and |
|
1651 not self.__prevText.rstrip().endswith('\\') |
|
1652 ): |
|
1653 # Move inline comments to previous line. |
|
1654 offset = tkn[2][1] |
|
1655 first = source[:offset] |
|
1656 second = source[offset:] |
|
1657 candidates.append(indent + second.strip() + self.__eol + |
|
1658 indent + first.strip() + self.__eol) |
|
1659 elif tokenType == tokenize.OP and tokenString != '=': |
|
1660 # Don't break on '=' after keyword as this violates PEP 8. |
|
1661 |
|
1662 assert tokenType != tokenize.INDENT |
|
1663 |
|
1664 offset = tkn[2][1] + 1 |
|
1665 first = source[:offset] |
|
1666 |
|
1667 secondIndent = indent |
|
1668 if first.rstrip().endswith('('): |
|
1669 secondIndent += self.__indentWord |
|
1670 elif '(' in first: |
|
1671 secondIndent += ' ' * (1 + first.find('(')) |
|
1672 else: |
|
1673 secondIndent += self.__indentWord |
|
1674 |
|
1675 second = (secondIndent + source[offset:].lstrip()) |
|
1676 if not second.strip(): |
|
1677 continue |
|
1678 |
|
1679 # Do not begin a line with a comma |
|
1680 if second.lstrip().startswith(','): |
|
1681 continue |
|
1682 |
|
1683 # Do end a line with a dot |
|
1684 if first.rstrip().endswith('.'): |
|
1685 continue |
|
1686 |
|
1687 if tokenString in '+-*/': |
|
1688 newText = first + ' \\' + self.__eol + second |
|
1689 else: |
|
1690 newText = first + self.__eol + second |
|
1691 |
|
1692 # Only fix if syntax is okay. |
|
1693 if self.__checkSyntax(self.__normalizeMultiline(newText)): |
|
1694 candidates.append(indent + newText) |
|
1695 |
|
1696 for keyTokenStrings in self.ShortenOperatorGroups: |
|
1697 shortened = self.__shortenLineAtTokens( |
|
1698 tokens, source, indent, keyTokenStrings) |
|
1699 |
|
1700 if shortened is not None and shortened != source: |
|
1701 candidates.append(shortened) |
|
1702 |
|
1703 return candidates |
|
1704 |
|
1705 def __normalizeMultiline(self, text): |
|
1706 """ |
|
1707 Private method to remove multiline-related code that will cause syntax error. |
|
1708 |
|
1709 @param line code line to work on (string) |
|
1710 @return normalized code line (string) |
|
1711 """ |
|
1712 for quote in '\'"': |
|
1713 dictPattern = r"^{q}[^{q}]*{q} *: *".format(q=quote) |
|
1714 if re.match(dictPattern, text): |
|
1715 if not text.strip().endswith('}'): |
|
1716 text += '}' |
|
1717 return '{' + text |
|
1718 |
|
1719 if text.startswith('def ') and text.rstrip().endswith(':'): |
|
1720 # Do not allow ':' to be alone. That is invalid. |
|
1721 splitText = [item.strip() for item in text.split(self.__eol)] |
|
1722 if ':' not in splitText and 'def' not in splitText: |
|
1723 return text[len('def'):].strip().rstrip(':') |
|
1724 |
|
1725 return text |
|
1726 |
|
1727 def __shortenLineAtTokens(self, tokens, source, indent, keyTokenStrings): |
|
1728 """ |
|
1729 Private method to break lines at key tokens. |
|
1730 |
|
1731 @param tokens tokens of the line as generated by tokenize (list of token) |
|
1732 @param source code string to work at (string) |
|
1733 @param indent indentation string of the code line (string) |
|
1734 @param keyTokenStrings key tokens to break at |
|
1735 @return broken code line (string) |
|
1736 """ |
|
1737 offsets = [] |
|
1738 firstParen = True |
|
1739 for tkn in tokens: |
|
1740 tokenType = tkn[0] |
|
1741 tokenString = tkn[1] |
|
1742 nextOffset = tkn[2][1] + 1 |
|
1743 |
|
1744 assert tokenType != tokenize.INDENT |
|
1745 |
|
1746 if tokenString in keyTokenStrings or (firstParen and |
|
1747 tokenString == '('): |
|
1748 # Don't split right before newline. |
|
1749 if nextOffset < len(source) - 1: |
|
1750 offsets.append(nextOffset) |
|
1751 |
|
1752 if tokenString == '(': |
|
1753 firstParen = False |
|
1754 |
|
1755 currentIndent = None |
|
1756 newText = None |
|
1757 for text in self.__splitAtOffsets(source, offsets): |
|
1758 if newText: |
|
1759 newText += self.__eol + currentIndent + text |
|
1760 |
|
1761 for symbol in '([{': |
|
1762 if text.endswith(symbol): |
|
1763 currentIndent += self.__indentWord |
|
1764 else: |
|
1765 # First line. |
|
1766 newText = text |
|
1767 assert not currentIndent |
|
1768 currentIndent = self.__indentWord |
|
1769 |
|
1770 assert newText is not None |
|
1771 |
|
1772 if self.__checkSyntax(self.__normalizeMultiline(newText)): |
|
1773 return indent + newText |
|
1774 else: |
|
1775 return None |
|
1776 |
|
1777 def __splitAtOffsets(self, line, offsets): |
|
1778 """ |
|
1779 Private method to split the line at the given offsets. |
|
1780 |
|
1781 @param line line to split (string) |
|
1782 @param offsets offsets to split at (list of integer) |
|
1783 @return split line (list of string) |
|
1784 """ |
|
1785 result = [] |
|
1786 |
|
1787 previousOffset = 0 |
|
1788 currentOffset = 0 |
|
1789 for currentOffset in sorted(offsets): |
|
1790 if currentOffset < len(line) and previousOffset != currentOffset: |
|
1791 result.append(line[previousOffset:currentOffset]) |
|
1792 previousOffset = currentOffset |
|
1793 |
|
1794 result.append(line[currentOffset:]) |
|
1795 |
|
1796 return result |
|
1797 |
|
1798 def __lineShorteningRank(self, candidate): |
|
1799 """ |
|
1800 Private method to rank a candidate. |
|
1801 |
|
1802 @param candidate candidate line to rank (string) |
|
1803 @return rank of the candidate (integer) |
|
1804 """ |
|
1805 rank = 0 |
|
1806 if candidate.strip(): |
|
1807 lines = candidate.split(self.__eol) |
|
1808 |
|
1809 offset = 0 |
|
1810 if lines[0].rstrip()[-1] not in '([{': |
|
1811 for symbol in '([{': |
|
1812 offset = max(offset, 1 + lines[0].find(symbol)) |
|
1813 |
|
1814 maxLength = max([offset + len(x.strip()) for x in lines]) |
|
1815 rank += maxLength |
|
1816 rank += len(lines) |
|
1817 |
|
1818 badStartingSymbol = { |
|
1819 '(': ')', |
|
1820 '[': ']', |
|
1821 '{': '}'}.get(lines[0][-1], None) |
|
1822 |
|
1823 if len(lines) > 1: |
|
1824 if (badStartingSymbol and |
|
1825 lines[1].lstrip().startswith(badStartingSymbol)): |
|
1826 rank += 20 |
|
1827 |
|
1828 if re.match(r".*[+\-\*/] \($", lines[0]): |
|
1829 # "1 * (\n" is ugly as hell. |
|
1830 rank += 100 |
|
1831 |
|
1832 for currentLine in lines: |
|
1833 for badStart in ['.', '%', '+', '-', '/']: |
|
1834 if currentLine.startswith(badStart): |
|
1835 rank += 100 |
|
1836 |
|
1837 for ending in '([{': |
|
1838 # Avoid lonely opening. They result in longer lines. |
|
1839 if (currentLine.endswith(ending) and |
|
1840 len(currentLine.strip()) <= len(self.__indentWord)): |
|
1841 rank += 100 |
|
1842 |
|
1843 if currentLine.endswith('%'): |
|
1844 rank -= 20 |
|
1845 |
|
1846 # Try to break list comprehensions at the "for". |
|
1847 if currentLine.lstrip().startswith('for'): |
|
1848 rank -= 50 |
|
1849 |
|
1850 rank += 10 * self.__countUnbalancedBrackets(currentLine) |
|
1851 else: |
|
1852 rank = 100000 |
|
1853 |
|
1854 return max(0, rank) |
|
1855 |
|
1856 def __countUnbalancedBrackets(self, line): |
|
1857 """ |
|
1858 Private method to determine the number of unmatched open/close brackets. |
|
1859 |
|
1860 @param line line to work at (string) |
|
1861 @return number of unmatched open/close brackets (integer) |
|
1862 """ |
|
1863 count = 0 |
|
1864 for opening, closing in ['()', '[]', '{}']: |
|
1865 count += abs(line.count(opening) - line.count(closing)) |
|
1866 |
|
1867 return count |
1544 |
1868 |
1545 def __getIndent(self, line): |
1869 def __getIndent(self, line): |
1546 """ |
1870 """ |
1547 Private method to get the indentation string. |
1871 Private method to get the indentation string. |
1548 |
1872 |
1549 @param line line to determine the indentation string from (string) |
1873 @param line line to determine the indentation string from (string) |
1550 @return indentation string (string) |
1874 @return indentation string (string) |
1551 """ |
1875 """ |
1552 # copied from Pep8Fixer |
1876 # copied from Pep8Fixer |
1553 return line.replace(line.lstrip(), "") |
1877 return line.replace(line.lstrip(), "") |
|
1878 |
|
1879 def __checkSyntax(self, code): |
|
1880 """ |
|
1881 Private method to check the syntax of the given code fragment. |
|
1882 |
|
1883 @param code code fragment to check (string) |
|
1884 @return flag indicating syntax is ok (boolean) |
|
1885 """ |
|
1886 code = code.replace("\r\n", "\n").replace("\r", "\n") |
|
1887 try: |
|
1888 return compile(code, '<string>', 'exec') |
|
1889 except (SyntaxError, TypeError, UnicodeDecodeError): |
|
1890 return False |