DebugClients/Python/coverage/phystokens.py

Fri, 10 Feb 2012 19:33:45 +0100

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Fri, 10 Feb 2012 19:33:45 +0100
changeset 1607
67c7ffaed401
parent 790
2c0ea0163ef4
child 3495
fac17a82b431
permissions
-rw-r--r--

Fixed an issue in the debugger backends for Python and Python3 that caused files in exceptions been converted to all lowercase on case insensitive filesystems.
(transplanted from 446515c955f68bfffac7f17c30763debb1564197)

"""Better tokenizing for coverage.py."""

import keyword, re, token, tokenize
from .backward import StringIO              # pylint: disable-msg=W0622

def phys_tokens(toks):
    """Return all physical tokens, even line continuations.

    tokenize.generate_tokens() doesn't return a token for the backslash that
    continues lines.  This wrapper provides those tokens so that we can
    re-create a faithful representation of the original source.

    Returns the same values as generate_tokens()

    """
    last_line = None
    last_lineno = -1
    last_ttype = None
    for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
        if last_lineno != elineno:
            if last_line and last_line[-2:] == "\\\n":
                # We are at the beginning of a new line, and the last line
                # ended with a backslash.  We probably have to inject a
                # backslash token into the stream. Unfortunately, there's more
                # to figure out.  This code::
                #
                #   usage = """\
                #   HEY THERE
                #   """
                #
                # triggers this condition, but the token text is::
                #
                #   '"""\\\nHEY THERE\n"""'
                #
                # so we need to figure out if the backslash is already in the
                # string token or not.
                inject_backslash = True
                if last_ttype == tokenize.COMMENT:
                    # Comments like this \
                    # should never result in a new token.
                    inject_backslash = False
                elif ttype == token.STRING:
                    if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':
                        # It's a multiline string and the first line ends with
                        # a backslash, so we don't need to inject another.
                        inject_backslash = False
                if inject_backslash:
                    # Figure out what column the backslash is in.
                    ccol = len(last_line.split("\n")[-2]) - 1
                    # Yield the token, with a fake token type.
                    yield (
                        99999, "\\\n",
                        (slineno, ccol), (slineno, ccol+2),
                        last_line
                        )
            last_line = ltext
            last_ttype = ttype
        yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
        last_lineno = elineno


def source_token_lines(source):
    """Generate a series of lines, one for each line in `source`.

    Each line is a list of pairs, each pair is a token::

        [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]

    Each pair has a token class, and the token text.

    If you concatenate all the token texts, and then join them with newlines,
    you should have your original `source` back, with two differences:
    trailing whitespace is not preserved, and a final line with no newline
    is indistinguishable from a final line with a newline.

    """
    ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
    line = []
    col = 0
    tokgen = tokenize.generate_tokens(StringIO(source.expandtabs(8)).readline)
    for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
        mark_start = True
        for part in re.split('(\n)', ttext):
            if part == '\n':
                yield line
                line = []
                col = 0
                mark_end = False
            elif part == '':
                mark_end = False
            elif ttype in ws_tokens:
                mark_end = False
            else:
                if mark_start and scol > col:
                    line.append(("ws", " " * (scol - col)))
                    mark_start = False
                tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
                if ttype == token.NAME and keyword.iskeyword(ttext):
                    tok_class = "key"
                line.append((tok_class, part))
                mark_end = True
            scol = 0
        if mark_end:
            col = ecol

    if line:
        yield line

#
# eflag: FileType = Python2

eric ide

mercurial