--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eric6/DataViews/CodeMetrics.py Sun Apr 14 15:09:21 2019 +0200 @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2003 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> +# + +# +# Code mainly borrowed from the Pythius package which is +# Copyright (c) 2001 by Jürgen Hermann <jh@web.de> +# + +""" +Module implementing a simple Python code metrics analyzer. + +@exception ValueError the tokenize module is too old +""" + +from __future__ import unicode_literals + +import os +import io +import sys +import keyword +import token +import tokenize + +import Utilities + +KEYWORD = token.NT_OFFSET + 1 +COMMENT = tokenize.COMMENT +INDENT = token.INDENT +DEDENT = token.DEDENT +NEWLINE = token.NEWLINE +EMPTY = tokenize.NL + + +class Token(object): + """ + Class to store the token related infos. + """ + def __init__(self, **kw): + """ + Constructor + + @keyparam **kw list of key, value pairs + """ + self.__dict__.update(kw) + + +class Parser(object): + """ + Class used to parse the source code of a Python file. + """ + def parse(self, text): + """ + Public method used to parse the source code. + + @param text the source code as read from a Python source file + """ + self.tokenlist = [] + + # convert eols + text = Utilities.convertLineEnds(text, os.linesep) + + if not text.endswith(os.linesep): + text = "{0}{1}".format(text, os.linesep) + + self.lines = text.count(os.linesep) + + source = io.BytesIO(text.encode("utf-8")) + try: + if sys.version_info[0] == 2: + gen = tokenize.generate_tokens(source.readline) + else: + gen = tokenize.tokenize(source.readline) + for toktype, toktext, start, end, line in gen: + (srow, scol) = start + (erow, ecol) = end + if toktype in [token.NEWLINE, tokenize.NL]: + self.__addToken(toktype, os.linesep, srow, scol, line) + elif toktype in [token.INDENT, token.DEDENT]: + self.__addToken(toktype, '', srow, scol, line) + elif toktype == token.NAME and keyword.iskeyword(toktext): + toktype = KEYWORD + self.__addToken(toktype, toktext, srow, scol, line) + else: + self.__addToken(toktype, toktext, srow, scol, line) + except tokenize.TokenError as msg: + print("Token Error: {0}".format(str(msg))) + # __IGNORE_WARNING_M801__ + return + + return + + def __addToken(self, toktype, toktext, srow, scol, line): + """ + Private method used to add a token to our list of tokens. + + @param toktype the type of the token (int) + @param toktext the text of the token (string) + @param srow starting row of the token (int) + @param scol starting column of the token (int) + @param line logical line the token was found (string) + """ + self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, + col=scol, line=line)) + +spacer = ' ' + + +class SourceStat(object): + """ + Class used to calculate and store the source code statistics. + """ + def __init__(self): + """ + Constructor + """ + self.identifiers = [] + # list of identifiers in order of appearance + self.active = [('TOTAL ', -1, 0)] + # stack of active identifiers and indent levels + self.counters = {} + # counters per identifier + self.indent_level = 0 + + def indent(self, tok): + """ + Public method used to increment the indentation level. + + @param tok a token (Token, ignored) + """ + self.indent_level += 1 + + def dedent(self, tok): + """ + Public method used to decrement the indentation level. + + @param tok the token to be processed (Token) + @exception ValueError raised to indicate an invalid indentation level + """ + self.indent_level -= 1 + if self.indent_level < 0: + raise ValueError("INTERNAL ERROR: Negative indent level") + + # remove identifiers of a higher indentation + while self.active and self.active[-1][1] >= self.indent_level: + counters = self.counters.setdefault(self.active[-1][0], {}) + counters['start'] = self.active[-1][2] + counters['end'] = tok.row - 1 + counters['lines'] = tok.row - self.active[-1][2] + del self.active[-1] + + def push(self, identifier, row): + """ + Public method used to store an identifier. + + @param identifier the identifier to be remembered (string) + @param row the row, the identifier is defined in (int) + """ + if len(self.active) > 1 and self.indent_level > self.active[-1][1]: + qualified = self.active[-1][0] + '.' + identifier + else: + qualified = identifier + self.active.append((qualified, self.indent_level, row)) + self.identifiers.append(qualified) + + def inc(self, key, value=1): + """ + Public method used to increment the value of a key. + + @param key the key to be incremented + @param value the increment (int) + """ + for counterId, _level, _row in self.active: + counters = self.counters.setdefault(counterId, {}) + counters[key] = counters.setdefault(key, 0) + value + + def getCounter(self, counterId, key): + """ + Public method used to get a specific counter value. + + @param counterId id of the counter (string) + @param key key of the value to be retrieved (string) + @return the value of the requested counter (int) + """ + return self.counters.get(counterId, {}).get(key, 0) + + +def summarize(total, key, value): + """ + Module function used to collect overall statistics. + + @param total the dictionary for the overall statistics + @param key the key to be summarize + @param value the value to be added to the overall statistics + @return the value added to the overall statistics + """ + total[key] = total.setdefault(key, 0) + value + return value + + +def analyze(filename, total): + """ + Module function used analyze the source of a Python file. + + @param filename name of the Python file to be analyzed (string) + @param total dictionary receiving the overall code statistics + @return a statistics object with the collected code statistics (SourceStat) + """ + try: + text = Utilities.readEncodedFile(filename)[0] + except (UnicodeError, IOError): + return SourceStat() + + parser = Parser() + parser.parse(text) + + stats = SourceStat() + stats.inc('lines', parser.lines) + for idx in range(len(parser.tokenlist)): + tok = parser.tokenlist[idx] + + # counting + if tok.type == NEWLINE: + stats.inc('nloc') + elif tok.type == COMMENT: + stats.inc('comments') + if tok.line.strip() == tok.text: + stats.inc('commentlines') + elif tok.type == EMPTY: + if parser.tokenlist[idx - 1].type == token.OP: + stats.inc('nloc') + elif parser.tokenlist[idx - 1].type == COMMENT: + continue + else: + stats.inc('empty') + elif tok.type == INDENT: + stats.indent(tok) + elif tok.type == DEDENT: + stats.dedent(tok) + elif tok.type == KEYWORD: + if tok.text in ("class", "def"): + stats.push(parser.tokenlist[idx + 1].text, tok.row) + + # collect overall statistics + summarize(total, 'lines', parser.lines) + summarize(total, 'bytes', len(text)) + summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments')) + summarize(total, 'commentlines', + stats.getCounter('TOTAL ', 'commentlines')) + summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty')) + summarize(total, 'non-commentary lines', + stats.getCounter('TOTAL ', 'nloc')) + + return stats