DataViews/CodeMetrics.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DataViews/CodeMetrics.py	Mon Dec 28 16:03:33 2009 +0000
@@ -0,0 +1,302 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2003 - 2009 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+
+#
+# Code mainly borrowed from the Pythius package which is
+# Copyright (c) 2001 by Jürgen Hermann <jh@web.de>
+#
+
+"""
+Module implementing a simple Python code metrics analyzer.
+
+@exception ValueError the tokenize module is too old
+"""
+
+import os
+import cStringIO
+import keyword
+import token
+import tokenize
+if not hasattr(tokenize, 'NL'):
+    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
+    
+import Utilities
+
+KEYWORD     = token.NT_OFFSET + 1
+COMMENT     = tokenize.COMMENT
+INDENT      = token.INDENT
+DEDENT      = token.DEDENT
+NEWLINE     = token.NEWLINE
+EMPTY       = tokenize.NL
+
+class Token(object):
+    """
+    Class to store the token related infos.
+    """
+    def __init__(self, **kw):
+        """
+        Constructor
+        
+        @param **kw list of key, value pairs
+        """
+        self.__dict__.update(kw)
+
+class Parser(object):
+    """
+    Class used to parse the source code of a Python file.
+    """
+    def parse(self, text):
+        """
+        Public method used to parse the source code.
+        
+        @param text the source code as read from a Python source file
+        """
+        self.tokenlist = []
+        
+        # convert eols
+        text = Utilities.convertLineEnds(text, os.linesep)
+        
+        if not text.endswith(os.linesep):
+            text = "%s%s" % (text, os.linesep)
+            
+        self.lines = text.count(os.linesep)
+        
+        source = cStringIO.StringIO(text)
+        try:
+            tokenize.tokenize(source.readline, self.__tokeneater)
+        except tokenize.TokenError, msg:
+            print "Token Error: %s" % str(msg)
+            return
+        
+        return
+    
+    def __addToken(self, toktype, toktext, srow, scol, line):
+        """
+        Private method used to add a token to our list of tokens.
+        
+        @param toktype the type of the token (int)
+        @param toktext the text of the token (string)
+        @param srow starting row of the token (int)
+        @param scol starting column of the token (int)
+        @param line logical line the token was found (string)
+        """
+        self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, 
+                                    col=scol, line=line))
+        
+    def __tokeneater(self, toktype, toktext, (srow, scol), (erow, ecol), line):
+        """
+        Private method called by tokenize.tokenize.
+        
+        @param toktype the type of the token (int)
+        @param toktext the text of the token (string)
+        @param srow starting row of the token (int)
+        @param scol starting column of the token (int)
+        @param erow ending row of the token (int)
+        @param ecol ending column of the token (int)
+        @param line logical line the token was found (string)
+        """
+        if toktype in [token.NEWLINE, tokenize.NL]:
+            self.__addToken(toktype, os.linesep, srow, scol, line)
+            return
+            
+        if toktype in [token.INDENT, token.DEDENT]:
+            self.__addToken(toktype, '', srow, scol, line)
+            return
+            
+        if toktype == token.NAME and keyword.iskeyword(toktext):
+            toktype = KEYWORD
+            
+        self.__addToken(toktype, toktext, srow, scol, line)
+
+spacer = ' '
+
+class SourceStat(object):
+    """
+    Class used to calculate and store the source code statistics.
+    """
+    def __init__(self):
+        """
+        Constructor
+        """
+        self.identifiers = [] # list of identifiers in order of appearance
+        self.active = [('TOTAL ',-1,0)] # stack of active identifiers and indent levels
+        self.counters = {} # counters per identifier
+        self.indent_level = 0
+
+    def indent(self, tok):
+        """
+        Public method used to increment the indentation level.
+        
+        @param tok a token (Token, ignored)
+        """
+        self.indent_level += 1
+
+    def dedent(self, tok):
+        """
+        Public method used to decrement the indentation level.
+        
+        @param tok the token to be processed (Token)
+        """
+        self.indent_level -= 1
+        if self.indent_level < 0:
+            raise ValueError("INTERNAL ERROR: Negative indent level")
+
+        # remove identifiers of a higher indentation
+        while self.active and self.active[-1][1] >= self.indent_level:
+            counters = self.counters.setdefault(self.active[-1][0], {})
+            counters['start'] = self.active[-1][2]
+            counters['end'] = tok.row - 1
+            counters['lines'] = tok.row - self.active[-1][2]
+            del self.active[-1]
+
+    def push(self, identifier, row):
+        """
+        Public method used to store an identifier.
+        
+        @param identifier the identifier to be remembered (string)
+        @param row the row, the identifier is defined in (int)
+        """
+        if len(self.active) > 1 and self.indent_level > self.active[-1][1]:
+            qualified = self.active[-1][0] + '.' + identifier
+        else:
+            qualified = identifier
+        self.active.append((qualified, self.indent_level, row))
+        self.identifiers.append(qualified)
+
+    def inc(self, key, value=1):
+        """
+        Public method used to increment the value of a key.
+        
+        @param key the key to be incremented
+        @param value the increment (int)
+        """
+        for id, level, row in self.active:
+            counters = self.counters.setdefault(id, {})
+            counters[key] = counters.setdefault(key, 0) + value
+
+    def dump(self):
+        """
+        Public method used to format and print the collected statistics.
+        """
+        label_len = 79 - len(spacer) - 6 * 6
+        print spacer + "FUNCTION / CLASS".ljust(label_len) + \
+            " START   END LINES  NLOC  COMM EMPTY"
+        for id in self.identifiers + ['TOTAL ']:
+            label = id
+            counters = self.counters.get(id, {})
+            msg = spacer + label.ljust(label_len)
+
+            for key in ('start', 'end', 'lines', 'nloc', 'comments', 'empty'):
+                if counters.get(key, 0):
+                    msg += " %5d" % (counters[key],)
+                else:
+                    msg += " " * 6
+
+            print msg
+
+    def getCounter(self, id, key):
+        """
+        Public method used to get a specific counter value.
+        
+        @param id id of the counter (string)
+        @param key key of the value to be retrieved (string)
+        @return the value of the requested counter (int)
+        """
+        return self.counters.get(id, {}).get(key, 0)
+
+def summarize(total, key, value):
+    """
+    Module function used to collect overall statistics.
+    
+    @param total the dictionary for the overall statistics
+    @param key the key to be summarize
+    @param value the value to be added to the overall statistics
+    @return the value added to the overall statistics
+    """
+    total[key] = total.setdefault(key, 0) + value
+    return value
+
+def analyze(filename, total):
+    """ 
+    Module function used analyze the source of a Python file.
+    
+    @param filename name of the Python file to be analyzed (string)
+    @param total dictionary receiving the overall code statistics
+    @return a statistics object with the collected code statistics (SourceStat)
+    """
+    file = open(filename, 'rb')
+    try:
+        text = Utilities.decode(file.read())[0].encode('utf-8')
+    finally:
+        file.close()
+
+    parser = Parser()
+    parser.parse(text)
+
+    stats = SourceStat()
+    stats.inc('lines', parser.lines)
+    comments = 0
+    empty = 0
+    for idx in range(len(parser.tokenlist)):
+        tok = parser.tokenlist[idx]
+        
+        # counting
+        if tok.type == NEWLINE:
+            stats.inc('nloc')
+        if tok.type == COMMENT:
+            stats.inc('comments')
+        if tok.type == EMPTY:
+            if parser.tokenlist[idx-1].type == token.OP:
+                stats.inc('nloc')
+            else:
+                stats.inc('empty')
+
+        if tok.type == INDENT: stats.indent(tok)
+        if tok.type == DEDENT: stats.dedent(tok)
+
+        if tok.type == KEYWORD:
+            if tok.text in ("class", "def"):
+                stats.push(parser.tokenlist[idx+1].text, tok.row)
+
+    # collect overall statistics
+    summarize(total, 'lines', parser.lines)
+    summarize(total, 'bytes', len(text))
+    summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments'))
+    summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty'))
+    summarize(total, 'non-commentary lines', stats.getCounter('TOTAL ', 'nloc'))
+
+    return stats
+    
+def main():
+    """
+    Modules main function used when called as a script.
+    
+    This function simply loops over all files given on the commandline
+    and collects the individual and overall source code statistics.
+    """
+    import sys
+    
+    files = sys.argv[1:]
+    
+    if not files:
+        sys.exit(1)
+        
+    total = {}
+    
+    summarize(total, 'files', len(files))
+    for file in files:
+        print file
+        stats = analyze(file, total)
+        stats.dump()
+        
+    print "\nSummary"
+    for key in ['files', 'lines', 'bytes', 'comments',
+                'empty lines', 'non-commentary lines']:
+        print key.ljust(20) + "%6d" % total[key]
+    
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main()

eric ide

mercurial