DataViews/CodeMetrics.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 13
1af94a91f439
--- a/DataViews/CodeMetrics.py	Fri Jan 01 16:11:36 2010 +0000
+++ b/DataViews/CodeMetrics.py	Sat Jan 02 15:11:35 2010 +0000
@@ -15,12 +15,10 @@
 """
 
 import os
-import cStringIO
+import io
 import keyword
 import token
 import tokenize
-if not hasattr(tokenize, 'NL'):
-    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
     
 import Utilities
 
@@ -63,11 +61,23 @@
             
         self.lines = text.count(os.linesep)
         
-        source = cStringIO.StringIO(text)
+        source = io.BytesIO(text.encode("utf-8"))
         try:
-            tokenize.tokenize(source.readline, self.__tokeneater)
-        except tokenize.TokenError, msg:
-            print "Token Error: %s" % str(msg)
+            gen = tokenize.tokenize(source.readline)
+            for toktype, toktext, start, end, line in gen:
+                (srow, scol) = start
+                (erow, ecol) = end
+                if toktype in [token.NEWLINE, tokenize.NL]:
+                    self.__addToken(toktype, os.linesep, srow, scol, line)
+                elif toktype in [token.INDENT, token.DEDENT]:
+                    self.__addToken(toktype, '', srow, scol, line)
+                elif toktype == token.NAME and keyword.iskeyword(toktext):
+                    toktype = KEYWORD
+                    self.__addToken(toktype, toktext, srow, scol, line)
+                else:
+                    self.__addToken(toktype, toktext, srow, scol, line)
+        except tokenize.TokenError as msg:
+            print("Token Error: %s" % str(msg))
             return
         
         return
@@ -84,31 +94,6 @@
         """
         self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, 
                                     col=scol, line=line))
-        
-    def __tokeneater(self, toktype, toktext, (srow, scol), (erow, ecol), line):
-        """
-        Private method called by tokenize.tokenize.
-        
-        @param toktype the type of the token (int)
-        @param toktext the text of the token (string)
-        @param srow starting row of the token (int)
-        @param scol starting column of the token (int)
-        @param erow ending row of the token (int)
-        @param ecol ending column of the token (int)
-        @param line logical line the token was found (string)
-        """
-        if toktype in [token.NEWLINE, tokenize.NL]:
-            self.__addToken(toktype, os.linesep, srow, scol, line)
-            return
-            
-        if toktype in [token.INDENT, token.DEDENT]:
-            self.__addToken(toktype, '', srow, scol, line)
-            return
-            
-        if toktype == token.NAME and keyword.iskeyword(toktext):
-            toktype = KEYWORD
-            
-        self.__addToken(toktype, toktext, srow, scol, line)
 
 spacer = ' '
 
@@ -181,8 +166,8 @@
         Public method used to format and print the collected statistics.
         """
         label_len = 79 - len(spacer) - 6 * 6
-        print spacer + "FUNCTION / CLASS".ljust(label_len) + \
-            " START   END LINES  NLOC  COMM EMPTY"
+        print(spacer + "FUNCTION / CLASS".ljust(label_len) + \
+            " START   END LINES  NLOC  COMM EMPTY")
         for id in self.identifiers + ['TOTAL ']:
             label = id
             counters = self.counters.get(id, {})
@@ -194,7 +179,7 @@
                 else:
                     msg += " " * 6
 
-            print msg
+            print(msg)
 
     def getCounter(self, id, key):
         """
@@ -226,9 +211,9 @@
     @param total dictionary receiving the overall code statistics
     @return a statistics object with the collected code statistics (SourceStat)
     """
-    file = open(filename, 'rb')
+    file = open(filename, 'r')
     try:
-        text = Utilities.decode(file.read())[0].encode('utf-8')
+        text = file.read()
     finally:
         file.close()
 
@@ -245,18 +230,18 @@
         # counting
         if tok.type == NEWLINE:
             stats.inc('nloc')
-        if tok.type == COMMENT:
+        elif tok.type == COMMENT:
             stats.inc('comments')
-        if tok.type == EMPTY:
+        elif tok.type == EMPTY:
             if parser.tokenlist[idx-1].type == token.OP:
                 stats.inc('nloc')
             else:
                 stats.inc('empty')
-
-        if tok.type == INDENT: stats.indent(tok)
-        if tok.type == DEDENT: stats.dedent(tok)
-
-        if tok.type == KEYWORD:
+        elif tok.type == INDENT: 
+            stats.indent(tok)
+        elif tok.type == DEDENT: 
+            stats.dedent(tok)
+        elif tok.type == KEYWORD:
             if tok.text in ("class", "def"):
                 stats.push(parser.tokenlist[idx+1].text, tok.row)
 
@@ -287,14 +272,14 @@
     
     summarize(total, 'files', len(files))
     for file in files:
-        print file
+        print(file)
         stats = analyze(file, total)
         stats.dump()
         
-    print "\nSummary"
+    print("\nSummary")
     for key in ['files', 'lines', 'bytes', 'comments',
                 'empty lines', 'non-commentary lines']:
-        print key.ljust(20) + "%6d" % total[key]
+        print(key.ljust(20) + "%6d" % total[key])
     
     sys.exit(0)
 

eric ide

mercurial