DataViews/CodeMetrics.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 13
1af94a91f439
equal deleted inserted replaced
11:b0996e4a289e 12:1d8dd9706f46
13 13
14 @exception ValueError the tokenize module is too old 14 @exception ValueError the tokenize module is too old
15 """ 15 """
16 16
17 import os 17 import os
18 import cStringIO 18 import io
19 import keyword 19 import keyword
20 import token 20 import token
21 import tokenize 21 import tokenize
22 if not hasattr(tokenize, 'NL'):
23 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
24 22
25 import Utilities 23 import Utilities
26 24
27 KEYWORD = token.NT_OFFSET + 1 25 KEYWORD = token.NT_OFFSET + 1
28 COMMENT = tokenize.COMMENT 26 COMMENT = tokenize.COMMENT
61 if not text.endswith(os.linesep): 59 if not text.endswith(os.linesep):
62 text = "%s%s" % (text, os.linesep) 60 text = "%s%s" % (text, os.linesep)
63 61
64 self.lines = text.count(os.linesep) 62 self.lines = text.count(os.linesep)
65 63
66 source = cStringIO.StringIO(text) 64 source = io.BytesIO(text.encode("utf-8"))
67 try: 65 try:
68 tokenize.tokenize(source.readline, self.__tokeneater) 66 gen = tokenize.tokenize(source.readline)
69 except tokenize.TokenError, msg: 67 for toktype, toktext, start, end, line in gen:
70 print "Token Error: %s" % str(msg) 68 (srow, scol) = start
69 (erow, ecol) = end
70 if toktype in [token.NEWLINE, tokenize.NL]:
71 self.__addToken(toktype, os.linesep, srow, scol, line)
72 elif toktype in [token.INDENT, token.DEDENT]:
73 self.__addToken(toktype, '', srow, scol, line)
74 elif toktype == token.NAME and keyword.iskeyword(toktext):
75 toktype = KEYWORD
76 self.__addToken(toktype, toktext, srow, scol, line)
77 else:
78 self.__addToken(toktype, toktext, srow, scol, line)
79 except tokenize.TokenError as msg:
80 print("Token Error: %s" % str(msg))
71 return 81 return
72 82
73 return 83 return
74 84
75 def __addToken(self, toktype, toktext, srow, scol, line): 85 def __addToken(self, toktype, toktext, srow, scol, line):
82 @param scol starting column of the token (int) 92 @param scol starting column of the token (int)
83 @param line logical line the token was found (string) 93 @param line logical line the token was found (string)
84 """ 94 """
85 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, 95 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow,
86 col=scol, line=line)) 96 col=scol, line=line))
87
88 def __tokeneater(self, toktype, toktext, (srow, scol), (erow, ecol), line):
89 """
90 Private method called by tokenize.tokenize.
91
92 @param toktype the type of the token (int)
93 @param toktext the text of the token (string)
94 @param srow starting row of the token (int)
95 @param scol starting column of the token (int)
96 @param erow ending row of the token (int)
97 @param ecol ending column of the token (int)
98 @param line logical line the token was found (string)
99 """
100 if toktype in [token.NEWLINE, tokenize.NL]:
101 self.__addToken(toktype, os.linesep, srow, scol, line)
102 return
103
104 if toktype in [token.INDENT, token.DEDENT]:
105 self.__addToken(toktype, '', srow, scol, line)
106 return
107
108 if toktype == token.NAME and keyword.iskeyword(toktext):
109 toktype = KEYWORD
110
111 self.__addToken(toktype, toktext, srow, scol, line)
112 97
113 spacer = ' ' 98 spacer = ' '
114 99
115 class SourceStat(object): 100 class SourceStat(object):
116 """ 101 """
179 def dump(self): 164 def dump(self):
180 """ 165 """
181 Public method used to format and print the collected statistics. 166 Public method used to format and print the collected statistics.
182 """ 167 """
183 label_len = 79 - len(spacer) - 6 * 6 168 label_len = 79 - len(spacer) - 6 * 6
184 print spacer + "FUNCTION / CLASS".ljust(label_len) + \ 169 print(spacer + "FUNCTION / CLASS".ljust(label_len) + \
185 " START END LINES NLOC COMM EMPTY" 170 " START END LINES NLOC COMM EMPTY")
186 for id in self.identifiers + ['TOTAL ']: 171 for id in self.identifiers + ['TOTAL ']:
187 label = id 172 label = id
188 counters = self.counters.get(id, {}) 173 counters = self.counters.get(id, {})
189 msg = spacer + label.ljust(label_len) 174 msg = spacer + label.ljust(label_len)
190 175
192 if counters.get(key, 0): 177 if counters.get(key, 0):
193 msg += " %5d" % (counters[key],) 178 msg += " %5d" % (counters[key],)
194 else: 179 else:
195 msg += " " * 6 180 msg += " " * 6
196 181
197 print msg 182 print(msg)
198 183
199 def getCounter(self, id, key): 184 def getCounter(self, id, key):
200 """ 185 """
201 Public method used to get a specific counter value. 186 Public method used to get a specific counter value.
202 187
224 209
225 @param filename name of the Python file to be analyzed (string) 210 @param filename name of the Python file to be analyzed (string)
226 @param total dictionary receiving the overall code statistics 211 @param total dictionary receiving the overall code statistics
227 @return a statistics object with the collected code statistics (SourceStat) 212 @return a statistics object with the collected code statistics (SourceStat)
228 """ 213 """
229 file = open(filename, 'rb') 214 file = open(filename, 'r')
230 try: 215 try:
231 text = Utilities.decode(file.read())[0].encode('utf-8') 216 text = file.read()
232 finally: 217 finally:
233 file.close() 218 file.close()
234 219
235 parser = Parser() 220 parser = Parser()
236 parser.parse(text) 221 parser.parse(text)
243 tok = parser.tokenlist[idx] 228 tok = parser.tokenlist[idx]
244 229
245 # counting 230 # counting
246 if tok.type == NEWLINE: 231 if tok.type == NEWLINE:
247 stats.inc('nloc') 232 stats.inc('nloc')
248 if tok.type == COMMENT: 233 elif tok.type == COMMENT:
249 stats.inc('comments') 234 stats.inc('comments')
250 if tok.type == EMPTY: 235 elif tok.type == EMPTY:
251 if parser.tokenlist[idx-1].type == token.OP: 236 if parser.tokenlist[idx-1].type == token.OP:
252 stats.inc('nloc') 237 stats.inc('nloc')
253 else: 238 else:
254 stats.inc('empty') 239 stats.inc('empty')
255 240 elif tok.type == INDENT:
256 if tok.type == INDENT: stats.indent(tok) 241 stats.indent(tok)
257 if tok.type == DEDENT: stats.dedent(tok) 242 elif tok.type == DEDENT:
258 243 stats.dedent(tok)
259 if tok.type == KEYWORD: 244 elif tok.type == KEYWORD:
260 if tok.text in ("class", "def"): 245 if tok.text in ("class", "def"):
261 stats.push(parser.tokenlist[idx+1].text, tok.row) 246 stats.push(parser.tokenlist[idx+1].text, tok.row)
262 247
263 # collect overall statistics 248 # collect overall statistics
264 summarize(total, 'lines', parser.lines) 249 summarize(total, 'lines', parser.lines)
285 270
286 total = {} 271 total = {}
287 272
288 summarize(total, 'files', len(files)) 273 summarize(total, 'files', len(files))
289 for file in files: 274 for file in files:
290 print file 275 print(file)
291 stats = analyze(file, total) 276 stats = analyze(file, total)
292 stats.dump() 277 stats.dump()
293 278
294 print "\nSummary" 279 print("\nSummary")
295 for key in ['files', 'lines', 'bytes', 'comments', 280 for key in ['files', 'lines', 'bytes', 'comments',
296 'empty lines', 'non-commentary lines']: 281 'empty lines', 'non-commentary lines']:
297 print key.ljust(20) + "%6d" % total[key] 282 print(key.ljust(20) + "%6d" % total[key])
298 283
299 sys.exit(0) 284 sys.exit(0)
300 285
301 if __name__ == "__main__": 286 if __name__ == "__main__":
302 main() 287 main()

eric ide

mercurial