DataViews/CodeMetrics.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2003 - 2009 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 #
7 # Code mainly borrowed from the Pythius package which is
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de>
9 #
10
11 """
12 Module implementing a simple Python code metrics analyzer.
13
14 @exception ValueError the tokenize module is too old
15 """
16
17 import os
18 import cStringIO
19 import keyword
20 import token
21 import tokenize
22 if not hasattr(tokenize, 'NL'):
23 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
24
25 import Utilities
26
27 KEYWORD = token.NT_OFFSET + 1
28 COMMENT = tokenize.COMMENT
29 INDENT = token.INDENT
30 DEDENT = token.DEDENT
31 NEWLINE = token.NEWLINE
32 EMPTY = tokenize.NL
33
34 class Token(object):
35 """
36 Class to store the token related infos.
37 """
38 def __init__(self, **kw):
39 """
40 Constructor
41
42 @param **kw list of key, value pairs
43 """
44 self.__dict__.update(kw)
45
46 class Parser(object):
47 """
48 Class used to parse the source code of a Python file.
49 """
50 def parse(self, text):
51 """
52 Public method used to parse the source code.
53
54 @param text the source code as read from a Python source file
55 """
56 self.tokenlist = []
57
58 # convert eols
59 text = Utilities.convertLineEnds(text, os.linesep)
60
61 if not text.endswith(os.linesep):
62 text = "%s%s" % (text, os.linesep)
63
64 self.lines = text.count(os.linesep)
65
66 source = cStringIO.StringIO(text)
67 try:
68 tokenize.tokenize(source.readline, self.__tokeneater)
69 except tokenize.TokenError, msg:
70 print "Token Error: %s" % str(msg)
71 return
72
73 return
74
75 def __addToken(self, toktype, toktext, srow, scol, line):
76 """
77 Private method used to add a token to our list of tokens.
78
79 @param toktype the type of the token (int)
80 @param toktext the text of the token (string)
81 @param srow starting row of the token (int)
82 @param scol starting column of the token (int)
83 @param line logical line the token was found (string)
84 """
85 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow,
86 col=scol, line=line))
87
88 def __tokeneater(self, toktype, toktext, (srow, scol), (erow, ecol), line):
89 """
90 Private method called by tokenize.tokenize.
91
92 @param toktype the type of the token (int)
93 @param toktext the text of the token (string)
94 @param srow starting row of the token (int)
95 @param scol starting column of the token (int)
96 @param erow ending row of the token (int)
97 @param ecol ending column of the token (int)
98 @param line logical line the token was found (string)
99 """
100 if toktype in [token.NEWLINE, tokenize.NL]:
101 self.__addToken(toktype, os.linesep, srow, scol, line)
102 return
103
104 if toktype in [token.INDENT, token.DEDENT]:
105 self.__addToken(toktype, '', srow, scol, line)
106 return
107
108 if toktype == token.NAME and keyword.iskeyword(toktext):
109 toktype = KEYWORD
110
111 self.__addToken(toktype, toktext, srow, scol, line)
112
113 spacer = ' '
114
115 class SourceStat(object):
116 """
117 Class used to calculate and store the source code statistics.
118 """
119 def __init__(self):
120 """
121 Constructor
122 """
123 self.identifiers = [] # list of identifiers in order of appearance
124 self.active = [('TOTAL ',-1,0)] # stack of active identifiers and indent levels
125 self.counters = {} # counters per identifier
126 self.indent_level = 0
127
128 def indent(self, tok):
129 """
130 Public method used to increment the indentation level.
131
132 @param tok a token (Token, ignored)
133 """
134 self.indent_level += 1
135
136 def dedent(self, tok):
137 """
138 Public method used to decrement the indentation level.
139
140 @param tok the token to be processed (Token)
141 """
142 self.indent_level -= 1
143 if self.indent_level < 0:
144 raise ValueError("INTERNAL ERROR: Negative indent level")
145
146 # remove identifiers of a higher indentation
147 while self.active and self.active[-1][1] >= self.indent_level:
148 counters = self.counters.setdefault(self.active[-1][0], {})
149 counters['start'] = self.active[-1][2]
150 counters['end'] = tok.row - 1
151 counters['lines'] = tok.row - self.active[-1][2]
152 del self.active[-1]
153
154 def push(self, identifier, row):
155 """
156 Public method used to store an identifier.
157
158 @param identifier the identifier to be remembered (string)
159 @param row the row, the identifier is defined in (int)
160 """
161 if len(self.active) > 1 and self.indent_level > self.active[-1][1]:
162 qualified = self.active[-1][0] + '.' + identifier
163 else:
164 qualified = identifier
165 self.active.append((qualified, self.indent_level, row))
166 self.identifiers.append(qualified)
167
168 def inc(self, key, value=1):
169 """
170 Public method used to increment the value of a key.
171
172 @param key the key to be incremented
173 @param value the increment (int)
174 """
175 for id, level, row in self.active:
176 counters = self.counters.setdefault(id, {})
177 counters[key] = counters.setdefault(key, 0) + value
178
179 def dump(self):
180 """
181 Public method used to format and print the collected statistics.
182 """
183 label_len = 79 - len(spacer) - 6 * 6
184 print spacer + "FUNCTION / CLASS".ljust(label_len) + \
185 " START END LINES NLOC COMM EMPTY"
186 for id in self.identifiers + ['TOTAL ']:
187 label = id
188 counters = self.counters.get(id, {})
189 msg = spacer + label.ljust(label_len)
190
191 for key in ('start', 'end', 'lines', 'nloc', 'comments', 'empty'):
192 if counters.get(key, 0):
193 msg += " %5d" % (counters[key],)
194 else:
195 msg += " " * 6
196
197 print msg
198
199 def getCounter(self, id, key):
200 """
201 Public method used to get a specific counter value.
202
203 @param id id of the counter (string)
204 @param key key of the value to be retrieved (string)
205 @return the value of the requested counter (int)
206 """
207 return self.counters.get(id, {}).get(key, 0)
208
209 def summarize(total, key, value):
210 """
211 Module function used to collect overall statistics.
212
213 @param total the dictionary for the overall statistics
214 @param key the key to be summarize
215 @param value the value to be added to the overall statistics
216 @return the value added to the overall statistics
217 """
218 total[key] = total.setdefault(key, 0) + value
219 return value
220
221 def analyze(filename, total):
222 """
223 Module function used analyze the source of a Python file.
224
225 @param filename name of the Python file to be analyzed (string)
226 @param total dictionary receiving the overall code statistics
227 @return a statistics object with the collected code statistics (SourceStat)
228 """
229 file = open(filename, 'rb')
230 try:
231 text = Utilities.decode(file.read())[0].encode('utf-8')
232 finally:
233 file.close()
234
235 parser = Parser()
236 parser.parse(text)
237
238 stats = SourceStat()
239 stats.inc('lines', parser.lines)
240 comments = 0
241 empty = 0
242 for idx in range(len(parser.tokenlist)):
243 tok = parser.tokenlist[idx]
244
245 # counting
246 if tok.type == NEWLINE:
247 stats.inc('nloc')
248 if tok.type == COMMENT:
249 stats.inc('comments')
250 if tok.type == EMPTY:
251 if parser.tokenlist[idx-1].type == token.OP:
252 stats.inc('nloc')
253 else:
254 stats.inc('empty')
255
256 if tok.type == INDENT: stats.indent(tok)
257 if tok.type == DEDENT: stats.dedent(tok)
258
259 if tok.type == KEYWORD:
260 if tok.text in ("class", "def"):
261 stats.push(parser.tokenlist[idx+1].text, tok.row)
262
263 # collect overall statistics
264 summarize(total, 'lines', parser.lines)
265 summarize(total, 'bytes', len(text))
266 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments'))
267 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty'))
268 summarize(total, 'non-commentary lines', stats.getCounter('TOTAL ', 'nloc'))
269
270 return stats
271
272 def main():
273 """
274 Modules main function used when called as a script.
275
276 This function simply loops over all files given on the commandline
277 and collects the individual and overall source code statistics.
278 """
279 import sys
280
281 files = sys.argv[1:]
282
283 if not files:
284 sys.exit(1)
285
286 total = {}
287
288 summarize(total, 'files', len(files))
289 for file in files:
290 print file
291 stats = analyze(file, total)
292 stats.dump()
293
294 print "\nSummary"
295 for key in ['files', 'lines', 'bytes', 'comments',
296 'empty lines', 'non-commentary lines']:
297 print key.ljust(20) + "%6d" % total[key]
298
299 sys.exit(0)
300
301 if __name__ == "__main__":
302 main()

eric ide

mercurial