|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2003 - 2009 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 # |
|
7 # Code mainly borrowed from the Pythius package which is |
|
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de> |
|
9 # |
|
10 |
|
11 """ |
|
12 Module implementing a simple Python code metrics analyzer. |
|
13 |
|
14 @exception ValueError the tokenize module is too old |
|
15 """ |
|
16 |
|
17 import os |
|
18 import cStringIO |
|
19 import keyword |
|
20 import token |
|
21 import tokenize |
|
22 if not hasattr(tokenize, 'NL'): |
|
23 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") |
|
24 |
|
25 import Utilities |
|
26 |
|
27 KEYWORD = token.NT_OFFSET + 1 |
|
28 COMMENT = tokenize.COMMENT |
|
29 INDENT = token.INDENT |
|
30 DEDENT = token.DEDENT |
|
31 NEWLINE = token.NEWLINE |
|
32 EMPTY = tokenize.NL |
|
33 |
|
34 class Token(object): |
|
35 """ |
|
36 Class to store the token related infos. |
|
37 """ |
|
38 def __init__(self, **kw): |
|
39 """ |
|
40 Constructor |
|
41 |
|
42 @param **kw list of key, value pairs |
|
43 """ |
|
44 self.__dict__.update(kw) |
|
45 |
|
46 class Parser(object): |
|
47 """ |
|
48 Class used to parse the source code of a Python file. |
|
49 """ |
|
50 def parse(self, text): |
|
51 """ |
|
52 Public method used to parse the source code. |
|
53 |
|
54 @param text the source code as read from a Python source file |
|
55 """ |
|
56 self.tokenlist = [] |
|
57 |
|
58 # convert eols |
|
59 text = Utilities.convertLineEnds(text, os.linesep) |
|
60 |
|
61 if not text.endswith(os.linesep): |
|
62 text = "%s%s" % (text, os.linesep) |
|
63 |
|
64 self.lines = text.count(os.linesep) |
|
65 |
|
66 source = cStringIO.StringIO(text) |
|
67 try: |
|
68 tokenize.tokenize(source.readline, self.__tokeneater) |
|
69 except tokenize.TokenError, msg: |
|
70 print "Token Error: %s" % str(msg) |
|
71 return |
|
72 |
|
73 return |
|
74 |
|
75 def __addToken(self, toktype, toktext, srow, scol, line): |
|
76 """ |
|
77 Private method used to add a token to our list of tokens. |
|
78 |
|
79 @param toktype the type of the token (int) |
|
80 @param toktext the text of the token (string) |
|
81 @param srow starting row of the token (int) |
|
82 @param scol starting column of the token (int) |
|
83 @param line logical line the token was found (string) |
|
84 """ |
|
85 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, |
|
86 col=scol, line=line)) |
|
87 |
|
88 def __tokeneater(self, toktype, toktext, (srow, scol), (erow, ecol), line): |
|
89 """ |
|
90 Private method called by tokenize.tokenize. |
|
91 |
|
92 @param toktype the type of the token (int) |
|
93 @param toktext the text of the token (string) |
|
94 @param srow starting row of the token (int) |
|
95 @param scol starting column of the token (int) |
|
96 @param erow ending row of the token (int) |
|
97 @param ecol ending column of the token (int) |
|
98 @param line logical line the token was found (string) |
|
99 """ |
|
100 if toktype in [token.NEWLINE, tokenize.NL]: |
|
101 self.__addToken(toktype, os.linesep, srow, scol, line) |
|
102 return |
|
103 |
|
104 if toktype in [token.INDENT, token.DEDENT]: |
|
105 self.__addToken(toktype, '', srow, scol, line) |
|
106 return |
|
107 |
|
108 if toktype == token.NAME and keyword.iskeyword(toktext): |
|
109 toktype = KEYWORD |
|
110 |
|
111 self.__addToken(toktype, toktext, srow, scol, line) |
|
112 |
|
113 spacer = ' ' |
|
114 |
|
115 class SourceStat(object): |
|
116 """ |
|
117 Class used to calculate and store the source code statistics. |
|
118 """ |
|
119 def __init__(self): |
|
120 """ |
|
121 Constructor |
|
122 """ |
|
123 self.identifiers = [] # list of identifiers in order of appearance |
|
124 self.active = [('TOTAL ',-1,0)] # stack of active identifiers and indent levels |
|
125 self.counters = {} # counters per identifier |
|
126 self.indent_level = 0 |
|
127 |
|
128 def indent(self, tok): |
|
129 """ |
|
130 Public method used to increment the indentation level. |
|
131 |
|
132 @param tok a token (Token, ignored) |
|
133 """ |
|
134 self.indent_level += 1 |
|
135 |
|
136 def dedent(self, tok): |
|
137 """ |
|
138 Public method used to decrement the indentation level. |
|
139 |
|
140 @param tok the token to be processed (Token) |
|
141 """ |
|
142 self.indent_level -= 1 |
|
143 if self.indent_level < 0: |
|
144 raise ValueError("INTERNAL ERROR: Negative indent level") |
|
145 |
|
146 # remove identifiers of a higher indentation |
|
147 while self.active and self.active[-1][1] >= self.indent_level: |
|
148 counters = self.counters.setdefault(self.active[-1][0], {}) |
|
149 counters['start'] = self.active[-1][2] |
|
150 counters['end'] = tok.row - 1 |
|
151 counters['lines'] = tok.row - self.active[-1][2] |
|
152 del self.active[-1] |
|
153 |
|
154 def push(self, identifier, row): |
|
155 """ |
|
156 Public method used to store an identifier. |
|
157 |
|
158 @param identifier the identifier to be remembered (string) |
|
159 @param row the row, the identifier is defined in (int) |
|
160 """ |
|
161 if len(self.active) > 1 and self.indent_level > self.active[-1][1]: |
|
162 qualified = self.active[-1][0] + '.' + identifier |
|
163 else: |
|
164 qualified = identifier |
|
165 self.active.append((qualified, self.indent_level, row)) |
|
166 self.identifiers.append(qualified) |
|
167 |
|
168 def inc(self, key, value=1): |
|
169 """ |
|
170 Public method used to increment the value of a key. |
|
171 |
|
172 @param key the key to be incremented |
|
173 @param value the increment (int) |
|
174 """ |
|
175 for id, level, row in self.active: |
|
176 counters = self.counters.setdefault(id, {}) |
|
177 counters[key] = counters.setdefault(key, 0) + value |
|
178 |
|
179 def dump(self): |
|
180 """ |
|
181 Public method used to format and print the collected statistics. |
|
182 """ |
|
183 label_len = 79 - len(spacer) - 6 * 6 |
|
184 print spacer + "FUNCTION / CLASS".ljust(label_len) + \ |
|
185 " START END LINES NLOC COMM EMPTY" |
|
186 for id in self.identifiers + ['TOTAL ']: |
|
187 label = id |
|
188 counters = self.counters.get(id, {}) |
|
189 msg = spacer + label.ljust(label_len) |
|
190 |
|
191 for key in ('start', 'end', 'lines', 'nloc', 'comments', 'empty'): |
|
192 if counters.get(key, 0): |
|
193 msg += " %5d" % (counters[key],) |
|
194 else: |
|
195 msg += " " * 6 |
|
196 |
|
197 print msg |
|
198 |
|
199 def getCounter(self, id, key): |
|
200 """ |
|
201 Public method used to get a specific counter value. |
|
202 |
|
203 @param id id of the counter (string) |
|
204 @param key key of the value to be retrieved (string) |
|
205 @return the value of the requested counter (int) |
|
206 """ |
|
207 return self.counters.get(id, {}).get(key, 0) |
|
208 |
|
209 def summarize(total, key, value): |
|
210 """ |
|
211 Module function used to collect overall statistics. |
|
212 |
|
213 @param total the dictionary for the overall statistics |
|
214 @param key the key to be summarize |
|
215 @param value the value to be added to the overall statistics |
|
216 @return the value added to the overall statistics |
|
217 """ |
|
218 total[key] = total.setdefault(key, 0) + value |
|
219 return value |
|
220 |
|
221 def analyze(filename, total): |
|
222 """ |
|
223 Module function used analyze the source of a Python file. |
|
224 |
|
225 @param filename name of the Python file to be analyzed (string) |
|
226 @param total dictionary receiving the overall code statistics |
|
227 @return a statistics object with the collected code statistics (SourceStat) |
|
228 """ |
|
229 file = open(filename, 'rb') |
|
230 try: |
|
231 text = Utilities.decode(file.read())[0].encode('utf-8') |
|
232 finally: |
|
233 file.close() |
|
234 |
|
235 parser = Parser() |
|
236 parser.parse(text) |
|
237 |
|
238 stats = SourceStat() |
|
239 stats.inc('lines', parser.lines) |
|
240 comments = 0 |
|
241 empty = 0 |
|
242 for idx in range(len(parser.tokenlist)): |
|
243 tok = parser.tokenlist[idx] |
|
244 |
|
245 # counting |
|
246 if tok.type == NEWLINE: |
|
247 stats.inc('nloc') |
|
248 if tok.type == COMMENT: |
|
249 stats.inc('comments') |
|
250 if tok.type == EMPTY: |
|
251 if parser.tokenlist[idx-1].type == token.OP: |
|
252 stats.inc('nloc') |
|
253 else: |
|
254 stats.inc('empty') |
|
255 |
|
256 if tok.type == INDENT: stats.indent(tok) |
|
257 if tok.type == DEDENT: stats.dedent(tok) |
|
258 |
|
259 if tok.type == KEYWORD: |
|
260 if tok.text in ("class", "def"): |
|
261 stats.push(parser.tokenlist[idx+1].text, tok.row) |
|
262 |
|
263 # collect overall statistics |
|
264 summarize(total, 'lines', parser.lines) |
|
265 summarize(total, 'bytes', len(text)) |
|
266 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments')) |
|
267 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty')) |
|
268 summarize(total, 'non-commentary lines', stats.getCounter('TOTAL ', 'nloc')) |
|
269 |
|
270 return stats |
|
271 |
|
272 def main(): |
|
273 """ |
|
274 Modules main function used when called as a script. |
|
275 |
|
276 This function simply loops over all files given on the commandline |
|
277 and collects the individual and overall source code statistics. |
|
278 """ |
|
279 import sys |
|
280 |
|
281 files = sys.argv[1:] |
|
282 |
|
283 if not files: |
|
284 sys.exit(1) |
|
285 |
|
286 total = {} |
|
287 |
|
288 summarize(total, 'files', len(files)) |
|
289 for file in files: |
|
290 print file |
|
291 stats = analyze(file, total) |
|
292 stats.dump() |
|
293 |
|
294 print "\nSummary" |
|
295 for key in ['files', 'lines', 'bytes', 'comments', |
|
296 'empty lines', 'non-commentary lines']: |
|
297 print key.ljust(20) + "%6d" % total[key] |
|
298 |
|
299 sys.exit(0) |
|
300 |
|
301 if __name__ == "__main__": |
|
302 main() |