|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2003 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 # |
|
7 # Code mainly borrowed from the Pythius package which is |
|
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de> |
|
9 # |
|
10 |
|
11 """ |
|
12 Module implementing a simple Python code metrics analyzer. |
|
13 |
|
14 @exception ValueError the tokenize module is too old |
|
15 """ |
|
16 |
|
17 from __future__ import unicode_literals |
|
18 |
|
19 import os |
|
20 import io |
|
21 import sys |
|
22 import keyword |
|
23 import token |
|
24 import tokenize |
|
25 |
|
26 import Utilities |
|
27 |
|
28 KEYWORD = token.NT_OFFSET + 1 |
|
29 COMMENT = tokenize.COMMENT |
|
30 INDENT = token.INDENT |
|
31 DEDENT = token.DEDENT |
|
32 NEWLINE = token.NEWLINE |
|
33 EMPTY = tokenize.NL |
|
34 |
|
35 |
|
36 class Token(object): |
|
37 """ |
|
38 Class to store the token related infos. |
|
39 """ |
|
40 def __init__(self, **kw): |
|
41 """ |
|
42 Constructor |
|
43 |
|
44 @keyparam **kw list of key, value pairs |
|
45 """ |
|
46 self.__dict__.update(kw) |
|
47 |
|
48 |
|
49 class Parser(object): |
|
50 """ |
|
51 Class used to parse the source code of a Python file. |
|
52 """ |
|
53 def parse(self, text): |
|
54 """ |
|
55 Public method used to parse the source code. |
|
56 |
|
57 @param text the source code as read from a Python source file |
|
58 """ |
|
59 self.tokenlist = [] |
|
60 |
|
61 # convert eols |
|
62 text = Utilities.convertLineEnds(text, os.linesep) |
|
63 |
|
64 if not text.endswith(os.linesep): |
|
65 text = "{0}{1}".format(text, os.linesep) |
|
66 |
|
67 self.lines = text.count(os.linesep) |
|
68 |
|
69 source = io.BytesIO(text.encode("utf-8")) |
|
70 try: |
|
71 if sys.version_info[0] == 2: |
|
72 gen = tokenize.generate_tokens(source.readline) |
|
73 else: |
|
74 gen = tokenize.tokenize(source.readline) |
|
75 for toktype, toktext, start, end, line in gen: |
|
76 (srow, scol) = start |
|
77 (erow, ecol) = end |
|
78 if toktype in [token.NEWLINE, tokenize.NL]: |
|
79 self.__addToken(toktype, os.linesep, srow, scol, line) |
|
80 elif toktype in [token.INDENT, token.DEDENT]: |
|
81 self.__addToken(toktype, '', srow, scol, line) |
|
82 elif toktype == token.NAME and keyword.iskeyword(toktext): |
|
83 toktype = KEYWORD |
|
84 self.__addToken(toktype, toktext, srow, scol, line) |
|
85 else: |
|
86 self.__addToken(toktype, toktext, srow, scol, line) |
|
87 except tokenize.TokenError as msg: |
|
88 print("Token Error: {0}".format(str(msg))) |
|
89 # __IGNORE_WARNING_M801__ |
|
90 return |
|
91 |
|
92 return |
|
93 |
|
94 def __addToken(self, toktype, toktext, srow, scol, line): |
|
95 """ |
|
96 Private method used to add a token to our list of tokens. |
|
97 |
|
98 @param toktype the type of the token (int) |
|
99 @param toktext the text of the token (string) |
|
100 @param srow starting row of the token (int) |
|
101 @param scol starting column of the token (int) |
|
102 @param line logical line the token was found (string) |
|
103 """ |
|
104 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, |
|
105 col=scol, line=line)) |
|
106 |
|
107 spacer = ' ' |
|
108 |
|
109 |
|
110 class SourceStat(object): |
|
111 """ |
|
112 Class used to calculate and store the source code statistics. |
|
113 """ |
|
114 def __init__(self): |
|
115 """ |
|
116 Constructor |
|
117 """ |
|
118 self.identifiers = [] |
|
119 # list of identifiers in order of appearance |
|
120 self.active = [('TOTAL ', -1, 0)] |
|
121 # stack of active identifiers and indent levels |
|
122 self.counters = {} |
|
123 # counters per identifier |
|
124 self.indent_level = 0 |
|
125 |
|
126 def indent(self, tok): |
|
127 """ |
|
128 Public method used to increment the indentation level. |
|
129 |
|
130 @param tok a token (Token, ignored) |
|
131 """ |
|
132 self.indent_level += 1 |
|
133 |
|
134 def dedent(self, tok): |
|
135 """ |
|
136 Public method used to decrement the indentation level. |
|
137 |
|
138 @param tok the token to be processed (Token) |
|
139 @exception ValueError raised to indicate an invalid indentation level |
|
140 """ |
|
141 self.indent_level -= 1 |
|
142 if self.indent_level < 0: |
|
143 raise ValueError("INTERNAL ERROR: Negative indent level") |
|
144 |
|
145 # remove identifiers of a higher indentation |
|
146 while self.active and self.active[-1][1] >= self.indent_level: |
|
147 counters = self.counters.setdefault(self.active[-1][0], {}) |
|
148 counters['start'] = self.active[-1][2] |
|
149 counters['end'] = tok.row - 1 |
|
150 counters['lines'] = tok.row - self.active[-1][2] |
|
151 del self.active[-1] |
|
152 |
|
153 def push(self, identifier, row): |
|
154 """ |
|
155 Public method used to store an identifier. |
|
156 |
|
157 @param identifier the identifier to be remembered (string) |
|
158 @param row the row, the identifier is defined in (int) |
|
159 """ |
|
160 if len(self.active) > 1 and self.indent_level > self.active[-1][1]: |
|
161 qualified = self.active[-1][0] + '.' + identifier |
|
162 else: |
|
163 qualified = identifier |
|
164 self.active.append((qualified, self.indent_level, row)) |
|
165 self.identifiers.append(qualified) |
|
166 |
|
167 def inc(self, key, value=1): |
|
168 """ |
|
169 Public method used to increment the value of a key. |
|
170 |
|
171 @param key the key to be incremented |
|
172 @param value the increment (int) |
|
173 """ |
|
174 for counterId, _level, _row in self.active: |
|
175 counters = self.counters.setdefault(counterId, {}) |
|
176 counters[key] = counters.setdefault(key, 0) + value |
|
177 |
|
178 def getCounter(self, counterId, key): |
|
179 """ |
|
180 Public method used to get a specific counter value. |
|
181 |
|
182 @param counterId id of the counter (string) |
|
183 @param key key of the value to be retrieved (string) |
|
184 @return the value of the requested counter (int) |
|
185 """ |
|
186 return self.counters.get(counterId, {}).get(key, 0) |
|
187 |
|
188 |
|
189 def summarize(total, key, value): |
|
190 """ |
|
191 Module function used to collect overall statistics. |
|
192 |
|
193 @param total the dictionary for the overall statistics |
|
194 @param key the key to be summarize |
|
195 @param value the value to be added to the overall statistics |
|
196 @return the value added to the overall statistics |
|
197 """ |
|
198 total[key] = total.setdefault(key, 0) + value |
|
199 return value |
|
200 |
|
201 |
|
202 def analyze(filename, total): |
|
203 """ |
|
204 Module function used analyze the source of a Python file. |
|
205 |
|
206 @param filename name of the Python file to be analyzed (string) |
|
207 @param total dictionary receiving the overall code statistics |
|
208 @return a statistics object with the collected code statistics (SourceStat) |
|
209 """ |
|
210 try: |
|
211 text = Utilities.readEncodedFile(filename)[0] |
|
212 except (UnicodeError, IOError): |
|
213 return SourceStat() |
|
214 |
|
215 parser = Parser() |
|
216 parser.parse(text) |
|
217 |
|
218 stats = SourceStat() |
|
219 stats.inc('lines', parser.lines) |
|
220 for idx in range(len(parser.tokenlist)): |
|
221 tok = parser.tokenlist[idx] |
|
222 |
|
223 # counting |
|
224 if tok.type == NEWLINE: |
|
225 stats.inc('nloc') |
|
226 elif tok.type == COMMENT: |
|
227 stats.inc('comments') |
|
228 if tok.line.strip() == tok.text: |
|
229 stats.inc('commentlines') |
|
230 elif tok.type == EMPTY: |
|
231 if parser.tokenlist[idx - 1].type == token.OP: |
|
232 stats.inc('nloc') |
|
233 elif parser.tokenlist[idx - 1].type == COMMENT: |
|
234 continue |
|
235 else: |
|
236 stats.inc('empty') |
|
237 elif tok.type == INDENT: |
|
238 stats.indent(tok) |
|
239 elif tok.type == DEDENT: |
|
240 stats.dedent(tok) |
|
241 elif tok.type == KEYWORD: |
|
242 if tok.text in ("class", "def"): |
|
243 stats.push(parser.tokenlist[idx + 1].text, tok.row) |
|
244 |
|
245 # collect overall statistics |
|
246 summarize(total, 'lines', parser.lines) |
|
247 summarize(total, 'bytes', len(text)) |
|
248 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments')) |
|
249 summarize(total, 'commentlines', |
|
250 stats.getCounter('TOTAL ', 'commentlines')) |
|
251 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty')) |
|
252 summarize(total, 'non-commentary lines', |
|
253 stats.getCounter('TOTAL ', 'nloc')) |
|
254 |
|
255 return stats |