|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2003 - 2022 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 # |
|
7 # Code mainly borrowed from the Pythius package which is |
|
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de> |
|
9 # |
|
10 |
|
11 """ |
|
12 Module implementing a simple Python code metrics analyzer. |
|
13 |
|
14 @exception ValueError the tokenize module is too old |
|
15 """ |
|
16 |
|
17 |
|
18 import os |
|
19 import io |
|
20 import keyword |
|
21 import token |
|
22 import tokenize |
|
23 |
|
24 import Utilities |
|
25 |
|
26 KEYWORD = token.NT_OFFSET + 1 |
|
27 COMMENT = tokenize.COMMENT |
|
28 INDENT = token.INDENT |
|
29 DEDENT = token.DEDENT |
|
30 NEWLINE = token.NEWLINE |
|
31 EMPTY = tokenize.NL |
|
32 |
|
33 |
|
34 class Token: |
|
35 """ |
|
36 Class to store the token related infos. |
|
37 """ |
|
38 def __init__(self, **kw): |
|
39 """ |
|
40 Constructor |
|
41 |
|
42 @keyparam **kw list of key, value pairs |
|
43 """ |
|
44 self.__dict__.update(kw) |
|
45 |
|
46 |
|
47 class Parser: |
|
48 """ |
|
49 Class used to parse the source code of a Python file. |
|
50 """ |
|
51 def parse(self, text): |
|
52 """ |
|
53 Public method used to parse the source code. |
|
54 |
|
55 @param text the source code as read from a Python source file |
|
56 """ |
|
57 self.tokenlist = [] |
|
58 |
|
59 # convert eols |
|
60 text = Utilities.convertLineEnds(text, os.linesep) |
|
61 |
|
62 if not text.endswith(os.linesep): |
|
63 text = "{0}{1}".format(text, os.linesep) |
|
64 |
|
65 self.lines = text.count(os.linesep) |
|
66 |
|
67 source = io.BytesIO(text.encode("utf-8")) |
|
68 try: |
|
69 gen = tokenize.tokenize(source.readline) |
|
70 for toktype, toktext, start, end, line in gen: |
|
71 (srow, scol) = start |
|
72 (erow, ecol) = end |
|
73 if toktype in [token.NEWLINE, tokenize.NL]: |
|
74 self.__addToken(toktype, os.linesep, srow, scol, line) |
|
75 elif toktype in [token.INDENT, token.DEDENT]: |
|
76 self.__addToken(toktype, '', srow, scol, line) |
|
77 elif toktype == token.NAME and keyword.iskeyword(toktext): |
|
78 toktype = KEYWORD |
|
79 self.__addToken(toktype, toktext, srow, scol, line) |
|
80 else: |
|
81 self.__addToken(toktype, toktext, srow, scol, line) |
|
82 except tokenize.TokenError as msg: |
|
83 print("Token Error: {0}".format(str(msg))) |
|
84 # __IGNORE_WARNING_M801__ |
|
85 return |
|
86 |
|
87 return |
|
88 |
|
89 def __addToken(self, toktype, toktext, srow, scol, line): |
|
90 """ |
|
91 Private method used to add a token to our list of tokens. |
|
92 |
|
93 @param toktype the type of the token (int) |
|
94 @param toktext the text of the token (string) |
|
95 @param srow starting row of the token (int) |
|
96 @param scol starting column of the token (int) |
|
97 @param line logical line the token was found (string) |
|
98 """ |
|
99 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, |
|
100 col=scol, line=line)) |
|
101 |
|
102 spacer = ' ' |
|
103 |
|
104 |
|
105 class SourceStat: |
|
106 """ |
|
107 Class used to calculate and store the source code statistics. |
|
108 """ |
|
109 def __init__(self): |
|
110 """ |
|
111 Constructor |
|
112 """ |
|
113 self.identifiers = [] |
|
114 # list of identifiers in order of appearance |
|
115 self.active = [('TOTAL ', -1, 0)] |
|
116 # stack of active identifiers and indent levels |
|
117 self.counters = {} |
|
118 # counters per identifier |
|
119 self.indent_level = 0 |
|
120 |
|
121 def indent(self, tok): |
|
122 """ |
|
123 Public method used to increment the indentation level. |
|
124 |
|
125 @param tok a token (Token, ignored) |
|
126 """ |
|
127 self.indent_level += 1 |
|
128 |
|
129 def dedent(self, tok): |
|
130 """ |
|
131 Public method used to decrement the indentation level. |
|
132 |
|
133 @param tok the token to be processed (Token) |
|
134 @exception ValueError raised to indicate an invalid indentation level |
|
135 """ |
|
136 self.indent_level -= 1 |
|
137 if self.indent_level < 0: |
|
138 raise ValueError("INTERNAL ERROR: Negative indent level") |
|
139 |
|
140 # remove identifiers of a higher indentation |
|
141 while self.active and self.active[-1][1] >= self.indent_level: |
|
142 counters = self.counters.setdefault(self.active[-1][0], {}) |
|
143 counters['start'] = self.active[-1][2] |
|
144 counters['end'] = tok.row - 1 |
|
145 counters['lines'] = tok.row - self.active[-1][2] |
|
146 del self.active[-1] |
|
147 |
|
148 def push(self, identifier, row): |
|
149 """ |
|
150 Public method used to store an identifier. |
|
151 |
|
152 @param identifier the identifier to be remembered (string) |
|
153 @param row the row, the identifier is defined in (int) |
|
154 """ |
|
155 if len(self.active) > 1 and self.indent_level > self.active[-1][1]: |
|
156 # __IGNORE_WARNING_Y108__ |
|
157 qualified = self.active[-1][0] + '.' + identifier |
|
158 else: |
|
159 qualified = identifier |
|
160 self.active.append((qualified, self.indent_level, row)) |
|
161 self.identifiers.append(qualified) |
|
162 |
|
163 def inc(self, key, value=1): |
|
164 """ |
|
165 Public method used to increment the value of a key. |
|
166 |
|
167 @param key the key to be incremented |
|
168 @param value the increment (int) |
|
169 """ |
|
170 for counterId, _level, _row in self.active: |
|
171 counters = self.counters.setdefault(counterId, {}) |
|
172 counters[key] = counters.setdefault(key, 0) + value |
|
173 |
|
174 def getCounter(self, counterId, key): |
|
175 """ |
|
176 Public method used to get a specific counter value. |
|
177 |
|
178 @param counterId id of the counter (string) |
|
179 @param key key of the value to be retrieved (string) |
|
180 @return the value of the requested counter (int) |
|
181 """ |
|
182 return self.counters.get(counterId, {}).get(key, 0) |
|
183 |
|
184 |
|
185 def summarize(total, key, value): |
|
186 """ |
|
187 Module function used to collect overall statistics. |
|
188 |
|
189 @param total the dictionary for the overall statistics |
|
190 @param key the key to be summarize |
|
191 @param value the value to be added to the overall statistics |
|
192 @return the value added to the overall statistics |
|
193 """ |
|
194 total[key] = total.setdefault(key, 0) + value |
|
195 return value |
|
196 |
|
197 |
|
198 def analyze(filename, total): |
|
199 """ |
|
200 Module function used analyze the source of a Python file. |
|
201 |
|
202 @param filename name of the Python file to be analyzed (string) |
|
203 @param total dictionary receiving the overall code statistics |
|
204 @return a statistics object with the collected code statistics (SourceStat) |
|
205 """ |
|
206 try: |
|
207 text = Utilities.readEncodedFile(filename)[0] |
|
208 except (UnicodeError, OSError): |
|
209 return SourceStat() |
|
210 |
|
211 parser = Parser() |
|
212 parser.parse(text) |
|
213 |
|
214 stats = SourceStat() |
|
215 stats.inc('lines', parser.lines) |
|
216 for idx in range(len(parser.tokenlist)): |
|
217 tok = parser.tokenlist[idx] |
|
218 |
|
219 # counting |
|
220 if tok.type == NEWLINE: |
|
221 stats.inc('nloc') |
|
222 elif tok.type == COMMENT: |
|
223 stats.inc('comments') |
|
224 if tok.line.strip() == tok.text: |
|
225 stats.inc('commentlines') |
|
226 elif tok.type == EMPTY: |
|
227 if parser.tokenlist[idx - 1].type == token.OP: |
|
228 stats.inc('nloc') |
|
229 elif parser.tokenlist[idx - 1].type == COMMENT: |
|
230 continue |
|
231 else: |
|
232 stats.inc('empty') |
|
233 elif tok.type == INDENT: |
|
234 stats.indent(tok) |
|
235 elif tok.type == DEDENT: |
|
236 stats.dedent(tok) |
|
237 elif ( |
|
238 tok.type == KEYWORD and |
|
239 tok.text in ("class", "def") |
|
240 ): |
|
241 stats.push(parser.tokenlist[idx + 1].text, tok.row) |
|
242 |
|
243 # collect overall statistics |
|
244 summarize(total, 'lines', parser.lines) |
|
245 summarize(total, 'bytes', len(text)) |
|
246 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments')) |
|
247 summarize(total, 'commentlines', |
|
248 stats.getCounter('TOTAL ', 'commentlines')) |
|
249 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty')) |
|
250 summarize(total, 'non-commentary lines', |
|
251 stats.getCounter('TOTAL ', 'nloc')) |
|
252 |
|
253 return stats |