33 |
33 |
34 class Token: |
34 class Token: |
35 """ |
35 """ |
36 Class to store the token related infos. |
36 Class to store the token related infos. |
37 """ |
37 """ |
|
38 |
38 def __init__(self, **kw): |
39 def __init__(self, **kw): |
39 """ |
40 """ |
40 Constructor |
41 Constructor |
41 |
42 |
42 @keyparam **kw list of key, value pairs |
43 @keyparam **kw list of key, value pairs |
43 """ |
44 """ |
44 self.__dict__.update(kw) |
45 self.__dict__.update(kw) |
45 |
46 |
46 |
47 |
47 class Parser: |
48 class Parser: |
48 """ |
49 """ |
49 Class used to parse the source code of a Python file. |
50 Class used to parse the source code of a Python file. |
50 """ |
51 """ |
|
52 |
51 def parse(self, text): |
53 def parse(self, text): |
52 """ |
54 """ |
53 Public method used to parse the source code. |
55 Public method used to parse the source code. |
54 |
56 |
55 @param text the source code as read from a Python source file |
57 @param text the source code as read from a Python source file |
56 """ |
58 """ |
57 self.tokenlist = [] |
59 self.tokenlist = [] |
58 |
60 |
59 # convert eols |
61 # convert eols |
60 text = Utilities.convertLineEnds(text, os.linesep) |
62 text = Utilities.convertLineEnds(text, os.linesep) |
61 |
63 |
62 if not text.endswith(os.linesep): |
64 if not text.endswith(os.linesep): |
63 text = "{0}{1}".format(text, os.linesep) |
65 text = "{0}{1}".format(text, os.linesep) |
64 |
66 |
65 self.lines = text.count(os.linesep) |
67 self.lines = text.count(os.linesep) |
66 |
68 |
67 source = io.BytesIO(text.encode("utf-8")) |
69 source = io.BytesIO(text.encode("utf-8")) |
68 try: |
70 try: |
69 gen = tokenize.tokenize(source.readline) |
71 gen = tokenize.tokenize(source.readline) |
70 for toktype, toktext, start, end, line in gen: |
72 for toktype, toktext, start, end, line in gen: |
71 (srow, scol) = start |
73 (srow, scol) = start |
72 (erow, ecol) = end |
74 (erow, ecol) = end |
73 if toktype in [token.NEWLINE, tokenize.NL]: |
75 if toktype in [token.NEWLINE, tokenize.NL]: |
74 self.__addToken(toktype, os.linesep, srow, scol, line) |
76 self.__addToken(toktype, os.linesep, srow, scol, line) |
75 elif toktype in [token.INDENT, token.DEDENT]: |
77 elif toktype in [token.INDENT, token.DEDENT]: |
76 self.__addToken(toktype, '', srow, scol, line) |
78 self.__addToken(toktype, "", srow, scol, line) |
77 elif toktype == token.NAME and keyword.iskeyword(toktext): |
79 elif toktype == token.NAME and keyword.iskeyword(toktext): |
78 toktype = KEYWORD |
80 toktype = KEYWORD |
79 self.__addToken(toktype, toktext, srow, scol, line) |
81 self.__addToken(toktype, toktext, srow, scol, line) |
80 else: |
82 else: |
81 self.__addToken(toktype, toktext, srow, scol, line) |
83 self.__addToken(toktype, toktext, srow, scol, line) |
82 except tokenize.TokenError as msg: |
84 except tokenize.TokenError as msg: |
83 print("Token Error: {0}".format(str(msg))) |
85 print("Token Error: {0}".format(str(msg))) |
84 # __IGNORE_WARNING_M801__ |
86 # __IGNORE_WARNING_M801__ |
85 return |
87 return |
86 |
88 |
87 return |
89 return |
88 |
90 |
89 def __addToken(self, toktype, toktext, srow, scol, line): |
91 def __addToken(self, toktype, toktext, srow, scol, line): |
90 """ |
92 """ |
91 Private method used to add a token to our list of tokens. |
93 Private method used to add a token to our list of tokens. |
92 |
94 |
93 @param toktype the type of the token (int) |
95 @param toktype the type of the token (int) |
94 @param toktext the text of the token (string) |
96 @param toktext the text of the token (string) |
95 @param srow starting row of the token (int) |
97 @param srow starting row of the token (int) |
96 @param scol starting column of the token (int) |
98 @param scol starting column of the token (int) |
97 @param line logical line the token was found (string) |
99 @param line logical line the token was found (string) |
98 """ |
100 """ |
99 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow, |
101 self.tokenlist.append( |
100 col=scol, line=line)) |
102 Token(type=toktype, text=toktext, row=srow, col=scol, line=line) |
101 |
103 ) |
102 spacer = ' ' |
104 |
|
105 |
|
106 spacer = " " |
103 |
107 |
104 |
108 |
105 class SourceStat: |
109 class SourceStat: |
106 """ |
110 """ |
107 Class used to calculate and store the source code statistics. |
111 Class used to calculate and store the source code statistics. |
108 """ |
112 """ |
|
113 |
109 def __init__(self): |
114 def __init__(self): |
110 """ |
115 """ |
111 Constructor |
116 Constructor |
112 """ |
117 """ |
113 self.identifiers = [] |
118 self.identifiers = [] |
114 # list of identifiers in order of appearance |
119 # list of identifiers in order of appearance |
115 self.active = [('TOTAL ', -1, 0)] |
120 self.active = [("TOTAL ", -1, 0)] |
116 # stack of active identifiers and indent levels |
121 # stack of active identifiers and indent levels |
117 self.counters = {} |
122 self.counters = {} |
118 # counters per identifier |
123 # counters per identifier |
119 self.indent_level = 0 |
124 self.indent_level = 0 |
120 |
125 |
121 def indent(self, tok): |
126 def indent(self, tok): |
122 """ |
127 """ |
123 Public method used to increment the indentation level. |
128 Public method used to increment the indentation level. |
124 |
129 |
125 @param tok a token (Token, ignored) |
130 @param tok a token (Token, ignored) |
126 """ |
131 """ |
127 self.indent_level += 1 |
132 self.indent_level += 1 |
128 |
133 |
129 def dedent(self, tok): |
134 def dedent(self, tok): |
130 """ |
135 """ |
131 Public method used to decrement the indentation level. |
136 Public method used to decrement the indentation level. |
132 |
137 |
133 @param tok the token to be processed (Token) |
138 @param tok the token to be processed (Token) |
134 @exception ValueError raised to indicate an invalid indentation level |
139 @exception ValueError raised to indicate an invalid indentation level |
135 """ |
140 """ |
136 self.indent_level -= 1 |
141 self.indent_level -= 1 |
137 if self.indent_level < 0: |
142 if self.indent_level < 0: |
138 raise ValueError("INTERNAL ERROR: Negative indent level") |
143 raise ValueError("INTERNAL ERROR: Negative indent level") |
139 |
144 |
140 # remove identifiers of a higher indentation |
145 # remove identifiers of a higher indentation |
141 while self.active and self.active[-1][1] >= self.indent_level: |
146 while self.active and self.active[-1][1] >= self.indent_level: |
142 counters = self.counters.setdefault(self.active[-1][0], {}) |
147 counters = self.counters.setdefault(self.active[-1][0], {}) |
143 counters['start'] = self.active[-1][2] |
148 counters["start"] = self.active[-1][2] |
144 counters['end'] = tok.row - 1 |
149 counters["end"] = tok.row - 1 |
145 counters['lines'] = tok.row - self.active[-1][2] |
150 counters["lines"] = tok.row - self.active[-1][2] |
146 del self.active[-1] |
151 del self.active[-1] |
147 |
152 |
148 def push(self, identifier, row): |
153 def push(self, identifier, row): |
149 """ |
154 """ |
150 Public method used to store an identifier. |
155 Public method used to store an identifier. |
151 |
156 |
152 @param identifier the identifier to be remembered (string) |
157 @param identifier the identifier to be remembered (string) |
153 @param row the row, the identifier is defined in (int) |
158 @param row the row, the identifier is defined in (int) |
154 """ |
159 """ |
155 if len(self.active) > 1 and self.indent_level > self.active[-1][1]: |
160 if len(self.active) > 1 and self.indent_level > self.active[-1][1]: |
156 # __IGNORE_WARNING_Y108__ |
161 # __IGNORE_WARNING_Y108__ |
157 qualified = self.active[-1][0] + '.' + identifier |
162 qualified = self.active[-1][0] + "." + identifier |
158 else: |
163 else: |
159 qualified = identifier |
164 qualified = identifier |
160 self.active.append((qualified, self.indent_level, row)) |
165 self.active.append((qualified, self.indent_level, row)) |
161 self.identifiers.append(qualified) |
166 self.identifiers.append(qualified) |
162 |
167 |
163 def inc(self, key, value=1): |
168 def inc(self, key, value=1): |
164 """ |
169 """ |
165 Public method used to increment the value of a key. |
170 Public method used to increment the value of a key. |
166 |
171 |
167 @param key the key to be incremented |
172 @param key the key to be incremented |
168 @param value the increment (int) |
173 @param value the increment (int) |
169 """ |
174 """ |
170 for counterId, _level, _row in self.active: |
175 for counterId, _level, _row in self.active: |
171 counters = self.counters.setdefault(counterId, {}) |
176 counters = self.counters.setdefault(counterId, {}) |
172 counters[key] = counters.setdefault(key, 0) + value |
177 counters[key] = counters.setdefault(key, 0) + value |
173 |
178 |
174 def getCounter(self, counterId, key): |
179 def getCounter(self, counterId, key): |
175 """ |
180 """ |
176 Public method used to get a specific counter value. |
181 Public method used to get a specific counter value. |
177 |
182 |
178 @param counterId id of the counter (string) |
183 @param counterId id of the counter (string) |
179 @param key key of the value to be retrieved (string) |
184 @param key key of the value to be retrieved (string) |
180 @return the value of the requested counter (int) |
185 @return the value of the requested counter (int) |
181 """ |
186 """ |
182 return self.counters.get(counterId, {}).get(key, 0) |
187 return self.counters.get(counterId, {}).get(key, 0) |
183 |
188 |
184 |
189 |
185 def summarize(total, key, value): |
190 def summarize(total, key, value): |
186 """ |
191 """ |
187 Module function used to collect overall statistics. |
192 Module function used to collect overall statistics. |
188 |
193 |
189 @param total the dictionary for the overall statistics |
194 @param total the dictionary for the overall statistics |
190 @param key the key to be summarize |
195 @param key the key to be summarize |
191 @param value the value to be added to the overall statistics |
196 @param value the value to be added to the overall statistics |
192 @return the value added to the overall statistics |
197 @return the value added to the overall statistics |
193 """ |
198 """ |
210 |
215 |
211 parser = Parser() |
216 parser = Parser() |
212 parser.parse(text) |
217 parser.parse(text) |
213 |
218 |
214 stats = SourceStat() |
219 stats = SourceStat() |
215 stats.inc('lines', parser.lines) |
220 stats.inc("lines", parser.lines) |
216 for idx in range(len(parser.tokenlist)): |
221 for idx in range(len(parser.tokenlist)): |
217 tok = parser.tokenlist[idx] |
222 tok = parser.tokenlist[idx] |
218 |
223 |
219 # counting |
224 # counting |
220 if tok.type == NEWLINE: |
225 if tok.type == NEWLINE: |
221 stats.inc('nloc') |
226 stats.inc("nloc") |
222 elif tok.type == COMMENT: |
227 elif tok.type == COMMENT: |
223 stats.inc('comments') |
228 stats.inc("comments") |
224 if tok.line.strip() == tok.text: |
229 if tok.line.strip() == tok.text: |
225 stats.inc('commentlines') |
230 stats.inc("commentlines") |
226 elif tok.type == EMPTY: |
231 elif tok.type == EMPTY: |
227 if parser.tokenlist[idx - 1].type == token.OP: |
232 if parser.tokenlist[idx - 1].type == token.OP: |
228 stats.inc('nloc') |
233 stats.inc("nloc") |
229 elif parser.tokenlist[idx - 1].type == COMMENT: |
234 elif parser.tokenlist[idx - 1].type == COMMENT: |
230 continue |
235 continue |
231 else: |
236 else: |
232 stats.inc('empty') |
237 stats.inc("empty") |
233 elif tok.type == INDENT: |
238 elif tok.type == INDENT: |
234 stats.indent(tok) |
239 stats.indent(tok) |
235 elif tok.type == DEDENT: |
240 elif tok.type == DEDENT: |
236 stats.dedent(tok) |
241 stats.dedent(tok) |
237 elif ( |
242 elif tok.type == KEYWORD and tok.text in ("class", "def"): |
238 tok.type == KEYWORD and |
|
239 tok.text in ("class", "def") |
|
240 ): |
|
241 stats.push(parser.tokenlist[idx + 1].text, tok.row) |
243 stats.push(parser.tokenlist[idx + 1].text, tok.row) |
242 |
244 |
243 # collect overall statistics |
245 # collect overall statistics |
244 summarize(total, 'lines', parser.lines) |
246 summarize(total, "lines", parser.lines) |
245 summarize(total, 'bytes', len(text)) |
247 summarize(total, "bytes", len(text)) |
246 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments')) |
248 summarize(total, "comments", stats.getCounter("TOTAL ", "comments")) |
247 summarize(total, 'commentlines', |
249 summarize(total, "commentlines", stats.getCounter("TOTAL ", "commentlines")) |
248 stats.getCounter('TOTAL ', 'commentlines')) |
250 summarize(total, "empty lines", stats.getCounter("TOTAL ", "empty")) |
249 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty')) |
251 summarize(total, "non-commentary lines", stats.getCounter("TOTAL ", "nloc")) |
250 summarize(total, 'non-commentary lines', |
|
251 stats.getCounter('TOTAL ', 'nloc')) |
|
252 |
252 |
253 return stats |
253 return stats |