eric6/DataViews/CodeMetrics.py

changeset 6942
2602857055c5
parent 6645
ad476851d7e0
child 7192
a22eee00b052
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2003 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 #
7 # Code mainly borrowed from the Pythius package which is
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de>
9 #
10
11 """
12 Module implementing a simple Python code metrics analyzer.
13
14 @exception ValueError the tokenize module is too old
15 """
16
17 from __future__ import unicode_literals
18
19 import os
20 import io
21 import sys
22 import keyword
23 import token
24 import tokenize
25
26 import Utilities
27
28 KEYWORD = token.NT_OFFSET + 1
29 COMMENT = tokenize.COMMENT
30 INDENT = token.INDENT
31 DEDENT = token.DEDENT
32 NEWLINE = token.NEWLINE
33 EMPTY = tokenize.NL
34
35
36 class Token(object):
37 """
38 Class to store the token related infos.
39 """
40 def __init__(self, **kw):
41 """
42 Constructor
43
44 @keyparam **kw list of key, value pairs
45 """
46 self.__dict__.update(kw)
47
48
49 class Parser(object):
50 """
51 Class used to parse the source code of a Python file.
52 """
53 def parse(self, text):
54 """
55 Public method used to parse the source code.
56
57 @param text the source code as read from a Python source file
58 """
59 self.tokenlist = []
60
61 # convert eols
62 text = Utilities.convertLineEnds(text, os.linesep)
63
64 if not text.endswith(os.linesep):
65 text = "{0}{1}".format(text, os.linesep)
66
67 self.lines = text.count(os.linesep)
68
69 source = io.BytesIO(text.encode("utf-8"))
70 try:
71 if sys.version_info[0] == 2:
72 gen = tokenize.generate_tokens(source.readline)
73 else:
74 gen = tokenize.tokenize(source.readline)
75 for toktype, toktext, start, end, line in gen:
76 (srow, scol) = start
77 (erow, ecol) = end
78 if toktype in [token.NEWLINE, tokenize.NL]:
79 self.__addToken(toktype, os.linesep, srow, scol, line)
80 elif toktype in [token.INDENT, token.DEDENT]:
81 self.__addToken(toktype, '', srow, scol, line)
82 elif toktype == token.NAME and keyword.iskeyword(toktext):
83 toktype = KEYWORD
84 self.__addToken(toktype, toktext, srow, scol, line)
85 else:
86 self.__addToken(toktype, toktext, srow, scol, line)
87 except tokenize.TokenError as msg:
88 print("Token Error: {0}".format(str(msg)))
89 # __IGNORE_WARNING_M801__
90 return
91
92 return
93
94 def __addToken(self, toktype, toktext, srow, scol, line):
95 """
96 Private method used to add a token to our list of tokens.
97
98 @param toktype the type of the token (int)
99 @param toktext the text of the token (string)
100 @param srow starting row of the token (int)
101 @param scol starting column of the token (int)
102 @param line logical line the token was found (string)
103 """
104 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow,
105 col=scol, line=line))
106
107 spacer = ' '
108
109
110 class SourceStat(object):
111 """
112 Class used to calculate and store the source code statistics.
113 """
114 def __init__(self):
115 """
116 Constructor
117 """
118 self.identifiers = []
119 # list of identifiers in order of appearance
120 self.active = [('TOTAL ', -1, 0)]
121 # stack of active identifiers and indent levels
122 self.counters = {}
123 # counters per identifier
124 self.indent_level = 0
125
126 def indent(self, tok):
127 """
128 Public method used to increment the indentation level.
129
130 @param tok a token (Token, ignored)
131 """
132 self.indent_level += 1
133
134 def dedent(self, tok):
135 """
136 Public method used to decrement the indentation level.
137
138 @param tok the token to be processed (Token)
139 @exception ValueError raised to indicate an invalid indentation level
140 """
141 self.indent_level -= 1
142 if self.indent_level < 0:
143 raise ValueError("INTERNAL ERROR: Negative indent level")
144
145 # remove identifiers of a higher indentation
146 while self.active and self.active[-1][1] >= self.indent_level:
147 counters = self.counters.setdefault(self.active[-1][0], {})
148 counters['start'] = self.active[-1][2]
149 counters['end'] = tok.row - 1
150 counters['lines'] = tok.row - self.active[-1][2]
151 del self.active[-1]
152
153 def push(self, identifier, row):
154 """
155 Public method used to store an identifier.
156
157 @param identifier the identifier to be remembered (string)
158 @param row the row, the identifier is defined in (int)
159 """
160 if len(self.active) > 1 and self.indent_level > self.active[-1][1]:
161 qualified = self.active[-1][0] + '.' + identifier
162 else:
163 qualified = identifier
164 self.active.append((qualified, self.indent_level, row))
165 self.identifiers.append(qualified)
166
167 def inc(self, key, value=1):
168 """
169 Public method used to increment the value of a key.
170
171 @param key the key to be incremented
172 @param value the increment (int)
173 """
174 for counterId, _level, _row in self.active:
175 counters = self.counters.setdefault(counterId, {})
176 counters[key] = counters.setdefault(key, 0) + value
177
178 def getCounter(self, counterId, key):
179 """
180 Public method used to get a specific counter value.
181
182 @param counterId id of the counter (string)
183 @param key key of the value to be retrieved (string)
184 @return the value of the requested counter (int)
185 """
186 return self.counters.get(counterId, {}).get(key, 0)
187
188
189 def summarize(total, key, value):
190 """
191 Module function used to collect overall statistics.
192
193 @param total the dictionary for the overall statistics
194 @param key the key to be summarize
195 @param value the value to be added to the overall statistics
196 @return the value added to the overall statistics
197 """
198 total[key] = total.setdefault(key, 0) + value
199 return value
200
201
202 def analyze(filename, total):
203 """
204 Module function used analyze the source of a Python file.
205
206 @param filename name of the Python file to be analyzed (string)
207 @param total dictionary receiving the overall code statistics
208 @return a statistics object with the collected code statistics (SourceStat)
209 """
210 try:
211 text = Utilities.readEncodedFile(filename)[0]
212 except (UnicodeError, IOError):
213 return SourceStat()
214
215 parser = Parser()
216 parser.parse(text)
217
218 stats = SourceStat()
219 stats.inc('lines', parser.lines)
220 for idx in range(len(parser.tokenlist)):
221 tok = parser.tokenlist[idx]
222
223 # counting
224 if tok.type == NEWLINE:
225 stats.inc('nloc')
226 elif tok.type == COMMENT:
227 stats.inc('comments')
228 if tok.line.strip() == tok.text:
229 stats.inc('commentlines')
230 elif tok.type == EMPTY:
231 if parser.tokenlist[idx - 1].type == token.OP:
232 stats.inc('nloc')
233 elif parser.tokenlist[idx - 1].type == COMMENT:
234 continue
235 else:
236 stats.inc('empty')
237 elif tok.type == INDENT:
238 stats.indent(tok)
239 elif tok.type == DEDENT:
240 stats.dedent(tok)
241 elif tok.type == KEYWORD:
242 if tok.text in ("class", "def"):
243 stats.push(parser.tokenlist[idx + 1].text, tok.row)
244
245 # collect overall statistics
246 summarize(total, 'lines', parser.lines)
247 summarize(total, 'bytes', len(text))
248 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments'))
249 summarize(total, 'commentlines',
250 stats.getCounter('TOTAL ', 'commentlines'))
251 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty'))
252 summarize(total, 'non-commentary lines',
253 stats.getCounter('TOTAL ', 'nloc'))
254
255 return stats

eric ide

mercurial