eric7/DataViews/CodeMetrics.py

branch
eric7
changeset 8312
800c432b34c8
parent 8234
fcb6b4b96274
child 8881
54e42bc2437a
equal deleted inserted replaced
8311:4e8b98454baa 8312:800c432b34c8
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2003 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 #
7 # Code mainly borrowed from the Pythius package which is
8 # Copyright (c) 2001 by Jürgen Hermann <jh@web.de>
9 #
10
11 """
12 Module implementing a simple Python code metrics analyzer.
13
14 @exception ValueError the tokenize module is too old
15 """
16
17
18 import os
19 import io
20 import keyword
21 import token
22 import tokenize
23
24 import Utilities
25
26 KEYWORD = token.NT_OFFSET + 1
27 COMMENT = tokenize.COMMENT
28 INDENT = token.INDENT
29 DEDENT = token.DEDENT
30 NEWLINE = token.NEWLINE
31 EMPTY = tokenize.NL
32
33
34 class Token:
35 """
36 Class to store the token related infos.
37 """
38 def __init__(self, **kw):
39 """
40 Constructor
41
42 @keyparam **kw list of key, value pairs
43 """
44 self.__dict__.update(kw)
45
46
47 class Parser:
48 """
49 Class used to parse the source code of a Python file.
50 """
51 def parse(self, text):
52 """
53 Public method used to parse the source code.
54
55 @param text the source code as read from a Python source file
56 """
57 self.tokenlist = []
58
59 # convert eols
60 text = Utilities.convertLineEnds(text, os.linesep)
61
62 if not text.endswith(os.linesep):
63 text = "{0}{1}".format(text, os.linesep)
64
65 self.lines = text.count(os.linesep)
66
67 source = io.BytesIO(text.encode("utf-8"))
68 try:
69 gen = tokenize.tokenize(source.readline)
70 for toktype, toktext, start, end, line in gen:
71 (srow, scol) = start
72 (erow, ecol) = end
73 if toktype in [token.NEWLINE, tokenize.NL]:
74 self.__addToken(toktype, os.linesep, srow, scol, line)
75 elif toktype in [token.INDENT, token.DEDENT]:
76 self.__addToken(toktype, '', srow, scol, line)
77 elif toktype == token.NAME and keyword.iskeyword(toktext):
78 toktype = KEYWORD
79 self.__addToken(toktype, toktext, srow, scol, line)
80 else:
81 self.__addToken(toktype, toktext, srow, scol, line)
82 except tokenize.TokenError as msg:
83 print("Token Error: {0}".format(str(msg)))
84 # __IGNORE_WARNING_M801__
85 return
86
87 return
88
89 def __addToken(self, toktype, toktext, srow, scol, line):
90 """
91 Private method used to add a token to our list of tokens.
92
93 @param toktype the type of the token (int)
94 @param toktext the text of the token (string)
95 @param srow starting row of the token (int)
96 @param scol starting column of the token (int)
97 @param line logical line the token was found (string)
98 """
99 self.tokenlist.append(Token(type=toktype, text=toktext, row=srow,
100 col=scol, line=line))
101
102 spacer = ' '
103
104
105 class SourceStat:
106 """
107 Class used to calculate and store the source code statistics.
108 """
109 def __init__(self):
110 """
111 Constructor
112 """
113 self.identifiers = []
114 # list of identifiers in order of appearance
115 self.active = [('TOTAL ', -1, 0)]
116 # stack of active identifiers and indent levels
117 self.counters = {}
118 # counters per identifier
119 self.indent_level = 0
120
121 def indent(self, tok):
122 """
123 Public method used to increment the indentation level.
124
125 @param tok a token (Token, ignored)
126 """
127 self.indent_level += 1
128
129 def dedent(self, tok):
130 """
131 Public method used to decrement the indentation level.
132
133 @param tok the token to be processed (Token)
134 @exception ValueError raised to indicate an invalid indentation level
135 """
136 self.indent_level -= 1
137 if self.indent_level < 0:
138 raise ValueError("INTERNAL ERROR: Negative indent level")
139
140 # remove identifiers of a higher indentation
141 while self.active and self.active[-1][1] >= self.indent_level:
142 counters = self.counters.setdefault(self.active[-1][0], {})
143 counters['start'] = self.active[-1][2]
144 counters['end'] = tok.row - 1
145 counters['lines'] = tok.row - self.active[-1][2]
146 del self.active[-1]
147
148 def push(self, identifier, row):
149 """
150 Public method used to store an identifier.
151
152 @param identifier the identifier to be remembered (string)
153 @param row the row, the identifier is defined in (int)
154 """
155 if len(self.active) > 1 and self.indent_level > self.active[-1][1]:
156 # __IGNORE_WARNING_Y108__
157 qualified = self.active[-1][0] + '.' + identifier
158 else:
159 qualified = identifier
160 self.active.append((qualified, self.indent_level, row))
161 self.identifiers.append(qualified)
162
163 def inc(self, key, value=1):
164 """
165 Public method used to increment the value of a key.
166
167 @param key the key to be incremented
168 @param value the increment (int)
169 """
170 for counterId, _level, _row in self.active:
171 counters = self.counters.setdefault(counterId, {})
172 counters[key] = counters.setdefault(key, 0) + value
173
174 def getCounter(self, counterId, key):
175 """
176 Public method used to get a specific counter value.
177
178 @param counterId id of the counter (string)
179 @param key key of the value to be retrieved (string)
180 @return the value of the requested counter (int)
181 """
182 return self.counters.get(counterId, {}).get(key, 0)
183
184
185 def summarize(total, key, value):
186 """
187 Module function used to collect overall statistics.
188
189 @param total the dictionary for the overall statistics
190 @param key the key to be summarize
191 @param value the value to be added to the overall statistics
192 @return the value added to the overall statistics
193 """
194 total[key] = total.setdefault(key, 0) + value
195 return value
196
197
198 def analyze(filename, total):
199 """
200 Module function used analyze the source of a Python file.
201
202 @param filename name of the Python file to be analyzed (string)
203 @param total dictionary receiving the overall code statistics
204 @return a statistics object with the collected code statistics (SourceStat)
205 """
206 try:
207 text = Utilities.readEncodedFile(filename)[0]
208 except (UnicodeError, OSError):
209 return SourceStat()
210
211 parser = Parser()
212 parser.parse(text)
213
214 stats = SourceStat()
215 stats.inc('lines', parser.lines)
216 for idx in range(len(parser.tokenlist)):
217 tok = parser.tokenlist[idx]
218
219 # counting
220 if tok.type == NEWLINE:
221 stats.inc('nloc')
222 elif tok.type == COMMENT:
223 stats.inc('comments')
224 if tok.line.strip() == tok.text:
225 stats.inc('commentlines')
226 elif tok.type == EMPTY:
227 if parser.tokenlist[idx - 1].type == token.OP:
228 stats.inc('nloc')
229 elif parser.tokenlist[idx - 1].type == COMMENT:
230 continue
231 else:
232 stats.inc('empty')
233 elif tok.type == INDENT:
234 stats.indent(tok)
235 elif tok.type == DEDENT:
236 stats.dedent(tok)
237 elif (
238 tok.type == KEYWORD and
239 tok.text in ("class", "def")
240 ):
241 stats.push(parser.tokenlist[idx + 1].text, tok.row)
242
243 # collect overall statistics
244 summarize(total, 'lines', parser.lines)
245 summarize(total, 'bytes', len(text))
246 summarize(total, 'comments', stats.getCounter('TOTAL ', 'comments'))
247 summarize(total, 'commentlines',
248 stats.getCounter('TOTAL ', 'commentlines'))
249 summarize(total, 'empty lines', stats.getCounter('TOTAL ', 'empty'))
250 summarize(total, 'non-commentary lines',
251 stats.getCounter('TOTAL ', 'nloc'))
252
253 return stats

eric ide

mercurial