|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2015 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a checker for miscellaneous checks. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 import sys |
|
13 import ast |
|
14 import re |
|
15 |
|
16 |
|
17 class MiscellaneousChecker(object): |
|
18 """ |
|
19 Class implementing a checker for miscellaneous checks. |
|
20 """ |
|
21 Codes = [ |
|
22 "M101", "M102", |
|
23 "M111", "M112", |
|
24 "M121", |
|
25 "M131", |
|
26 "M801", |
|
27 |
|
28 "M901", |
|
29 ] |
|
30 |
|
31 def __init__(self, source, filename, select, ignore, expected, repeat, |
|
32 args): |
|
33 """ |
|
34 Constructor |
|
35 |
|
36 @param source source code to be checked (list of string) |
|
37 @param filename name of the source file (string) |
|
38 @param select list of selected codes (list of string) |
|
39 @param ignore list of codes to be ignored (list of string) |
|
40 @param expected list of expected codes (list of string) |
|
41 @param repeat flag indicating to report each occurrence of a code |
|
42 (boolean) |
|
43 @param args dictionary of arguments for the miscellaneous checks (dict) |
|
44 """ |
|
45 self.__select = tuple(select) |
|
46 self.__ignore = ('',) if select else tuple(ignore) |
|
47 self.__expected = expected[:] |
|
48 self.__repeat = repeat |
|
49 self.__filename = filename |
|
50 self.__source = source[:] |
|
51 self.__args = args |
|
52 |
|
53 self.__blindExceptRegex = re.compile( |
|
54 r'(except:)') # __IGNORE_WARNING__ |
|
55 self.__pep3101FormatRegex = re.compile( |
|
56 r'^(?:[^\'"]*[\'"][^\'"]*[\'"])*\s*%|^\s*%') |
|
57 |
|
58 # statistics counters |
|
59 self.counters = {} |
|
60 |
|
61 # collection of detected errors |
|
62 self.errors = [] |
|
63 |
|
64 checkersWithCodes = [ |
|
65 # TODO: fill this |
|
66 (self.__checkCoding, ("M101", "M102")), |
|
67 (self.__checkCopyright, ("M111", "M112")), |
|
68 (self.__checkBlindExcept, ("M121",)), |
|
69 (self.__checkPep3101, ("M131",)), |
|
70 (self.__checkPrintStatements, ("M801",)), |
|
71 ] |
|
72 |
|
73 self.__defaultArgs = { |
|
74 "CodingChecker": 'latin-1, utf-8', |
|
75 "CopyrightChecker": { |
|
76 "MinFilesize": 0, |
|
77 "Author": "", |
|
78 }, |
|
79 } |
|
80 |
|
81 self.__checkers = [] |
|
82 for checker, codes in checkersWithCodes: |
|
83 if any(not (code and self.__ignoreCode(code)) |
|
84 for code in codes): |
|
85 self.__checkers.append(checker) |
|
86 |
|
87 def __ignoreCode(self, code): |
|
88 """ |
|
89 Private method to check if the message code should be ignored. |
|
90 |
|
91 @param code message code to check for (string) |
|
92 @return flag indicating to ignore the given code (boolean) |
|
93 """ |
|
94 return (code.startswith(self.__ignore) and |
|
95 not code.startswith(self.__select)) |
|
96 |
|
97 def __error(self, lineNumber, offset, code, *args): |
|
98 """ |
|
99 Private method to record an issue. |
|
100 |
|
101 @param lineNumber line number of the issue (integer) |
|
102 @param offset position within line of the issue (integer) |
|
103 @param code message code (string) |
|
104 @param args arguments for the message (list) |
|
105 """ |
|
106 if self.__ignoreCode(code): |
|
107 return |
|
108 |
|
109 if code in self.counters: |
|
110 self.counters[code] += 1 |
|
111 else: |
|
112 self.counters[code] = 1 |
|
113 |
|
114 # Don't care about expected codes |
|
115 if code in self.__expected: |
|
116 return |
|
117 |
|
118 if code and (self.counters[code] == 1 or self.__repeat): |
|
119 # record the issue with one based line number |
|
120 self.errors.append( |
|
121 (self.__filename, lineNumber + 1, offset, (code, args))) |
|
122 |
|
123 def __reportInvalidSyntax(self): |
|
124 """ |
|
125 Private method to report a syntax error. |
|
126 """ |
|
127 exc_type, exc = sys.exc_info()[:2] |
|
128 if len(exc.args) > 1: |
|
129 offset = exc.args[1] |
|
130 if len(offset) > 2: |
|
131 offset = offset[1:3] |
|
132 else: |
|
133 offset = (1, 0) |
|
134 self.__error(offset[0] - 1, offset[1] or 0, |
|
135 'M901', exc_type.__name__, exc.args[0]) |
|
136 |
|
137 def run(self): |
|
138 """ |
|
139 Public method to check the given source against miscellaneous |
|
140 conditions. |
|
141 """ |
|
142 if not self.__filename: |
|
143 # don't do anything, if essential data is missing |
|
144 return |
|
145 |
|
146 if not self.__checkers: |
|
147 # don't do anything, if no codes were selected |
|
148 return |
|
149 |
|
150 try: |
|
151 self.__tree = compile( |
|
152 ''.join(self.__source), '', 'exec', ast.PyCF_ONLY_AST) |
|
153 except (SyntaxError, TypeError): |
|
154 self.__reportInvalidSyntax() |
|
155 return |
|
156 |
|
157 for check in self.__checkers: |
|
158 check() |
|
159 |
|
160 def __checkCoding(self): |
|
161 """ |
|
162 Private method to check the presence of a coding line and valid |
|
163 encodings. |
|
164 """ |
|
165 if len(self.__source) == 0: |
|
166 return |
|
167 |
|
168 encodings = [e.lower().strip() |
|
169 for e in self.__args.get( |
|
170 "CodingChecker", self.__defaultArgs["CodingChecker"]) |
|
171 .split(",")] |
|
172 for lineno, line in enumerate(self.__source[:2]): |
|
173 matched = re.search('coding[:=]\s*([-\w.]+)', line, re.IGNORECASE) |
|
174 if matched: |
|
175 if encodings and matched.group(1).lower() not in encodings: |
|
176 self.__error(lineno, 0, "M102", matched.group(1)) |
|
177 break |
|
178 else: |
|
179 self.__error(0, 0, "M101") |
|
180 |
|
181 def __checkCopyright(self): |
|
182 """ |
|
183 Private method to check the presence of a copyright statement. |
|
184 """ |
|
185 source = "".join(self.__source) |
|
186 copyrightArgs = self.__args.get( |
|
187 "CopyrightChecker", self.__defaultArgs["CopyrightChecker"]) |
|
188 copyrightMinFileSize = copyrightArgs.get( |
|
189 "MinFilesize", |
|
190 self.__defaultArgs["CopyrightChecker"]["MinFilesize"]) |
|
191 copyrightAuthor = copyrightArgs.get( |
|
192 "Author", |
|
193 self.__defaultArgs["CopyrightChecker"]["Author"]) |
|
194 copyrightRegexStr = \ |
|
195 r"Copyright\s+(\(C\)\s+)?(\d{{4}}\s+-\s+)?\d{{4}}\s+{author}" |
|
196 |
|
197 tocheck = max(1024, copyrightMinFileSize) |
|
198 topOfSource = source[:tocheck] |
|
199 if len(topOfSource) < copyrightMinFileSize: |
|
200 return |
|
201 |
|
202 copyrightRe = re.compile(copyrightRegexStr.format(author=r".*"), |
|
203 re.IGNORECASE) |
|
204 if not copyrightRe.search(topOfSource): |
|
205 self.__error(0, 0, "M111") |
|
206 return |
|
207 |
|
208 if copyrightAuthor: |
|
209 copyrightAuthorRe = re.compile( |
|
210 copyrightRegexStr.format(author=copyrightAuthor), |
|
211 re.IGNORECASE) |
|
212 if not copyrightAuthorRe.search(topOfSource): |
|
213 self.__error(0, 0, "M112") |
|
214 |
|
215 def __checkBlindExcept(self): |
|
216 """ |
|
217 Private method to check for blind except statements. |
|
218 """ |
|
219 for lineno, line in enumerate(self.__source): |
|
220 match = self.__blindExceptRegex.search(line) |
|
221 if match: |
|
222 self.__error(lineno, match.start(), "M121") |
|
223 |
|
224 def __checkPep3101(self): |
|
225 """ |
|
226 Private method to check for old style string formatting. |
|
227 """ |
|
228 for lineno, line in enumerate(self.__source): |
|
229 match = self.__pep3101FormatRegex.search(line) |
|
230 if match: |
|
231 lineLen = len(line) |
|
232 pos = line.find('%') |
|
233 formatPos = pos |
|
234 formatter = '%' |
|
235 if line[pos + 1] == "(": |
|
236 pos = line.find(")", pos) |
|
237 c = line[pos] |
|
238 while c not in "diouxXeEfFgGcrs": |
|
239 pos += 1 |
|
240 if pos >= lineLen: |
|
241 break |
|
242 c = line[pos] |
|
243 if c in "diouxXeEfFgGcrs": |
|
244 formatter += c |
|
245 self.__error(lineno, formatPos, "M131", formatter) |
|
246 |
|
247 def __checkPrintStatements(self): |
|
248 """ |
|
249 Private method to check for print statements. |
|
250 """ |
|
251 for node in ast.walk(self.__tree): |
|
252 if (isinstance(node, ast.Call) and |
|
253 getattr(node.func, 'id', None) == 'print') or \ |
|
254 (hasattr(ast, 'Print') and isinstance(node, ast.Print)): |
|
255 self.__error(node.lineno - 1, node.col_offset, "M801") |