Utilities/SyntaxCheck.py

branch
Py2 comp.
changeset 2571
e6bb19eb87ea
child 3065
070b35dde35e
equal deleted inserted replaced
2552:1e893ea4e366 2571:e6bb19eb87ea
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2011 - 2013 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the syntax check for Python 2/3.
8 """
9
10 import sys
11 if sys.version_info[0] >= 3:
12 if __name__ == '__main__':
13 from py3flakes.checker import Checker
14 from py3flakes.messages import ImportStarUsed
15 else:
16 from .py3flakes.checker import Checker #__IGNORE_WARNING__
17 from .py3flakes.messages import ImportStarUsed #__IGNORE_WARNING__
18 else:
19 str = unicode #__IGNORE_WARNING__
20 if __name__ == '__main__':
21 from py2flakes.checker import Checker #__IGNORE_WARNING__
22 from py2flakes.messages import ImportStarUsed #__IGNORE_WARNING__
23 else:
24 from .py2flakes.checker import Checker #__IGNORE_WARNING__
25 from .py2flakes.messages import ImportStarUsed #__IGNORE_WARNING__
26
27 import re
28 import traceback
29 from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
30
31 try:
32 import Preferences
33 except (ImportError):
34 pass
35
36 codingBytes_regexps = [
37 (2, re.compile(br'''coding[:=]\s*([-\w_.]+)''')),
38 (1, re.compile(br'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')),
39 ]
40
41
42 def get_codingBytes(text):
43 """
44 Function to get the coding of a bytes text.
45
46 @param text bytes text to inspect (bytes)
47 @return coding string
48 """
49 lines = text.splitlines()
50 for coding in codingBytes_regexps:
51 coding_re = coding[1]
52 head = lines[:coding[0]]
53 for l in head:
54 m = coding_re.search(l)
55 if m:
56 return str(m.group(1), "ascii").lower()
57 return None
58
59
60 def decode(text):
61 """
62 Function to decode some byte text into a string.
63
64 @param text byte text to decode (bytes)
65 @return tuple of decoded text and encoding (string, string)
66 """
67 try:
68 if text.startswith(BOM_UTF8):
69 # UTF-8 with BOM
70 return str(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
71 elif text.startswith(BOM_UTF16):
72 # UTF-16 with BOM
73 return str(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
74 elif text.startswith(BOM_UTF32):
75 # UTF-32 with BOM
76 return str(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
77 coding = get_codingBytes(text)
78 if coding:
79 return str(text, coding), coding
80 except (UnicodeError, LookupError):
81 pass
82
83 # Assume UTF-8
84 try:
85 return str(text, 'utf-8'), 'utf-8-guessed'
86 except (UnicodeError, LookupError):
87 pass
88
89 try:
90 guess = None
91 if Preferences.getEditor("AdvancedEncodingDetection"):
92 # Try the universal character encoding detector
93 try:
94 import ThirdParty.CharDet.chardet
95 guess = ThirdParty.CharDet.chardet.detect(text)
96 if guess and guess['confidence'] > 0.95 and guess['encoding'] is not None:
97 codec = guess['encoding'].lower()
98 return str(text, codec), '{0}-guessed'.format(codec)
99 except (UnicodeError, LookupError, ImportError):
100 pass
101 except (NameError):
102 pass
103
104 # Try default encoding
105 try:
106 codec = Preferences.getEditor("DefaultEncoding")
107 return str(text, codec), '{0}-default'.format(codec)
108 except (UnicodeError, LookupError, NameError):
109 pass
110
111 try:
112 if Preferences.getEditor("AdvancedEncodingDetection"):
113 # Use the guessed one even if confifence level is low
114 if guess and guess['encoding'] is not None:
115 try:
116 codec = guess['encoding'].lower()
117 return str(text, codec), '{0}-guessed'.format(codec)
118 except (UnicodeError, LookupError):
119 pass
120 except (NameError):
121 pass
122
123 # Assume UTF-8 loosing information
124 return str(text, "utf-8", "ignore"), 'utf-8-ignore'
125
126
127 def readEncodedFile(filename):
128 """
129 Function to read a file and decode it's contents into proper text.
130
131 @param filename name of the file to read (string)
132 @return tuple of decoded text and encoding (string, string)
133 """
134 try:
135 filename = filename.encode(sys.getfilesystemencoding())
136 except (UnicodeDecodeError):
137 pass
138 f = open(filename, "rb")
139 text = f.read()
140 f.close()
141 return decode(text)
142
143
144 def normalizeCode(codestring):
145 """
146 Function to normalize the given code.
147
148 @param codestring code to be normalized (string)
149 @return normalized code (string)
150 """
151 codestring = codestring.replace("\r\n", "\n").replace("\r", "\n")
152
153 if codestring and codestring[-1] != '\n':
154 codestring = codestring + '\n'
155
156 # Check type for py2: if not str it's unicode
157 if sys.version_info[0] == 2:
158 try:
159 codestring = codestring.encode('utf-8')
160 except:
161 pass
162
163 return codestring
164
165
166 def extractLineFlags(line, startComment="#", endComment=""):
167 """
168 Function to extract flags starting and ending with '__' from a line comment.
169
170 @param line line to extract flags from (string)
171 @keyparam startComment string identifying the start of the comment (string)
172 @keyparam endComment string identifying the end of a comment (string)
173 @return list containing the extracted flags (list of strings)
174 """
175 flags = []
176
177 pos = line.rfind(startComment)
178 if pos >= 0:
179 comment = line[pos + len(startComment):].strip()
180 if endComment:
181 comment = comment.replace("endComment", "")
182 flags = [f.strip() for f in comment.split()
183 if (f.startswith("__") and f.endswith("__"))]
184 return flags
185
186
187 def compile_and_check(file_, codestring="", checkFlakes=True, ignoreStarImportWarnings=False):
188 """
189 Function to compile one Python source file to Python bytecode
190 and to perform a pyflakes check.
191
192 @param file_ source filename (string)
193 @param codestring string containing the code to compile (string)
194 @keyparam checkFlakes flag indicating to do a pyflakes check (boolean)
195 @keyparam ignoreStarImportWarnings flag indicating to
196 ignore 'star import' warnings (boolean)
197 @return A tuple indicating status (True = an error was found), the
198 file name, the line number, the index number, the code string
199 and the error message (boolean, string, string, string, string,
200 string). If checkFlakes is True, a list of strings containing the
201 warnings (marker, file name, line number, message)
202 The values are only valid, if the status is True.
203 """
204
205 try:
206 import builtins
207 except ImportError:
208 import __builtin__ as builtins #__IGNORE_WARNING__
209
210 try:
211 if sys.version_info[0] == 2:
212 file_enc = file_.encode(sys.getfilesystemencoding())
213 else:
214 file_enc = file_
215
216 if not codestring:
217 try:
218 codestring = readEncodedFile(file_)[0]
219 except (UnicodeDecodeError, IOError):
220 return (False, None, None, None, None, None, [])
221
222 codestring = normalizeCode(codestring)
223
224 if file_.endswith('.ptl'):
225 try:
226 import quixote.ptl_compile
227 except ImportError:
228 return (False, None, None, None, None, None, [])
229 template = quixote.ptl_compile.Template(codestring, file_enc)
230 template.compile()
231
232 # ast.PyCF_ONLY_AST = 1024, speed optimisation
233 module = builtins.compile(codestring, file_enc, 'exec', 1024)
234 except SyntaxError as detail:
235 index = 0
236 code = ""
237 error = ""
238 lines = traceback.format_exception_only(SyntaxError, detail)
239 match = re.match('\s*File "(.+)", line (\d+)',
240 lines[0].replace('<string>', '{0}'.format(file_)))
241 if match is not None:
242 fn, line = match.group(1, 2)
243 if lines[1].startswith('SyntaxError:'):
244 error = re.match('SyntaxError: (.+)', lines[1]).group(1)
245 else:
246 code = re.match('(.+)', lines[1]).group(1)
247 for seLine in lines[2:]:
248 if seLine.startswith('SyntaxError:'):
249 error = re.match('SyntaxError: (.+)', seLine).group(1)
250 elif seLine.rstrip().endswith('^'):
251 index = len(seLine.rstrip()) - 4
252 else:
253 fn = detail.filename
254 line = detail.lineno or 1
255 error = detail.msg
256 return (True, fn, int(line), index, code, error, [])
257 except ValueError as detail:
258 index = 0
259 code = ""
260 try:
261 fn = detail.filename
262 line = detail.lineno
263 error = detail.msg
264 except AttributeError:
265 fn = file_
266 line = 1
267 error = str(detail)
268 return (True, fn, line, index, code, error, [])
269 except Exception as detail:
270 try:
271 fn = detail.filename
272 line = detail.lineno
273 index = 0
274 code = ""
275 error = detail.msg
276 return (True, fn, line, index, code, error, [])
277 except: # this catchall is intentional
278 pass
279
280 # pyflakes
281 if not checkFlakes:
282 return (False, "", -1, -1, "", "", [])
283
284 strings = []
285 lines = codestring.splitlines()
286 try:
287 warnings = Checker(module, file_)
288 warnings.messages.sort(key=lambda a: a.lineno)
289 for warning in warnings.messages:
290 if ignoreStarImportWarnings and \
291 isinstance(warning, ImportStarUsed):
292 continue
293
294 _fn, lineno, message, msg_args = warning.getMessageData()
295 if "__IGNORE_WARNING__" not in extractLineFlags(lines[lineno - 1].strip()):
296 strings.append(["FLAKES_WARNING", _fn, lineno, message, msg_args])
297 except SyntaxError as err:
298 if err.text.strip():
299 msg = err.text.strip()
300 else:
301 msg = err.msg
302 strings.append(["FLAKES_ERROR", file_, err.lineno, msg, ()])
303
304 return (False, "", -1, -1, "", "", strings)
305
306
307 if __name__ == "__main__":
308 if len(sys.argv) < 2 or \
309 len(sys.argv) > 3 or \
310 (len(sys.argv) == 3 and sys.argv[1] not in ["-fi", "-fs"]):
311 print("ERROR")
312 print("")
313 print("")
314 print("")
315 print("")
316 print("No file name given.")
317 else:
318 filename = sys.argv[-1]
319 checkFlakes = len(sys.argv) == 3
320 ignoreStarImportWarnings = sys.argv[1] == "-fi" # Setting is ignored if checkFlakes is False
321
322 try:
323 codestring = readEncodedFile(filename)[0]
324
325 syntaxerror, fname, line, index, code, error, warnings = \
326 compile_and_check(filename, codestring, checkFlakes, ignoreStarImportWarnings)
327 except IOError as msg:
328 # fake a syntax error
329 syntaxerror, fname, line, index, code, error, warnings = \
330 True, filename, 1, 0, "", "I/O Error: %s" % str(msg), []
331
332 if syntaxerror:
333 print("ERROR")
334 else:
335 print("NO_ERROR")
336 print(fname)
337 print(line)
338 print(index)
339 print(code)
340 print(error)
341
342 if not syntaxerror:
343 for warningLine in warnings:
344 msg_args = warningLine.pop()
345 for warning in warningLine:
346 print(warning)
347 msg_args = [str(x) for x in msg_args]
348 print('#'.join(msg_args))
349
350 sys.exit(0)

eric ide

mercurial