eric6/Plugins/CheckerPlugins/Tabnanny/Tabnanny.py

changeset 7662
d5e4bed968b4
parent 7661
6bf02583bf9e
child 7663
b4d5234f92e7
equal deleted inserted replaced
7661:6bf02583bf9e 7662:d5e4bed968b4
1 # -*- coding: utf-8 -*-
2
3 """
4 The Tab Nanny despises ambiguous indentation. She knows no mercy.
5
6 tabnanny -- Detection of ambiguous indentation
7
8 For the time being this module is intended to be called as a script.
9 However it is possible to import it into an IDE and use the function
10 check() described below.
11
12 Warning: The API provided by this module is likely to change in future
13 releases; such changes may not be backward compatible.
14
15 This is a modified version to make the original tabnanny better suitable
16 for being called from within the eric6 IDE.
17
18 @exception ValueError The tokenize module is too old.
19 """
20
21 # Released to the public domain, by Tim Peters, 15 April 1998.
22
23 # XXX Note: this is now a standard library module.
24 # XXX The API needs to undergo changes however; the current code is too
25 # XXX script-like. This will be addressed later.
26
27 #
28 # This is a modified version to make the original tabnanny better suitable
29 # for being called from within the eric6 IDE. The modifications are as
30 # follows:
31 #
32 # - there is no main function anymore
33 # - check function has been modified to only accept a filename and return
34 # a tuple indicating status (1 = an error was found), the filename, the
35 # linenumber and the error message (boolean, string, string, string). The
36 # values are only valid, if the status equals 1.
37 #
38 # Mofifications Copyright (c) 2003-2020 Detlev Offenbach
39 # <detlev@die-offenbachs.de>
40 #
41
42 __version__ = "6_eric"
43
44 import tokenize
45 try:
46 import StringIO as io
47 import Queue as queue
48 except (ImportError):
49 import io # __IGNORE_WARNING__
50 import queue
51
52 import multiprocessing
53
54
55 if not hasattr(tokenize, 'NL'):
56 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
57
58 __all__ = ["check", "NannyNag", "process_tokens"]
59
60
61 def initService():
62 """
63 Initialize the service and return the entry point.
64
65 @return the entry point for the background client (function)
66 """
67 return check
68
69
70 def initBatchService():
71 """
72 Initialize the batch service and return the entry point.
73
74 @return the entry point for the background client (function)
75 """
76 return batchCheck
77
78
79 class NannyNag(Exception):
80 """
81 Class implementing an exception for indentation issues.
82
83 Raised by tokeneater() if detecting an ambiguous indent.
84 Captured and handled in check().
85 """
86 def __init__(self, lineno, msg, line):
87 """
88 Constructor
89
90 @param lineno Line number of the ambiguous indent.
91 @param msg Descriptive message assigned to this problem.
92 @param line The offending source line.
93 """
94 self.lineno, self.msg, self.line = lineno, msg, line
95
96 def get_lineno(self):
97 """
98 Public method to retrieve the line number.
99
100 @return The line number (integer)
101 """
102 return self.lineno
103
104 def get_msg(self):
105 """
106 Public method to retrieve the message.
107
108 @return The error message (string)
109 """
110 return self.msg
111
112 def get_line(self):
113 """
114 Public method to retrieve the offending line.
115
116 @return The line of code (string)
117 """
118 return self.line
119
120
121 def check(file, text=""):
122 """
123 Private function to check one Python source file for whitespace related
124 problems.
125
126 @param file source filename (string)
127 @param text source text (string)
128 @return A tuple indicating status (True = an error was found), the
129 filename, the linenumber and the error message
130 (boolean, string, string, string). The values are only
131 valid, if the status is True.
132 """
133 return __check(file, text)
134
135
136 def batchCheck(argumentsList, send, fx, cancelled, maxProcesses=0):
137 """
138 Module function to check a batch of files for whitespace related problems.
139
140 @param argumentsList list of arguments tuples as given for check
141 @type list
142 @param send reference to send function
143 @type func
144 @param fx registered service name
145 @type str
146 @param cancelled reference to function checking for a cancellation
147 @type func
148 @param maxProcesses number of processes to be used
149 @type int
150 """
151 if maxProcesses == 0:
152 # determine based on CPU count
153 try:
154 NumberOfProcesses = multiprocessing.cpu_count()
155 if NumberOfProcesses >= 1:
156 NumberOfProcesses -= 1
157 except NotImplementedError:
158 NumberOfProcesses = 1
159 else:
160 NumberOfProcesses = maxProcesses
161
162 # Create queues
163 taskQueue = multiprocessing.Queue()
164 doneQueue = multiprocessing.Queue()
165
166 # Submit tasks (initially two time number of processes
167 initialTasks = 2 * NumberOfProcesses
168 for task in argumentsList[:initialTasks]:
169 taskQueue.put(task)
170
171 # Start worker processes
172 for _ in range(NumberOfProcesses):
173 multiprocessing.Process(
174 target=worker, args=(taskQueue, doneQueue)
175 ).start()
176
177 # Get and send results
178 endIndex = len(argumentsList) - initialTasks
179 for i in range(len(argumentsList)):
180 resultSent = False
181 wasCancelled = False
182
183 while not resultSent:
184 try:
185 # get result (waiting max. 3 seconds and send it to frontend
186 filename, result = doneQueue.get()
187 send(fx, filename, result)
188 resultSent = True
189 except queue.Empty:
190 # ignore empty queue, just carry on
191 if cancelled():
192 wasCancelled = True
193 break
194
195 if wasCancelled or cancelled():
196 # just exit the loop ignoring the results of queued tasks
197 break
198
199 if i < endIndex:
200 taskQueue.put(argumentsList[i + initialTasks])
201
202 # Tell child processes to stop
203 for _ in range(NumberOfProcesses):
204 taskQueue.put('STOP')
205
206
207 def worker(inputQueue, outputQueue):
208 """
209 Module function acting as the parallel worker for the style check.
210
211 @param inputQueue input queue (multiprocessing.Queue)
212 @param outputQueue output queue (multiprocessing.Queue)
213 """
214 for filename, source in iter(inputQueue.get, 'STOP'):
215 result = __check(filename, source)
216 outputQueue.put((filename, result))
217
218
219 def __check(file, text=""):
220 """
221 Private function to check one Python source file for whitespace related
222 problems.
223
224 @param file source filename (string)
225 @param text source text (string)
226 @return A tuple indicating status (True = an error was found), the
227 filename, the linenumber and the error message
228 (boolean, string, string). The values are only
229 valid, if the status is True.
230 """
231 global indents, check_equal
232 indents = [Whitespace("")]
233 check_equal = 0
234 if not text:
235 return (True, "1", "Error: source code missing.")
236
237 source = io.StringIO(text)
238 try:
239 process_tokens(tokenize.generate_tokens(source.readline))
240
241 except tokenize.TokenError as msg:
242 return (True, "1", "Token Error: {0}".format(str(msg)))
243
244 except IndentationError as err:
245 return (True, str(err.lineno),
246 "Indentation Error: {0}".format(str(err.msg)))
247
248 except NannyNag as nag:
249 badline = nag.get_lineno()
250 line = nag.get_line()
251 return (True, str(badline), line)
252
253 except Exception as err:
254 return (True, "1", "Unspecific Error: {0}".format(str(err)))
255
256 return (False, "", "")
257
258
259 class Whitespace(object):
260 """
261 Class implementing the whitespace checker.
262 """
263 # the characters used for space and tab
264 S, T = ' \t'
265
266 # members:
267 # raw
268 # the original string
269 # n
270 # the number of leading whitespace characters in raw
271 # nt
272 # the number of tabs in raw[:n]
273 # norm
274 # the normal form as a pair (count, trailing), where:
275 # count
276 # a tuple such that raw[:n] contains count[i]
277 # instances of S * i + T
278 # trailing
279 # the number of trailing spaces in raw[:n]
280 # It's A Theorem that m.indent_level(t) ==
281 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
282 # is_simple
283 # true iff raw[:n] is of the form (T*)(S*)
284
285 def __init__(self, ws):
286 """
287 Constructor
288
289 @param ws The string to be checked.
290 """
291 self.raw = ws
292 S, T = Whitespace.S, Whitespace.T
293 count = []
294 b = n = nt = 0
295 for ch in self.raw:
296 if ch == S:
297 n = n + 1
298 b = b + 1
299 elif ch == T:
300 n = n + 1
301 nt = nt + 1
302 if b >= len(count):
303 count = count + [0] * (b - len(count) + 1)
304 count[b] = count[b] + 1
305 b = 0
306 else:
307 break
308 self.n = n
309 self.nt = nt
310 self.norm = tuple(count), b
311 self.is_simple = len(count) <= 1
312
313 # return length of longest contiguous run of spaces (whether or not
314 # preceding a tab)
315 def longest_run_of_spaces(self):
316 """
317 Public method to calculate the length of longest contiguous run of
318 spaces.
319
320 @return The length of longest contiguous run of spaces (whether or not
321 preceding a tab)
322 """
323 count, trailing = self.norm
324 return max(len(count) - 1, trailing)
325
326 def indent_level(self, tabsize):
327 """
328 Public method to determine the indentation level.
329
330 @param tabsize The length of a tab stop. (integer)
331 @return indentation level (integer)
332 """
333 ## count, il = self.norm
334 ## for i in range(len(count)):
335 ## if count[i]:
336 ## il = il + (i/tabsize + 1)*tabsize * count[i]
337 ## return il
338
339 ## quicker:
340 ## il = trailing + sum (i/ts + 1)*ts*count[i] =
341 ## trailing + ts * sum (i/ts + 1)*count[i] =
342 ## trailing + ts * sum i/ts*count[i] + count[i] =
343 ## trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
344 ## trailing + ts * [(sum i/ts*count[i]) + num_tabs]
345 ## and note that i/ts*count[i] is 0 when i < ts
346
347 count, trailing = self.norm
348 il = 0
349 for i in range(tabsize, len(count)):
350 il = il + i / tabsize * count[i]
351 return trailing + tabsize * (il + self.nt)
352
353 # return true iff self.indent_level(t) == other.indent_level(t)
354 # for all t >= 1
355 def equal(self, other):
356 """
357 Public method to compare the indentation levels of two Whitespace
358 objects for equality.
359
360 @param other Whitespace object to compare against.
361 @return True, if we compare equal against the other Whitespace object.
362 """
363 return self.norm == other.norm
364
365 # return a list of tuples (ts, i1, i2) such that
366 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
367 # Intended to be used after not self.equal(other) is known, in which
368 # case it will return at least one witnessing tab size.
369 def not_equal_witness(self, other):
370 """
371 Public method to calculate a tuple of witnessing tab size.
372
373 Intended to be used after not self.equal(other) is known, in which
374 case it will return at least one witnessing tab size.
375
376 @param other Whitespace object to calculate against.
377 @return A list of tuples (ts, i1, i2) such that
378 i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
379 """
380 n = max(self.longest_run_of_spaces(),
381 other.longest_run_of_spaces()) + 1
382 a = []
383 for ts in range(1, n + 1):
384 if self.indent_level(ts) != other.indent_level(ts):
385 a.append((ts,
386 self.indent_level(ts),
387 other.indent_level(ts)))
388 return a
389
390 # Return True iff self.indent_level(t) < other.indent_level(t)
391 # for all t >= 1.
392 # The algorithm is due to Vincent Broman.
393 # Easy to prove it's correct.
394 # XXXpost that.
395 # Trivial to prove n is sharp (consider T vs ST).
396 # Unknown whether there's a faster general way. I suspected so at
397 # first, but no longer.
398 # For the special (but common!) case where M and N are both of the
399 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
400 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
401 # XXXwrite that up.
402 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
403 def less(self, other):
404 """
405 Public method to compare the indentation level against another
406 Whitespace objects to be smaller.
407
408 @param other Whitespace object to compare against.
409 @return True, if we compare less against the other Whitespace object.
410 """
411 if self.n >= other.n:
412 return False
413 if self.is_simple and other.is_simple:
414 return self.nt <= other.nt
415 n = max(self.longest_run_of_spaces(),
416 other.longest_run_of_spaces()) + 1
417 # the self.n >= other.n test already did it for ts=1
418 for ts in range(2, n + 1):
419 if self.indent_level(ts) >= other.indent_level(ts):
420 return False
421 return True
422
423 # return a list of tuples (ts, i1, i2) such that
424 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
425 # Intended to be used after not self.less(other) is known, in which
426 # case it will return at least one witnessing tab size.
427 def not_less_witness(self, other):
428 """
429 Public method to calculate a tuple of witnessing tab size.
430
431 Intended to be used after not self.less(other is known, in which
432 case it will return at least one witnessing tab size.
433
434 @param other Whitespace object to calculate against.
435 @return A list of tuples (ts, i1, i2) such that
436 i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
437 """
438 n = max(self.longest_run_of_spaces(),
439 other.longest_run_of_spaces()) + 1
440 a = []
441 for ts in range(1, n + 1):
442 if self.indent_level(ts) >= other.indent_level(ts):
443 a.append((ts,
444 self.indent_level(ts),
445 other.indent_level(ts)))
446 return a
447
448
449 def format_witnesses(w):
450 """
451 Function to format the witnesses as a readable string.
452
453 @param w A list of witnesses
454 @return A formated string of the witnesses.
455 """
456 firsts = [str(tup[0]) for tup in w]
457 prefix = "at tab size"
458 if len(w) > 1:
459 prefix = prefix + "s"
460 return prefix + " " + ', '.join(firsts)
461
462
463 def process_tokens(tokens):
464 """
465 Function processing all tokens generated by a tokenizer run.
466
467 @param tokens list of tokens
468 @exception NannyNag raised to indicate an indentation error
469 """
470 INDENT = tokenize.INDENT
471 DEDENT = tokenize.DEDENT
472 NEWLINE = tokenize.NEWLINE
473 JUNK = tokenize.COMMENT, tokenize.NL
474 indents = [Whitespace("")]
475 check_equal = 0
476
477 for (tokenType, token, start, _end, line) in tokens:
478 if tokenType == NEWLINE:
479 # a program statement, or ENDMARKER, will eventually follow,
480 # after some (possibly empty) run of tokens of the form
481 # (NL | COMMENT)* (INDENT | DEDENT+)?
482 # If an INDENT appears, setting check_equal is wrong, and will
483 # be undone when we see the INDENT.
484 check_equal = 1
485
486 elif tokenType == INDENT:
487 check_equal = 0
488 thisguy = Whitespace(token)
489 if not indents[-1].less(thisguy):
490 witness = indents[-1].not_less_witness(thisguy)
491 msg = "indent not greater e.g. " + format_witnesses(witness)
492 raise NannyNag(start[0], msg, line)
493 indents.append(thisguy)
494
495 elif tokenType == DEDENT:
496 # there's nothing we need to check here! what's important is
497 # that when the run of DEDENTs ends, the indentation of the
498 # program statement (or ENDMARKER) that triggered the run is
499 # equal to what's left at the top of the indents stack
500
501 # Ouch! This assert triggers if the last line of the source
502 # is indented *and* lacks a newline -- then DEDENTs pop out
503 # of thin air.
504 # assert check_equal # else no earlier NEWLINE, or an
505 # earlier INDENT
506 check_equal = 1
507
508 del indents[-1]
509
510 elif check_equal and tokenType not in JUNK:
511 # this is the first "real token" following a NEWLINE, so it
512 # must be the first token of the next program statement, or an
513 # ENDMARKER; the "line" argument exposes the leading whitespace
514 # for this statement; in the case of ENDMARKER, line is an empty
515 # string, so will properly match the empty string with which the
516 # "indents" stack was seeded
517 check_equal = 0
518 thisguy = Whitespace(line)
519 if not indents[-1].equal(thisguy):
520 witness = indents[-1].not_equal_witness(thisguy)
521 msg = "indent not equal e.g. " + format_witnesses(witness)
522 raise NannyNag(start[0], msg, line)
523
524 # eflag: noqa = M111

eric ide

mercurial