Plugins/CheckerPlugins/Tabnanny/Tabnanny.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2
3 """
4 The Tab Nanny despises ambiguous indentation. She knows no mercy.
5
6 tabnanny -- Detection of ambiguous indentation
7
8 For the time being this module is intended to be called as a script.
9 However it is possible to import it into an IDE and use the function
10 check() described below.
11
12 Warning: The API provided by this module is likely to change in future
13 releases; such changes may not be backward compatible.
14
15 This is a modified version to make the original tabnanny better suitable
16 for being called from within the eric4 IDE.
17
18 @exception ValueError The tokenize module is too old.
19 """
20
21 # Released to the public domain, by Tim Peters, 15 April 1998.
22
23 # XXX Note: this is now a standard library module.
24 # XXX The API needs to undergo changes however; the current code is too
25 # XXX script-like. This will be addressed later.
26
27 #
28 # This is a modified version to make the original tabnanny better suitable
29 # for being called from within the eric4 IDE. The modifications are as
30 # follows:
31 #
32 # - there is no main function anymore
33 # - check function has been modified to only accept a filename and return
34 # a tuple indicating status (1 = an error was found), the filename, the
35 # linenumber and the error message (boolean, string, string, string). The
36 # values are only valid, if the status equals 1.
37 #
38 # Mofifications copyright (c) 2003 Detlev Offenbach <detlev@die-offenbachs.de>
39 #
40
41 __version__ = "6_eric"
42
43 import os
44 import sys
45 import tokenize
46 import cStringIO
47
48 import Utilities
49
50 if not hasattr(tokenize, 'NL'):
51 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
52
53 __all__ = ["check", "NannyNag", "process_tokens"]
54
55 class NannyNag(Exception):
56 """
57 Raised by tokeneater() if detecting an ambiguous indent.
58 Captured and handled in check().
59 """
60 def __init__(self, lineno, msg, line):
61 """
62 Constructor
63
64 @param lineno Line number of the ambiguous indent.
65 @param msg Descriptive message assigned to this problem.
66 @param line The offending source line.
67 """
68 self.lineno, self.msg, self.line = lineno, msg, line
69
70 def get_lineno(self):
71 """
72 Method to retrieve the line number.
73
74 @return The line number (integer)
75 """
76 return self.lineno
77
78 def get_msg(self):
79 """
80 Method to retrieve the message.
81
82 @return The error message (string)
83 """
84 return self.msg
85
86 def get_line(self):
87 """
88 Method to retrieve the offending line.
89
90 @return The line of code (string)
91 """
92 return self.line
93
94 def check(file):
95 """
96 Private function to check one Python source file for whitespace related problems.
97
98 @param file source filename (string)
99 @return A tuple indicating status (True = an error was found), the
100 filename, the linenumber and the error message
101 (boolean, string, string, string). The values are only
102 valid, if the status is True.
103 """
104 global indents, check_equal
105 indents = [Whitespace("")]
106 check_equal = 0
107
108 try:
109 f = open(file)
110 except IOError, msg:
111 return (True, file, "1", "I/O Error: %s" % unicode(msg))
112
113 try:
114 text = Utilities.decode(f.read())[0].encode('utf-8')
115 finally:
116 f.close()
117
118 # convert eols
119 text = Utilities.convertLineEnds(text, os.linesep)
120
121 source = cStringIO.StringIO(text)
122 try:
123 process_tokens(tokenize.generate_tokens(source.readline))
124
125 except tokenize.TokenError, msg:
126 f.close()
127 return (True, file, "1", "Token Error: %s" % unicode(msg))
128
129 except IndentationError, err:
130 f.close()
131 return (True, file, err.lineno, "Indentation Error: %s" % unicode(err.msg))
132
133 except NannyNag, nag:
134 badline = nag.get_lineno()
135 line = nag.get_line()
136 f.close()
137 return (True, file, str(badline), line)
138
139 except Exception, err:
140 f.close()
141 return (True, file, "1", "Unspecific Error: %s" % unicode(err))
142
143 f.close()
144 return (False, None, None, None)
145
146 class Whitespace(object):
147 """
148 Class implementing the whitespace checker.
149 """
150 # the characters used for space and tab
151 S, T = ' \t'
152
153 # members:
154 # raw
155 # the original string
156 # n
157 # the number of leading whitespace characters in raw
158 # nt
159 # the number of tabs in raw[:n]
160 # norm
161 # the normal form as a pair (count, trailing), where:
162 # count
163 # a tuple such that raw[:n] contains count[i]
164 # instances of S * i + T
165 # trailing
166 # the number of trailing spaces in raw[:n]
167 # It's A Theorem that m.indent_level(t) ==
168 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
169 # is_simple
170 # true iff raw[:n] is of the form (T*)(S*)
171
172 def __init__(self, ws):
173 """
174 Constructor
175
176 @param ws The string to be checked.
177 """
178 self.raw = ws
179 S, T = Whitespace.S, Whitespace.T
180 count = []
181 b = n = nt = 0
182 for ch in self.raw:
183 if ch == S:
184 n = n + 1
185 b = b + 1
186 elif ch == T:
187 n = n + 1
188 nt = nt + 1
189 if b >= len(count):
190 count = count + [0] * (b - len(count) + 1)
191 count[b] = count[b] + 1
192 b = 0
193 else:
194 break
195 self.n = n
196 self.nt = nt
197 self.norm = tuple(count), b
198 self.is_simple = len(count) <= 1
199
200 # return length of longest contiguous run of spaces (whether or not
201 # preceding a tab)
202 def longest_run_of_spaces(self):
203 """
204 Method to calculate the length of longest contiguous run of spaces.
205
206 @return The length of longest contiguous run of spaces (whether or not
207 preceding a tab)
208 """
209 count, trailing = self.norm
210 return max(len(count)-1, trailing)
211
212 def indent_level(self, tabsize):
213 """
214 Method to determine the indentation level.
215
216 @param tabsize The length of a tab stop. (integer)
217 @return indentation level (integer)
218 """
219 # count, il = self.norm
220 # for i in range(len(count)):
221 # if count[i]:
222 # il = il + (i/tabsize + 1)*tabsize * count[i]
223 # return il
224
225 # quicker:
226 # il = trailing + sum (i/ts + 1)*ts*count[i] =
227 # trailing + ts * sum (i/ts + 1)*count[i] =
228 # trailing + ts * sum i/ts*count[i] + count[i] =
229 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
230 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
231 # and note that i/ts*count[i] is 0 when i < ts
232
233 count, trailing = self.norm
234 il = 0
235 for i in range(tabsize, len(count)):
236 il = il + i/tabsize * count[i]
237 return trailing + tabsize * (il + self.nt)
238
239 # return true iff self.indent_level(t) == other.indent_level(t)
240 # for all t >= 1
241 def equal(self, other):
242 """
243 Method to compare the indentation levels of two Whitespace objects for equality.
244
245 @param other Whitespace object to compare against.
246 @return True, if we compare equal against the other Whitespace object.
247 """
248 return self.norm == other.norm
249
250 # return a list of tuples (ts, i1, i2) such that
251 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
252 # Intended to be used after not self.equal(other) is known, in which
253 # case it will return at least one witnessing tab size.
254 def not_equal_witness(self, other):
255 """
256 Method to calculate a tuple of witnessing tab size.
257
258 Intended to be used after not self.equal(other) is known, in which
259 case it will return at least one witnessing tab size.
260
261 @param other Whitespace object to calculate against.
262 @return A list of tuples (ts, i1, i2) such that
263 i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
264 """
265 n = max(self.longest_run_of_spaces(),
266 other.longest_run_of_spaces()) + 1
267 a = []
268 for ts in range(1, n+1):
269 if self.indent_level(ts) != other.indent_level(ts):
270 a.append( (ts,
271 self.indent_level(ts),
272 other.indent_level(ts)) )
273 return a
274
275 # Return True iff self.indent_level(t) < other.indent_level(t)
276 # for all t >= 1.
277 # The algorithm is due to Vincent Broman.
278 # Easy to prove it's correct.
279 # XXXpost that.
280 # Trivial to prove n is sharp (consider T vs ST).
281 # Unknown whether there's a faster general way. I suspected so at
282 # first, but no longer.
283 # For the special (but common!) case where M and N are both of the
284 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
285 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
286 # XXXwrite that up.
287 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
288 def less(self, other):
289 """
290 Method to compare the indentation level against another Whitespace objects to
291 be smaller.
292
293 @param other Whitespace object to compare against.
294 @return True, if we compare less against the other Whitespace object.
295 """
296 if self.n >= other.n:
297 return False
298 if self.is_simple and other.is_simple:
299 return self.nt <= other.nt
300 n = max(self.longest_run_of_spaces(),
301 other.longest_run_of_spaces()) + 1
302 # the self.n >= other.n test already did it for ts=1
303 for ts in range(2, n+1):
304 if self.indent_level(ts) >= other.indent_level(ts):
305 return False
306 return True
307
308 # return a list of tuples (ts, i1, i2) such that
309 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
310 # Intended to be used after not self.less(other) is known, in which
311 # case it will return at least one witnessing tab size.
312 def not_less_witness(self, other):
313 """
314 Method to calculate a tuple of witnessing tab size.
315
316 Intended to be used after not self.less(other is known, in which
317 case it will return at least one witnessing tab size.
318
319 @param other Whitespace object to calculate against.
320 @return A list of tuples (ts, i1, i2) such that
321 i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
322 """
323 n = max(self.longest_run_of_spaces(),
324 other.longest_run_of_spaces()) + 1
325 a = []
326 for ts in range(1, n+1):
327 if self.indent_level(ts) >= other.indent_level(ts):
328 a.append( (ts,
329 self.indent_level(ts),
330 other.indent_level(ts)) )
331 return a
332
333 def format_witnesses(w):
334 """
335 Function to format the witnesses as a readable string.
336
337 @param w A list of witnesses
338 @return A formated string of the witnesses.
339 """
340 firsts = map(lambda tup: str(tup[0]), w)
341 prefix = "at tab size"
342 if len(w) > 1:
343 prefix = prefix + "s"
344 return prefix + " " + ', '.join(firsts)
345
346 def process_tokens(tokens):
347 """
348 Function processing all tokens generated by a tokenizer run.
349
350 @param tokens list of tokens
351 """
352 INDENT = tokenize.INDENT
353 DEDENT = tokenize.DEDENT
354 NEWLINE = tokenize.NEWLINE
355 JUNK = tokenize.COMMENT, tokenize.NL
356 indents = [Whitespace("")]
357 check_equal = 0
358
359 for (type, token, start, end, line) in tokens:
360 if type == NEWLINE:
361 # a program statement, or ENDMARKER, will eventually follow,
362 # after some (possibly empty) run of tokens of the form
363 # (NL | COMMENT)* (INDENT | DEDENT+)?
364 # If an INDENT appears, setting check_equal is wrong, and will
365 # be undone when we see the INDENT.
366 check_equal = 1
367
368 elif type == INDENT:
369 check_equal = 0
370 thisguy = Whitespace(token)
371 if not indents[-1].less(thisguy):
372 witness = indents[-1].not_less_witness(thisguy)
373 msg = "indent not greater e.g. " + format_witnesses(witness)
374 raise NannyNag(start[0], msg, line)
375 indents.append(thisguy)
376
377 elif type == DEDENT:
378 # there's nothing we need to check here! what's important is
379 # that when the run of DEDENTs ends, the indentation of the
380 # program statement (or ENDMARKER) that triggered the run is
381 # equal to what's left at the top of the indents stack
382
383 # Ouch! This assert triggers if the last line of the source
384 # is indented *and* lacks a newline -- then DEDENTs pop out
385 # of thin air.
386 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
387 check_equal = 1
388
389 del indents[-1]
390
391 elif check_equal and type not in JUNK:
392 # this is the first "real token" following a NEWLINE, so it
393 # must be the first token of the next program statement, or an
394 # ENDMARKER; the "line" argument exposes the leading whitespace
395 # for this statement; in the case of ENDMARKER, line is an empty
396 # string, so will properly match the empty string with which the
397 # "indents" stack was seeded
398 check_equal = 0
399 thisguy = Whitespace(line)
400 if not indents[-1].equal(thisguy):
401 witness = indents[-1].not_equal_witness(thisguy)
402 msg = "indent not equal e.g. " + format_witnesses(witness)
403 raise NannyNag(start[0], msg, line)

eric ide

mercurial