UtilitiesPython2/Tabnanny.py

changeset 805
83ca4d1ff648
child 945
8cd4d08fa9f6
equal deleted inserted replaced
804:3465556892de 805:83ca4d1ff648
1 # -*- coding: utf-8 -*-
2
3 """
4 The Tab Nanny despises ambiguous indentation. She knows no mercy.
5
6 tabnanny -- Detection of ambiguous indentation
7
8 For the time being this module is intended to be called as a script.
9 However it is possible to import it into an IDE and use the function
10 check() described below.
11
12 Warning: The API provided by this module is likely to change in future
13 releases; such changes may not be backward compatible.
14
15 This is a modified version to make the original tabnanny better suitable
16 for being called from within the eric5 IDE.
17
18 @exception ValueError The tokenize module is too old.
19 """
20
21 # Released to the public domain, by Tim Peters, 15 April 1998.
22
23 # XXX Note: this is now a standard library module.
24 # XXX The API needs to undergo changes however; the current code is too
25 # XXX script-like. This will be addressed later.
26
27 #
28 # This is a modified version to make the original tabnanny better suitable
29 # for being called from within the eric4 IDE. The modifications are as
30 # follows:
31 #
32 # - there is no main function anymore
33 # - check function has been modified to only accept a filename and return
34 # a tuple indicating status (1 = an error was found), the filename, the
35 # linenumber and the error message (boolean, string, string, string). The
36 # values are only valid, if the status equals 1.
37 #
38 # Modifications copyright (c) 2003 Detlev Offenbach <detlev@die-offenbachs.de>
39 #
40
41 __version__ = "6_eric5"
42
43 import tokenize
44 import cStringIO
45
46 if not hasattr(tokenize, 'NL'):
47 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
48
49 __all__ = ["check", "NannyNag", "process_tokens"]
50
51 class NannyNag(Exception):
52 """
53 Raised by tokeneater() if detecting an ambiguous indent.
54 Captured and handled in check().
55 """
56 def __init__(self, lineno, msg, line):
57 """
58 Constructor
59
60 @param lineno Line number of the ambiguous indent.
61 @param msg Descriptive message assigned to this problem.
62 @param line The offending source line.
63 """
64 self.lineno, self.msg, self.line = lineno, msg, line
65
66 def get_lineno(self):
67 """
68 Method to retrieve the line number.
69
70 @return The line number (integer)
71 """
72 return self.lineno
73
74 def get_msg(self):
75 """
76 Method to retrieve the message.
77
78 @return The error message (string)
79 """
80 return self.msg
81
82 def get_line(self):
83 """
84 Method to retrieve the offending line.
85
86 @return The line of code (string)
87 """
88 return self.line
89
90 def check(filename, codestring):
91 """
92 Private function to check one Python source file for whitespace related problems.
93
94 @param filename source filename (string)
95 @param codestring source code (string)
96 @return A tuple indicating status (True = an error was found), the
97 filename, the linenumber and the error message
98 (boolean, string, string, string). The values are only
99 valid, if the status is True.
100 """
101 global indents, check_equal
102 indents = [Whitespace("")]
103 check_equal = 0
104
105 source = cStringIO.StringIO(codestring)
106 try:
107 process_tokens(tokenize.generate_tokens(source.readline))
108
109 except tokenize.TokenError, msg:
110 return (True, filename, "1", "Token Error: %s" % unicode(msg))
111
112 except IndentationError, err:
113 return (True, filename, err.lineno, "Indentation Error: %s" % unicode(err.msg))
114
115 except NannyNag, nag:
116 badline = nag.get_lineno()
117 line = nag.get_line()
118 return (True, filename, str(badline), line)
119
120 except Exception, err:
121 return (True, filename, "1", "Unspecific Error: %s" % unicode(err))
122
123 return (False, None, None, None)
124
125 class Whitespace(object):
126 """
127 Class implementing the whitespace checker.
128 """
129 # the characters used for space and tab
130 S, T = ' \t'
131
132 # members:
133 # raw
134 # the original string
135 # n
136 # the number of leading whitespace characters in raw
137 # nt
138 # the number of tabs in raw[:n]
139 # norm
140 # the normal form as a pair (count, trailing), where:
141 # count
142 # a tuple such that raw[:n] contains count[i]
143 # instances of S * i + T
144 # trailing
145 # the number of trailing spaces in raw[:n]
146 # It's A Theorem that m.indent_level(t) ==
147 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
148 # is_simple
149 # true iff raw[:n] is of the form (T*)(S*)
150
151 def __init__(self, ws):
152 """
153 Constructor
154
155 @param ws The string to be checked.
156 """
157 self.raw = ws
158 S, T = Whitespace.S, Whitespace.T
159 count = []
160 b = n = nt = 0
161 for ch in self.raw:
162 if ch == S:
163 n = n + 1
164 b = b + 1
165 elif ch == T:
166 n = n + 1
167 nt = nt + 1
168 if b >= len(count):
169 count = count + [0] * (b - len(count) + 1)
170 count[b] = count[b] + 1
171 b = 0
172 else:
173 break
174 self.n = n
175 self.nt = nt
176 self.norm = tuple(count), b
177 self.is_simple = len(count) <= 1
178
179 # return length of longest contiguous run of spaces (whether or not
180 # preceding a tab)
181 def longest_run_of_spaces(self):
182 """
183 Method to calculate the length of longest contiguous run of spaces.
184
185 @return The length of longest contiguous run of spaces (whether or not
186 preceding a tab)
187 """
188 count, trailing = self.norm
189 return max(len(count)-1, trailing)
190
191 def indent_level(self, tabsize):
192 """
193 Method to determine the indentation level.
194
195 @param tabsize The length of a tab stop. (integer)
196 @return indentation level (integer)
197 """
198 # count, il = self.norm
199 # for i in range(len(count)):
200 # if count[i]:
201 # il = il + (i/tabsize + 1)*tabsize * count[i]
202 # return il
203
204 # quicker:
205 # il = trailing + sum (i/ts + 1)*ts*count[i] =
206 # trailing + ts * sum (i/ts + 1)*count[i] =
207 # trailing + ts * sum i/ts*count[i] + count[i] =
208 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
209 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
210 # and note that i/ts*count[i] is 0 when i < ts
211
212 count, trailing = self.norm
213 il = 0
214 for i in range(tabsize, len(count)):
215 il = il + i/tabsize * count[i]
216 return trailing + tabsize * (il + self.nt)
217
218 # return true iff self.indent_level(t) == other.indent_level(t)
219 # for all t >= 1
220 def equal(self, other):
221 """
222 Method to compare the indentation levels of two Whitespace objects for equality.
223
224 @param other Whitespace object to compare against.
225 @return True, if we compare equal against the other Whitespace object.
226 """
227 return self.norm == other.norm
228
229 # return a list of tuples (ts, i1, i2) such that
230 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
231 # Intended to be used after not self.equal(other) is known, in which
232 # case it will return at least one witnessing tab size.
233 def not_equal_witness(self, other):
234 """
235 Method to calculate a tuple of witnessing tab size.
236
237 Intended to be used after not self.equal(other) is known, in which
238 case it will return at least one witnessing tab size.
239
240 @param other Whitespace object to calculate against.
241 @return A list of tuples (ts, i1, i2) such that
242 i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
243 """
244 n = max(self.longest_run_of_spaces(),
245 other.longest_run_of_spaces()) + 1
246 a = []
247 for ts in range(1, n+1):
248 if self.indent_level(ts) != other.indent_level(ts):
249 a.append( (ts,
250 self.indent_level(ts),
251 other.indent_level(ts)) )
252 return a
253
254 # Return True iff self.indent_level(t) < other.indent_level(t)
255 # for all t >= 1.
256 # The algorithm is due to Vincent Broman.
257 # Easy to prove it's correct.
258 # XXXpost that.
259 # Trivial to prove n is sharp (consider T vs ST).
260 # Unknown whether there's a faster general way. I suspected so at
261 # first, but no longer.
262 # For the special (but common!) case where M and N are both of the
263 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
264 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
265 # XXXwrite that up.
266 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
267 def less(self, other):
268 """
269 Method to compare the indentation level against another Whitespace objects to
270 be smaller.
271
272 @param other Whitespace object to compare against.
273 @return True, if we compare less against the other Whitespace object.
274 """
275 if self.n >= other.n:
276 return False
277 if self.is_simple and other.is_simple:
278 return self.nt <= other.nt
279 n = max(self.longest_run_of_spaces(),
280 other.longest_run_of_spaces()) + 1
281 # the self.n >= other.n test already did it for ts=1
282 for ts in range(2, n+1):
283 if self.indent_level(ts) >= other.indent_level(ts):
284 return False
285 return True
286
287 # return a list of tuples (ts, i1, i2) such that
288 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
289 # Intended to be used after not self.less(other) is known, in which
290 # case it will return at least one witnessing tab size.
291 def not_less_witness(self, other):
292 """
293 Method to calculate a tuple of witnessing tab size.
294
295 Intended to be used after not self.less(other is known, in which
296 case it will return at least one witnessing tab size.
297
298 @param other Whitespace object to calculate against.
299 @return A list of tuples (ts, i1, i2) such that
300 i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
301 """
302 n = max(self.longest_run_of_spaces(),
303 other.longest_run_of_spaces()) + 1
304 a = []
305 for ts in range(1, n+1):
306 if self.indent_level(ts) >= other.indent_level(ts):
307 a.append( (ts,
308 self.indent_level(ts),
309 other.indent_level(ts)) )
310 return a
311
312 def format_witnesses(w):
313 """
314 Function to format the witnesses as a readable string.
315
316 @param w A list of witnesses
317 @return A formated string of the witnesses.
318 """
319 firsts = map(lambda tup: str(tup[0]), w)
320 prefix = "at tab size"
321 if len(w) > 1:
322 prefix = prefix + "s"
323 return prefix + " " + ', '.join(firsts)
324
325 def process_tokens(tokens):
326 """
327 Function processing all tokens generated by a tokenizer run.
328
329 @param tokens list of tokens
330 """
331 INDENT = tokenize.INDENT
332 DEDENT = tokenize.DEDENT
333 NEWLINE = tokenize.NEWLINE
334 JUNK = tokenize.COMMENT, tokenize.NL
335 indents = [Whitespace("")]
336 check_equal = 0
337
338 for (type, token, start, end, line) in tokens:
339 if type == NEWLINE:
340 # a program statement, or ENDMARKER, will eventually follow,
341 # after some (possibly empty) run of tokens of the form
342 # (NL | COMMENT)* (INDENT | DEDENT+)?
343 # If an INDENT appears, setting check_equal is wrong, and will
344 # be undone when we see the INDENT.
345 check_equal = 1
346
347 elif type == INDENT:
348 check_equal = 0
349 thisguy = Whitespace(token)
350 if not indents[-1].less(thisguy):
351 witness = indents[-1].not_less_witness(thisguy)
352 msg = "indent not greater e.g. " + format_witnesses(witness)
353 raise NannyNag(start[0], msg, line)
354 indents.append(thisguy)
355
356 elif type == DEDENT:
357 # there's nothing we need to check here! what's important is
358 # that when the run of DEDENTs ends, the indentation of the
359 # program statement (or ENDMARKER) that triggered the run is
360 # equal to what's left at the top of the indents stack
361
362 # Ouch! This assert triggers if the last line of the source
363 # is indented *and* lacks a newline -- then DEDENTs pop out
364 # of thin air.
365 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
366 check_equal = 1
367
368 del indents[-1]
369
370 elif check_equal and type not in JUNK:
371 # this is the first "real token" following a NEWLINE, so it
372 # must be the first token of the next program statement, or an
373 # ENDMARKER; the "line" argument exposes the leading whitespace
374 # for this statement; in the case of ENDMARKER, line is an empty
375 # string, so will properly match the empty string with which the
376 # "indents" stack was seeded
377 check_equal = 0
378 thisguy = Whitespace(line)
379 if not indents[-1].equal(thisguy):
380 witness = indents[-1].not_equal_witness(thisguy)
381 msg = "indent not equal e.g. " + format_witnesses(witness)
382 raise NannyNag(start[0], msg, line)
383
384 #
385 # eflag: FileType = Python2

eric ide

mercurial