UtilitiesPython2/Pep257CheckerPy2.py

changeset 2917
fe82710d02cb
child 2929
28ab0bc63d69
equal deleted inserted replaced
2916:a8628dfdfe04 2917:fe82710d02cb
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2013 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing a checker for PEP-257 documentation string conventions.
8 """
9
10 #
11 # The routines of the checker class are modeled after the ones found in
12 # pep257.py (version 0.2.4).
13 #
14
15 try:
16 # Python 2
17 from StringIO import StringIO # __IGNORE_EXCEPTION__
18 except ImportError:
19 # Python 3
20 from io import StringIO # __IGNORE_WARNING__
21 import tokenize
22
23
24 class Pep257Context(object):
25 """
26 Class implementing the source context.
27 """
28 def __init__(self, source, startLine, contextType):
29 """
30 Constructor
31
32 @param source source code of the context (list of string or string)
33 @param startLine line number the context starts in the source (integer)
34 @param contextType type of the context object (string)
35 """
36 if isinstance(source, str):
37 self.__source = source.splitlines(True)
38 else:
39 self.__source = source[:]
40 self.__start = startLine
41 self.__indent = ""
42 self.__type = contextType
43
44 # ensure first line is left justified
45 if self.__source:
46 self.__indent = self.__source[0].replace(
47 self.__source[0].lstrip(), "")
48 self.__source[0] = self.__source[0].lstrip()
49
50 def source(self):
51 """
52 Public method to get the source.
53
54 @return source (list of string)
55 """
56 return self.__source
57
58 def ssource(self):
59 """
60 Public method to get the joined source lines.
61
62 @return source (string)
63 """
64 return "".join(self.__source)
65
66 def start(self):
67 """
68 Public method to get the start line number.
69
70 @return start line number (integer)
71 """
72 return self.__start
73
74 def end(self):
75 """
76 Public method to get the end line number.
77
78 @return end line number (integer)
79 """
80 return self.__start + len(self.__source) - 1
81
82 def indent(self):
83 """
84 Public method to get the indentation of the first line.
85
86 @return indentation string (string)
87 """
88 return self.__indent
89
90 def contextType(self):
91 """
92 Public method to get the context type.
93
94 @return context type (string)
95 """
96 return self.__type
97
98
99 class Pep257Checker(object):
100 """
101 Class implementing a checker for PEP-257 documentation string conventions.
102 """
103 Codes = [
104 "D101", "D102", "D103", "D104", "D105",
105 "D111", "D112", "D113",
106 "D121", "D122",
107 "D131", "D132", "D133", "D134",
108 "D141", "D142", "D143", "D144", "D145",
109 ]
110
111 def __init__(self, source, filename, select, ignore, expected, repeat,
112 maxLineLength=79):
113 """
114 Constructor (according to 'extended' pep8.py API)
115
116 @param source source code to be checked (list of string)
117 @param filename name of the source file (string)
118 @param select list of selected codes (list of string)
119 @param ignore list of codes to be ignored (list of string)
120 @param expected list of expected codes (list of string)
121 @param repeat flag indicating to report each occurrence of a code
122 (boolean)
123 @param maxLineLength allowed line length (integer)
124 """
125 self.__select = tuple(select)
126 self.__ignore = tuple(ignore)
127 self.__expected = expected[:]
128 self.__repeat = repeat
129 self.__maxLineLength = maxLineLength
130 self.__filename = filename
131 self.__source = source[:]
132 self.__isScript = self.__source[0].startswith('#!')
133
134 # statistics counters
135 self.counters = {}
136
137 # collection of detected errors
138 self.errors = []
139
140 self.__lineNumber = 0
141
142 # caches
143 self.__functionsCache = None
144 self.__classesCache = None
145 self.__methodsCache = None
146
147 self.__keywords = [
148 'moduleDocstring', 'functionDocstring',
149 'classDocstring', 'methodDocstring',
150 'defDocstring', 'docstring'
151 ]
152 self.__checkersWithCodes = {
153 "moduleDocstring": [
154 (self.__checkModulesDocstrings, ("D101",)),
155 ],
156 "functionDocstring": [
157 ],
158 "classDocstring": [
159 (self.__checkClassDocstring, ("D104", "D105")),
160 (self.__checkBlankBeforeAndAfterClass, ("D142", "D143")),
161 ],
162 "methodDocstring": [
163 ],
164 "defDocstring": [
165 (self.__checkFunctionDocstring, ("D102", "D103")),
166 (self.__checkImperativeMood, ("D132",)),
167 (self.__checkNoSignature, ("D133",)),
168 (self.__checkReturnType, ("D134",)),
169 (self.__checkNoBlankLineBefore, ("D141",)),
170 ],
171 "docstring": [
172 (self.__checkTripleDoubleQuotes, ("D111",)),
173 (self.__checkBackslashes, ("D112",)),
174 (self.__checkUnicode, ("D113",)),
175 (self.__checkOneLiner, ("D121",)),
176 (self.__checkIndent, ("D122",)),
177 (self.__checkEndsWithPeriod, ("D131",)),
178 (self.__checkBlankAfterSummary, ("D144",)),
179 (self.__checkBlankAfterLastParagraph, ("D145",)),
180 ],
181 }
182
183 self.__checkers = {}
184 for key, checkers in self.__checkersWithCodes.items():
185 for checker, codes in checkers:
186 if any(not (code and self.__ignoreCode(code))
187 for code in codes):
188 if key not in self.__checkers:
189 self.__checkers[key] = []
190 self.__checkers[key].append(checker)
191
192 def __ignoreCode(self, code):
193 """
194 Private method to check if the error code should be ignored.
195
196 @param code message code to check for (string)
197 @return flag indicating to ignore the given code (boolean)
198 """
199 return (code.startswith(self.__ignore) and
200 not code.startswith(self.__select))
201
202 def __error(self, lineNumber, offset, code, *args):
203 """
204 Private method to record an issue.
205
206 @param lineNumber line number of the issue (integer)
207 @param offset position within line of the issue (integer)
208 @param code message code (string)
209 @param args arguments for the message (list)
210 """
211 if self.__ignoreCode(code):
212 return
213
214 if code in self.counters:
215 self.counters[code] += 1
216 else:
217 self.counters[code] = 1
218
219 # Don't care about expected codes
220 if code in self.__expected:
221 return
222
223 if code and (self.counters[code] == 1 or self.__repeat):
224 # record the issue with one based line number
225 self.errors.append(
226 (self.__filename, lineNumber + 1, offset, code, args))
227
228 def __resetReadline(self):
229 """
230 Private method to reset the internal readline function.
231 """
232 self.__lineNumber = 0
233
234 def __readline(self):
235 """
236 Private method to get the next line from the source.
237
238 @return next line of source (string)
239 """
240 self.__lineNumber += 1
241 if self.__lineNumber > len(self.__source):
242 return ''
243 return self.__source[self.__lineNumber - 1]
244
245 def run(self):
246 """
247 Public method to check the given source for violations of doc string
248 conventions according to PEP-257.
249 """
250 if not self.__source or not self.__filename:
251 # don't do anything, if essential data is missing
252 return
253
254 for keyword in self.__keywords:
255 if keyword in self.__checkers:
256 for check in self.__checkers[keyword]:
257 for context in self.__parseContexts(keyword):
258 docstring = self.__parseDocstring(context, keyword)
259 check(docstring, context)
260
261 def __getSummaryLine(self, docstringContext):
262 """
263 Private method to extract the summary line.
264
265 @param docstringContext docstring context (Pep257Context)
266 @return summary line (string) and the line it was found on (integer)
267 """
268 lines = docstringContext.source()
269
270 line = (lines[0]
271 .replace('r"""', "", 1)
272 .replace('u"""', "", 1)
273 .replace('"""', "")
274 .replace("r'''", "", 1)
275 .replace("u'''", "", 1)
276 .replace("'''", "")
277 .strip())
278
279 if len(lines) == 1 or len(line) > 0:
280 return line, 0
281 return lines[1].strip(), 1
282
283 ##################################################################
284 ## Parsing functionality below
285 ##################################################################
286
287 def __parseModuleDocstring(self, source):
288 """
289 Private method to extract a docstring given a module source.
290
291 @param source source to parse (list of string)
292 @return context of extracted docstring (Pep257Context)
293 """
294 for kind, value, (line, char), _, _ in tokenize.generate_tokens(
295 StringIO("".join(source)).readline):
296 if kind in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]:
297 continue
298 elif kind == tokenize.STRING: # first STRING should be docstring
299 return Pep257Context(value, line - 1, "docstring")
300 else:
301 return None
302
303 def __parseDocstring(self, context, what=''):
304 """
305 Private method to extract a docstring given `def` or `class` source.
306
307 @param context context data to get the docstring from (Pep257Context)
308 @return context of extracted docstring (Pep257Context)
309 """
310 moduleDocstring = self.__parseModuleDocstring(context.source())
311 if what.startswith('module') or context.contextType() == "module":
312 return moduleDocstring
313 if moduleDocstring:
314 return moduleDocstring
315
316 tokenGenerator = tokenize.generate_tokens(
317 StringIO(context.ssource()).readline)
318 try:
319 kind = None
320 while kind != tokenize.INDENT:
321 kind, _, _, _, _ = next(tokenGenerator)
322 kind, value, (line, char), _, _ = next(tokenGenerator)
323 if kind == tokenize.STRING: # STRING after INDENT is a docstring
324 return Pep257Context(
325 value, context.start() + line - 1, "docstring")
326 except StopIteration:
327 pass
328
329 return None
330
331 def __parseTopLevel(self, keyword):
332 """
333 Private method to extract top-level functions or classes.
334
335 @param keyword keyword signaling what to extract (string)
336 @return extracted function or class contexts (list of Pep257Context)
337 """
338 self.__resetReadline()
339 tokenGenerator = tokenize.generate_tokens(self.__readline)
340 kind, value, char = None, None, None
341 contexts = []
342 try:
343 while True:
344 start, end = None, None
345 while not (kind == tokenize.NAME and
346 value == keyword and
347 char == 0):
348 kind, value, (line, char), _, _ = next(tokenGenerator)
349 start = line - 1, char
350 while not (kind == tokenize.DEDENT and
351 value == '' and
352 char == 0):
353 kind, value, (line, char), _, _ = next(tokenGenerator)
354 end = line - 1, char
355 contexts.append(Pep257Context(
356 self.__source[start[0]:end[0]], start[0], keyword))
357 except StopIteration:
358 return contexts
359
360 def __parseFunctions(self):
361 """
362 Private method to extract top-level functions.
363
364 @return extracted function contexts (list of Pep257Context)
365 """
366 if not self.__functionsCache:
367 self.__functionsCache = self.__parseTopLevel('def')
368 return self.__functionsCache
369
370 def __parseClasses(self):
371 """
372 Private method to extract top-level classes.
373
374 @return extracted class contexts (list of Pep257Context)
375 """
376 if not self.__classesCache:
377 self.__classesCache = self.__parseTopLevel('class')
378 return self.__classesCache
379
380 def __skipIndentedBlock(self, tokenGenerator):
381 """
382 Private method to skip over an indented block of source code.
383
384 @param tokenGenerator token generator
385 @return last token of the indented block
386 """
387 kind, value, start, end, raw = next(tokenGenerator)
388 while kind != tokenize.INDENT:
389 kind, value, start, end, raw = next(tokenGenerator)
390 indent = 1
391 for kind, value, start, end, raw in tokenGenerator:
392 if kind == tokenize.INDENT:
393 indent += 1
394 elif kind == tokenize.DEDENT:
395 indent -= 1
396 if indent == 0:
397 return kind, value, start, end, raw
398
399 def __parseMethods(self):
400 """
401 Private method to extract methods of all classes.
402
403 @return extracted method contexts (list of Pep257Context)
404 """
405 if not self.__methodsCache:
406 contexts = []
407 for classContext in self.__parseClasses():
408 tokenGenerator = tokenize.generate_tokens(
409 StringIO(classContext.ssource()).readline)
410 kind, value, char = None, None, None
411 try:
412 while True:
413 start, end = None, None
414 while not (kind == tokenize.NAME and value == 'def'):
415 kind, value, (line, char), _, _ = \
416 next(tokenGenerator)
417 start = line - 1, char
418 kind, value, (line, char), _, _ = \
419 self.__skipIndentedBlock(tokenGenerator)
420 end = line - 1, char
421 startLine = classContext.start() + start[0]
422 endLine = classContext.start() + end[0]
423 contexts.append(
424 Pep257Context(self.__source[startLine:endLine],
425 startLine, "def"))
426 except StopIteration:
427 pass
428 self.__methodsCache = contexts
429
430 return self.__methodsCache
431
432 def __parseContexts(self, kind):
433 """
434 Private method to extract a context from the source.
435
436 @param kind kind of context to extract (string)
437 @return requested contexts (list of Pep257Context)
438 """
439 if kind == 'moduleDocstring':
440 return [Pep257Context(self.__source, 0, "module")]
441 if kind == 'functionDocstring':
442 return self.__parseFunctions()
443 if kind == 'classDocstring':
444 return self.__parseClasses()
445 if kind == 'methodDocstring':
446 return self.__parseMethods()
447 if kind == 'defDocstring':
448 return self.__parseFunctions() + self.__parseMethods()
449 if kind == 'docstring':
450 return ([Pep257Context(self.__source, 0, "module")] +
451 self.__parseFunctions() +
452 self.__parseClasses() +
453 self.__parseMethods())
454 return [] # fall back
455
456 ##################################################################
457 ## Checking functionality below
458 ##################################################################
459
460 def __checkModulesDocstrings(self, docstringContext, context):
461 """
462 Private method to check, if the module has a docstring.
463
464 @param docstringContext docstring context (Pep257Context)
465 @param context context of the docstring (Pep257Context)
466 """
467 if docstringContext is None:
468 self.__error(context.start(), 0, "D101")
469 return
470
471 docstring = docstringContext.ssource()
472 if (not docstring or not docstring.strip() or
473 not docstring.strip('\'"')):
474 self.__error(context.start(), 0, "D101")
475
476 def __checkFunctionDocstring(self, docstringContext, context):
477 """
478 Private method to check, that all public functions and methods
479 have a docstring.
480
481 @param docstringContext docstring context (Pep257Context)
482 @param context context of the docstring (Pep257Context)
483 """
484 if self.__isScript:
485 # assume nothing is exported
486 return
487
488 functionName = context.source()[0].lstrip().split()[1].split("(")[0]
489 if functionName.startswith('_') and not functionName.endswith('__'):
490 code = "D103"
491 else:
492 code = "D102"
493
494 if docstringContext is None:
495 self.__error(context.start(), 0, code)
496 return
497
498 docstring = docstringContext.ssource()
499 if (not docstring or not docstring.strip() or
500 not docstring.strip('\'"')):
501 self.__error(context.start(), 0, code)
502
503 def __checkClassDocstring(self, docstringContext, context):
504 """
505 Private method to check, that all public functions and methods
506 have a docstring.
507
508 @param docstringContext docstring context (Pep257Context)
509 @param context context of the docstring (Pep257Context)
510 """
511 if self.__isScript:
512 # assume nothing is exported
513 return
514
515 className = context.source()[0].lstrip().split()[1].split("(")[0]
516 if className.startswith('_'):
517 code = "D105"
518 else:
519 code = "D104"
520
521 if docstringContext is None:
522 self.__error(context.start(), 0, code)
523 return
524
525 docstring = docstringContext.ssource()
526 if (not docstring or not docstring.strip() or
527 not docstring.strip('\'"')):
528 self.__error(context.start(), 0, code)
529
530 def __checkTripleDoubleQuotes(self, docstringContext, context):
531 """
532 Private method to check, that all docstrings are surrounded
533 by triple double quotes.
534
535 @param docstringContext docstring context (Pep257Context)
536 @param context context of the docstring (Pep257Context)
537 """
538 if docstringContext is None:
539 return
540
541 docstring = docstringContext.ssource().strip()
542 if not docstring.startswith(('"""', 'r"""', 'u"""')):
543 self.__error(docstringContext.start(), 0, "D111")
544
545 def __checkBackslashes(self, docstringContext, context):
546 """
547 Private method to check, that all docstrings containing
548 backslashes are surrounded by raw triple double quotes.
549
550 @param docstringContext docstring context (Pep257Context)
551 @param context context of the docstring (Pep257Context)
552 """
553 if docstringContext is None:
554 return
555
556 docstring = docstringContext.ssource().strip()
557 if "\\" in docstring and not docstring.startswith('r"""'):
558 self.__error(docstringContext.start(), 0, "D112")
559
560 def __checkUnicode(self, docstringContext, context):
561 """
562 Private method to check, that all docstrings containing unicode
563 characters are surrounded by unicode triple double quotes.
564
565 @param docstringContext docstring context (Pep257Context)
566 @param context context of the docstring (Pep257Context)
567 """
568 if docstringContext is None:
569 return
570
571 docstring = docstringContext.ssource().strip()
572 if not docstring.startswith('u"""') and \
573 any(ord(char) > 127 for char in docstring):
574 self.__error(docstringContext.start(), 0, "D113")
575
576 def __checkOneLiner(self, docstringContext, context):
577 """
578 Private method to check, that one-liner docstrings fit on
579 one line with quotes.
580
581 @param docstringContext docstring context (Pep257Context)
582 @param context context of the docstring (Pep257Context)
583 """
584 if docstringContext is None:
585 return
586
587 lines = docstringContext.source()
588 if len(lines) > 1:
589 nonEmptyLines = [l for l in lines if l.strip().strip('\'"')]
590 if len(nonEmptyLines) == 1:
591 modLen = len(context.indent() + '"""' +
592 nonEmptyLines[0].strip() + '"""')
593 if context.contextType() != "module":
594 modLen += 4
595 if modLen <= self.__maxLineLength:
596 self.__error(docstringContext.start(), 0, "D121")
597
598 def __checkIndent(self, docstringContext, context):
599 """
600 Private method to check, that docstrings are properly indented.
601
602 @param docstringContext docstring context (Pep257Context)
603 @param context context of the docstring (Pep257Context)
604 """
605 if docstringContext is None:
606 return
607
608 lines = docstringContext.source()
609 if len(lines) == 1:
610 return
611
612 nonEmptyLines = [l.rstrip() for l in lines[1:] if l.strip()]
613 if not nonEmptyLines:
614 return
615
616 indent = min([len(l) - len(l.strip()) for l in nonEmptyLines])
617 if context.contextType() == "module":
618 expectedIndent = 0
619 else:
620 expectedIndent = len(context.indent()) + 4
621 if indent != expectedIndent:
622 self.__error(docstringContext.start(), 0, "D122")
623
624 def __checkEndsWithPeriod(self, docstringContext, context):
625 """
626 Private method to check, that docstring summaries end with a period.
627
628 @param docstringContext docstring context (Pep257Context)
629 @param context context of the docstring (Pep257Context)
630 """
631 if docstringContext is None:
632 return
633
634 summary, lineNumber = self.__getSummaryLine(docstringContext)
635 if not summary.endswith("."):
636 self.__error(docstringContext.start() + lineNumber, 0, "D131")
637
638 def __checkImperativeMood(self, docstringContext, context):
639 """
640 Private method to check, that docstring summaries are in
641 imperative mood.
642
643 @param docstringContext docstring context (Pep257Context)
644 @param context context of the docstring (Pep257Context)
645 """
646 if docstringContext is None:
647 return
648
649 summary, lineNumber = self.__getSummaryLine(docstringContext)
650 firstWord = summary.strip().split()[0]
651 if firstWord.endswith("s") and not firstWord.endswith("ss"):
652 self.__error(docstringContext.start() + lineNumber, 0, "D132")
653
654 def __checkNoSignature(self, docstringContext, context):
655 """
656 Private method to check, that docstring summaries don't repeat
657 the function's signature.
658
659 @param docstringContext docstring context (Pep257Context)
660 @param context context of the docstring (Pep257Context)
661 """
662 if docstringContext is None:
663 return
664
665 functionName = context.source()[0].lstrip().split()[1].split("(")[0]
666 summary, lineNumber = self.__getSummaryLine(docstringContext)
667 if functionName + "(" in summary.replace(" ", ""):
668 self.__error(docstringContext.start() + lineNumber, 0, "D133")
669
670 def __checkReturnType(self, docstringContext, context):
671 """
672 Private method to check, that docstrings mention the return value type.
673
674 @param docstringContext docstring context (Pep257Context)
675 @param context context of the docstring (Pep257Context)
676 """
677 if docstringContext is None or self.__isScript:
678 return
679
680 if "return" not in docstringContext.ssource().lower():
681 tokens = list(
682 tokenize.generate_tokens(StringIO(context.ssource()).readline))
683 return_ = [tokens[i + 1][0] for i, token in enumerate(tokens)
684 if token[1] == "return"]
685 if (set(return_) -
686 set([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE]) !=
687 set([])):
688 self.__error(docstringContext.end(), 0, "D134")
689
690 def __checkNoBlankLineBefore(self, docstringContext, context):
691 """
692 Private method to check, that function/method docstrings are not
693 preceded by a blank line.
694
695 @param docstringContext docstring context (Pep257Context)
696 @param context context of the docstring (Pep257Context)
697 """
698 if docstringContext is None:
699 return
700
701 contextLines = context.source()
702 cti = 0
703 while cti < len(contextLines) and \
704 not contextLines[cti].strip().startswith(
705 ('"""', 'r"""', 'u"""', "'''", "r'''", "u'''")):
706 cti += 1
707
708 if cti == len(contextLines):
709 return
710
711 if not contextLines[cti - 1].strip():
712 self.__error(docstringContext.start(), 0, "D141")
713
714 def __checkBlankBeforeAndAfterClass(self, docstringContext, context):
715 """
716 Private method to check, that class docstrings have one
717 blank line around them.
718
719 @param docstringContext docstring context (Pep257Context)
720 @param context context of the docstring (Pep257Context)
721 """
722 if docstringContext is None:
723 return
724
725 contextLines = context.source()
726 cti = 0
727 while cti < len(contextLines) and \
728 not contextLines[cti].strip().startswith(
729 ('"""', 'r"""', 'u"""', "'''", "r'''", "u'''")):
730 cti += 1
731
732 if cti == len(contextLines):
733 return
734
735 start = cti
736 if contextLines[cti].strip() in (
737 '"""', 'r"""', 'u"""', "'''", "r'''", "u'''"):
738 # it is a multi line docstring
739 cti += 1
740
741 while cti < len(contextLines) and \
742 not contextLines[cti].strip().endswith(('"""', "'''")):
743 cti += 1
744 end = cti
745
746 if contextLines[start - 1].strip():
747 self.__error(docstringContext.start(), 0, "D142")
748 if contextLines[end + 1].strip():
749 self.__error(docstringContext.end(), 0, "D143")
750
751 def __checkBlankAfterSummary(self, docstringContext, context):
752 """
753 Private method to check, that docstring summaries are followed
754 by a blank line.
755
756 @param docstringContext docstring context (Pep257Context)
757 @param context context of the docstring (Pep257Context)
758 """
759 if docstringContext is None:
760 return
761
762 docstrings = docstringContext.source()
763 if len(docstrings) in [1, 3]:
764 # correct/invalid one-liner
765 return
766
767 summary, lineNumber = self.__getSummaryLine(docstringContext)
768 if docstrings[lineNumber + 1].strip():
769 self.__error(docstringContext.start() + lineNumber, 0, "D144")
770
771 def __checkBlankAfterLastParagraph(self, docstringContext, context):
772 """
773 Private method to check, that docstring summaries are followed
774 by a blank line.
775
776 @param docstringContext docstring context (Pep257Context)
777 @param context context of the docstring (Pep257Context)
778 """
779 if docstringContext is None:
780 return
781
782 docstrings = docstringContext.source()
783 if len(docstrings) in [1, 3]:
784 # correct/invalid one-liner
785 return
786
787 if docstrings[-2].strip():
788 self.__error(docstringContext.end(), 0, "D145")
789
790 #
791 # eflag: FileType = Python2

eric ide

mercurial