|
1 # |
|
2 # Jasy - Web Tooling Framework |
|
3 # Copyright 2010-2012 Zynga Inc. |
|
4 # Copyright 2013-2014 Sebastian Werner |
|
5 # |
|
6 |
|
7 from __future__ import unicode_literals |
|
8 |
|
9 import re |
|
10 |
|
11 import jasy.core.Text as Text |
|
12 import jasy.core.Console as Console |
|
13 |
|
14 from jasy import UserError |
|
15 from jasy.script.util import * |
|
16 |
|
17 |
|
18 # Used to measure the doc indent size (with leading stars in front of content) |
|
19 docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)") |
|
20 |
|
21 # Used to split type lists as supported by throw, return and params |
|
22 listSplit = re.compile("\s*\|\s*") |
|
23 |
|
24 # Used to remove markup sequences after doc processing of comment text |
|
25 stripMarkup = re.compile(r"<.*?>") |
|
26 |
|
27 |
|
28 |
|
29 # Matches return blocks in comments |
|
30 returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}") |
|
31 |
|
32 # Matches type definitions in comments |
|
33 typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}") |
|
34 |
|
35 # Matches tags |
|
36 tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)") |
|
37 |
|
38 # Matches param declarations in own dialect |
|
39 paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?") |
|
40 |
|
41 # Matches links in own dialect |
|
42 linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})") |
|
43 |
|
44 # matches backticks and has a built-in failsafe for backticks which do not terminate on the same line |
|
45 tickMatcher = re.compile(r"(`[^\n`]*?`)") |
|
46 |
|
47 |
|
48 class CommentException(Exception): |
|
49 """ |
|
50 Thrown when errors during comment processing are detected. |
|
51 """ |
|
52 |
|
53 def __init__(self, message, lineNo=0): |
|
54 Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1)) |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 class Comment(): |
|
60 """ |
|
61 Comment class is attached to parsed nodes and used to store all comment related information. |
|
62 |
|
63 The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative. |
|
64 """ |
|
65 |
|
66 # Relation to code |
|
67 context = None |
|
68 |
|
69 # Dictionary of tags |
|
70 tags = None |
|
71 |
|
72 # Dictionary of params |
|
73 params = None |
|
74 |
|
75 # List of return types |
|
76 returns = None |
|
77 |
|
78 # Static type |
|
79 type = None |
|
80 |
|
81 # Collected text of the comment (without the extracted doc relevant data) |
|
82 text = None |
|
83 |
|
84 # Text with extracted / parsed data |
|
85 __processedText = None |
|
86 |
|
87 # Text of the comment converted to HTML including highlighting (only for doc comment) |
|
88 __highlightedText = None |
|
89 |
|
90 # Text / Code Blocks in the comment |
|
91 __blocks = None |
|
92 |
|
93 |
|
94 def __init__(self, text, context=None, lineNo=0, indent="", fileId=None): |
|
95 |
|
96 # Store context (relation to code) |
|
97 self.context = context |
|
98 |
|
99 # Store fileId |
|
100 self.fileId = fileId |
|
101 |
|
102 # Figure out the type of the comment based on the starting characters |
|
103 |
|
104 # Inline comments |
|
105 if text.startswith("//"): |
|
106 # "// hello" => " hello" |
|
107 text = " " + text[2:] |
|
108 self.variant = "single" |
|
109 |
|
110 # Doc comments |
|
111 elif text.startswith("/**"): |
|
112 # "/** hello */" => " hello " |
|
113 text = " " + text[3:-2] |
|
114 self.variant = "doc" |
|
115 |
|
116 # Protected comments which should not be removed (e.g these are used for license blocks) |
|
117 elif text.startswith("/*!"): |
|
118 # "/*! hello */" => " hello " |
|
119 text = " " + text[3:-2] |
|
120 self.variant = "protected" |
|
121 |
|
122 # A normal multiline comment |
|
123 elif text.startswith("/*"): |
|
124 # "/* hello */" => " hello " |
|
125 text = " " + text[2:-2] |
|
126 self.variant = "multi" |
|
127 |
|
128 else: |
|
129 raise CommentException("Invalid comment text: %s" % text, lineNo) |
|
130 |
|
131 # Multi line comments need to have their indentation removed |
|
132 if "\n" in text: |
|
133 text = self.__outdent(text, indent, lineNo) |
|
134 |
|
135 # For single line comments strip the surrounding whitespace |
|
136 else: |
|
137 # " hello " => "hello" |
|
138 text = text.strip() |
|
139 |
|
140 # The text of the comment before any processing took place |
|
141 self.text = text |
|
142 |
|
143 |
|
144 # Perform annotation parsing, markdown conversion and code highlighting on doc blocks |
|
145 if self.variant == "doc": |
|
146 |
|
147 # Separate text and code blocks |
|
148 self.__blocks = self.__splitBlocks(text) |
|
149 |
|
150 # Re-combine everything and apply processing and formatting |
|
151 plainText = '' # text without annotations but with markdown |
|
152 for b in self.__blocks: |
|
153 |
|
154 if b["type"] == "comment": |
|
155 |
|
156 processed = self.__processDoc(b["text"], lineNo) |
|
157 b["processed"] = processed |
|
158 |
|
159 if "<" in processed: |
|
160 plainText += stripMarkup.sub("", processed) |
|
161 |
|
162 else: |
|
163 plainText += processed |
|
164 |
|
165 else: |
|
166 plainText += "\n\n" + b["text"] + "\n\n" |
|
167 |
|
168 # The without any annotations |
|
169 self.text = plainText.strip() |
|
170 |
|
171 |
|
172 def __splitBlocks(self, text): |
|
173 """ |
|
174 Splits up text and code blocks in comments. |
|
175 |
|
176 This will try to use hoedown for Markdown parsing if available and will |
|
177 fallback to a simpler implementation in order to allow processing of |
|
178 doc parameters and links without hoedown being installed. |
|
179 """ |
|
180 |
|
181 if not Text.supportsMarkdown: |
|
182 return self.__splitSimple(text) |
|
183 |
|
184 marked = Text.markdownToHtml(text) |
|
185 |
|
186 def unescape(html): |
|
187 html = html.replace('<', '<') |
|
188 html = html.replace('>', '>') |
|
189 html = html.replace('&', '&') |
|
190 html = html.replace('"', '"') |
|
191 return html.replace(''', "'") |
|
192 |
|
193 parts = [] |
|
194 |
|
195 lineNo = 0 |
|
196 lines = text.split("\n") |
|
197 markedLines = marked.split("\n") |
|
198 |
|
199 i = 0 |
|
200 while i < len(markedLines): |
|
201 |
|
202 l = markedLines[i] |
|
203 |
|
204 # the original text of the line |
|
205 parsed = unescape(stripMarkup.sub("", l)) |
|
206 |
|
207 # start of a code block, grab all text before it and move it into a block |
|
208 if l.startswith('<pre><code>'): |
|
209 |
|
210 # everything since the last code block and before this one must be text |
|
211 comment = [] |
|
212 for s in range(lineNo, len(lines)): |
|
213 |
|
214 source = lines[s] |
|
215 if source.strip() == parsed.strip(): |
|
216 lineNo = s |
|
217 break |
|
218 |
|
219 comment.append(source) |
|
220 |
|
221 parts.append({ |
|
222 "type": "comment", |
|
223 "text": "\n".join(comment) |
|
224 }) |
|
225 |
|
226 # Find the end of the code block |
|
227 e = i |
|
228 while i < len(markedLines): |
|
229 l = markedLines[i] |
|
230 i += 1 |
|
231 |
|
232 if l.startswith('</code></pre>'): |
|
233 break |
|
234 |
|
235 lineCount = (i - e) - 1 |
|
236 |
|
237 # add the code block |
|
238 parts.append({ |
|
239 "type": "code", |
|
240 "text": "\n".join(lines[lineNo:lineNo + lineCount]) |
|
241 }) |
|
242 |
|
243 lineNo += lineCount |
|
244 |
|
245 else: |
|
246 i += 1 |
|
247 |
|
248 # append the rest of the comment as text |
|
249 parts.append({ |
|
250 "type": "comment", |
|
251 "text": "\n".join(lines[lineNo:]) |
|
252 }) |
|
253 |
|
254 return parts |
|
255 |
|
256 |
|
257 def __splitSimple(self, text): |
|
258 """Splits comment text and code blocks by manually parsing a subset of markdown""" |
|
259 |
|
260 inCode = False |
|
261 oldIndent = 0 |
|
262 parts = [] |
|
263 wasEmpty = False |
|
264 wasList = False |
|
265 |
|
266 lineNo = 0 |
|
267 lines = text.split("\n") |
|
268 |
|
269 for s, l in enumerate(lines): |
|
270 |
|
271 # ignore empty lines |
|
272 if not l.strip() == "": |
|
273 |
|
274 # get indentation value and change |
|
275 indent = len(l) - len(l.lstrip()) |
|
276 change = indent - oldIndent |
|
277 |
|
278 # detect code blocks |
|
279 if change >= 4 and wasEmpty: |
|
280 if not wasList: |
|
281 oldIndent = indent |
|
282 inCode = True |
|
283 |
|
284 parts.append({ |
|
285 "type": "comment", |
|
286 "text": "\n".join(lines[lineNo:s]) |
|
287 }) |
|
288 |
|
289 lineNo = s |
|
290 |
|
291 # detect outdents |
|
292 elif change < 0: |
|
293 inCode = False |
|
294 |
|
295 parts.append({ |
|
296 "type": "code", |
|
297 "text": "\n".join(lines[lineNo:s - 1]) |
|
298 }) |
|
299 |
|
300 lineNo = s |
|
301 |
|
302 # only keep track of old previous indentation outside of comments |
|
303 if not inCode: |
|
304 oldIndent = indent |
|
305 |
|
306 # remember whether this marked a list or not |
|
307 wasList = l.strip().startswith('-') or l.strip().startswith('*') |
|
308 wasEmpty = False |
|
309 |
|
310 else: |
|
311 wasEmpty = True |
|
312 |
|
313 parts.append({ |
|
314 "type": "code" if inCode else "comment", |
|
315 "text": "\n".join(lines[lineNo:]) |
|
316 }) |
|
317 |
|
318 return parts |
|
319 |
|
320 |
|
321 def getHtml(self, highlight=True): |
|
322 """ |
|
323 Returns the comment text converted to HTML |
|
324 |
|
325 :param highlight: Whether to highlight the code |
|
326 :type highlight: bool |
|
327 """ |
|
328 |
|
329 if not Text.supportsMarkdown: |
|
330 raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.") |
|
331 |
|
332 if highlight: |
|
333 |
|
334 if self.__highlightedText is None: |
|
335 |
|
336 highlightedText = "" |
|
337 |
|
338 for block in self.__blocks: |
|
339 |
|
340 if block["type"] == "comment": |
|
341 highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"])) |
|
342 else: |
|
343 highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"])) |
|
344 |
|
345 self.__highlightedText = highlightedText |
|
346 |
|
347 return self.__highlightedText |
|
348 |
|
349 else: |
|
350 |
|
351 if self.__processedText is None: |
|
352 |
|
353 processedText = "" |
|
354 |
|
355 for block in self.__blocks: |
|
356 |
|
357 if block["type"] == "comment": |
|
358 processedText += Text.markdownToHtml(block["processed"]) |
|
359 else: |
|
360 processedText += "\n%s\n\n" % block["text"] |
|
361 |
|
362 self.__processedText = processedText.strip() |
|
363 |
|
364 return self.__processedText |
|
365 |
|
366 |
|
367 def hasContent(self): |
|
368 return self.variant == "doc" and len(self.text) |
|
369 |
|
370 |
|
371 def getTags(self): |
|
372 return self.tags |
|
373 |
|
374 |
|
375 def hasTag(self, name): |
|
376 if not self.tags: |
|
377 return False |
|
378 |
|
379 return name in self.tags |
|
380 |
|
381 |
|
382 def __outdent(self, text, indent, startLineNo): |
|
383 """ |
|
384 Outdent multi line comment text and filtering empty lines |
|
385 """ |
|
386 |
|
387 lines = [] |
|
388 |
|
389 # First, split up the comments lines and remove the leading indentation |
|
390 for lineNo, line in enumerate((indent+text).split("\n")): |
|
391 |
|
392 if line.startswith(indent): |
|
393 lines.append(line[len(indent):].rstrip()) |
|
394 |
|
395 elif line.strip() == "": |
|
396 lines.append("") |
|
397 |
|
398 else: |
|
399 # Only warn for doc comments, otherwise it might just be code commented out |
|
400 # which is sometimes formatted pretty crazy when commented out |
|
401 if self.variant == "doc": |
|
402 Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId) |
|
403 |
|
404 return text |
|
405 |
|
406 # Find first line with real content, then grab the one after it to get the |
|
407 # characters which need |
|
408 outdentString = "" |
|
409 for lineNo, line in enumerate(lines): |
|
410 |
|
411 if line != "" and line.strip() != "": |
|
412 matchedDocIndent = docIndentReg.match(line) |
|
413 |
|
414 if not matchedDocIndent: |
|
415 # As soon as we find a non doc indent like line we stop |
|
416 break |
|
417 |
|
418 elif matchedDocIndent.group(2) != "": |
|
419 # otherwise we look for content behind the indent to get the |
|
420 # correct real indent (with spaces) |
|
421 outdentString = matchedDocIndent.group(1) |
|
422 break |
|
423 |
|
424 lineNo += 1 |
|
425 |
|
426 # Process outdenting to all lines (remove the outdentString from the start of the lines) |
|
427 if outdentString != "": |
|
428 |
|
429 lineNo = 0 |
|
430 outdentStringLen = len(outdentString) |
|
431 |
|
432 for lineNo, line in enumerate(lines): |
|
433 if len(line) <= outdentStringLen: |
|
434 lines[lineNo] = "" |
|
435 |
|
436 else: |
|
437 if not line.startswith(outdentString): |
|
438 |
|
439 # Only warn for doc comments, otherwise it might just be code commented out |
|
440 # which is sometimes formatted pretty crazy when commented out |
|
441 if self.variant == "doc": |
|
442 Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId) |
|
443 |
|
444 else: |
|
445 lines[lineNo] = line[outdentStringLen:] |
|
446 |
|
447 # Merge final lines and remove leading and trailing new lines |
|
448 return "\n".join(lines).strip("\n") |
|
449 |
|
450 |
|
451 def __processDoc(self, text, startLineNo): |
|
452 |
|
453 text = self.__extractStaticType(text) |
|
454 text = self.__extractReturns(text) |
|
455 text = self.__extractTags(text) |
|
456 |
|
457 # Collapse new empty lines at start/end |
|
458 text = text.strip("\n\t ") |
|
459 |
|
460 parsed = '' |
|
461 |
|
462 # Now parse only the text outside of backticks |
|
463 last = 0 |
|
464 def split(match): |
|
465 |
|
466 # Grab the text before the back tick and process any parameters in it |
|
467 nonlocal parsed |
|
468 nonlocal last |
|
469 |
|
470 start, end = match.span() |
|
471 before = text[last:start] |
|
472 parsed += self.__processParams(before) + match.group(1) |
|
473 last = end |
|
474 |
|
475 tickMatcher.sub(split, text) |
|
476 |
|
477 # add the rest of the text |
|
478 parsed += self.__processParams(text[last:]) |
|
479 |
|
480 text = self.__processLinks(parsed) |
|
481 |
|
482 return text |
|
483 |
|
484 |
|
485 def __splitTypeList(self, decl): |
|
486 |
|
487 if decl is None: |
|
488 return decl |
|
489 |
|
490 splitted = listSplit.split(decl.strip()) |
|
491 |
|
492 result = [] |
|
493 for entry in splitted: |
|
494 |
|
495 # Figure out if it is marked as array |
|
496 isArray = False |
|
497 if entry.endswith("[]"): |
|
498 isArray = True |
|
499 entry = entry[:-2] |
|
500 |
|
501 store = { |
|
502 "name" : entry |
|
503 } |
|
504 |
|
505 if isArray: |
|
506 store["array"] = True |
|
507 |
|
508 if entry in builtinTypes: |
|
509 store["builtin"] = True |
|
510 |
|
511 if entry in pseudoTypes: |
|
512 store["pseudo"] = True |
|
513 |
|
514 result.append(store) |
|
515 |
|
516 return result |
|
517 |
|
518 |
|
519 |
|
520 def __extractReturns(self, text): |
|
521 """ |
|
522 Extracts leading return defintion (when type is function) |
|
523 """ |
|
524 |
|
525 def collectReturn(match): |
|
526 self.returns = self.__splitTypeList(match.group(1)) |
|
527 return "" |
|
528 |
|
529 return returnMatcher.sub(collectReturn, text) |
|
530 |
|
531 |
|
532 |
|
533 def __extractStaticType(self, text): |
|
534 """ |
|
535 Extracts leading type defintion (when value is a static type) |
|
536 """ |
|
537 |
|
538 def collectType(match): |
|
539 self.type = match.group(1).strip() |
|
540 return "" |
|
541 |
|
542 return typeMatcher.sub(collectType, text) |
|
543 |
|
544 |
|
545 |
|
546 def __extractTags(self, text): |
|
547 """ |
|
548 Extract all tags inside the give doc comment. These are replaced from |
|
549 the text and collected inside the "tags" key as a dict. |
|
550 """ |
|
551 |
|
552 def collectTags(match): |
|
553 if not self.tags: |
|
554 self.tags = {} |
|
555 |
|
556 name = match.group(1) |
|
557 param = match.group(3) |
|
558 |
|
559 if name in self.tags: |
|
560 self.tags[name].add(param) |
|
561 elif param: |
|
562 self.tags[name] = set([param]) |
|
563 else: |
|
564 self.tags[name] = True |
|
565 |
|
566 return "" |
|
567 |
|
568 return tagMatcher.sub(collectTags, text) |
|
569 |
|
570 |
|
571 def __processParams(self, text): |
|
572 |
|
573 def collectParams(match): |
|
574 |
|
575 paramName = match.group(1) |
|
576 paramTypes = match.group(3) |
|
577 paramDynamic = match.group(4) is not None |
|
578 paramOptional = match.group(5) is not None |
|
579 paramDefault = match.group(7) |
|
580 |
|
581 if paramTypes: |
|
582 paramTypes = self.__splitTypeList(paramTypes) |
|
583 |
|
584 if self.params is None: |
|
585 self.params = {} |
|
586 |
|
587 params = self.params |
|
588 fullName = match.group(1).strip() |
|
589 names = fullName.split('.') |
|
590 |
|
591 for i, mapName in enumerate(names): |
|
592 |
|
593 # Ensure we have the map object in the params |
|
594 if not mapName in params: |
|
595 params[mapName] = {} |
|
596 |
|
597 # Add new entries and overwrite if a type is defined in this entry |
|
598 if not mapName in params or paramTypes is not None: |
|
599 |
|
600 # Make sure to not overwrite something like @options {Object} with the type of @options.x {Number} |
|
601 if i == len(names) - 1: |
|
602 |
|
603 paramEntry = params[mapName] = {} |
|
604 |
|
605 if paramTypes is not None: |
|
606 paramEntry["type"] = paramTypes |
|
607 |
|
608 if paramDynamic: |
|
609 paramEntry["dynamic"] = paramDynamic |
|
610 |
|
611 if paramOptional: |
|
612 paramEntry["optional"] = paramOptional |
|
613 |
|
614 if paramDefault is not None: |
|
615 paramEntry["default"] = paramDefault |
|
616 |
|
617 else: |
|
618 paramEntry = params[mapName] |
|
619 |
|
620 |
|
621 else: |
|
622 paramEntry = params[mapName] |
|
623 |
|
624 # create fields for new map level |
|
625 if i + 1 < len(names): |
|
626 if not "fields" in paramEntry: |
|
627 paramEntry["fields"] = {} |
|
628 |
|
629 params = paramEntry["fields"] |
|
630 |
|
631 return '<code class="param">%s</code>' % fullName |
|
632 |
|
633 return paramMatcher.sub(collectParams, text) |
|
634 |
|
635 |
|
636 def __processLinks(self, text): |
|
637 |
|
638 def formatTypes(match): |
|
639 |
|
640 parsedSection = match.group(3) |
|
641 parsedFile = match.group(4) |
|
642 parsedItem = match.group(6) |
|
643 |
|
644 # Do not match {} |
|
645 if parsedSection is None and parsedFile is None and parsedItem is None: |
|
646 return match.group(1) |
|
647 |
|
648 # Minor corrections |
|
649 if parsedSection and not parsedItem: |
|
650 parsedSection = "" |
|
651 |
|
652 attr = "" |
|
653 link = "" |
|
654 label = "" |
|
655 |
|
656 if parsedSection: |
|
657 link += '%s:' % parsedSection |
|
658 |
|
659 if parsedFile: |
|
660 link += parsedFile |
|
661 label += parsedFile |
|
662 |
|
663 if parsedItem: |
|
664 link += "~%s" % parsedItem |
|
665 if label == "": |
|
666 label = parsedItem |
|
667 else: |
|
668 label += "#%s" % parsedItem |
|
669 |
|
670 # add link to attributes list |
|
671 attr += ' href="#%s"' % link |
|
672 |
|
673 # build final HTML |
|
674 return '<a%s><code>%s</code></a>' % (attr, label) |
|
675 |
|
676 return linkMatcher.sub(formatTypes, text) |
|
677 |