ThirdParty/Jasy/jasy/script/api/Comment.py

changeset 6650
1dd52aa8897c
parent 3145
a9de05d4a22f
equal deleted inserted replaced
6649:f1b3a73831c9 6650:1dd52aa8897c
1 #
2 # Jasy - Web Tooling Framework
3 # Copyright 2010-2012 Zynga Inc.
4 # Copyright 2013-2014 Sebastian Werner
5 #
6
7 from __future__ import unicode_literals
8
9 import re
10
11 import jasy.core.Text as Text
12 import jasy.core.Console as Console
13
14 from jasy import UserError
15 from jasy.script.util import *
16
17
18 # Used to measure the doc indent size (with leading stars in front of content)
19 docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)")
20
21 # Used to split type lists as supported by throw, return and params
22 listSplit = re.compile("\s*\|\s*")
23
24 # Used to remove markup sequences after doc processing of comment text
25 stripMarkup = re.compile(r"<.*?>")
26
27
28
29 # Matches return blocks in comments
30 returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}")
31
32 # Matches type definitions in comments
33 typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}")
34
35 # Matches tags
36 tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)")
37
38 # Matches param declarations in own dialect
39 paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?")
40
41 # Matches links in own dialect
42 linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})")
43
44 # matches backticks and has a built-in failsafe for backticks which do not terminate on the same line
45 tickMatcher = re.compile(r"(`[^\n`]*?`)")
46
47
48 class CommentException(Exception):
49 """
50 Thrown when errors during comment processing are detected.
51 """
52
53 def __init__(self, message, lineNo=0):
54 Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1))
55
56
57
58
59 class Comment():
60 """
61 Comment class is attached to parsed nodes and used to store all comment related information.
62
63 The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative.
64 """
65
66 # Relation to code
67 context = None
68
69 # Dictionary of tags
70 tags = None
71
72 # Dictionary of params
73 params = None
74
75 # List of return types
76 returns = None
77
78 # Static type
79 type = None
80
81 # Collected text of the comment (without the extracted doc relevant data)
82 text = None
83
84 # Text with extracted / parsed data
85 __processedText = None
86
87 # Text of the comment converted to HTML including highlighting (only for doc comment)
88 __highlightedText = None
89
90 # Text / Code Blocks in the comment
91 __blocks = None
92
93
94 def __init__(self, text, context=None, lineNo=0, indent="", fileId=None):
95
96 # Store context (relation to code)
97 self.context = context
98
99 # Store fileId
100 self.fileId = fileId
101
102 # Figure out the type of the comment based on the starting characters
103
104 # Inline comments
105 if text.startswith("//"):
106 # "// hello" => " hello"
107 text = " " + text[2:]
108 self.variant = "single"
109
110 # Doc comments
111 elif text.startswith("/**"):
112 # "/** hello */" => " hello "
113 text = " " + text[3:-2]
114 self.variant = "doc"
115
116 # Protected comments which should not be removed (e.g these are used for license blocks)
117 elif text.startswith("/*!"):
118 # "/*! hello */" => " hello "
119 text = " " + text[3:-2]
120 self.variant = "protected"
121
122 # A normal multiline comment
123 elif text.startswith("/*"):
124 # "/* hello */" => " hello "
125 text = " " + text[2:-2]
126 self.variant = "multi"
127
128 else:
129 raise CommentException("Invalid comment text: %s" % text, lineNo)
130
131 # Multi line comments need to have their indentation removed
132 if "\n" in text:
133 text = self.__outdent(text, indent, lineNo)
134
135 # For single line comments strip the surrounding whitespace
136 else:
137 # " hello " => "hello"
138 text = text.strip()
139
140 # The text of the comment before any processing took place
141 self.text = text
142
143
144 # Perform annotation parsing, markdown conversion and code highlighting on doc blocks
145 if self.variant == "doc":
146
147 # Separate text and code blocks
148 self.__blocks = self.__splitBlocks(text)
149
150 # Re-combine everything and apply processing and formatting
151 plainText = '' # text without annotations but with markdown
152 for b in self.__blocks:
153
154 if b["type"] == "comment":
155
156 processed = self.__processDoc(b["text"], lineNo)
157 b["processed"] = processed
158
159 if "<" in processed:
160 plainText += stripMarkup.sub("", processed)
161
162 else:
163 plainText += processed
164
165 else:
166 plainText += "\n\n" + b["text"] + "\n\n"
167
168 # The without any annotations
169 self.text = plainText.strip()
170
171
172 def __splitBlocks(self, text):
173 """
174 Splits up text and code blocks in comments.
175
176 This will try to use hoedown for Markdown parsing if available and will
177 fallback to a simpler implementation in order to allow processing of
178 doc parameters and links without hoedown being installed.
179 """
180
181 if not Text.supportsMarkdown:
182 return self.__splitSimple(text)
183
184 marked = Text.markdownToHtml(text)
185
186 def unescape(html):
187 html = html.replace('&lt;', '<')
188 html = html.replace('&gt;', '>')
189 html = html.replace('&amp;', '&')
190 html = html.replace('&quot;', '"')
191 return html.replace('&#39;', "'")
192
193 parts = []
194
195 lineNo = 0
196 lines = text.split("\n")
197 markedLines = marked.split("\n")
198
199 i = 0
200 while i < len(markedLines):
201
202 l = markedLines[i]
203
204 # the original text of the line
205 parsed = unescape(stripMarkup.sub("", l))
206
207 # start of a code block, grab all text before it and move it into a block
208 if l.startswith('<pre><code>'):
209
210 # everything since the last code block and before this one must be text
211 comment = []
212 for s in range(lineNo, len(lines)):
213
214 source = lines[s]
215 if source.strip() == parsed.strip():
216 lineNo = s
217 break
218
219 comment.append(source)
220
221 parts.append({
222 "type": "comment",
223 "text": "\n".join(comment)
224 })
225
226 # Find the end of the code block
227 e = i
228 while i < len(markedLines):
229 l = markedLines[i]
230 i += 1
231
232 if l.startswith('</code></pre>'):
233 break
234
235 lineCount = (i - e) - 1
236
237 # add the code block
238 parts.append({
239 "type": "code",
240 "text": "\n".join(lines[lineNo:lineNo + lineCount])
241 })
242
243 lineNo += lineCount
244
245 else:
246 i += 1
247
248 # append the rest of the comment as text
249 parts.append({
250 "type": "comment",
251 "text": "\n".join(lines[lineNo:])
252 })
253
254 return parts
255
256
257 def __splitSimple(self, text):
258 """Splits comment text and code blocks by manually parsing a subset of markdown"""
259
260 inCode = False
261 oldIndent = 0
262 parts = []
263 wasEmpty = False
264 wasList = False
265
266 lineNo = 0
267 lines = text.split("\n")
268
269 for s, l in enumerate(lines):
270
271 # ignore empty lines
272 if not l.strip() == "":
273
274 # get indentation value and change
275 indent = len(l) - len(l.lstrip())
276 change = indent - oldIndent
277
278 # detect code blocks
279 if change >= 4 and wasEmpty:
280 if not wasList:
281 oldIndent = indent
282 inCode = True
283
284 parts.append({
285 "type": "comment",
286 "text": "\n".join(lines[lineNo:s])
287 })
288
289 lineNo = s
290
291 # detect outdents
292 elif change < 0:
293 inCode = False
294
295 parts.append({
296 "type": "code",
297 "text": "\n".join(lines[lineNo:s - 1])
298 })
299
300 lineNo = s
301
302 # only keep track of old previous indentation outside of comments
303 if not inCode:
304 oldIndent = indent
305
306 # remember whether this marked a list or not
307 wasList = l.strip().startswith('-') or l.strip().startswith('*')
308 wasEmpty = False
309
310 else:
311 wasEmpty = True
312
313 parts.append({
314 "type": "code" if inCode else "comment",
315 "text": "\n".join(lines[lineNo:])
316 })
317
318 return parts
319
320
321 def getHtml(self, highlight=True):
322 """
323 Returns the comment text converted to HTML
324
325 :param highlight: Whether to highlight the code
326 :type highlight: bool
327 """
328
329 if not Text.supportsMarkdown:
330 raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.")
331
332 if highlight:
333
334 if self.__highlightedText is None:
335
336 highlightedText = ""
337
338 for block in self.__blocks:
339
340 if block["type"] == "comment":
341 highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"]))
342 else:
343 highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"]))
344
345 self.__highlightedText = highlightedText
346
347 return self.__highlightedText
348
349 else:
350
351 if self.__processedText is None:
352
353 processedText = ""
354
355 for block in self.__blocks:
356
357 if block["type"] == "comment":
358 processedText += Text.markdownToHtml(block["processed"])
359 else:
360 processedText += "\n%s\n\n" % block["text"]
361
362 self.__processedText = processedText.strip()
363
364 return self.__processedText
365
366
367 def hasContent(self):
368 return self.variant == "doc" and len(self.text)
369
370
371 def getTags(self):
372 return self.tags
373
374
375 def hasTag(self, name):
376 if not self.tags:
377 return False
378
379 return name in self.tags
380
381
382 def __outdent(self, text, indent, startLineNo):
383 """
384 Outdent multi line comment text and filtering empty lines
385 """
386
387 lines = []
388
389 # First, split up the comments lines and remove the leading indentation
390 for lineNo, line in enumerate((indent+text).split("\n")):
391
392 if line.startswith(indent):
393 lines.append(line[len(indent):].rstrip())
394
395 elif line.strip() == "":
396 lines.append("")
397
398 else:
399 # Only warn for doc comments, otherwise it might just be code commented out
400 # which is sometimes formatted pretty crazy when commented out
401 if self.variant == "doc":
402 Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
403
404 return text
405
406 # Find first line with real content, then grab the one after it to get the
407 # characters which need
408 outdentString = ""
409 for lineNo, line in enumerate(lines):
410
411 if line != "" and line.strip() != "":
412 matchedDocIndent = docIndentReg.match(line)
413
414 if not matchedDocIndent:
415 # As soon as we find a non doc indent like line we stop
416 break
417
418 elif matchedDocIndent.group(2) != "":
419 # otherwise we look for content behind the indent to get the
420 # correct real indent (with spaces)
421 outdentString = matchedDocIndent.group(1)
422 break
423
424 lineNo += 1
425
426 # Process outdenting to all lines (remove the outdentString from the start of the lines)
427 if outdentString != "":
428
429 lineNo = 0
430 outdentStringLen = len(outdentString)
431
432 for lineNo, line in enumerate(lines):
433 if len(line) <= outdentStringLen:
434 lines[lineNo] = ""
435
436 else:
437 if not line.startswith(outdentString):
438
439 # Only warn for doc comments, otherwise it might just be code commented out
440 # which is sometimes formatted pretty crazy when commented out
441 if self.variant == "doc":
442 Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
443
444 else:
445 lines[lineNo] = line[outdentStringLen:]
446
447 # Merge final lines and remove leading and trailing new lines
448 return "\n".join(lines).strip("\n")
449
450
451 def __processDoc(self, text, startLineNo):
452
453 text = self.__extractStaticType(text)
454 text = self.__extractReturns(text)
455 text = self.__extractTags(text)
456
457 # Collapse new empty lines at start/end
458 text = text.strip("\n\t ")
459
460 parsed = ''
461
462 # Now parse only the text outside of backticks
463 last = 0
464 def split(match):
465
466 # Grab the text before the back tick and process any parameters in it
467 nonlocal parsed
468 nonlocal last
469
470 start, end = match.span()
471 before = text[last:start]
472 parsed += self.__processParams(before) + match.group(1)
473 last = end
474
475 tickMatcher.sub(split, text)
476
477 # add the rest of the text
478 parsed += self.__processParams(text[last:])
479
480 text = self.__processLinks(parsed)
481
482 return text
483
484
485 def __splitTypeList(self, decl):
486
487 if decl is None:
488 return decl
489
490 splitted = listSplit.split(decl.strip())
491
492 result = []
493 for entry in splitted:
494
495 # Figure out if it is marked as array
496 isArray = False
497 if entry.endswith("[]"):
498 isArray = True
499 entry = entry[:-2]
500
501 store = {
502 "name" : entry
503 }
504
505 if isArray:
506 store["array"] = True
507
508 if entry in builtinTypes:
509 store["builtin"] = True
510
511 if entry in pseudoTypes:
512 store["pseudo"] = True
513
514 result.append(store)
515
516 return result
517
518
519
520 def __extractReturns(self, text):
521 """
522 Extracts leading return defintion (when type is function)
523 """
524
525 def collectReturn(match):
526 self.returns = self.__splitTypeList(match.group(1))
527 return ""
528
529 return returnMatcher.sub(collectReturn, text)
530
531
532
533 def __extractStaticType(self, text):
534 """
535 Extracts leading type defintion (when value is a static type)
536 """
537
538 def collectType(match):
539 self.type = match.group(1).strip()
540 return ""
541
542 return typeMatcher.sub(collectType, text)
543
544
545
546 def __extractTags(self, text):
547 """
548 Extract all tags inside the give doc comment. These are replaced from
549 the text and collected inside the "tags" key as a dict.
550 """
551
552 def collectTags(match):
553 if not self.tags:
554 self.tags = {}
555
556 name = match.group(1)
557 param = match.group(3)
558
559 if name in self.tags:
560 self.tags[name].add(param)
561 elif param:
562 self.tags[name] = set([param])
563 else:
564 self.tags[name] = True
565
566 return ""
567
568 return tagMatcher.sub(collectTags, text)
569
570
571 def __processParams(self, text):
572
573 def collectParams(match):
574
575 paramName = match.group(1)
576 paramTypes = match.group(3)
577 paramDynamic = match.group(4) is not None
578 paramOptional = match.group(5) is not None
579 paramDefault = match.group(7)
580
581 if paramTypes:
582 paramTypes = self.__splitTypeList(paramTypes)
583
584 if self.params is None:
585 self.params = {}
586
587 params = self.params
588 fullName = match.group(1).strip()
589 names = fullName.split('.')
590
591 for i, mapName in enumerate(names):
592
593 # Ensure we have the map object in the params
594 if not mapName in params:
595 params[mapName] = {}
596
597 # Add new entries and overwrite if a type is defined in this entry
598 if not mapName in params or paramTypes is not None:
599
600 # Make sure to not overwrite something like @options {Object} with the type of @options.x {Number}
601 if i == len(names) - 1:
602
603 paramEntry = params[mapName] = {}
604
605 if paramTypes is not None:
606 paramEntry["type"] = paramTypes
607
608 if paramDynamic:
609 paramEntry["dynamic"] = paramDynamic
610
611 if paramOptional:
612 paramEntry["optional"] = paramOptional
613
614 if paramDefault is not None:
615 paramEntry["default"] = paramDefault
616
617 else:
618 paramEntry = params[mapName]
619
620
621 else:
622 paramEntry = params[mapName]
623
624 # create fields for new map level
625 if i + 1 < len(names):
626 if not "fields" in paramEntry:
627 paramEntry["fields"] = {}
628
629 params = paramEntry["fields"]
630
631 return '<code class="param">%s</code>' % fullName
632
633 return paramMatcher.sub(collectParams, text)
634
635
636 def __processLinks(self, text):
637
638 def formatTypes(match):
639
640 parsedSection = match.group(3)
641 parsedFile = match.group(4)
642 parsedItem = match.group(6)
643
644 # Do not match {}
645 if parsedSection is None and parsedFile is None and parsedItem is None:
646 return match.group(1)
647
648 # Minor corrections
649 if parsedSection and not parsedItem:
650 parsedSection = ""
651
652 attr = ""
653 link = ""
654 label = ""
655
656 if parsedSection:
657 link += '%s:' % parsedSection
658
659 if parsedFile:
660 link += parsedFile
661 label += parsedFile
662
663 if parsedItem:
664 link += "~%s" % parsedItem
665 if label == "":
666 label = parsedItem
667 else:
668 label += "#%s" % parsedItem
669
670 # add link to attributes list
671 attr += ' href="#%s"' % link
672
673 # build final HTML
674 return '<a%s><code>%s</code></a>' % (attr, label)
675
676 return linkMatcher.sub(formatTypes, text)
677

eric ide

mercurial