diff -r f1b3a73831c9 -r 1dd52aa8897c ThirdParty/Jasy/jasy/script/api/Comment.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ThirdParty/Jasy/jasy/script/api/Comment.py Sat Jan 12 12:11:42 2019 +0100 @@ -0,0 +1,677 @@ +# +# Jasy - Web Tooling Framework +# Copyright 2010-2012 Zynga Inc. +# Copyright 2013-2014 Sebastian Werner +# + +from __future__ import unicode_literals + +import re + +import jasy.core.Text as Text +import jasy.core.Console as Console + +from jasy import UserError +from jasy.script.util import * + + +# Used to measure the doc indent size (with leading stars in front of content) +docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)") + +# Used to split type lists as supported by throw, return and params +listSplit = re.compile("\s*\|\s*") + +# Used to remove markup sequences after doc processing of comment text +stripMarkup = re.compile(r"<.*?>") + + + +# Matches return blocks in comments +returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}") + +# Matches type definitions in comments +typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}") + +# Matches tags +tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)") + +# Matches param declarations in own dialect +paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?") + +# Matches links in own dialect +linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})") + +# matches backticks and has a built-in failsafe for backticks which do not terminate on the same line +tickMatcher = re.compile(r"(`[^\n`]*?`)") + + +class CommentException(Exception): + """ + Thrown when errors during comment processing are detected. + """ + + def __init__(self, message, lineNo=0): + Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1)) + + + + +class Comment(): + """ + Comment class is attached to parsed nodes and used to store all comment related information. + + The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative. + """ + + # Relation to code + context = None + + # Dictionary of tags + tags = None + + # Dictionary of params + params = None + + # List of return types + returns = None + + # Static type + type = None + + # Collected text of the comment (without the extracted doc relevant data) + text = None + + # Text with extracted / parsed data + __processedText = None + + # Text of the comment converted to HTML including highlighting (only for doc comment) + __highlightedText = None + + # Text / Code Blocks in the comment + __blocks = None + + + def __init__(self, text, context=None, lineNo=0, indent="", fileId=None): + + # Store context (relation to code) + self.context = context + + # Store fileId + self.fileId = fileId + + # Figure out the type of the comment based on the starting characters + + # Inline comments + if text.startswith("//"): + # "// hello" => " hello" + text = " " + text[2:] + self.variant = "single" + + # Doc comments + elif text.startswith("/**"): + # "/** hello */" => " hello " + text = " " + text[3:-2] + self.variant = "doc" + + # Protected comments which should not be removed (e.g these are used for license blocks) + elif text.startswith("/*!"): + # "/*! hello */" => " hello " + text = " " + text[3:-2] + self.variant = "protected" + + # A normal multiline comment + elif text.startswith("/*"): + # "/* hello */" => " hello " + text = " " + text[2:-2] + self.variant = "multi" + + else: + raise CommentException("Invalid comment text: %s" % text, lineNo) + + # Multi line comments need to have their indentation removed + if "\n" in text: + text = self.__outdent(text, indent, lineNo) + + # For single line comments strip the surrounding whitespace + else: + # " hello " => "hello" + text = text.strip() + + # The text of the comment before any processing took place + self.text = text + + + # Perform annotation parsing, markdown conversion and code highlighting on doc blocks + if self.variant == "doc": + + # Separate text and code blocks + self.__blocks = self.__splitBlocks(text) + + # Re-combine everything and apply processing and formatting + plainText = '' # text without annotations but with markdown + for b in self.__blocks: + + if b["type"] == "comment": + + processed = self.__processDoc(b["text"], lineNo) + b["processed"] = processed + + if "<" in processed: + plainText += stripMarkup.sub("", processed) + + else: + plainText += processed + + else: + plainText += "\n\n" + b["text"] + "\n\n" + + # The without any annotations + self.text = plainText.strip() + + + def __splitBlocks(self, text): + """ + Splits up text and code blocks in comments. + + This will try to use hoedown for Markdown parsing if available and will + fallback to a simpler implementation in order to allow processing of + doc parameters and links without hoedown being installed. + """ + + if not Text.supportsMarkdown: + return self.__splitSimple(text) + + marked = Text.markdownToHtml(text) + + def unescape(html): + html = html.replace('<', '<') + html = html.replace('>', '>') + html = html.replace('&', '&') + html = html.replace('"', '"') + return html.replace(''', "'") + + parts = [] + + lineNo = 0 + lines = text.split("\n") + markedLines = marked.split("\n") + + i = 0 + while i < len(markedLines): + + l = markedLines[i] + + # the original text of the line + parsed = unescape(stripMarkup.sub("", l)) + + # start of a code block, grab all text before it and move it into a block + if l.startswith('<pre><code>'): + + # everything since the last code block and before this one must be text + comment = [] + for s in range(lineNo, len(lines)): + + source = lines[s] + if source.strip() == parsed.strip(): + lineNo = s + break + + comment.append(source) + + parts.append({ + "type": "comment", + "text": "\n".join(comment) + }) + + # Find the end of the code block + e = i + while i < len(markedLines): + l = markedLines[i] + i += 1 + + if l.startswith('</code></pre>'): + break + + lineCount = (i - e) - 1 + + # add the code block + parts.append({ + "type": "code", + "text": "\n".join(lines[lineNo:lineNo + lineCount]) + }) + + lineNo += lineCount + + else: + i += 1 + + # append the rest of the comment as text + parts.append({ + "type": "comment", + "text": "\n".join(lines[lineNo:]) + }) + + return parts + + + def __splitSimple(self, text): + """Splits comment text and code blocks by manually parsing a subset of markdown""" + + inCode = False + oldIndent = 0 + parts = [] + wasEmpty = False + wasList = False + + lineNo = 0 + lines = text.split("\n") + + for s, l in enumerate(lines): + + # ignore empty lines + if not l.strip() == "": + + # get indentation value and change + indent = len(l) - len(l.lstrip()) + change = indent - oldIndent + + # detect code blocks + if change >= 4 and wasEmpty: + if not wasList: + oldIndent = indent + inCode = True + + parts.append({ + "type": "comment", + "text": "\n".join(lines[lineNo:s]) + }) + + lineNo = s + + # detect outdents + elif change < 0: + inCode = False + + parts.append({ + "type": "code", + "text": "\n".join(lines[lineNo:s - 1]) + }) + + lineNo = s + + # only keep track of old previous indentation outside of comments + if not inCode: + oldIndent = indent + + # remember whether this marked a list or not + wasList = l.strip().startswith('-') or l.strip().startswith('*') + wasEmpty = False + + else: + wasEmpty = True + + parts.append({ + "type": "code" if inCode else "comment", + "text": "\n".join(lines[lineNo:]) + }) + + return parts + + + def getHtml(self, highlight=True): + """ + Returns the comment text converted to HTML + + :param highlight: Whether to highlight the code + :type highlight: bool + """ + + if not Text.supportsMarkdown: + raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.") + + if highlight: + + if self.__highlightedText is None: + + highlightedText = "" + + for block in self.__blocks: + + if block["type"] == "comment": + highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"])) + else: + highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"])) + + self.__highlightedText = highlightedText + + return self.__highlightedText + + else: + + if self.__processedText is None: + + processedText = "" + + for block in self.__blocks: + + if block["type"] == "comment": + processedText += Text.markdownToHtml(block["processed"]) + else: + processedText += "\n%s\n\n" % block["text"] + + self.__processedText = processedText.strip() + + return self.__processedText + + + def hasContent(self): + return self.variant == "doc" and len(self.text) + + + def getTags(self): + return self.tags + + + def hasTag(self, name): + if not self.tags: + return False + + return name in self.tags + + + def __outdent(self, text, indent, startLineNo): + """ + Outdent multi line comment text and filtering empty lines + """ + + lines = [] + + # First, split up the comments lines and remove the leading indentation + for lineNo, line in enumerate((indent+text).split("\n")): + + if line.startswith(indent): + lines.append(line[len(indent):].rstrip()) + + elif line.strip() == "": + lines.append("") + + else: + # Only warn for doc comments, otherwise it might just be code commented out + # which is sometimes formatted pretty crazy when commented out + if self.variant == "doc": + Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId) + + return text + + # Find first line with real content, then grab the one after it to get the + # characters which need + outdentString = "" + for lineNo, line in enumerate(lines): + + if line != "" and line.strip() != "": + matchedDocIndent = docIndentReg.match(line) + + if not matchedDocIndent: + # As soon as we find a non doc indent like line we stop + break + + elif matchedDocIndent.group(2) != "": + # otherwise we look for content behind the indent to get the + # correct real indent (with spaces) + outdentString = matchedDocIndent.group(1) + break + + lineNo += 1 + + # Process outdenting to all lines (remove the outdentString from the start of the lines) + if outdentString != "": + + lineNo = 0 + outdentStringLen = len(outdentString) + + for lineNo, line in enumerate(lines): + if len(line) <= outdentStringLen: + lines[lineNo] = "" + + else: + if not line.startswith(outdentString): + + # Only warn for doc comments, otherwise it might just be code commented out + # which is sometimes formatted pretty crazy when commented out + if self.variant == "doc": + Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId) + + else: + lines[lineNo] = line[outdentStringLen:] + + # Merge final lines and remove leading and trailing new lines + return "\n".join(lines).strip("\n") + + + def __processDoc(self, text, startLineNo): + + text = self.__extractStaticType(text) + text = self.__extractReturns(text) + text = self.__extractTags(text) + + # Collapse new empty lines at start/end + text = text.strip("\n\t ") + + parsed = '' + + # Now parse only the text outside of backticks + last = 0 + def split(match): + + # Grab the text before the back tick and process any parameters in it + nonlocal parsed + nonlocal last + + start, end = match.span() + before = text[last:start] + parsed += self.__processParams(before) + match.group(1) + last = end + + tickMatcher.sub(split, text) + + # add the rest of the text + parsed += self.__processParams(text[last:]) + + text = self.__processLinks(parsed) + + return text + + + def __splitTypeList(self, decl): + + if decl is None: + return decl + + splitted = listSplit.split(decl.strip()) + + result = [] + for entry in splitted: + + # Figure out if it is marked as array + isArray = False + if entry.endswith("[]"): + isArray = True + entry = entry[:-2] + + store = { + "name" : entry + } + + if isArray: + store["array"] = True + + if entry in builtinTypes: + store["builtin"] = True + + if entry in pseudoTypes: + store["pseudo"] = True + + result.append(store) + + return result + + + + def __extractReturns(self, text): + """ + Extracts leading return defintion (when type is function) + """ + + def collectReturn(match): + self.returns = self.__splitTypeList(match.group(1)) + return "" + + return returnMatcher.sub(collectReturn, text) + + + + def __extractStaticType(self, text): + """ + Extracts leading type defintion (when value is a static type) + """ + + def collectType(match): + self.type = match.group(1).strip() + return "" + + return typeMatcher.sub(collectType, text) + + + + def __extractTags(self, text): + """ + Extract all tags inside the give doc comment. These are replaced from + the text and collected inside the "tags" key as a dict. + """ + + def collectTags(match): + if not self.tags: + self.tags = {} + + name = match.group(1) + param = match.group(3) + + if name in self.tags: + self.tags[name].add(param) + elif param: + self.tags[name] = set([param]) + else: + self.tags[name] = True + + return "" + + return tagMatcher.sub(collectTags, text) + + + def __processParams(self, text): + + def collectParams(match): + + paramName = match.group(1) + paramTypes = match.group(3) + paramDynamic = match.group(4) is not None + paramOptional = match.group(5) is not None + paramDefault = match.group(7) + + if paramTypes: + paramTypes = self.__splitTypeList(paramTypes) + + if self.params is None: + self.params = {} + + params = self.params + fullName = match.group(1).strip() + names = fullName.split('.') + + for i, mapName in enumerate(names): + + # Ensure we have the map object in the params + if not mapName in params: + params[mapName] = {} + + # Add new entries and overwrite if a type is defined in this entry + if not mapName in params or paramTypes is not None: + + # Make sure to not overwrite something like @options {Object} with the type of @options.x {Number} + if i == len(names) - 1: + + paramEntry = params[mapName] = {} + + if paramTypes is not None: + paramEntry["type"] = paramTypes + + if paramDynamic: + paramEntry["dynamic"] = paramDynamic + + if paramOptional: + paramEntry["optional"] = paramOptional + + if paramDefault is not None: + paramEntry["default"] = paramDefault + + else: + paramEntry = params[mapName] + + + else: + paramEntry = params[mapName] + + # create fields for new map level + if i + 1 < len(names): + if not "fields" in paramEntry: + paramEntry["fields"] = {} + + params = paramEntry["fields"] + + return '<code class="param">%s</code>' % fullName + + return paramMatcher.sub(collectParams, text) + + + def __processLinks(self, text): + + def formatTypes(match): + + parsedSection = match.group(3) + parsedFile = match.group(4) + parsedItem = match.group(6) + + # Do not match {} + if parsedSection is None and parsedFile is None and parsedItem is None: + return match.group(1) + + # Minor corrections + if parsedSection and not parsedItem: + parsedSection = "" + + attr = "" + link = "" + label = "" + + if parsedSection: + link += '%s:' % parsedSection + + if parsedFile: + link += parsedFile + label += parsedFile + + if parsedItem: + link += "~%s" % parsedItem + if label == "": + label = parsedItem + else: + label += "#%s" % parsedItem + + # add link to attributes list + attr += ' href="#%s"' % link + + # build final HTML + return '<a%s><code>%s</code></a>' % (attr, label) + + return linkMatcher.sub(formatTypes, text) +