ThirdParty/Jasy/jasy/script/api/Comment.py

changeset 6650
1dd52aa8897c
parent 3145
a9de05d4a22f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ThirdParty/Jasy/jasy/script/api/Comment.py	Sat Jan 12 12:11:42 2019 +0100
@@ -0,0 +1,677 @@
+#
+# Jasy - Web Tooling Framework
+# Copyright 2010-2012 Zynga Inc.
+# Copyright 2013-2014 Sebastian Werner
+#
+
+from __future__ import unicode_literals
+
+import re
+
+import jasy.core.Text as Text
+import jasy.core.Console as Console
+
+from jasy import UserError
+from jasy.script.util import *
+
+
+# Used to measure the doc indent size (with leading stars in front of content)
+docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)")
+
+# Used to split type lists as supported by throw, return and params
+listSplit = re.compile("\s*\|\s*")
+
+# Used to remove markup sequences after doc processing of comment text
+stripMarkup = re.compile(r"<.*?>")
+
+
+
+# Matches return blocks in comments
+returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}")
+
+# Matches type definitions in comments
+typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}")
+
+# Matches tags
+tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)")
+
+# Matches param declarations in own dialect
+paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?")
+
+# Matches links in own dialect
+linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})")
+
+# matches backticks and has a built-in failsafe for backticks which do not terminate on the same line
+tickMatcher = re.compile(r"(`[^\n`]*?`)")
+
+
+class CommentException(Exception):
+    """
+    Thrown when errors during comment processing are detected.
+    """
+
+    def __init__(self, message, lineNo=0):
+        Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1))
+
+
+
+
+class Comment():
+    """
+    Comment class is attached to parsed nodes and used to store all comment related information.
+
+    The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative.
+    """
+
+    # Relation to code
+    context = None
+
+    # Dictionary of tags
+    tags = None
+
+    # Dictionary of params
+    params = None
+
+    # List of return types
+    returns = None
+
+    # Static type
+    type = None
+
+    # Collected text of the comment (without the extracted doc relevant data)
+    text = None
+
+    # Text with extracted / parsed data
+    __processedText = None
+
+    # Text of the comment converted to HTML including highlighting (only for doc comment)
+    __highlightedText = None
+
+    # Text / Code Blocks in the comment
+    __blocks = None
+
+
+    def __init__(self, text, context=None, lineNo=0, indent="", fileId=None):
+
+        # Store context (relation to code)
+        self.context = context
+
+        # Store fileId
+        self.fileId = fileId
+
+        # Figure out the type of the comment based on the starting characters
+
+        # Inline comments
+        if text.startswith("//"):
+            # "// hello" => "   hello"
+            text = "  " + text[2:]
+            self.variant = "single"
+
+        # Doc comments
+        elif text.startswith("/**"):
+            # "/** hello */" => "    hello "
+            text = "   " + text[3:-2]
+            self.variant = "doc"
+
+        # Protected comments which should not be removed (e.g these are used for license blocks)
+        elif text.startswith("/*!"):
+            # "/*! hello */" => "    hello "
+            text = "   " + text[3:-2]
+            self.variant = "protected"
+
+        # A normal multiline comment
+        elif text.startswith("/*"):
+            # "/* hello */" => "   hello "
+            text = "  " + text[2:-2]
+            self.variant = "multi"
+
+        else:
+            raise CommentException("Invalid comment text: %s" % text, lineNo)
+
+        # Multi line comments need to have their indentation removed
+        if "\n" in text:
+            text = self.__outdent(text, indent, lineNo)
+
+        # For single line comments strip the surrounding whitespace
+        else:
+            # " hello " => "hello"
+            text = text.strip()
+
+        # The text of the comment before any processing took place
+        self.text = text
+
+
+        # Perform annotation parsing, markdown conversion and code highlighting on doc blocks
+        if self.variant == "doc":
+
+            # Separate text and code blocks
+            self.__blocks = self.__splitBlocks(text)
+
+            # Re-combine everything and apply processing and formatting
+            plainText = '' # text without annotations but with markdown
+            for b in self.__blocks:
+
+                if b["type"] == "comment":
+
+                    processed = self.__processDoc(b["text"], lineNo)
+                    b["processed"] = processed
+
+                    if "<" in processed:
+                        plainText += stripMarkup.sub("", processed)
+
+                    else:
+                        plainText += processed
+
+                else:
+                    plainText += "\n\n" + b["text"] + "\n\n"
+
+            # The without any annotations
+            self.text = plainText.strip()
+
+
+    def __splitBlocks(self, text):
+        """
+        Splits up text and code blocks in comments.
+
+        This will try to use hoedown for Markdown parsing if available and will
+        fallback to a simpler implementation in order to allow processing of
+        doc parameters and links without hoedown being installed.
+        """
+
+        if not Text.supportsMarkdown:
+            return self.__splitSimple(text)
+
+        marked = Text.markdownToHtml(text)
+
+        def unescape(html):
+            html = html.replace('&lt;', '<')
+            html = html.replace('&gt;', '>')
+            html = html.replace('&amp;', '&')
+            html = html.replace('&quot;', '"')
+            return html.replace('&#39;', "'")
+
+        parts = []
+
+        lineNo = 0
+        lines = text.split("\n")
+        markedLines = marked.split("\n")
+
+        i = 0
+        while i < len(markedLines):
+
+            l = markedLines[i]
+
+            # the original text of the line
+            parsed = unescape(stripMarkup.sub("", l))
+
+            # start of a code block, grab all text before it and move it into a block
+            if l.startswith('<pre><code>'):
+
+                # everything since the last code block and before this one must be text
+                comment = []
+                for s in range(lineNo, len(lines)):
+
+                    source = lines[s]
+                    if source.strip() == parsed.strip():
+                        lineNo = s
+                        break
+
+                    comment.append(source)
+
+                parts.append({
+                    "type": "comment",
+                    "text": "\n".join(comment)
+                })
+
+                # Find the end of the code block
+                e = i
+                while i < len(markedLines):
+                    l = markedLines[i]
+                    i += 1
+
+                    if l.startswith('</code></pre>'):
+                        break
+
+                lineCount = (i - e) - 1
+
+                # add the code block
+                parts.append({
+                    "type": "code",
+                    "text": "\n".join(lines[lineNo:lineNo + lineCount])
+                })
+
+                lineNo += lineCount
+
+            else:
+                i += 1
+
+        # append the rest of the comment as text
+        parts.append({
+            "type": "comment",
+            "text": "\n".join(lines[lineNo:])
+        })
+
+        return parts
+
+
+    def __splitSimple(self, text):
+        """Splits comment text and code blocks by manually parsing a subset of markdown"""
+
+        inCode = False
+        oldIndent = 0
+        parts = []
+        wasEmpty = False
+        wasList = False
+
+        lineNo = 0
+        lines = text.split("\n")
+
+        for s, l in enumerate(lines):
+
+            # ignore empty lines
+            if not l.strip() == "":
+
+                # get indentation value and change
+                indent = len(l) - len(l.lstrip())
+                change = indent - oldIndent
+
+                # detect code blocks
+                if change >= 4 and wasEmpty:
+                    if not wasList:
+                        oldIndent = indent
+                        inCode = True
+
+                        parts.append({
+                            "type": "comment",
+                            "text": "\n".join(lines[lineNo:s])
+                        })
+
+                        lineNo = s
+
+                # detect outdents
+                elif change < 0:
+                    inCode = False
+
+                    parts.append({
+                        "type": "code",
+                        "text": "\n".join(lines[lineNo:s - 1])
+                    })
+
+                    lineNo = s
+
+                # only keep track of old previous indentation outside of comments
+                if not inCode:
+                    oldIndent = indent
+
+                # remember whether this marked a list or not
+                wasList = l.strip().startswith('-') or l.strip().startswith('*')
+                wasEmpty = False
+
+            else:
+                wasEmpty = True
+
+        parts.append({
+            "type": "code" if inCode else "comment",
+            "text": "\n".join(lines[lineNo:])
+        })
+
+        return parts
+
+
+    def getHtml(self, highlight=True):
+        """
+        Returns the comment text converted to HTML
+
+        :param highlight: Whether to highlight the code
+        :type highlight: bool
+        """
+
+        if not Text.supportsMarkdown:
+            raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.")
+
+        if highlight:
+
+            if self.__highlightedText is None:
+
+                highlightedText = ""
+
+                for block in self.__blocks:
+
+                    if block["type"] == "comment":
+                        highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"]))
+                    else:
+                        highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"]))
+
+                self.__highlightedText = highlightedText
+
+            return self.__highlightedText
+
+        else:
+
+            if self.__processedText is None:
+
+                processedText = ""
+
+                for block in self.__blocks:
+
+                    if block["type"] == "comment":
+                        processedText += Text.markdownToHtml(block["processed"])
+                    else:
+                        processedText += "\n%s\n\n" % block["text"]
+
+                self.__processedText = processedText.strip()
+
+            return self.__processedText
+
+
+    def hasContent(self):
+        return self.variant == "doc" and len(self.text)
+
+
+    def getTags(self):
+        return self.tags
+
+
+    def hasTag(self, name):
+        if not self.tags:
+            return False
+
+        return name in self.tags
+
+
+    def __outdent(self, text, indent, startLineNo):
+        """
+        Outdent multi line comment text and filtering empty lines
+        """
+
+        lines = []
+
+        # First, split up the comments lines and remove the leading indentation
+        for lineNo, line in enumerate((indent+text).split("\n")):
+
+            if line.startswith(indent):
+                lines.append(line[len(indent):].rstrip())
+
+            elif line.strip() == "":
+                lines.append("")
+
+            else:
+                # Only warn for doc comments, otherwise it might just be code commented out
+                # which is sometimes formatted pretty crazy when commented out
+                if self.variant == "doc":
+                    Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
+
+                return text
+
+        # Find first line with real content, then grab the one after it to get the
+        # characters which need
+        outdentString = ""
+        for lineNo, line in enumerate(lines):
+
+            if line != "" and line.strip() != "":
+                matchedDocIndent = docIndentReg.match(line)
+
+                if not matchedDocIndent:
+                    # As soon as we find a non doc indent like line we stop
+                    break
+
+                elif matchedDocIndent.group(2) != "":
+                    # otherwise we look for content behind the indent to get the
+                    # correct real indent (with spaces)
+                    outdentString = matchedDocIndent.group(1)
+                    break
+
+            lineNo += 1
+
+        # Process outdenting to all lines (remove the outdentString from the start of the lines)
+        if outdentString != "":
+
+            lineNo = 0
+            outdentStringLen = len(outdentString)
+
+            for lineNo, line in enumerate(lines):
+                if len(line) <= outdentStringLen:
+                    lines[lineNo] = ""
+
+                else:
+                    if not line.startswith(outdentString):
+
+                        # Only warn for doc comments, otherwise it might just be code commented out
+                        # which is sometimes formatted pretty crazy when commented out
+                        if self.variant == "doc":
+                            Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
+
+                    else:
+                        lines[lineNo] = line[outdentStringLen:]
+
+        # Merge final lines and remove leading and trailing new lines
+        return "\n".join(lines).strip("\n")
+
+
+    def __processDoc(self, text, startLineNo):
+
+        text = self.__extractStaticType(text)
+        text = self.__extractReturns(text)
+        text = self.__extractTags(text)
+
+        # Collapse new empty lines at start/end
+        text = text.strip("\n\t ")
+
+        parsed = ''
+
+        # Now parse only the text outside of backticks
+        last = 0
+        def split(match):
+
+            # Grab the text before the back tick and process any parameters in it
+            nonlocal parsed
+            nonlocal last
+
+            start, end = match.span()
+            before = text[last:start]
+            parsed += self.__processParams(before) + match.group(1)
+            last = end
+
+        tickMatcher.sub(split, text)
+
+        # add the rest of the text
+        parsed += self.__processParams(text[last:])
+
+        text = self.__processLinks(parsed)
+
+        return text
+
+
+    def __splitTypeList(self, decl):
+
+        if decl is None:
+            return decl
+
+        splitted = listSplit.split(decl.strip())
+
+        result = []
+        for entry in splitted:
+
+            # Figure out if it is marked as array
+            isArray = False
+            if entry.endswith("[]"):
+                isArray = True
+                entry = entry[:-2]
+
+            store = {
+                "name" : entry
+            }
+
+            if isArray:
+                store["array"] = True
+
+            if entry in builtinTypes:
+                store["builtin"] = True
+
+            if entry in pseudoTypes:
+                store["pseudo"] = True
+
+            result.append(store)
+
+        return result
+
+
+
+    def __extractReturns(self, text):
+        """
+        Extracts leading return defintion (when type is function)
+        """
+
+        def collectReturn(match):
+            self.returns = self.__splitTypeList(match.group(1))
+            return ""
+
+        return returnMatcher.sub(collectReturn, text)
+
+
+
+    def __extractStaticType(self, text):
+        """
+        Extracts leading type defintion (when value is a static type)
+        """
+
+        def collectType(match):
+            self.type = match.group(1).strip()
+            return ""
+
+        return typeMatcher.sub(collectType, text)
+
+
+
+    def __extractTags(self, text):
+        """
+        Extract all tags inside the give doc comment. These are replaced from
+        the text and collected inside the "tags" key as a dict.
+        """
+
+        def collectTags(match):
+             if not self.tags:
+                 self.tags = {}
+
+             name = match.group(1)
+             param = match.group(3)
+
+             if name in self.tags:
+                 self.tags[name].add(param)
+             elif param:
+                 self.tags[name] = set([param])
+             else:
+                 self.tags[name] = True
+
+             return ""
+
+        return tagMatcher.sub(collectTags, text)
+
+
+    def __processParams(self, text):
+
+        def collectParams(match):
+
+            paramName = match.group(1)
+            paramTypes = match.group(3)
+            paramDynamic = match.group(4) is not None
+            paramOptional = match.group(5) is not None
+            paramDefault = match.group(7)
+
+            if paramTypes:
+                paramTypes = self.__splitTypeList(paramTypes)
+
+            if self.params is None:
+                self.params = {}
+
+            params = self.params
+            fullName = match.group(1).strip()
+            names = fullName.split('.')
+
+            for i, mapName in enumerate(names):
+
+                # Ensure we have the map object in the params
+                if not mapName in params:
+                    params[mapName] = {}
+
+                # Add new entries and overwrite if a type is defined in this entry
+                if not mapName in params or paramTypes is not None:
+
+                    # Make sure to not overwrite something like @options {Object} with the type of @options.x {Number}
+                    if i == len(names) - 1:
+
+                        paramEntry = params[mapName] = {}
+
+                        if paramTypes is not None:
+                            paramEntry["type"] = paramTypes
+
+                        if paramDynamic:
+                            paramEntry["dynamic"] = paramDynamic
+
+                        if paramOptional:
+                            paramEntry["optional"] = paramOptional
+
+                        if paramDefault is not None:
+                            paramEntry["default"] = paramDefault
+
+                    else:
+                        paramEntry = params[mapName]
+
+
+                else:
+                    paramEntry = params[mapName]
+
+                # create fields for new map level
+                if i + 1 < len(names):
+                    if not "fields" in paramEntry:
+                        paramEntry["fields"] = {}
+
+                    params = paramEntry["fields"]
+
+            return '<code class="param">%s</code>' % fullName
+
+        return paramMatcher.sub(collectParams, text)
+
+
+    def __processLinks(self, text):
+
+        def formatTypes(match):
+
+            parsedSection = match.group(3)
+            parsedFile = match.group(4)
+            parsedItem = match.group(6)
+
+            # Do not match {}
+            if parsedSection is None and parsedFile is None and parsedItem is None:
+                return match.group(1)
+
+            # Minor corrections
+            if parsedSection and not parsedItem:
+                parsedSection = ""
+
+            attr = ""
+            link = ""
+            label = ""
+
+            if parsedSection:
+                link += '%s:' % parsedSection
+
+            if parsedFile:
+                link += parsedFile
+                label += parsedFile
+
+            if parsedItem:
+                link += "~%s" % parsedItem
+                if label == "":
+                    label = parsedItem
+                else:
+                    label += "#%s" % parsedItem
+
+            # add link to attributes list
+            attr += ' href="#%s"' % link
+
+            # build final HTML
+            return '<a%s><code>%s</code></a>' % (attr, label)
+
+        return linkMatcher.sub(formatTypes, text)
+

eric ide

mercurial