eric: ThirdParty/Jasy/jasy/script/api/Comment.py@1dd52aa8897c

#
# Jasy - Web Tooling Framework
# Copyright 2010-2012 Zynga Inc.
# Copyright 2013-2014 Sebastian Werner
#

from __future__ import unicode_literals

import re

import jasy.core.Text as Text
import jasy.core.Console as Console

from jasy import UserError
from jasy.script.util import *


# Used to measure the doc indent size (with leading stars in front of content)
docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)")

# Used to split type lists as supported by throw, return and params
listSplit = re.compile("\s*\|\s*")

# Used to remove markup sequences after doc processing of comment text
stripMarkup = re.compile(r"<.*?>")


# Matches return blocks in comments
returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}")

# Matches type definitions in comments
typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}")

# Matches tags
tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)")

# Matches param declarations in own dialect
paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?")

# Matches links in own dialect
linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})")

# matches backticks and has a built-in failsafe for backticks which do not terminate on the same line
tickMatcher = re.compile(r"(`[^\n`]*?`)")


class CommentException(Exception):
    """
    Thrown when errors during comment processing are detected.
    """

    def __init__(self, message, lineNo=0):
        Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1))


class Comment():
    """
    Comment class is attached to parsed nodes and used to store all comment related information.

    The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative.
    """

    # Relation to code
    context = None

    # Dictionary of tags
    tags = None

    # Dictionary of params
    params = None

    # List of return types
    returns = None

    # Static type
    type = None

    # Collected text of the comment (without the extracted doc relevant data)
    text = None

    # Text with extracted / parsed data
    __processedText = None

    # Text of the comment converted to HTML including highlighting (only for doc comment)
    __highlightedText = None

    # Text / Code Blocks in the comment
    __blocks = None


    def __init__(self, text, context=None, lineNo=0, indent="", fileId=None):

        # Store context (relation to code)
        self.context = context

        # Store fileId
        self.fileId = fileId

        # Figure out the type of the comment based on the starting characters

        # Inline comments
        if text.startswith("//"):
            # "// hello" => "   hello"
            text = "  " + text[2:]
            self.variant = "single"

        # Doc comments
        elif text.startswith("/**"):
            # "/** hello */" => "    hello "
            text = "   " + text[3:-2]
            self.variant = "doc"

        # Protected comments which should not be removed (e.g these are used for license blocks)
        elif text.startswith("/*!"):
            # "/*! hello */" => "    hello "
            text = "   " + text[3:-2]
            self.variant = "protected"

        # A normal multiline comment
        elif text.startswith("/*"):
            # "/* hello */" => "   hello "
            text = "  " + text[2:-2]
            self.variant = "multi"

        else:
            raise CommentException("Invalid comment text: %s" % text, lineNo)

        # Multi line comments need to have their indentation removed
        if "\n" in text:
            text = self.__outdent(text, indent, lineNo)

        # For single line comments strip the surrounding whitespace
        else:
            # " hello " => "hello"
            text = text.strip()

        # The text of the comment before any processing took place
        self.text = text


        # Perform annotation parsing, markdown conversion and code highlighting on doc blocks
        if self.variant == "doc":

            # Separate text and code blocks
            self.__blocks = self.__splitBlocks(text)

            # Re-combine everything and apply processing and formatting
            plainText = '' # text without annotations but with markdown
            for b in self.__blocks:

                if b["type"] == "comment":

                    processed = self.__processDoc(b["text"], lineNo)
                    b["processed"] = processed

                    if "<" in processed:
                        plainText += stripMarkup.sub("", processed)

                    else:
                        plainText += processed

                else:
                    plainText += "\n\n" + b["text"] + "\n\n"

            # The without any annotations
            self.text = plainText.strip()


    def __splitBlocks(self, text):
        """
        Splits up text and code blocks in comments.

        This will try to use hoedown for Markdown parsing if available and will
        fallback to a simpler implementation in order to allow processing of
        doc parameters and links without hoedown being installed.
        """

        if not Text.supportsMarkdown:
            return self.__splitSimple(text)

        marked = Text.markdownToHtml(text)

        def unescape(html):
            html = html.replace('&lt;', '<')
            html = html.replace('&gt;', '>')
            html = html.replace('&amp;', '&')
            html = html.replace('&quot;', '"')
            return html.replace('&#39;', "'")

        parts = []

        lineNo = 0
        lines = text.split("\n")
        markedLines = marked.split("\n")

        i = 0
        while i < len(markedLines):

            l = markedLines[i]

            # the original text of the line
            parsed = unescape(stripMarkup.sub("", l))

            # start of a code block, grab all text before it and move it into a block
            if l.startswith('<pre><code>'):

                # everything since the last code block and before this one must be text
                comment = []
                for s in range(lineNo, len(lines)):

                    source = lines[s]
                    if source.strip() == parsed.strip():
                        lineNo = s
                        break

                    comment.append(source)

                parts.append({
                    "type": "comment",
                    "text": "\n".join(comment)
                })

                # Find the end of the code block
                e = i
                while i < len(markedLines):
                    l = markedLines[i]
                    i += 1

                    if l.startswith('</code></pre>'):
                        break

                lineCount = (i - e) - 1

                # add the code block
                parts.append({
                    "type": "code",
                    "text": "\n".join(lines[lineNo:lineNo + lineCount])
                })

                lineNo += lineCount

            else:
                i += 1

        # append the rest of the comment as text
        parts.append({
            "type": "comment",
            "text": "\n".join(lines[lineNo:])
        })

        return parts


    def __splitSimple(self, text):
        """Splits comment text and code blocks by manually parsing a subset of markdown"""

        inCode = False
        oldIndent = 0
        parts = []
        wasEmpty = False
        wasList = False

        lineNo = 0
        lines = text.split("\n")

        for s, l in enumerate(lines):

            # ignore empty lines
            if not l.strip() == "":

                # get indentation value and change
                indent = len(l) - len(l.lstrip())
                change = indent - oldIndent

                # detect code blocks
                if change >= 4 and wasEmpty:
                    if not wasList:
                        oldIndent = indent
                        inCode = True

                        parts.append({
                            "type": "comment",
                            "text": "\n".join(lines[lineNo:s])
                        })

                        lineNo = s

                # detect outdents
                elif change < 0:
                    inCode = False

                    parts.append({
                        "type": "code",
                        "text": "\n".join(lines[lineNo:s - 1])
                    })

                    lineNo = s

                # only keep track of old previous indentation outside of comments
                if not inCode:
                    oldIndent = indent

                # remember whether this marked a list or not
                wasList = l.strip().startswith('-') or l.strip().startswith('*')
                wasEmpty = False

            else:
                wasEmpty = True

        parts.append({
            "type": "code" if inCode else "comment",
            "text": "\n".join(lines[lineNo:])
        })

        return parts


    def getHtml(self, highlight=True):
        """
        Returns the comment text converted to HTML

        :param highlight: Whether to highlight the code
        :type highlight: bool
        """

        if not Text.supportsMarkdown:
            raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.")

        if highlight:

            if self.__highlightedText is None:

                highlightedText = ""

                for block in self.__blocks:

                    if block["type"] == "comment":
                        highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"]))
                    else:
                        highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"]))

                self.__highlightedText = highlightedText

            return self.__highlightedText

        else:

            if self.__processedText is None:

                processedText = ""

                for block in self.__blocks:

                    if block["type"] == "comment":
                        processedText += Text.markdownToHtml(block["processed"])
                    else:
                        processedText += "\n%s\n\n" % block["text"]

                self.__processedText = processedText.strip()

            return self.__processedText


    def hasContent(self):
        return self.variant == "doc" and len(self.text)


    def getTags(self):
        return self.tags


    def hasTag(self, name):
        if not self.tags:
            return False

        return name in self.tags


    def __outdent(self, text, indent, startLineNo):
        """
        Outdent multi line comment text and filtering empty lines
        """

        lines = []

        # First, split up the comments lines and remove the leading indentation
        for lineNo, line in enumerate((indent+text).split("\n")):

            if line.startswith(indent):
                lines.append(line[len(indent):].rstrip())

            elif line.strip() == "":
                lines.append("")

            else:
                # Only warn for doc comments, otherwise it might just be code commented out
                # which is sometimes formatted pretty crazy when commented out
                if self.variant == "doc":
                    Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId)

                return text

        # Find first line with real content, then grab the one after it to get the
        # characters which need
        outdentString = ""
        for lineNo, line in enumerate(lines):

            if line != "" and line.strip() != "":
                matchedDocIndent = docIndentReg.match(line)

                if not matchedDocIndent:
                    # As soon as we find a non doc indent like line we stop
                    break

                elif matchedDocIndent.group(2) != "":
                    # otherwise we look for content behind the indent to get the
                    # correct real indent (with spaces)
                    outdentString = matchedDocIndent.group(1)
                    break

            lineNo += 1

        # Process outdenting to all lines (remove the outdentString from the start of the lines)
        if outdentString != "":

            lineNo = 0
            outdentStringLen = len(outdentString)

            for lineNo, line in enumerate(lines):
                if len(line) <= outdentStringLen:
                    lines[lineNo] = ""

                else:
                    if not line.startswith(outdentString):

                        # Only warn for doc comments, otherwise it might just be code commented out
                        # which is sometimes formatted pretty crazy when commented out
                        if self.variant == "doc":
                            Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId)

                    else:
                        lines[lineNo] = line[outdentStringLen:]

        # Merge final lines and remove leading and trailing new lines
        return "\n".join(lines).strip("\n")


    def __processDoc(self, text, startLineNo):

        text = self.__extractStaticType(text)
        text = self.__extractReturns(text)
        text = self.__extractTags(text)

        # Collapse new empty lines at start/end
        text = text.strip("\n\t ")

        parsed = ''

        # Now parse only the text outside of backticks
        last = 0
        def split(match):

            # Grab the text before the back tick and process any parameters in it
            nonlocal parsed
            nonlocal last

            start, end = match.span()
            before = text[last:start]
            parsed += self.__processParams(before) + match.group(1)
            last = end

        tickMatcher.sub(split, text)

        # add the rest of the text
        parsed += self.__processParams(text[last:])

        text = self.__processLinks(parsed)

        return text


    def __splitTypeList(self, decl):

        if decl is None:
            return decl

        splitted = listSplit.split(decl.strip())

        result = []
        for entry in splitted:

            # Figure out if it is marked as array
            isArray = False
            if entry.endswith("[]"):
                isArray = True
                entry = entry[:-2]

            store = {
                "name" : entry
            }

            if isArray:
                store["array"] = True

            if entry in builtinTypes:
                store["builtin"] = True

            if entry in pseudoTypes:
                store["pseudo"] = True

            result.append(store)

        return result


    def __extractReturns(self, text):
        """
        Extracts leading return defintion (when type is function)
        """

        def collectReturn(match):
            self.returns = self.__splitTypeList(match.group(1))
            return ""

        return returnMatcher.sub(collectReturn, text)


    def __extractStaticType(self, text):
        """
        Extracts leading type defintion (when value is a static type)
        """

        def collectType(match):
            self.type = match.group(1).strip()
            return ""

        return typeMatcher.sub(collectType, text)


    def __extractTags(self, text):
        """
        Extract all tags inside the give doc comment. These are replaced from
        the text and collected inside the "tags" key as a dict.
        """

        def collectTags(match):
             if not self.tags:
                 self.tags = {}

             name = match.group(1)
             param = match.group(3)

             if name in self.tags:
                 self.tags[name].add(param)
             elif param:
                 self.tags[name] = set([param])
             else:
                 self.tags[name] = True

             return ""

        return tagMatcher.sub(collectTags, text)


    def __processParams(self, text):

        def collectParams(match):

            paramName = match.group(1)
            paramTypes = match.group(3)
            paramDynamic = match.group(4) is not None
            paramOptional = match.group(5) is not None
            paramDefault = match.group(7)

            if paramTypes:
                paramTypes = self.__splitTypeList(paramTypes)

            if self.params is None:
                self.params = {}

            params = self.params
            fullName = match.group(1).strip()
            names = fullName.split('.')

            for i, mapName in enumerate(names):

                # Ensure we have the map object in the params
                if not mapName in params:
                    params[mapName] = {}

                # Add new entries and overwrite if a type is defined in this entry
                if not mapName in params or paramTypes is not None:

                    # Make sure to not overwrite something like @options {Object} with the type of @options.x {Number}
                    if i == len(names) - 1:

                        paramEntry = params[mapName] = {}

                        if paramTypes is not None:
                            paramEntry["type"] = paramTypes

                        if paramDynamic:
                            paramEntry["dynamic"] = paramDynamic

                        if paramOptional:
                            paramEntry["optional"] = paramOptional

                        if paramDefault is not None:
                            paramEntry["default"] = paramDefault

                    else:
                        paramEntry = params[mapName]


                else:
                    paramEntry = params[mapName]

                # create fields for new map level
                if i + 1 < len(names):
                    if not "fields" in paramEntry:
                        paramEntry["fields"] = {}

                    params = paramEntry["fields"]

            return '<code class="param">%s</code>' % fullName

        return paramMatcher.sub(collectParams, text)


    def __processLinks(self, text):

        def formatTypes(match):

            parsedSection = match.group(3)
            parsedFile = match.group(4)
            parsedItem = match.group(6)

            # Do not match {}
            if parsedSection is None and parsedFile is None and parsedItem is None:
                return match.group(1)

            # Minor corrections
            if parsedSection and not parsedItem:
                parsedSection = ""

            attr = ""
            link = ""
            label = ""

            if parsedSection:
                link += '%s:' % parsedSection

            if parsedFile:
                link += parsedFile
                label += parsedFile

            if parsedItem:
                link += "~%s" % parsedItem
                if label == "":
                    label = parsedItem
                else:
                    label += "#%s" % parsedItem

            # add link to attributes list
            attr += ' href="#%s"' % link

            # build final HTML
            return '<a%s><code>%s</code></a>' % (attr, label)

        return linkMatcher.sub(formatTypes, text)