eric: comparison ThirdParty/Jasy/jasy/script/api/Comment.py

-:f1b3a73831c9
+:1dd52aa8897c
+#
+# Jasy - Web Tooling Framework
+# Copyright 2010-2012 Zynga Inc.
+# Copyright 2013-2014 Sebastian Werner
+#
+from __future__ import unicode_literals
+import re
+import jasy.core.Text as Text
+import jasy.core.Console as Console
+from jasy import UserError
+from jasy.script.util import *
+# Used to measure the doc indent size (with leading stars in front of content)
+docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)")
+# Used to split type lists as supported by throw, return and params
+listSplit = re.compile("\s*\|\s*")
+# Used to remove markup sequences after doc processing of comment text
+stripMarkup = re.compile(r"<.*?>")
+# Matches return blocks in comments
+returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}")
+# Matches type definitions in comments
+typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}")
+# Matches tags
+tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)")
+# Matches param declarations in own dialect
+paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?")
+# Matches links in own dialect
+linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})")
+# matches backticks and has a built-in failsafe for backticks which do not terminate on the same line
+tickMatcher = re.compile(r"(`[^\n`]*?`)")
+class CommentException(Exception):
+"""
+Thrown when errors during comment processing are detected.
+"""
+def __init__(self, message, lineNo=0):
+Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo+1))
+class Comment():
+"""
+Comment class is attached to parsed nodes and used to store all comment related information.
+The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less repeative.
+"""
+# Relation to code
+context = None
+# Dictionary of tags
+tags = None
+# Dictionary of params
+params = None
+# List of return types
+returns = None
+# Static type
+type = None
+# Collected text of the comment (without the extracted doc relevant data)
+text = None
+# Text with extracted / parsed data
+__processedText = None
+# Text of the comment converted to HTML including highlighting (only for doc comment)
+__highlightedText = None
+# Text / Code Blocks in the comment
+__blocks = None
+def __init__(self, text, context=None, lineNo=0, indent="", fileId=None):
+# Store context (relation to code)
+self.context = context
+# Store fileId
+self.fileId = fileId
+# Figure out the type of the comment based on the starting characters
+# Inline comments
+if text.startswith("//"):
+# "// hello" => "   hello"
+text = "  " + text[2:]
+self.variant = "single"
+# Doc comments
+elif text.startswith("/**"):
+# "/** hello */" => "    hello "
+text = "   " + text[3:-2]
+self.variant = "doc"
+# Protected comments which should not be removed (e.g these are used for license blocks)
+elif text.startswith("/*!"):
+# "/*! hello */" => "    hello "
+text = "   " + text[3:-2]
+self.variant = "protected"
+# A normal multiline comment
+elif text.startswith("/*"):
+# "/* hello */" => "   hello "
+text = "  " + text[2:-2]
+self.variant = "multi"
+else:
+raise CommentException("Invalid comment text: %s" % text, lineNo)
+# Multi line comments need to have their indentation removed
+if "\n" in text:
+text = self.__outdent(text, indent, lineNo)
+# For single line comments strip the surrounding whitespace
+else:
+# " hello " => "hello"
+text = text.strip()
+# The text of the comment before any processing took place
+self.text = text
+# Perform annotation parsing, markdown conversion and code highlighting on doc blocks
+if self.variant == "doc":
+# Separate text and code blocks
+self.__blocks = self.__splitBlocks(text)
+# Re-combine everything and apply processing and formatting
+plainText = '' # text without annotations but with markdown
+for b in self.__blocks:
+if b["type"] == "comment":
+processed = self.__processDoc(b["text"], lineNo)
+b["processed"] = processed
+if "<" in processed:
+plainText += stripMarkup.sub("", processed)
+else:
+plainText += processed
+else:
+plainText += "\n\n" + b["text"] + "\n\n"
+# The without any annotations
+self.text = plainText.strip()
+def __splitBlocks(self, text):
+"""
+Splits up text and code blocks in comments.
+This will try to use hoedown for Markdown parsing if available and will
+fallback to a simpler implementation in order to allow processing of
+doc parameters and links without hoedown being installed.
+"""
+if not Text.supportsMarkdown:
+return self.__splitSimple(text)
+marked = Text.markdownToHtml(text)
+def unescape(html):
+html = html.replace('&lt;', '<')
+html = html.replace('&gt;', '>')
+html = html.replace('&amp;', '&')
+html = html.replace('&quot;', '"')
+return html.replace('&#39;', "'")
+parts = []
+lineNo = 0
+lines = text.split("\n")
+markedLines = marked.split("\n")
+i = 0
+while i < len(markedLines):
+l = markedLines[i]
+# the original text of the line
+parsed = unescape(stripMarkup.sub("", l))
+# start of a code block, grab all text before it and move it into a block
+if l.startswith('<pre><code>'):
+# everything since the last code block and before this one must be text
+comment = []
+for s in range(lineNo, len(lines)):
+source = lines[s]
+if source.strip() == parsed.strip():
+lineNo = s
+break
+comment.append(source)
+parts.append({
+"type": "comment",
+"text": "\n".join(comment)
+})
+# Find the end of the code block
+e = i
+while i < len(markedLines):
+l = markedLines[i]
+i += 1
+if l.startswith('</code></pre>'):
+break
+lineCount = (i - e) - 1
+# add the code block
+parts.append({
+"type": "code",
+"text": "\n".join(lines[lineNo:lineNo + lineCount])
+})
+lineNo += lineCount
+else:
+i += 1
+# append the rest of the comment as text
+parts.append({
+"type": "comment",
+"text": "\n".join(lines[lineNo:])
+})
+return parts
+def __splitSimple(self, text):
+"""Splits comment text and code blocks by manually parsing a subset of markdown"""
+inCode = False
+oldIndent = 0
+parts = []
+wasEmpty = False
+wasList = False
+lineNo = 0
+lines = text.split("\n")
+for s, l in enumerate(lines):
+# ignore empty lines
+if not l.strip() == "":
+# get indentation value and change
+indent = len(l) - len(l.lstrip())
+change = indent - oldIndent
+# detect code blocks
+if change >= 4 and wasEmpty:
+if not wasList:
+oldIndent = indent
+inCode = True
+parts.append({
+"type": "comment",
+"text": "\n".join(lines[lineNo:s])
+})
+lineNo = s
+# detect outdents
+elif change < 0:
+inCode = False
+parts.append({
+"type": "code",
+"text": "\n".join(lines[lineNo:s - 1])
+})
+lineNo = s
+# only keep track of old previous indentation outside of comments
+if not inCode:
+oldIndent = indent
+# remember whether this marked a list or not
+wasList = l.strip().startswith('-') or l.strip().startswith('*')
+wasEmpty = False
+else:
+wasEmpty = True
+parts.append({
+"type": "code" if inCode else "comment",
+"text": "\n".join(lines[lineNo:])
+})
+return parts
+def getHtml(self, highlight=True):
+"""
+Returns the comment text converted to HTML
+:param highlight: Whether to highlight the code
+:type highlight: bool
+"""
+if not Text.supportsMarkdown:
+raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.")
+if highlight:
+if self.__highlightedText is None:
+highlightedText = ""
+for block in self.__blocks:
+if block["type"] == "comment":
+highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"]))
+else:
+highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"]))
+self.__highlightedText = highlightedText
+return self.__highlightedText
+else:
+if self.__processedText is None:
+processedText = ""
+for block in self.__blocks:
+if block["type"] == "comment":
+processedText += Text.markdownToHtml(block["processed"])
+else:
+processedText += "\n%s\n\n" % block["text"]
+self.__processedText = processedText.strip()
+return self.__processedText
+def hasContent(self):
+return self.variant == "doc" and len(self.text)
+def getTags(self):
+return self.tags
+def hasTag(self, name):
+if not self.tags:
+return False
+return name in self.tags
+def __outdent(self, text, indent, startLineNo):
+"""
+Outdent multi line comment text and filtering empty lines
+"""
+lines = []
+# First, split up the comments lines and remove the leading indentation
+for lineNo, line in enumerate((indent+text).split("\n")):
+if line.startswith(indent):
+lines.append(line[len(indent):].rstrip())
+elif line.strip() == "":
+lines.append("")
+else:
+# Only warn for doc comments, otherwise it might just be code commented out
+# which is sometimes formatted pretty crazy when commented out
+if self.variant == "doc":
+Console.warn("Could not outdent doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
+return text
+# Find first line with real content, then grab the one after it to get the
+# characters which need
+outdentString = ""
+for lineNo, line in enumerate(lines):
+if line != "" and line.strip() != "":
+matchedDocIndent = docIndentReg.match(line)
+if not matchedDocIndent:
+# As soon as we find a non doc indent like line we stop
+break
+elif matchedDocIndent.group(2) != "":
+# otherwise we look for content behind the indent to get the
+# correct real indent (with spaces)
+outdentString = matchedDocIndent.group(1)
+break
+lineNo += 1
+# Process outdenting to all lines (remove the outdentString from the start of the lines)
+if outdentString != "":
+lineNo = 0
+outdentStringLen = len(outdentString)
+for lineNo, line in enumerate(lines):
+if len(line) <= outdentStringLen:
+lines[lineNo] = ""
+else:
+if not line.startswith(outdentString):
+# Only warn for doc comments, otherwise it might just be code commented out
+# which is sometimes formatted pretty crazy when commented out
+if self.variant == "doc":
+Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo+lineNo, self.fileId)
+else:
+lines[lineNo] = line[outdentStringLen:]
+# Merge final lines and remove leading and trailing new lines
+return "\n".join(lines).strip("\n")
+def __processDoc(self, text, startLineNo):
+text = self.__extractStaticType(text)
+text = self.__extractReturns(text)
+text = self.__extractTags(text)
+# Collapse new empty lines at start/end
+text = text.strip("\n\t ")
+parsed = ''
+# Now parse only the text outside of backticks
+last = 0
+def split(match):
+# Grab the text before the back tick and process any parameters in it
+nonlocal parsed
+nonlocal last
+start, end = match.span()
+before = text[last:start]
+parsed += self.__processParams(before) + match.group(1)
+last = end
+tickMatcher.sub(split, text)
+# add the rest of the text
+parsed += self.__processParams(text[last:])
+text = self.__processLinks(parsed)
+return text
+def __splitTypeList(self, decl):
+if decl is None:
+return decl
+splitted = listSplit.split(decl.strip())
+result = []
+for entry in splitted:
+# Figure out if it is marked as array
+isArray = False
+if entry.endswith("[]"):
+isArray = True
+entry = entry[:-2]
+store = {
+"name" : entry
+}
+if isArray:
+store["array"] = True
+if entry in builtinTypes:
+store["builtin"] = True
+if entry in pseudoTypes:
+store["pseudo"] = True
+result.append(store)
+return result
+def __extractReturns(self, text):
+"""
+Extracts leading return defintion (when type is function)
+"""
+def collectReturn(match):
+self.returns = self.__splitTypeList(match.group(1))
+return ""
+return returnMatcher.sub(collectReturn, text)
+def __extractStaticType(self, text):
+"""
+Extracts leading type defintion (when value is a static type)
+"""
+def collectType(match):
+self.type = match.group(1).strip()
+return ""
+return typeMatcher.sub(collectType, text)
+def __extractTags(self, text):
+"""
+Extract all tags inside the give doc comment. These are replaced from
+the text and collected inside the "tags" key as a dict.
+"""
+def collectTags(match):
+if not self.tags:
+self.tags = {}
+name = match.group(1)
+param = match.group(3)
+if name in self.tags:
+self.tags[name].add(param)
+elif param:
+self.tags[name] = set([param])
+else:
+self.tags[name] = True
+return ""
+return tagMatcher.sub(collectTags, text)
+def __processParams(self, text):
+def collectParams(match):
+paramName = match.group(1)
+paramTypes = match.group(3)
+paramDynamic = match.group(4) is not None
+paramOptional = match.group(5) is not None
+paramDefault = match.group(7)
+if paramTypes:
+paramTypes = self.__splitTypeList(paramTypes)
+if self.params is None:
+self.params = {}
+params = self.params
+fullName = match.group(1).strip()
+names = fullName.split('.')
+for i, mapName in enumerate(names):
+# Ensure we have the map object in the params
+if not mapName in params:
+params[mapName] = {}
+# Add new entries and overwrite if a type is defined in this entry
+if not mapName in params or paramTypes is not None:
+# Make sure to not overwrite something like @options {Object} with the type of @options.x {Number}
+if i == len(names) - 1:
+paramEntry = params[mapName] = {}
+if paramTypes is not None:
+paramEntry["type"] = paramTypes
+if paramDynamic:
+paramEntry["dynamic"] = paramDynamic
+if paramOptional:
+paramEntry["optional"] = paramOptional
+if paramDefault is not None:
+paramEntry["default"] = paramDefault
+else:
+paramEntry = params[mapName]
+else:
+paramEntry = params[mapName]
+# create fields for new map level
+if i + 1 < len(names):
+if not "fields" in paramEntry:
+paramEntry["fields"] = {}
+params = paramEntry["fields"]
+return '<code class="param">%s</code>' % fullName
+return paramMatcher.sub(collectParams, text)
+def __processLinks(self, text):
+def formatTypes(match):
+parsedSection = match.group(3)
+parsedFile = match.group(4)
+parsedItem = match.group(6)
+# Do not match {}
+if parsedSection is None and parsedFile is None and parsedItem is None:
+return match.group(1)
+# Minor corrections
+if parsedSection and not parsedItem:
+parsedSection = ""
+attr = ""
+link = ""
+label = ""
+if parsedSection:
+link += '%s:' % parsedSection
+if parsedFile:
+link += parsedFile
+label += parsedFile
+if parsedItem:
+link += "~%s" % parsedItem
+if label == "":
+label = parsedItem
+else:
+label += "#%s" % parsedItem
+# add link to attributes list
+attr += ' href="#%s"' % link
+# build final HTML
+return '<a%s><code>%s</code></a>' % (attr, label)
+return linkMatcher.sub(formatTypes, text)

Mercurial Repositories > eric / file comparison

comparison: ThirdParty/Jasy/jasy/script/api/Comment.py

ThirdParty/Jasy/jasy/script/api/Comment.py