Sat, 23 Feb 2019 22:03:27 +0100
Ignore triple quotes in comments.
# -*- coding: utf-8 -*- # Copyright (c) 2005 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> # """ Parse a Python file and retrieve classes, functions/methods and attributes. Parse enough of a Python file to recognize class and method definitions and to find out the superclasses of a class as well as its attributes. This is module is based on pyclbr found in the Python 2.2.2 distribution. """ from __future__ import unicode_literals import sys import imp import re import Utilities import Utilities.ClassBrowsers as ClassBrowsers from . import ClbrBaseClasses from functools import reduce TABWIDTH = 4 SUPPORTED_TYPES = [ClassBrowsers.PY_SOURCE, ClassBrowsers.PTL_SOURCE] _getnext = re.compile( r""" (?P<String> \#[^\n]*$ # Ignore triple quotes in comments | \""" [^"\\]* (?: (?: \\. | "(?!"") ) [^"\\]* )* \""" | ''' [^'\\]* (?: (?: \\. | '(?!'') ) [^'\\]* )* ''' | " [^"\\\n]* (?: \\. [^"\\\n]*)* " | ' [^'\\\n]* (?: \\. [^'\\\n]*)* ' ) | (?P<Publics> ^ [ \t]* __all__ [ \t]* = [ \t]* \[ (?P<Identifiers> [^\]]*? ) \] ) | (?P<MethodModifier> ^ (?P<MethodModifierIndent> [ \t]* ) (?P<MethodModifierType> @classmethod | @staticmethod ) ) | (?P<Method> ^ (?P<MethodIndent> [ \t]* ) (?: async [ \t]+ )? def [ \t]+ (?P<MethodName> \w+ ) (?: [ \t]* \[ (?: plain | html ) \] )? [ \t]* \( (?P<MethodSignature> (?: [^)] | \)[ \t]*,? )*? ) \) [ \t]* (?P<MethodReturnAnnotation> (?: -> [ \t]* [^:]+ )? ) [ \t]* : ) | (?P<Class> ^ (?P<ClassIndent> [ \t]* ) class [ \t]+ (?P<ClassName> \w+ ) [ \t]* (?P<ClassSupers> \( [^)]* \) )? [ \t]* : ) | (?P<Attribute> ^ (?P<AttributeIndent> [ \t]* ) self [ \t]* \. [ \t]* (?P<AttributeName> \w+ ) [ \t]* = ) | (?P<Variable> ^ (?P<VariableIndent> [ \t]* ) (?P<VariableName> \w+ ) [ \t]* = ) | (?P<Main> ^ if \s+ __name__ \s* == \s* [^:]+ : $ ) | (?P<ConditionalDefine> ^ (?P<ConditionalDefineIndent> [ \t]* ) (?: (?: if | elif ) [ \t]+ [^:]* | else [ \t]* ) : (?= \s* (?: async [ \t]+ )? def) ) | (?P<Import> ^ [ \t]* (?: import | from [ \t]+ \. [ \t]+ import ) [ \t]+ (?P<ImportList> (?: [^#;\\\n]* (?: \\\n )* )* ) ) | (?P<ImportFrom> ^ [ \t]* from [ \t]+ (?P<ImportFromPath> \.* \w+ (?: [ \t]* \. [ \t]* \w+ )* ) [ \t]+ import [ \t]+ (?P<ImportFromList> (?: \( \s* .*? \s* \) ) | (?: [^#;\\\n]* (?: \\\n )* )* ) ) | (?P<CodingLine> ^ \# \s* [*_-]* \s* coding[:=] \s* (?P<Coding> [-\w_.]+ ) \s* [*_-]* $ )""", re.VERBOSE | re.DOTALL | re.MULTILINE).search _commentsub = re.compile(r"""#[^\n]*\n|#[^\n]*$""").sub _modules = {} # cache of modules we've seen class VisibilityMixin(ClbrBaseClasses.ClbrVisibilityMixinBase): """ Mixin class implementing the notion of visibility. """ def __init__(self): """ Constructor """ if self.name.startswith('__'): self.setPrivate() elif self.name.startswith('_'): self.setProtected() else: self.setPublic() class Class(ClbrBaseClasses.Class, VisibilityMixin): """ Class to represent a Python class. """ def __init__(self, module, name, superClasses, file, lineno): """ Constructor @param module name of the module containing this class @param name name of this class @param superClasses list of class names this class is inherited from @param file filename containing this class @param lineno linenumber of the class definition """ ClbrBaseClasses.Class.__init__(self, module, name, superClasses, file, lineno) VisibilityMixin.__init__(self) class Function(ClbrBaseClasses.Function, VisibilityMixin): """ Class to represent a Python function. """ def __init__(self, module, name, file, lineno, signature='', separator=',', modifierType=ClbrBaseClasses.Function.General, annotation=""): """ Constructor @param module name of the module containing this function @param name name of this function @param file filename containing this class @param lineno linenumber of the class definition @param signature parameterlist of the method @param separator string separating the parameters @param modifierType type of the function @param annotation return annotation """ ClbrBaseClasses.Function.__init__(self, module, name, file, lineno, signature, separator, modifierType, annotation) VisibilityMixin.__init__(self) class Attribute(ClbrBaseClasses.Attribute, VisibilityMixin): """ Class to represent a class attribute. """ def __init__(self, module, name, file, lineno): """ Constructor @param module name of the module containing this class @param name name of this class @param file filename containing this attribute @param lineno linenumber of the class definition """ ClbrBaseClasses.Attribute.__init__(self, module, name, file, lineno) VisibilityMixin.__init__(self) class Publics(object): """ Class to represent the list of public identifiers. """ def __init__(self, module, file, lineno, idents): """ Constructor @param module name of the module containing this function @param file filename containing this class @param lineno linenumber of the class definition @param idents list of public identifiers """ self.module = module self.name = '__all__' self.file = file self.lineno = lineno self.identifiers = [e.replace('"', '').replace("'", "").strip() for e in idents.split(',')] class Imports(object): """ Class to represent the list of imported modules. """ def __init__(self, module, file): """ Constructor @param module name of the module containing the import (string) @param file file name containing the import (string) """ self.module = module self.name = 'import' self.file = file self.imports = {} def addImport(self, moduleName, names, lineno): """ Public method to add a list of imported names. @param moduleName name of the imported module (string) @param names list of names (list of strings) @param lineno line number of the import """ if moduleName not in self.imports: module = ImportedModule(self.module, self.file, moduleName) self.imports[moduleName] = module else: module = self.imports[moduleName] module.addImport(lineno, names) def getImport(self, moduleName): """ Public method to get an imported module item. @param moduleName name of the imported module (string) @return imported module item (ImportedModule) or None """ if moduleName in self.imports: return self.imports[moduleName] else: return None def getImports(self): """ Public method to get all imported module names. @return dictionary of imported module names with name as key and list of line numbers of imports as value """ return self.imports class ImportedModule(object): """ Class to represent an imported module. """ def __init__(self, module, file, importedModule): """ Constructor @param module name of the module containing the import (string) @param file file name containing the import (string) @param importedModule name of the imported module (string) """ self.module = module self.name = 'import' self.file = file self.importedModuleName = importedModule self.linenos = [] self.importedNames = {} # dictionary of imported names with name as key and list of line # numbers as value def addImport(self, lineno, importedNames): """ Public method to add a list of imported names. @param lineno line number of the import @param importedNames list of imported names (list of strings) """ if lineno not in self.linenos: self.linenos.append(lineno) for name in importedNames: if name not in self.importedNames: self.importedNames[name] = [lineno] else: self.importedNames[name].append(lineno) def readmodule_ex(module, path=None, inpackage=False, isPyFile=False): """ Read a module file and return a dictionary of classes. Search for MODULE in PATH and sys.path, read and parse the module and return a dictionary with one entry for each class found in the module. @param module name of the module file (string) @param path path the module should be searched in (list of strings) @param inpackage flag indicating a module inside a package is scanned @param isPyFile flag indicating a Python file (boolean) @return the resulting dictionary """ global _modules dictionary = {} dict_counts = {} if module in _modules: # we've seen this module before... return _modules[module] if module in sys.builtin_module_names: # this is a built-in module _modules[module] = dictionary return dictionary # search the path for the module path = [] if path is None else path[:] f = None if inpackage: try: f, file, (suff, mode, type) = \ ClassBrowsers.find_module(module, path) except ImportError: f = None if f is None: fullpath = path[:] + sys.path[:] f, file, (suff, mode, type) = \ ClassBrowsers.find_module(module, fullpath, isPyFile) if module.endswith(".py") and type == imp.PKG_DIRECTORY: return dictionary if type == imp.PKG_DIRECTORY: dictionary['__path__'] = [file] _modules[module] = dictionary path = [file] + path f, file, (suff, mode, type) = \ ClassBrowsers.find_module('__init__', [file]) if f: f.close() if type not in SUPPORTED_TYPES: # not Python source, can't do anything with this module _modules[module] = dictionary return dictionary _modules[module] = dictionary classstack = [] # stack of (class, indent) pairs conditionalsstack = [] # stack of indents of conditional defines deltastack = [] deltaindent = 0 deltaindentcalculated = 0 try: src = Utilities.readEncodedFile(file)[0] except (UnicodeError, IOError): # can't do anything with this module _modules[module] = dictionary return dictionary lineno, last_lineno_pos = 1, 0 lastGlobalEntry = None cur_obj = None i = 0 modifierType = ClbrBaseClasses.Function.General modifierIndent = -1 while True: m = _getnext(src, i) if not m: break start, i = m.span() if m.start("MethodModifier") >= 0: modifierIndent = _indent(m.group("MethodModifierIndent")) modifierType = m.group("MethodModifierType") elif m.start("Method") >= 0: # found a method definition or function thisindent = _indent(m.group("MethodIndent")) meth_name = m.group("MethodName") meth_sig = m.group("MethodSignature") meth_sig = meth_sig.replace('\\\n', '') meth_sig = _commentsub('', meth_sig) meth_ret = m.group("MethodReturnAnnotation") meth_ret = meth_ret.replace('\\\n', '') meth_ret = _commentsub('', meth_ret) lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start if modifierType and modifierIndent == thisindent: if modifierType == "@staticmethod": modifier = ClbrBaseClasses.Function.Static elif modifierType == "@classmethod": modifier = ClbrBaseClasses.Function.Class else: modifier = ClbrBaseClasses.Function.General else: modifier = ClbrBaseClasses.Function.General # modify indentation level for conditional defines if conditionalsstack: if thisindent > conditionalsstack[-1]: if not deltaindentcalculated: deltastack.append(thisindent - conditionalsstack[-1]) deltaindent = reduce(lambda x, y: x + y, deltastack) deltaindentcalculated = 1 thisindent -= deltaindent else: while conditionalsstack and \ conditionalsstack[-1] >= thisindent: del conditionalsstack[-1] if deltastack: del deltastack[-1] deltaindentcalculated = 0 # close all classes indented at least as much while classstack and \ classstack[-1][1] >= thisindent: if classstack[-1][0] is not None: # record the end line classstack[-1][0].setEndLine(lineno - 1) del classstack[-1] if classstack: # it's a class method cur_class = classstack[-1][0] if cur_class: # it's a method/nested def f = Function(None, meth_name, file, lineno, meth_sig, annotation=meth_ret, modifierType=modifier) cur_class._addmethod(meth_name, f) else: # it's a function f = Function(module, meth_name, file, lineno, meth_sig, annotation=meth_ret, modifierType=modifier) if meth_name in dict_counts: dict_counts[meth_name] += 1 meth_name = "{0}_{1:d}".format( meth_name, dict_counts[meth_name]) else: dict_counts[meth_name] = 0 dictionary[meth_name] = f if not classstack: if lastGlobalEntry: lastGlobalEntry.setEndLine(lineno - 1) lastGlobalEntry = f if cur_obj and isinstance(cur_obj, Function): cur_obj.setEndLine(lineno - 1) cur_obj = f classstack.append((f, thisindent)) # Marker for nested fns # reset the modifier settings modifierType = ClbrBaseClasses.Function.General modifierIndent = -1 elif m.start("String") >= 0: pass elif m.start("Class") >= 0: # we found a class definition thisindent = _indent(m.group("ClassIndent")) # close all classes indented at least as much while classstack and \ classstack[-1][1] >= thisindent: if classstack[-1][0] is not None: # record the end line classstack[-1][0].setEndLine(lineno - 1) del classstack[-1] lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start class_name = m.group("ClassName") inherit = m.group("ClassSupers") if inherit: # the class inherits from other classes inherit = inherit[1:-1].strip() inherit = _commentsub('', inherit) names = [] for n in inherit.split(','): n = n.strip() if n in dictionary: # we know this super class n = dictionary[n] else: c = n.split('.') if len(c) > 1: # super class # is of the # form module.class: # look in # module for class m = c[-2] c = c[-1] if m in _modules: d = _modules[m] if c in d: n = d[c] names.append(n) inherit = names # remember this class cur_class = Class(module, class_name, inherit, file, lineno) if not classstack: if class_name in dict_counts: dict_counts[class_name] += 1 class_name = "{0}_{1:d}".format( class_name, dict_counts[class_name]) else: dict_counts[class_name] = 0 dictionary[class_name] = cur_class else: classstack[-1][0]._addclass(class_name, cur_class) if not classstack: if lastGlobalEntry: lastGlobalEntry.setEndLine(lineno - 1) lastGlobalEntry = cur_class classstack.append((cur_class, thisindent)) elif m.start("Attribute") >= 0: lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start index = -1 while index >= -len(classstack): if classstack[index][0] is not None and \ not isinstance(classstack[index][0], Function): attr = Attribute( module, m.group("AttributeName"), file, lineno) classstack[index][0]._addattribute(attr) break else: index -= 1 elif m.start("Main") >= 0: # 'main' part of the script, reset class stack lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start classstack = [] elif m.start("Variable") >= 0: thisindent = _indent(m.group("VariableIndent")) variable_name = m.group("VariableName") lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start if thisindent == 0 or not classstack: # global variable, reset class stack first classstack = [] if "@@Globals@@" not in dictionary: dictionary["@@Globals@@"] = ClbrBaseClasses.ClbrBase( module, "Globals", file, lineno) dictionary["@@Globals@@"]._addglobal( Attribute(module, variable_name, file, lineno)) if lastGlobalEntry: lastGlobalEntry.setEndLine(lineno - 1) lastGlobalEntry = None else: index = -1 while index >= -len(classstack): if classstack[index][1] >= thisindent: index -= 1 else: if isinstance(classstack[index][0], Class): classstack[index][0]._addglobal( Attribute(module, variable_name, file, lineno)) break elif m.start("Publics") >= 0: idents = m.group("Identifiers") lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start pubs = Publics(module, file, lineno, idents) dictionary['__all__'] = pubs elif m.start("Import") >= 0: # import module names = [n.strip() for n in "".join(m.group("ImportList").splitlines()) .replace("\\", "").split(',')] lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start if "@@Import@@" not in dictionary: dictionary["@@Import@@"] = Imports(module, file) for name in names: dictionary["@@Import@@"].addImport(name, [], lineno) elif m.start("ImportFrom") >= 0: # from module import stuff mod = m.group("ImportFromPath") namesLines = (m.group("ImportFromList") .replace("(", "").replace(")", "") .replace("\\", "") .strip().splitlines()) namesLines = [line.split("#")[0].strip() for line in namesLines] names = [n.strip() for n in "".join(namesLines) .split(',')] lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start if "@@Import@@" not in dictionary: dictionary["@@Import@@"] = Imports(module, file) dictionary["@@Import@@"].addImport(mod, names, lineno) elif m.start("ConditionalDefine") >= 0: # a conditional function/method definition thisindent = _indent(m.group("ConditionalDefineIndent")) while conditionalsstack and \ conditionalsstack[-1] >= thisindent: del conditionalsstack[-1] if deltastack: del deltastack[-1] conditionalsstack.append(thisindent) deltaindentcalculated = 0 elif m.start("CodingLine") >= 0: # a coding statement coding = m.group("Coding") lineno = lineno + src.count('\n', last_lineno_pos, start) last_lineno_pos = start if "@@Coding@@" not in dictionary: dictionary["@@Coding@@"] = ClbrBaseClasses.Coding( module, file, lineno, coding) else: assert 0, "regexp _getnext found something unexpected" if '__all__' in dictionary: # set visibility of all top level elements pubs = dictionary['__all__'] for key in dictionary.keys(): if key == '__all__' or key.startswith("@@"): continue if key in pubs.identifiers: dictionary[key].setPublic() else: dictionary[key].setPrivate() del dictionary['__all__'] return dictionary def _indent(ws): """ Module function to return the indentation depth. @param ws the whitespace to be checked (string) @return length of the whitespace string (integer) """ return len(ws.expandtabs(TABWIDTH))