ThirdParty/Pygments/pygments/lexers/scripting.py

changeset 4697
c2e9bf425554
parent 4172
4f20dba37ab6
child 5713
6762afd9f963
equal deleted inserted replaced
4696:bf4d19a7cade 4697:c2e9bf425554
3 pygments.lexers.scripting 3 pygments.lexers.scripting
4 ~~~~~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 Lexer for scripting and embedded languages. 6 Lexer for scripting and embedded languages.
7 7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 13
14 from pygments.lexer import RegexLexer, include, bygroups, default, combined, \ 14 from pygments.lexer import RegexLexer, include, bygroups, default, combined, \
15 words 15 words
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Error, Whitespace 17 Number, Punctuation, Error, Whitespace, Other
18 from pygments.util import get_bool_opt, get_list_opt, iteritems 18 from pygments.util import get_bool_opt, get_list_opt, iteritems
19 19
20 __all__ = ['LuaLexer', 'MoonScriptLexer', 'ChaiscriptLexer', 'LSLLexer', 20 __all__ = ['LuaLexer', 'MoonScriptLexer', 'ChaiscriptLexer', 'LSLLexer',
21 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer'] 21 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer',
22 'EasytrieveLexer', 'JclLexer']
22 23
23 24
24 class LuaLexer(RegexLexer): 25 class LuaLexer(RegexLexer):
25 """ 26 """
26 For `Lua <http://www.lua.org>`_ source code. 27 For `Lua <http://www.lua.org>`_ source code.
875 bygroups(Keyword.Declaration, Text), 'class'), 876 bygroups(Keyword.Declaration, Text), 'class'),
876 (r'(import|include)(\s+)', 877 (r'(import|include)(\s+)',
877 bygroups(Keyword.Namespace, Text), 'import'), 878 bygroups(Keyword.Namespace, Text), 'import'),
878 (words(( 879 (words((
879 'gc_collect', 'gc_mm_items', 'gc_mm_usage', 'gc_collect_threshold', 880 'gc_collect', 'gc_mm_items', 'gc_mm_usage', 'gc_collect_threshold',
880 'urlencode', 'urldecode', 'base64encode', 'base64decode', 'sha1', 'crc32', 'sha2', 881 'urlencode', 'urldecode', 'base64encode', 'base64decode', 'sha1', 'crc32',
881 'md5', 'md5_file', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'exp', 882 'sha2', 'md5', 'md5_file', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos',
882 'fabs', 'floor', 'fmod', 'log', 'log10', 'pow', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 883 'cosh', 'exp', 'fabs', 'floor', 'fmod', 'log', 'log10', 'pow', 'sin',
883 'isint', 'isfloat', 'ischar', 'isstring', 'isarray', 'ismap', 'isalias', 'typeof', 884 'sinh', 'sqrt', 'tan', 'tanh', 'isint', 'isfloat', 'ischar', 'isstring',
884 'sizeof', 'toint', 'tostring', 'fromxml', 'toxml', 'binary', 'pack', 'load', 'eval', 885 'isarray', 'ismap', 'isalias', 'typeof', 'sizeof', 'toint', 'tostring',
885 'var_names', 'var_values', 'user_functions', 'dyn_functions', 'methods', 'call', 886 'fromxml', 'toxml', 'binary', 'pack', 'load', 'eval', 'var_names',
886 'call_method', 'mknod', 'mkfifo', 'mount', 'umount2', 'umount', 'ticks', 'usleep', 887 'var_values', 'user_functions', 'dyn_functions', 'methods', 'call',
887 'sleep', 'time', 'strtime', 'strdate', 'dllopen', 'dlllink', 'dllcall', 'dllcall_argv', 888 'call_method', 'mknod', 'mkfifo', 'mount', 'umount2', 'umount', 'ticks',
888 'dllclose', 'env', 'exec', 'fork', 'getpid', 'wait', 'popen', 'pclose', 'exit', 'kill', 889 'usleep', 'sleep', 'time', 'strtime', 'strdate', 'dllopen', 'dlllink',
889 'pthread_create', 'pthread_create_argv', 'pthread_exit', 'pthread_join', 'pthread_kill', 890 'dllcall', 'dllcall_argv', 'dllclose', 'env', 'exec', 'fork', 'getpid',
890 'smtp_send', 'http_get', 'http_post', 'http_download', 'socket', 'bind', 'listen', 891 'wait', 'popen', 'pclose', 'exit', 'kill', 'pthread_create',
891 'accept', 'getsockname', 'getpeername', 'settimeout', 'connect', 'server', 'recv', 892 'pthread_create_argv', 'pthread_exit', 'pthread_join', 'pthread_kill',
892 'send', 'close', 'print', 'println', 'printf', 'input', 'readline', 'serial_open', 893 'smtp_send', 'http_get', 'http_post', 'http_download', 'socket', 'bind',
893 'serial_fcntl', 'serial_get_attr', 'serial_get_ispeed', 'serial_get_ospeed', 894 'listen', 'accept', 'getsockname', 'getpeername', 'settimeout', 'connect',
894 'serial_set_attr', 'serial_set_ispeed', 'serial_set_ospeed', 'serial_write', 895 'server', 'recv', 'send', 'close', 'print', 'println', 'printf', 'input',
895 'serial_read', 'serial_close', 'xml_load', 'xml_parse', 'fopen', 'fseek', 'ftell', 896 'readline', 'serial_open', 'serial_fcntl', 'serial_get_attr',
896 'fsize', 'fread', 'fwrite', 'fgets', 'fclose', 'file', 'readdir', 'pcre_replace', 'size', 897 'serial_get_ispeed', 'serial_get_ospeed', 'serial_set_attr',
897 'pop', 'unmap', 'has', 'keys', 'values', 'length', 'find', 'substr', 'replace', 'split', 898 'serial_set_ispeed', 'serial_set_ospeed', 'serial_write', 'serial_read',
898 'trim', 'remove', 'contains', 'join'), suffix=r'\b'), 899 'serial_close', 'xml_load', 'xml_parse', 'fopen', 'fseek', 'ftell',
900 'fsize', 'fread', 'fwrite', 'fgets', 'fclose', 'file', 'readdir',
901 'pcre_replace', 'size', 'pop', 'unmap', 'has', 'keys', 'values',
902 'length', 'find', 'substr', 'replace', 'split', 'trim', 'remove',
903 'contains', 'join'), suffix=r'\b'),
899 Name.Builtin), 904 Name.Builtin),
900 (words(( 905 (words((
901 'MethodReference', 'Runner', 'Dll', 'Thread', 'Pipe', 'Process', 906 'MethodReference', 'Runner', 'Dll', 'Thread', 'Pipe', 'Process',
902 'Runnable', 'CGI', 'ClientSocket', 'Socket', 'ServerSocket', 907 'Runnable', 'CGI', 'ClientSocket', 'Socket', 'ServerSocket',
903 'File', 'Console', 'Directory', 'Exception'), suffix=r'\b'), 908 'File', 'Console', 'Directory', 'Exception'), suffix=r'\b'),
919 ], 924 ],
920 'import': [ 925 'import': [
921 (r'[\w.]+\*?', Name.Namespace, '#pop') 926 (r'[\w.]+\*?', Name.Namespace, '#pop')
922 ], 927 ],
923 } 928 }
929
930
931 class EasytrieveLexer(RegexLexer):
932 """
933 Easytrieve Plus is a programming language for extracting, filtering and
934 converting sequential data. Furthermore it can layout data for reports.
935 It is mainly used on mainframe platforms and can access several of the
936 mainframe's native file formats. It is somewhat comparable to awk.
937
938 .. versionadded:: 2.1
939 """
940 name = 'Easytrieve'
941 aliases = ['easytrieve']
942 filenames = ['*.ezt', '*.mac']
943 mimetypes = ['text/x-easytrieve']
944 flags = 0
945
946 # Note: We cannot use r'\b' at the start and end of keywords because
947 # Easytrieve Plus delimiter characters are:
948 #
949 # * space ( )
950 # * apostrophe (')
951 # * period (.)
952 # * comma (,)
953 # * paranthesis ( and )
954 # * colon (:)
955 #
956 # Additionally words end once a '*' appears, indicatins a comment.
957 _DELIMITERS = r' \'.,():\n'
958 _DELIMITERS_OR_COMENT = _DELIMITERS + '*'
959 _DELIMITER_PATTERN = '[' + _DELIMITERS + ']'
960 _DELIMITER_PATTERN_CAPTURE = '(' + _DELIMITER_PATTERN + ')'
961 _NON_DELIMITER_OR_COMMENT_PATTERN = '[^' + _DELIMITERS_OR_COMENT + ']'
962 _OPERATORS_PATTERN = u'[.+\\-/=\\[\\](){}<>;,&%¬]'
963 _KEYWORDS = [
964 'AFTER-BREAK', 'AFTER-LINE', 'AFTER-SCREEN', 'AIM', 'AND', 'ATTR',
965 'BEFORE', 'BEFORE-BREAK', 'BEFORE-LINE', 'BEFORE-SCREEN', 'BUSHU',
966 'BY', 'CALL', 'CASE', 'CHECKPOINT', 'CHKP', 'CHKP-STATUS', 'CLEAR',
967 'CLOSE', 'COL', 'COLOR', 'COMMIT', 'CONTROL', 'COPY', 'CURSOR', 'D',
968 'DECLARE', 'DEFAULT', 'DEFINE', 'DELETE', 'DENWA', 'DISPLAY', 'DLI',
969 'DO', 'DUPLICATE', 'E', 'ELSE', 'ELSE-IF', 'END', 'END-CASE',
970 'END-DO', 'END-IF', 'END-PROC', 'ENDPAGE', 'ENDTABLE', 'ENTER', 'EOF',
971 'EQ', 'ERROR', 'EXIT', 'EXTERNAL', 'EZLIB', 'F1', 'F10', 'F11', 'F12',
972 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F2', 'F20', 'F21',
973 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F3', 'F30',
974 'F31', 'F32', 'F33', 'F34', 'F35', 'F36', 'F4', 'F5', 'F6', 'F7',
975 'F8', 'F9', 'FETCH', 'FILE-STATUS', 'FILL', 'FINAL', 'FIRST',
976 'FIRST-DUP', 'FOR', 'GE', 'GET', 'GO', 'GOTO', 'GQ', 'GR', 'GT',
977 'HEADING', 'HEX', 'HIGH-VALUES', 'IDD', 'IDMS', 'IF', 'IN', 'INSERT',
978 'JUSTIFY', 'KANJI-DATE', 'KANJI-DATE-LONG', 'KANJI-TIME', 'KEY',
979 'KEY-PRESSED', 'KOKUGO', 'KUN', 'LAST-DUP', 'LE', 'LEVEL', 'LIKE',
980 'LINE', 'LINE-COUNT', 'LINE-NUMBER', 'LINK', 'LIST', 'LOW-VALUES',
981 'LQ', 'LS', 'LT', 'MACRO', 'MASK', 'MATCHED', 'MEND', 'MESSAGE',
982 'MOVE', 'MSTART', 'NE', 'NEWPAGE', 'NOMASK', 'NOPRINT', 'NOT',
983 'NOTE', 'NOVERIFY', 'NQ', 'NULL', 'OF', 'OR', 'OTHERWISE', 'PA1',
984 'PA2', 'PA3', 'PAGE-COUNT', 'PAGE-NUMBER', 'PARM-REGISTER',
985 'PATH-ID', 'PATTERN', 'PERFORM', 'POINT', 'POS', 'PRIMARY', 'PRINT',
986 'PROCEDURE', 'PROGRAM', 'PUT', 'READ', 'RECORD', 'RECORD-COUNT',
987 'RECORD-LENGTH', 'REFRESH', 'RELEASE', 'RENUM', 'REPEAT', 'REPORT',
988 'REPORT-INPUT', 'RESHOW', 'RESTART', 'RETRIEVE', 'RETURN-CODE',
989 'ROLLBACK', 'ROW', 'S', 'SCREEN', 'SEARCH', 'SECONDARY', 'SELECT',
990 'SEQUENCE', 'SIZE', 'SKIP', 'SOKAKU', 'SORT', 'SQL', 'STOP', 'SUM',
991 'SYSDATE', 'SYSDATE-LONG', 'SYSIN', 'SYSIPT', 'SYSLST', 'SYSPRINT',
992 'SYSSNAP', 'SYSTIME', 'TALLY', 'TERM-COLUMNS', 'TERM-NAME',
993 'TERM-ROWS', 'TERMINATION', 'TITLE', 'TO', 'TRANSFER', 'TRC',
994 'UNIQUE', 'UNTIL', 'UPDATE', 'UPPERCASE', 'USER', 'USERID', 'VALUE',
995 'VERIFY', 'W', 'WHEN', 'WHILE', 'WORK', 'WRITE', 'X', 'XDM', 'XRST'
996 ]
997
998 tokens = {
999 'root': [
1000 (r'\*.*\n', Comment.Single),
1001 (r'\n+', Whitespace),
1002 # Macro argument
1003 (r'&' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+\.', Name.Variable,
1004 'after_macro_argument'),
1005 # Macro call
1006 (r'%' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Variable),
1007 (r'(FILE|MACRO|REPORT)(\s+)',
1008 bygroups(Keyword.Declaration, Whitespace), 'after_declaration'),
1009 (r'(JOB|PARM)' + r'(' + _DELIMITER_PATTERN + r')',
1010 bygroups(Keyword.Declaration, Operator)),
1011 (words(_KEYWORDS, suffix=_DELIMITER_PATTERN_CAPTURE),
1012 bygroups(Keyword.Reserved, Operator)),
1013 (_OPERATORS_PATTERN, Operator),
1014 # Procedure declaration
1015 (r'(' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+)(\s*)(\.?)(\s*)(PROC)(\s*\n)',
1016 bygroups(Name.Function, Whitespace, Operator, Whitespace,
1017 Keyword.Declaration, Whitespace)),
1018 (r'[0-9]+\.[0-9]*', Number.Float),
1019 (r'[0-9]+', Number.Integer),
1020 (r"'(''|[^'])*'", String),
1021 (r'\s+', Whitespace),
1022 # Everything else just belongs to a name
1023 (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)
1024 ],
1025 'after_declaration': [
1026 (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function),
1027 ('', Whitespace, '#pop')
1028 ],
1029 'after_macro_argument': [
1030 (r'\*.*\n', Comment.Single, '#pop'),
1031 (r'\s+', Whitespace, '#pop'),
1032 (_OPERATORS_PATTERN, Operator, '#pop'),
1033 (r"'(''|[^'])*'", String, '#pop'),
1034 # Everything else just belongs to a name
1035 (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)
1036 ],
1037 }
1038 _COMMENT_LINE_REGEX = re.compile(r'^\s*\*')
1039 _MACRO_HEADER_REGEX = re.compile(r'^\s*MACRO')
1040
1041 def analyse_text(text):
1042 """
1043 Perform a structural analysis for basic Easytrieve constructs.
1044 """
1045 result = 0.0
1046 lines = text.split('\n')
1047 hasEndProc = False
1048 hasHeaderComment = False
1049 hasFile = False
1050 hasJob = False
1051 hasProc = False
1052 hasParm = False
1053 hasReport = False
1054
1055 def isCommentLine(line):
1056 return EasytrieveLexer._COMMENT_LINE_REGEX.match(lines[0]) is not None
1057
1058 def isEmptyLine(line):
1059 return not bool(line.strip())
1060
1061 # Remove possible empty lines and header comments.
1062 while lines and (isEmptyLine(lines[0]) or isCommentLine(lines[0])):
1063 if not isEmptyLine(lines[0]):
1064 hasHeaderComment = True
1065 del lines[0]
1066
1067 if EasytrieveLexer._MACRO_HEADER_REGEX.match(lines[0]):
1068 # Looks like an Easytrieve macro.
1069 result = 0.4
1070 if hasHeaderComment:
1071 result += 0.4
1072 else:
1073 # Scan the source for lines starting with indicators.
1074 for line in lines:
1075 words = line.split()
1076 if (len(words) >= 2):
1077 firstWord = words[0]
1078 if not hasReport:
1079 if not hasJob:
1080 if not hasFile:
1081 if not hasParm:
1082 if firstWord == 'PARM':
1083 hasParm = True
1084 if firstWord == 'FILE':
1085 hasFile = True
1086 if firstWord == 'JOB':
1087 hasJob = True
1088 elif firstWord == 'PROC':
1089 hasProc = True
1090 elif firstWord == 'END-PROC':
1091 hasEndProc = True
1092 elif firstWord == 'REPORT':
1093 hasReport = True
1094
1095 # Weight the findings.
1096 if hasJob and (hasProc == hasEndProc):
1097 if hasHeaderComment:
1098 result += 0.1
1099 if hasParm:
1100 if hasProc:
1101 # Found PARM, JOB and PROC/END-PROC:
1102 # pretty sure this is Easytrieve.
1103 result += 0.8
1104 else:
1105 # Found PARAM and JOB: probably this is Easytrieve
1106 result += 0.5
1107 else:
1108 # Found JOB and possibly other keywords: might be Easytrieve
1109 result += 0.11
1110 if hasParm:
1111 # Note: PARAM is not a proper English word, so this is
1112 # regarded a much better indicator for Easytrieve than
1113 # the other words.
1114 result += 0.2
1115 if hasFile:
1116 result += 0.01
1117 if hasReport:
1118 result += 0.01
1119 assert 0.0 <= result <= 1.0
1120 return result
1121
1122
1123 class JclLexer(RegexLexer):
1124 """
1125 `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_
1126 is a scripting language used on mainframe platforms to instruct the system
1127 on how to run a batch job or start a subsystem. It is somewhat
1128 comparable to MS DOS batch and Unix shell scripts.
1129
1130 .. versionadded:: 2.1
1131 """
1132 name = 'JCL'
1133 aliases = ['jcl']
1134 filenames = ['*.jcl']
1135 mimetypes = ['text/x-jcl']
1136 flags = re.IGNORECASE
1137
1138 tokens = {
1139 'root': [
1140 (r'//\*.*\n', Comment.Single),
1141 (r'//', Keyword.Pseudo, 'statement'),
1142 (r'/\*', Keyword.Pseudo, 'jes2_statement'),
1143 # TODO: JES3 statement
1144 (r'.*\n', Other) # Input text or inline code in any language.
1145 ],
1146 'statement': [
1147 (r'\s*\n', Whitespace, '#pop'),
1148 (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)',
1149 bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace),
1150 'option'),
1151 (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'),
1152 (r'\s+', Whitespace, 'statement_command'),
1153 ],
1154 'statement_command': [
1155 (r'\s+(command|cntl|dd|endctl|endif|else|include|jcllib|'
1156 r'output|pend|proc|set|then|xmit)\s+', Keyword.Reserved, 'option'),
1157 include('option')
1158 ],
1159 'jes2_statement': [
1160 (r'\s*\n', Whitespace, '#pop'),
1161 (r'\$', Keyword, 'option'),
1162 (r'\b(jobparam|message|netacct|notify|output|priority|route|'
1163 r'setup|signoff|xeq|xmit)\b', Keyword, 'option'),
1164 ],
1165 'option': [
1166 # (r'\n', Text, 'root'),
1167 (r'\*', Name.Builtin),
1168 (r'[\[\](){}<>;,]', Punctuation),
1169 (r'[-+*/=&%]', Operator),
1170 (r'[a-z_][a-z_0-9]*', Name),
1171 (r'[0-9]+\.[0-9]*', Number.Float),
1172 (r'\.[0-9]+', Number.Float),
1173 (r'[0-9]+', Number.Integer),
1174 (r"'", String, 'option_string'),
1175 (r'[ \t]+', Whitespace, 'option_comment'),
1176 (r'\.', Punctuation),
1177 ],
1178 'option_string': [
1179 (r"(\n)(//)", bygroups(Text, Keyword.Pseudo)),
1180 (r"''", String),
1181 (r"[^']", String),
1182 (r"'", String, '#pop'),
1183 ],
1184 'option_comment': [
1185 # (r'\n', Text, 'root'),
1186 (r'.+', Comment.Single),
1187 ]
1188 }
1189
1190 _JOB_HEADER_PATTERN = re.compile(r'^//[a-z#$@][a-z0-9#$@]{0,7}\s+job(\s+.*)?$',
1191 re.IGNORECASE)
1192
1193 def analyse_text(text):
1194 """
1195 Recognize JCL job by header.
1196 """
1197 result = 0.0
1198 lines = text.split('\n')
1199 if len(lines) > 0:
1200 if JclLexer._JOB_HEADER_PATTERN.match(lines[0]):
1201 result = 1.0
1202 assert 0.0 <= result <= 1.0
1203 return result

eric ide

mercurial