eric6/ThirdParty/Pygments/pygments/lexers/perl.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.perl
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Perl and related languages.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15 using, this, default, words
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation
18 from pygments.util import shebang_matches
19
20 __all__ = ['PerlLexer', 'Perl6Lexer']
21
22
23 class PerlLexer(RegexLexer):
24 """
25 For `Perl <http://www.perl.org>`_ source code.
26 """
27
28 name = 'Perl'
29 aliases = ['perl', 'pl']
30 filenames = ['*.pl', '*.pm', '*.t']
31 mimetypes = ['text/x-perl', 'application/x-perl']
32
33 flags = re.DOTALL | re.MULTILINE
34 # TODO: give this to a perl guy who knows how to parse perl...
35 tokens = {
36 'balanced-regex': [
37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
47 ],
48 'root': [
49 (r'\A\#!.+?$', Comment.Hashbang),
50 (r'\#.*?$', Comment.Single),
51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
52 (words((
53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
55 'unless', 'until', 'while', 'print', 'new', 'BEGIN',
56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
57 Keyword),
58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
59 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
61 # common delimiters
62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
63 String.Regex),
64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
67 String.Regex),
68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
69 String.Regex),
70 # balanced delimiters
71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
74 'balanced-regex'),
75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
76 'balanced-regex'),
77
78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
81 String.Regex),
82 (r'\s+', Text),
83 (words((
84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
110 Name.Builtin),
111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
114 (r'__END__', Comment.Preproc, 'end-part'),
115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
117 (r'[$@%#]+', Name.Variable, 'varname'),
118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
120 (r'0b[01]+(_[01]+)*', Number.Bin),
121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
122 Number.Float),
123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
124 (r'\d+(_\d+)*', Number.Integer),
125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
128 (r'<([^\s>]+)>', String.Regex),
129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
135 bygroups(Keyword, Text, Name.Namespace)),
136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
137 bygroups(Keyword, Text, Name.Namespace)),
138 (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'),
139 (words((
140 'no', 'package', 'require', 'use'), suffix=r'\b'),
141 Keyword),
142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
143 r'!~|&&?|\|\||\.{1,3})', Operator),
144 (r'[-+/*%=<>&^|!\\~]=?', Operator),
145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage
146 # of punctuation in Perl!
147 (r'(?=\w)', Name, 'name'),
148 ],
149 'format': [
150 (r'\.\n', String.Interpol, '#pop'),
151 (r'[^\n]*\n', String.Interpol),
152 ],
153 'varname': [
154 (r'\s+', Text),
155 (r'\{', Punctuation, '#pop'), # hash syntax?
156 (r'\)|,', Punctuation, '#pop'), # argument specifier
157 (r'\w+::', Name.Namespace),
158 (r'[\w:]+', Name.Variable, '#pop'),
159 ],
160 'name': [
161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
163 (r'[\w:]+', Name, '#pop'),
164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
165 (r'(?=\W)', Text, '#pop'),
166 ],
167 'funcname': [
168 (r'[a-zA-Z_]\w*[!?]?', Name.Function),
169 (r'\s+', Text),
170 # argument declaration
171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
172 (r';', Punctuation, '#pop'),
173 (r'.*?\{', Punctuation, '#pop'),
174 ],
175 'cb-string': [
176 (r'\\[{}\\]', String.Other),
177 (r'\\', String.Other),
178 (r'\{', String.Other, 'cb-string'),
179 (r'\}', String.Other, '#pop'),
180 (r'[^{}\\]+', String.Other)
181 ],
182 'rb-string': [
183 (r'\\[()\\]', String.Other),
184 (r'\\', String.Other),
185 (r'\(', String.Other, 'rb-string'),
186 (r'\)', String.Other, '#pop'),
187 (r'[^()]+', String.Other)
188 ],
189 'sb-string': [
190 (r'\\[\[\]\\]', String.Other),
191 (r'\\', String.Other),
192 (r'\[', String.Other, 'sb-string'),
193 (r'\]', String.Other, '#pop'),
194 (r'[^\[\]]+', String.Other)
195 ],
196 'lt-string': [
197 (r'\\[<>\\]', String.Other),
198 (r'\\', String.Other),
199 (r'\<', String.Other, 'lt-string'),
200 (r'\>', String.Other, '#pop'),
201 (r'[^<>]+', String.Other)
202 ],
203 'end-part': [
204 (r'.+', Comment.Preproc, '#pop')
205 ]
206 }
207
208 def analyse_text(text):
209 if shebang_matches(text, r'perl'):
210 return True
211 if re.search(r'(?:my|our)\s+[$@%(]', text):
212 return 0.9
213
214
215 class Perl6Lexer(ExtendedRegexLexer):
216 """
217 For `Perl 6 <http://www.perl6.org>`_ source code.
218
219 .. versionadded:: 2.0
220 """
221
222 name = 'Perl6'
223 aliases = ['perl6', 'pl6']
224 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
225 '*.6pm', '*.p6m', '*.pm6', '*.t']
226 mimetypes = ['text/x-perl6', 'application/x-perl6']
227 flags = re.MULTILINE | re.DOTALL | re.UNICODE
228
229 PERL6_IDENTIFIER_RANGE = r"['\w:-]"
230
231 PERL6_KEYWORDS = (
232 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT',
233 'KEEP', 'LAST', 'LEAVE', 'NEXT', 'POST', 'PRE', 'START', 'TEMP',
234 'UNDO', 'as', 'assoc', 'async', 'augment', 'binary', 'break', 'but',
235 'cached', 'category', 'class', 'constant', 'contend', 'continue',
236 'copy', 'deep', 'default', 'defequiv', 'defer', 'die', 'do', 'else',
237 'elsif', 'enum', 'equiv', 'exit', 'export', 'fail', 'fatal', 'for',
238 'gather', 'given', 'goto', 'grammar', 'handles', 'has', 'if', 'inline',
239 'irs', 'is', 'last', 'leave', 'let', 'lift', 'loop', 'looser', 'macro',
240 'make', 'maybe', 'method', 'module', 'multi', 'my', 'next', 'of',
241 'ofs', 'only', 'oo', 'ors', 'our', 'package', 'parsed', 'prec',
242 'proto', 'readonly', 'redo', 'ref', 'regex', 'reparsed', 'repeat',
243 'require', 'required', 'return', 'returns', 'role', 'rule', 'rw',
244 'self', 'slang', 'state', 'sub', 'submethod', 'subset', 'supersede',
245 'take', 'temp', 'tighter', 'token', 'trusts', 'try', 'unary',
246 'unless', 'until', 'use', 'warn', 'when', 'where', 'while', 'will',
247 )
248
249 PERL6_BUILTINS = (
250 'ACCEPTS', 'HOW', 'REJECTS', 'VAR', 'WHAT', 'WHENCE', 'WHERE', 'WHICH',
251 'WHO', 'abs', 'acos', 'acosec', 'acosech', 'acosh', 'acotan', 'acotanh',
252 'all', 'any', 'approx', 'arity', 'asec', 'asech', 'asin', 'asinh',
253 'assuming', 'atan', 'atan2', 'atanh', 'attr', 'bless', 'body', 'by',
254 'bytes', 'caller', 'callsame', 'callwith', 'can', 'capitalize', 'cat',
255 'ceiling', 'chars', 'chmod', 'chomp', 'chop', 'chr', 'chroot',
256 'circumfix', 'cis', 'classify', 'clone', 'close', 'cmp_ok', 'codes',
257 'comb', 'connect', 'contains', 'context', 'cos', 'cosec', 'cosech',
258 'cosh', 'cotan', 'cotanh', 'count', 'defined', 'delete', 'diag',
259 'dies_ok', 'does', 'e', 'each', 'eager', 'elems', 'end', 'eof', 'eval',
260 'eval_dies_ok', 'eval_elsewhere', 'eval_lives_ok', 'evalfile', 'exists',
261 'exp', 'first', 'flip', 'floor', 'flunk', 'flush', 'fmt', 'force_todo',
262 'fork', 'from', 'getc', 'gethost', 'getlogin', 'getpeername', 'getpw',
263 'gmtime', 'graphs', 'grep', 'hints', 'hyper', 'im', 'index', 'infix',
264 'invert', 'is_approx', 'is_deeply', 'isa', 'isa_ok', 'isnt', 'iterator',
265 'join', 'key', 'keys', 'kill', 'kv', 'lastcall', 'lazy', 'lc', 'lcfirst',
266 'like', 'lines', 'link', 'lives_ok', 'localtime', 'log', 'log10', 'map',
267 'max', 'min', 'minmax', 'name', 'new', 'nextsame', 'nextwith', 'nfc',
268 'nfd', 'nfkc', 'nfkd', 'nok_error', 'nonce', 'none', 'normalize', 'not',
269 'nothing', 'ok', 'once', 'one', 'open', 'opendir', 'operator', 'ord',
270 'p5chomp', 'p5chop', 'pack', 'pair', 'pairs', 'pass', 'perl', 'pi',
271 'pick', 'plan', 'plan_ok', 'polar', 'pop', 'pos', 'postcircumfix',
272 'postfix', 'pred', 'prefix', 'print', 'printf', 'push', 'quasi',
273 'quotemeta', 'rand', 're', 'read', 'readdir', 'readline', 'reduce',
274 'reverse', 'rewind', 'rewinddir', 'rindex', 'roots', 'round',
275 'roundrobin', 'run', 'runinstead', 'sameaccent', 'samecase', 'say',
276 'sec', 'sech', 'sech', 'seek', 'shape', 'shift', 'sign', 'signature',
277 'sin', 'sinh', 'skip', 'skip_rest', 'sleep', 'slurp', 'sort', 'splice',
278 'split', 'sprintf', 'sqrt', 'srand', 'strand', 'subst', 'substr', 'succ',
279 'sum', 'symlink', 'tan', 'tanh', 'throws_ok', 'time', 'times', 'to',
280 'todo', 'trim', 'trim_end', 'trim_start', 'true', 'truncate', 'uc',
281 'ucfirst', 'undef', 'undefine', 'uniq', 'unlike', 'unlink', 'unpack',
282 'unpolar', 'unshift', 'unwrap', 'use_ok', 'value', 'values', 'vec',
283 'version_lt', 'void', 'wait', 'want', 'wrap', 'write', 'zip',
284 )
285
286 PERL6_BUILTIN_CLASSES = (
287 'Abstraction', 'Any', 'AnyChar', 'Array', 'Associative', 'Bag', 'Bit',
288 'Blob', 'Block', 'Bool', 'Buf', 'Byte', 'Callable', 'Capture', 'Char', 'Class',
289 'Code', 'Codepoint', 'Comparator', 'Complex', 'Decreasing', 'Exception',
290 'Failure', 'False', 'Grammar', 'Grapheme', 'Hash', 'IO', 'Increasing',
291 'Int', 'Junction', 'KeyBag', 'KeyExtractor', 'KeyHash', 'KeySet',
292 'KitchenSink', 'List', 'Macro', 'Mapping', 'Match', 'Matcher', 'Method',
293 'Module', 'Num', 'Object', 'Ordered', 'Ordering', 'OrderingPair',
294 'Package', 'Pair', 'Positional', 'Proxy', 'Range', 'Rat', 'Regex',
295 'Role', 'Routine', 'Scalar', 'Seq', 'Set', 'Signature', 'Str', 'StrLen',
296 'StrPos', 'Sub', 'Submethod', 'True', 'UInt', 'Undef', 'Version', 'Void',
297 'Whatever', 'bit', 'bool', 'buf', 'buf1', 'buf16', 'buf2', 'buf32',
298 'buf4', 'buf64', 'buf8', 'complex', 'int', 'int1', 'int16', 'int2',
299 'int32', 'int4', 'int64', 'int8', 'num', 'rat', 'rat1', 'rat16', 'rat2',
300 'rat32', 'rat4', 'rat64', 'rat8', 'uint', 'uint1', 'uint16', 'uint2',
301 'uint32', 'uint4', 'uint64', 'uint8', 'utf16', 'utf32', 'utf8',
302 )
303
304 PERL6_OPERATORS = (
305 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
306 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
307 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
308 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
309 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
310 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
311 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
312 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
313 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
314 'not', '<==', '==>', '<<==', '==>>',
315 )
316
317 # Perl 6 has a *lot* of possible bracketing characters
318 # this list was lifted from STD.pm6 (https://github.com/perl6/std)
319 PERL6_BRACKETS = {
320 u'\u0028': u'\u0029', u'\u003c': u'\u003e', u'\u005b': u'\u005d',
321 u'\u007b': u'\u007d', u'\u00ab': u'\u00bb', u'\u0f3a': u'\u0f3b',
322 u'\u0f3c': u'\u0f3d', u'\u169b': u'\u169c', u'\u2018': u'\u2019',
323 u'\u201a': u'\u2019', u'\u201b': u'\u2019', u'\u201c': u'\u201d',
324 u'\u201e': u'\u201d', u'\u201f': u'\u201d', u'\u2039': u'\u203a',
325 u'\u2045': u'\u2046', u'\u207d': u'\u207e', u'\u208d': u'\u208e',
326 u'\u2208': u'\u220b', u'\u2209': u'\u220c', u'\u220a': u'\u220d',
327 u'\u2215': u'\u29f5', u'\u223c': u'\u223d', u'\u2243': u'\u22cd',
328 u'\u2252': u'\u2253', u'\u2254': u'\u2255', u'\u2264': u'\u2265',
329 u'\u2266': u'\u2267', u'\u2268': u'\u2269', u'\u226a': u'\u226b',
330 u'\u226e': u'\u226f', u'\u2270': u'\u2271', u'\u2272': u'\u2273',
331 u'\u2274': u'\u2275', u'\u2276': u'\u2277', u'\u2278': u'\u2279',
332 u'\u227a': u'\u227b', u'\u227c': u'\u227d', u'\u227e': u'\u227f',
333 u'\u2280': u'\u2281', u'\u2282': u'\u2283', u'\u2284': u'\u2285',
334 u'\u2286': u'\u2287', u'\u2288': u'\u2289', u'\u228a': u'\u228b',
335 u'\u228f': u'\u2290', u'\u2291': u'\u2292', u'\u2298': u'\u29b8',
336 u'\u22a2': u'\u22a3', u'\u22a6': u'\u2ade', u'\u22a8': u'\u2ae4',
337 u'\u22a9': u'\u2ae3', u'\u22ab': u'\u2ae5', u'\u22b0': u'\u22b1',
338 u'\u22b2': u'\u22b3', u'\u22b4': u'\u22b5', u'\u22b6': u'\u22b7',
339 u'\u22c9': u'\u22ca', u'\u22cb': u'\u22cc', u'\u22d0': u'\u22d1',
340 u'\u22d6': u'\u22d7', u'\u22d8': u'\u22d9', u'\u22da': u'\u22db',
341 u'\u22dc': u'\u22dd', u'\u22de': u'\u22df', u'\u22e0': u'\u22e1',
342 u'\u22e2': u'\u22e3', u'\u22e4': u'\u22e5', u'\u22e6': u'\u22e7',
343 u'\u22e8': u'\u22e9', u'\u22ea': u'\u22eb', u'\u22ec': u'\u22ed',
344 u'\u22f0': u'\u22f1', u'\u22f2': u'\u22fa', u'\u22f3': u'\u22fb',
345 u'\u22f4': u'\u22fc', u'\u22f6': u'\u22fd', u'\u22f7': u'\u22fe',
346 u'\u2308': u'\u2309', u'\u230a': u'\u230b', u'\u2329': u'\u232a',
347 u'\u23b4': u'\u23b5', u'\u2768': u'\u2769', u'\u276a': u'\u276b',
348 u'\u276c': u'\u276d', u'\u276e': u'\u276f', u'\u2770': u'\u2771',
349 u'\u2772': u'\u2773', u'\u2774': u'\u2775', u'\u27c3': u'\u27c4',
350 u'\u27c5': u'\u27c6', u'\u27d5': u'\u27d6', u'\u27dd': u'\u27de',
351 u'\u27e2': u'\u27e3', u'\u27e4': u'\u27e5', u'\u27e6': u'\u27e7',
352 u'\u27e8': u'\u27e9', u'\u27ea': u'\u27eb', u'\u2983': u'\u2984',
353 u'\u2985': u'\u2986', u'\u2987': u'\u2988', u'\u2989': u'\u298a',
354 u'\u298b': u'\u298c', u'\u298d': u'\u298e', u'\u298f': u'\u2990',
355 u'\u2991': u'\u2992', u'\u2993': u'\u2994', u'\u2995': u'\u2996',
356 u'\u2997': u'\u2998', u'\u29c0': u'\u29c1', u'\u29c4': u'\u29c5',
357 u'\u29cf': u'\u29d0', u'\u29d1': u'\u29d2', u'\u29d4': u'\u29d5',
358 u'\u29d8': u'\u29d9', u'\u29da': u'\u29db', u'\u29f8': u'\u29f9',
359 u'\u29fc': u'\u29fd', u'\u2a2b': u'\u2a2c', u'\u2a2d': u'\u2a2e',
360 u'\u2a34': u'\u2a35', u'\u2a3c': u'\u2a3d', u'\u2a64': u'\u2a65',
361 u'\u2a79': u'\u2a7a', u'\u2a7d': u'\u2a7e', u'\u2a7f': u'\u2a80',
362 u'\u2a81': u'\u2a82', u'\u2a83': u'\u2a84', u'\u2a8b': u'\u2a8c',
363 u'\u2a91': u'\u2a92', u'\u2a93': u'\u2a94', u'\u2a95': u'\u2a96',
364 u'\u2a97': u'\u2a98', u'\u2a99': u'\u2a9a', u'\u2a9b': u'\u2a9c',
365 u'\u2aa1': u'\u2aa2', u'\u2aa6': u'\u2aa7', u'\u2aa8': u'\u2aa9',
366 u'\u2aaa': u'\u2aab', u'\u2aac': u'\u2aad', u'\u2aaf': u'\u2ab0',
367 u'\u2ab3': u'\u2ab4', u'\u2abb': u'\u2abc', u'\u2abd': u'\u2abe',
368 u'\u2abf': u'\u2ac0', u'\u2ac1': u'\u2ac2', u'\u2ac3': u'\u2ac4',
369 u'\u2ac5': u'\u2ac6', u'\u2acd': u'\u2ace', u'\u2acf': u'\u2ad0',
370 u'\u2ad1': u'\u2ad2', u'\u2ad3': u'\u2ad4', u'\u2ad5': u'\u2ad6',
371 u'\u2aec': u'\u2aed', u'\u2af7': u'\u2af8', u'\u2af9': u'\u2afa',
372 u'\u2e02': u'\u2e03', u'\u2e04': u'\u2e05', u'\u2e09': u'\u2e0a',
373 u'\u2e0c': u'\u2e0d', u'\u2e1c': u'\u2e1d', u'\u2e20': u'\u2e21',
374 u'\u3008': u'\u3009', u'\u300a': u'\u300b', u'\u300c': u'\u300d',
375 u'\u300e': u'\u300f', u'\u3010': u'\u3011', u'\u3014': u'\u3015',
376 u'\u3016': u'\u3017', u'\u3018': u'\u3019', u'\u301a': u'\u301b',
377 u'\u301d': u'\u301e', u'\ufd3e': u'\ufd3f', u'\ufe17': u'\ufe18',
378 u'\ufe35': u'\ufe36', u'\ufe37': u'\ufe38', u'\ufe39': u'\ufe3a',
379 u'\ufe3b': u'\ufe3c', u'\ufe3d': u'\ufe3e', u'\ufe3f': u'\ufe40',
380 u'\ufe41': u'\ufe42', u'\ufe43': u'\ufe44', u'\ufe47': u'\ufe48',
381 u'\ufe59': u'\ufe5a', u'\ufe5b': u'\ufe5c', u'\ufe5d': u'\ufe5e',
382 u'\uff08': u'\uff09', u'\uff1c': u'\uff1e', u'\uff3b': u'\uff3d',
383 u'\uff5b': u'\uff5d', u'\uff5f': u'\uff60', u'\uff62': u'\uff63',
384 }
385
386 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
387 if boundary_regex_fragment is None:
388 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
389 suffix + r')\b'
390 else:
391 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
392 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
393 boundary_regex_fragment + r')'
394
395 def brackets_callback(token_class):
396 def callback(lexer, match, context):
397 groups = match.groupdict()
398 opening_chars = groups['delimiter']
399 n_chars = len(opening_chars)
400 adverbs = groups.get('adverbs')
401
402 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
403 text = context.text
404
405 if closer is None: # it's not a mirrored character, which means we
406 # just need to look for the next occurrence
407
408 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
409 else: # we need to look for the corresponding closing character,
410 # keep nesting in mind
411 closing_chars = closer * n_chars
412 nesting_level = 1
413
414 search_pos = match.start('delimiter')
415
416 while nesting_level > 0:
417 next_open_pos = text.find(opening_chars, search_pos + n_chars)
418 next_close_pos = text.find(closing_chars, search_pos + n_chars)
419
420 if next_close_pos == -1:
421 next_close_pos = len(text)
422 nesting_level = 0
423 elif next_open_pos != -1 and next_open_pos < next_close_pos:
424 nesting_level += 1
425 search_pos = next_open_pos
426 else: # next_close_pos < next_open_pos
427 nesting_level -= 1
428 search_pos = next_close_pos
429
430 end_pos = next_close_pos
431
432 if end_pos < 0: # if we didn't find a closer, just highlight the
433 # rest of the text in this class
434 end_pos = len(text)
435
436 if adverbs is not None and re.search(r':to\b', adverbs):
437 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
438 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
439 r'\s*$', text[end_pos:], re.MULTILINE)
440
441 if end_heredoc:
442 end_pos += end_heredoc.end()
443 else:
444 end_pos = len(text)
445
446 yield match.start(), token_class, text[match.start():end_pos + n_chars]
447 context.pos = end_pos + n_chars
448
449 return callback
450
451 def opening_brace_callback(lexer, match, context):
452 stack = context.stack
453
454 yield match.start(), Text, context.text[match.start():match.end()]
455 context.pos = match.end()
456
457 # if we encounter an opening brace and we're one level
458 # below a token state, it means we need to increment
459 # the nesting level for braces so we know later when
460 # we should return to the token rules.
461 if len(stack) > 2 and stack[-2] == 'token':
462 context.perl6_token_nesting_level += 1
463
464 def closing_brace_callback(lexer, match, context):
465 stack = context.stack
466
467 yield match.start(), Text, context.text[match.start():match.end()]
468 context.pos = match.end()
469
470 # if we encounter a free closing brace and we're one level
471 # below a token state, it means we need to check the nesting
472 # level to see if we need to return to the token state.
473 if len(stack) > 2 and stack[-2] == 'token':
474 context.perl6_token_nesting_level -= 1
475 if context.perl6_token_nesting_level == 0:
476 stack.pop()
477
478 def embedded_perl6_callback(lexer, match, context):
479 context.perl6_token_nesting_level = 1
480 yield match.start(), Text, context.text[match.start():match.end()]
481 context.pos = match.end()
482 context.stack.append('root')
483
484 # If you're modifying these rules, be careful if you need to process '{' or '}'
485 # characters. We have special logic for processing these characters (due to the fact
486 # that you can nest Perl 6 code in regex blocks), so if you need to process one of
487 # them, make sure you also process the corresponding one!
488 tokens = {
489 'common': [
490 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
491 brackets_callback(Comment.Multiline)),
492 (r'#[^\n]*$', Comment.Single),
493 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
494 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
495 (r'^=.*?\n\s*?\n', Comment.Multiline),
496 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
497 bygroups(Keyword, Name), 'token-sym-brackets'),
498 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
499 bygroups(Keyword, Name), 'pre-token'),
500 # deal with a special case in the Perl 6 grammar (role q { ... })
501 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
502 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
503 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
504 Name.Builtin),
505 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
506 # copied from PerlLexer
507 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
508 Name.Variable),
509 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
510 (r'::\?\w+', Name.Variable.Global),
511 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*',
512 Name.Variable.Global),
513 (r'\$(?:<.*?>)+', Name.Variable),
514 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
515 r'(?P=first_char)*)', brackets_callback(String)),
516 # copied from PerlLexer
517 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
518 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
519 (r'0b[01]+(_[01]+)*', Number.Bin),
520 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
521 Number.Float),
522 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
523 (r'\d+(_\d+)*', Number.Integer),
524 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
525 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
526 (r'm\w+(?=\()', Name),
527 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
528 r'(?P=first_char)*)', brackets_callback(String.Regex)),
529 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
530 String.Regex),
531 (r'<[^\s=].*?\S>', String),
532 (_build_word_match(PERL6_OPERATORS), Operator),
533 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
534 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
535 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
536 ],
537 'root': [
538 include('common'),
539 (r'\{', opening_brace_callback),
540 (r'\}', closing_brace_callback),
541 (r'.+?', Text),
542 ],
543 'pre-token': [
544 include('common'),
545 (r'\{', Text, ('#pop', 'token')),
546 (r'.+?', Text),
547 ],
548 'token-sym-brackets': [
549 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
550 brackets_callback(Name), ('#pop', 'pre-token')),
551 default(('#pop', 'pre-token')),
552 ],
553 'token': [
554 (r'\}', Text, '#pop'),
555 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
556 # make sure that quotes in character classes aren't treated as strings
557 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
558 # make sure that '#' characters in quotes aren't treated as comments
559 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
560 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
561 (r'#.*?$', Comment.Single),
562 (r'\{', embedded_perl6_callback),
563 ('.+?', String.Regex),
564 ],
565 }
566
567 def analyse_text(text):
568 def strip_pod(lines):
569 in_pod = False
570 stripped_lines = []
571
572 for line in lines:
573 if re.match(r'^=(?:end|cut)', line):
574 in_pod = False
575 elif re.match(r'^=\w+', line):
576 in_pod = True
577 elif not in_pod:
578 stripped_lines.append(line)
579
580 return stripped_lines
581
582 # XXX handle block comments
583 lines = text.splitlines()
584 lines = strip_pod(lines)
585 text = '\n'.join(lines)
586
587 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
588 return True
589
590 saw_perl_decl = False
591 rating = False
592
593 # check for my/our/has declarations
594 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
595 r"+\s+)?[$@%&(]", text):
596 rating = 0.8
597 saw_perl_decl = True
598
599 for line in lines:
600 line = re.sub('#.*', '', line)
601 if re.match(r'^\s*$', line):
602 continue
603
604 # match v6; use v6; use v6.0; use v6.0.0;
605 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
606 return True
607 # match class, module, role, enum, grammar declarations
608 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
609 if class_decl:
610 if saw_perl_decl or class_decl.group('scope') is not None:
611 return True
612 rating = 0.05
613 continue
614 break
615
616 return rating
617
618 def __init__(self, **options):
619 super(Perl6Lexer, self).__init__(**options)
620 self.encoding = options.get('encoding', 'utf-8')

eric ide

mercurial