eric6/ThirdParty/Pygments/pygments/lexers/perl.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.perl
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Perl, Raku and related languages.
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15 using, this, default, words
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation
18 from pygments.util import shebang_matches
19
20 __all__ = ['PerlLexer', 'Perl6Lexer']
21
22
23 class PerlLexer(RegexLexer):
24 """
25 For `Perl <https://www.perl.org>`_ source code.
26 """
27
28 name = 'Perl'
29 aliases = ['perl', 'pl']
30 filenames = ['*.pl', '*.pm', '*.t', '*.perl']
31 mimetypes = ['text/x-perl', 'application/x-perl']
32
33 flags = re.DOTALL | re.MULTILINE
34 # TODO: give this to a perl guy who knows how to parse perl...
35 tokens = {
36 'balanced-regex': [
37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
47 ],
48 'root': [
49 (r'\A\#!.+?$', Comment.Hashbang),
50 (r'\#.*?$', Comment.Single),
51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
52 (words((
53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
55 'unless', 'until', 'while', 'print', 'new', 'BEGIN',
56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
57 Keyword),
58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
59 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
61 # common delimiters
62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
63 String.Regex),
64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
67 String.Regex),
68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
69 String.Regex),
70 # balanced delimiters
71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
74 'balanced-regex'),
75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
76 'balanced-regex'),
77
78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
81 String.Regex),
82 (r'\s+', Text),
83 (words((
84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
110 Name.Builtin),
111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
114 (r'__END__', Comment.Preproc, 'end-part'),
115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
117 (r'[$@%#]+', Name.Variable, 'varname'),
118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
120 (r'0b[01]+(_[01]+)*', Number.Bin),
121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
122 Number.Float),
123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
124 (r'\d+(_\d+)*', Number.Integer),
125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
128 (r'<([^\s>]+)>', String.Regex),
129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
135 bygroups(Keyword, Text, Name.Namespace)),
136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
137 bygroups(Keyword, Text, Name.Namespace)),
138 (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'),
139 (words((
140 'no', 'package', 'require', 'use'), suffix=r'\b'),
141 Keyword),
142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
143 r'!~|&&?|\|\||\.{1,3})', Operator),
144 (r'[-+/*%=<>&^|!\\~]=?', Operator),
145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage
146 # of punctuation in Perl!
147 (r'(?=\w)', Name, 'name'),
148 ],
149 'format': [
150 (r'\.\n', String.Interpol, '#pop'),
151 (r'[^\n]*\n', String.Interpol),
152 ],
153 'varname': [
154 (r'\s+', Text),
155 (r'\{', Punctuation, '#pop'), # hash syntax?
156 (r'\)|,', Punctuation, '#pop'), # argument specifier
157 (r'\w+::', Name.Namespace),
158 (r'[\w:]+', Name.Variable, '#pop'),
159 ],
160 'name': [
161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
163 (r'[\w:]+', Name, '#pop'),
164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
165 (r'(?=\W)', Text, '#pop'),
166 ],
167 'funcname': [
168 (r'[a-zA-Z_]\w*[!?]?', Name.Function),
169 (r'\s+', Text),
170 # argument declaration
171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
172 (r';', Punctuation, '#pop'),
173 (r'.*?\{', Punctuation, '#pop'),
174 ],
175 'cb-string': [
176 (r'\\[{}\\]', String.Other),
177 (r'\\', String.Other),
178 (r'\{', String.Other, 'cb-string'),
179 (r'\}', String.Other, '#pop'),
180 (r'[^{}\\]+', String.Other)
181 ],
182 'rb-string': [
183 (r'\\[()\\]', String.Other),
184 (r'\\', String.Other),
185 (r'\(', String.Other, 'rb-string'),
186 (r'\)', String.Other, '#pop'),
187 (r'[^()]+', String.Other)
188 ],
189 'sb-string': [
190 (r'\\[\[\]\\]', String.Other),
191 (r'\\', String.Other),
192 (r'\[', String.Other, 'sb-string'),
193 (r'\]', String.Other, '#pop'),
194 (r'[^\[\]]+', String.Other)
195 ],
196 'lt-string': [
197 (r'\\[<>\\]', String.Other),
198 (r'\\', String.Other),
199 (r'\<', String.Other, 'lt-string'),
200 (r'\>', String.Other, '#pop'),
201 (r'[^<>]+', String.Other)
202 ],
203 'end-part': [
204 (r'.+', Comment.Preproc, '#pop')
205 ]
206 }
207
208 def analyse_text(text):
209 if shebang_matches(text, r'perl'):
210 return True
211
212 result = 0
213
214 if re.search(r'(?:my|our)\s+[$@%(]', text):
215 result += 0.9
216
217 if ':=' in text:
218 # := is not valid Perl, but it appears in unicon, so we should
219 # become less confident if we think we found Perl with :=
220 result /= 2
221
222 return result
223
224
225 class Perl6Lexer(ExtendedRegexLexer):
226 """
227 For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code.
228
229 .. versionadded:: 2.0
230 """
231
232 name = 'Perl6'
233 aliases = ['perl6', 'pl6', 'raku']
234 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
235 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
236 '*.rakutest', '*.rakudoc']
237 mimetypes = ['text/x-perl6', 'application/x-perl6']
238 flags = re.MULTILINE | re.DOTALL | re.UNICODE
239
240 PERL6_IDENTIFIER_RANGE = r"['\w:-]"
241
242 PERL6_KEYWORDS = (
243 #Phasers
244 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
245 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
246 #Keywords
247 'anon','augment','but','class','constant','default','does','else',
248 'elsif','enum','for','gather','given','grammar','has','if','import',
249 'is','let','loop','made','make','method','module','multi','my','need',
250 'orwith','our','proceed','proto','repeat','require','return',
251 'return-rw','returns','role','rule','state','sub','submethod','subset',
252 'succeed','supersede','token','try','unit','unless','until','use',
253 'when','while','with','without',
254 #Traits
255 'export','native','repr','required','rw','symbol',
256 )
257
258 PERL6_BUILTINS = (
259 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
260 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
261 'actions','add','add_attribute','add_enum_value','add_fallback',
262 'add_method','add_parent','add_private_method','add_role','add_trustee',
263 'adverb','after','all','allocate','allof','allowed','alternative-names',
264 'annotations','antipair','antipairs','any','anyof','app_lifetime',
265 'append','arch','archname','args','arity','Array','asec','asech','asin',
266 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
267 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
268 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
269 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
270 'await','backtrace','Bag','BagHash','bail-out','base','basename',
271 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
272 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
273 'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
274 'bytes','cache','callframe','calling-package','CALL-ME','callsame',
275 'callwith','can','cancel','candidates','cando','can-ok','canonpath',
276 'caps','caption','Capture','cas','catdir','categorize','categorize-list',
277 'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
278 'chars','chdir','child','child-name','child-typename','chmod','chomp',
279 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
280 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
281 'column','comb','combinations','command','comment','compiler','Complex',
282 'compose','compose_type','composer','condition','config',
283 'configure_destroy','configure_type_checking','conj','connect',
284 'constraints','construct','contains','contents','copy','cos','cosec',
285 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
286 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
287 'Date','DateTime','day','daycount','day-of-month','day-of-week',
288 'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
289 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
290 'denominator','desc','DESTROY','destroyers','devnull','diag',
291 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
292 'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
293 'eager','earlier','elems','emit','enclosing','encode','encoder',
294 'encoding','end','ends-with','enum_from_value','enum_value_list',
295 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
296 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
297 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
298 'expmod','extension','f','fail','fails-like','fc','feature','file',
299 'filename','find_method','find_method_qualified','finish','first','flat',
300 'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
301 'freeze','from','from-list','from-loop','from-posix','full',
302 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
303 'grep','handle','handled','handles','hardware','has_accessor','Hash',
304 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
305 'illegal','im','in','indent','index','indices','indir','infinite',
306 'infix','infix:<+>','infix:<->','install_method_cache','Instant',
307 'instead','Int','int-bounds','interval','in-timezone','invalid-str',
308 'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
309 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
310 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
311 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
312 'is-win','item','iterator','join','keep','kept','KERNELnames','key',
313 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
314 'lazy','lc','leading','level','like','line','lines','link','List',
315 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
316 'made','MAIN','make','Map','match','max','maxpairs','merge','message',
317 'method','method_table','methods','migrate','min','minmax','minpairs',
318 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
319 'move','mro','msb','multi','multiness','my','name','named','named_names',
320 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
321 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
322 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
323 'nodemap','nok','none','norm','not','note','now','nude','Num',
324 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
325 'ok','old','on-close','one','on-switch','open','opened','operation',
326 'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
327 'package','package-kind','package-name','packages','pair','pairs',
328 'pairup','parameter','params','parent','parent-name','parents','parse',
329 'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
330 'payload','peer-host','peer-port','periods','perl','permutations','phaser',
331 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
332 'polymod','pop','pos','positional','posix','postfix','postmatch',
333 'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
334 'print','printf','print-nl','print-to','private','private_method_table',
335 'proc','produce','Promise','prompt','protect','pull-one','push',
336 'push-all','push-at-least','push-exactly','push-until-lazy','put',
337 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
338 're','read','readchars','readonly','ready','Real','reallocate','reals',
339 'reason','rebless','receive','recv','redispatcher','redo','reduce',
340 'rel2abs','relative','release','rename','repeated','replacement',
341 'report','reserved','resolve','restore','result','resume','rethrow',
342 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
343 'roll','rootdir','roots','rotate','rotor','round','roundrobin',
344 'routine-type','run','rwx','s','samecase','samemark','samewith','say',
345 'schedule-on','scheduler','scope','sec','sech','second','seek','self',
346 'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
347 'SetHash','set-instruments','setup_finalization','shape','share','shell',
348 'shift','sibling','sigil','sign','signal','signals','signature','sin',
349 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
350 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
351 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
352 'sort','source','source-package','spawn','SPEC','splice','split',
353 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
354 'start','started','starts-with','status','stderr','stdout','Str',
355 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
356 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
357 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
358 'target','target-name','tc','tclc','tell','then','throttle','throw',
359 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
360 'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
361 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
362 'type_captures','typename','uc','udp','uncaught_handler','unimatch',
363 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
364 'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
365 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
366 'variable','verbose-config','version','VMnames','volume','vow','w','wait',
367 'warn','watch','watch-path','week','weekday-of-month','week-number',
368 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
369 'whole-second','WHY','wordcase','words','workaround','wrap','write',
370 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',
371
372 )
373
374 PERL6_BUILTIN_CLASSES = (
375 #Booleans
376 'False','True',
377 #Classes
378 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
379 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
380 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
381 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
382 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
383 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
384 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
385 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
386 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
387 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
388 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
389 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
390 'Lock','Lock::Async','long','longlong','Macro','Map','Match',
391 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
392 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
393 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
394 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
395 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
396 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
397 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
398 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
399 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
400 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
401 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
402 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
403 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
404 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
405 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
406 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
407 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
408 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
409 'Version','VM','Whatever','WhateverCode','WrapHandle'
410 )
411
412 PERL6_OPERATORS = (
413 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
414 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
415 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
416 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
417 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
418 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
419 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
420 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
421 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
422 'not', '<==', '==>', '<<==', '==>>','unicmp',
423 )
424
425 # Perl 6 has a *lot* of possible bracketing characters
426 # this list was lifted from STD.pm6 (https://github.com/perl6/std)
427 PERL6_BRACKETS = {
428 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
429 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
430 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
431 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
432 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
433 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
434 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
435 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
436 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
437 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
438 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
439 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
440 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
441 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
442 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
443 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
444 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
445 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
446 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
447 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
448 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
449 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
450 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
451 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
452 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
453 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
454 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
455 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
456 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
457 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
458 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
459 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
460 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
461 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
462 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
463 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
464 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
465 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
466 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
467 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
468 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
469 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
470 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
471 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
472 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
473 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
474 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
475 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
476 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
477 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
478 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
479 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
480 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
481 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
482 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
483 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
484 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
485 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
486 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
487 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
488 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
489 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
490 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
491 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
492 }
493
494 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
495 if boundary_regex_fragment is None:
496 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
497 suffix + r')\b'
498 else:
499 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
500 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
501 boundary_regex_fragment + r')'
502
503 def brackets_callback(token_class):
504 def callback(lexer, match, context):
505 groups = match.groupdict()
506 opening_chars = groups['delimiter']
507 n_chars = len(opening_chars)
508 adverbs = groups.get('adverbs')
509
510 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
511 text = context.text
512
513 if closer is None: # it's not a mirrored character, which means we
514 # just need to look for the next occurrence
515
516 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
517 else: # we need to look for the corresponding closing character,
518 # keep nesting in mind
519 closing_chars = closer * n_chars
520 nesting_level = 1
521
522 search_pos = match.start('delimiter')
523
524 while nesting_level > 0:
525 next_open_pos = text.find(opening_chars, search_pos + n_chars)
526 next_close_pos = text.find(closing_chars, search_pos + n_chars)
527
528 if next_close_pos == -1:
529 next_close_pos = len(text)
530 nesting_level = 0
531 elif next_open_pos != -1 and next_open_pos < next_close_pos:
532 nesting_level += 1
533 search_pos = next_open_pos
534 else: # next_close_pos < next_open_pos
535 nesting_level -= 1
536 search_pos = next_close_pos
537
538 end_pos = next_close_pos
539
540 if end_pos < 0: # if we didn't find a closer, just highlight the
541 # rest of the text in this class
542 end_pos = len(text)
543
544 if adverbs is not None and re.search(r':to\b', adverbs):
545 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
546 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
547 r'\s*$', text[end_pos:], re.MULTILINE)
548
549 if end_heredoc:
550 end_pos += end_heredoc.end()
551 else:
552 end_pos = len(text)
553
554 yield match.start(), token_class, text[match.start():end_pos + n_chars]
555 context.pos = end_pos + n_chars
556
557 return callback
558
559 def opening_brace_callback(lexer, match, context):
560 stack = context.stack
561
562 yield match.start(), Text, context.text[match.start():match.end()]
563 context.pos = match.end()
564
565 # if we encounter an opening brace and we're one level
566 # below a token state, it means we need to increment
567 # the nesting level for braces so we know later when
568 # we should return to the token rules.
569 if len(stack) > 2 and stack[-2] == 'token':
570 context.perl6_token_nesting_level += 1
571
572 def closing_brace_callback(lexer, match, context):
573 stack = context.stack
574
575 yield match.start(), Text, context.text[match.start():match.end()]
576 context.pos = match.end()
577
578 # if we encounter a free closing brace and we're one level
579 # below a token state, it means we need to check the nesting
580 # level to see if we need to return to the token state.
581 if len(stack) > 2 and stack[-2] == 'token':
582 context.perl6_token_nesting_level -= 1
583 if context.perl6_token_nesting_level == 0:
584 stack.pop()
585
586 def embedded_perl6_callback(lexer, match, context):
587 context.perl6_token_nesting_level = 1
588 yield match.start(), Text, context.text[match.start():match.end()]
589 context.pos = match.end()
590 context.stack.append('root')
591
592 # If you're modifying these rules, be careful if you need to process '{' or '}'
593 # characters. We have special logic for processing these characters (due to the fact
594 # that you can nest Perl 6 code in regex blocks), so if you need to process one of
595 # them, make sure you also process the corresponding one!
596 tokens = {
597 'common': [
598 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
599 brackets_callback(Comment.Multiline)),
600 (r'#[^\n]*$', Comment.Single),
601 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
602 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
603 (r'^=.*?\n\s*?\n', Comment.Multiline),
604 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
605 bygroups(Keyword, Name), 'token-sym-brackets'),
606 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
607 bygroups(Keyword, Name), 'pre-token'),
608 # deal with a special case in the Perl 6 grammar (role q { ... })
609 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
610 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
611 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
612 Name.Builtin),
613 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
614 # copied from PerlLexer
615 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
616 Name.Variable),
617 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
618 (r'::\?\w+', Name.Variable.Global),
619 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
620 Name.Variable.Global),
621 (r'\$(?:<.*?>)+', Name.Variable),
622 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
623 r'(?P=first_char)*)', brackets_callback(String)),
624 # copied from PerlLexer
625 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
626 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
627 (r'0b[01]+(_[01]+)*', Number.Bin),
628 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
629 Number.Float),
630 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
631 (r'\d+(_\d+)*', Number.Integer),
632 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
633 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
634 (r'm\w+(?=\()', Name),
635 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
636 r'(?P=first_char)*)', brackets_callback(String.Regex)),
637 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
638 String.Regex),
639 (r'<[^\s=].*?\S>', String),
640 (_build_word_match(PERL6_OPERATORS), Operator),
641 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
642 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
643 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
644 ],
645 'root': [
646 include('common'),
647 (r'\{', opening_brace_callback),
648 (r'\}', closing_brace_callback),
649 (r'.+?', Text),
650 ],
651 'pre-token': [
652 include('common'),
653 (r'\{', Text, ('#pop', 'token')),
654 (r'.+?', Text),
655 ],
656 'token-sym-brackets': [
657 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
658 brackets_callback(Name), ('#pop', 'pre-token')),
659 default(('#pop', 'pre-token')),
660 ],
661 'token': [
662 (r'\}', Text, '#pop'),
663 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
664 # make sure that quotes in character classes aren't treated as strings
665 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
666 # make sure that '#' characters in quotes aren't treated as comments
667 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
668 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
669 (r'#.*?$', Comment.Single),
670 (r'\{', embedded_perl6_callback),
671 ('.+?', String.Regex),
672 ],
673 }
674
675 def analyse_text(text):
676 def strip_pod(lines):
677 in_pod = False
678 stripped_lines = []
679
680 for line in lines:
681 if re.match(r'^=(?:end|cut)', line):
682 in_pod = False
683 elif re.match(r'^=\w+', line):
684 in_pod = True
685 elif not in_pod:
686 stripped_lines.append(line)
687
688 return stripped_lines
689
690 # XXX handle block comments
691 lines = text.splitlines()
692 lines = strip_pod(lines)
693 text = '\n'.join(lines)
694
695 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
696 return True
697
698 saw_perl_decl = False
699 rating = False
700
701 # check for my/our/has declarations
702 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
703 r"+\s+)?[$@%&(]", text):
704 rating = 0.8
705 saw_perl_decl = True
706
707 for line in lines:
708 line = re.sub('#.*', '', line)
709 if re.match(r'^\s*$', line):
710 continue
711
712 # match v6; use v6; use v6.0; use v6.0.0;
713 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
714 return True
715 # match class, module, role, enum, grammar declarations
716 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
717 if class_decl:
718 if saw_perl_decl or class_decl.group('scope') is not None:
719 return True
720 rating = 0.05
721 continue
722 break
723
724 if ':=' in text:
725 # Same logic as above for PerlLexer
726 rating /= 2
727
728 return rating
729
730 def __init__(self, **options):
731 super().__init__(**options)
732 self.encoding = options.get('encoding', 'utf-8')

eric ide

mercurial