|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.pascal |
|
4 ~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for Pascal family languages. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, include, bygroups, words, \ |
|
15 using, this, default |
|
16 from pygments.util import get_bool_opt, get_list_opt |
|
17 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
18 Number, Punctuation, Error |
|
19 from pygments.scanner import Scanner |
|
20 |
|
21 # compatibility import |
|
22 from pygments.lexers.modula2 import Modula2Lexer |
|
23 |
|
24 __all__ = ['DelphiLexer', 'AdaLexer'] |
|
25 |
|
26 |
|
27 class DelphiLexer(Lexer): |
|
28 """ |
|
29 For `Delphi <http://www.borland.com/delphi/>`_ (Borland Object Pascal), |
|
30 Turbo Pascal and Free Pascal source code. |
|
31 |
|
32 Additional options accepted: |
|
33 |
|
34 `turbopascal` |
|
35 Highlight Turbo Pascal specific keywords (default: ``True``). |
|
36 `delphi` |
|
37 Highlight Borland Delphi specific keywords (default: ``True``). |
|
38 `freepascal` |
|
39 Highlight Free Pascal specific keywords (default: ``True``). |
|
40 `units` |
|
41 A list of units that should be considered builtin, supported are |
|
42 ``System``, ``SysUtils``, ``Classes`` and ``Math``. |
|
43 Default is to consider all of them builtin. |
|
44 """ |
|
45 name = 'Delphi' |
|
46 aliases = ['delphi', 'pas', 'pascal', 'objectpascal'] |
|
47 filenames = ['*.pas', '*.dpr'] |
|
48 mimetypes = ['text/x-pascal'] |
|
49 |
|
50 TURBO_PASCAL_KEYWORDS = ( |
|
51 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case', |
|
52 'const', 'constructor', 'continue', 'destructor', 'div', 'do', |
|
53 'downto', 'else', 'end', 'file', 'for', 'function', 'goto', |
|
54 'if', 'implementation', 'in', 'inherited', 'inline', 'interface', |
|
55 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator', |
|
56 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce', |
|
57 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to', |
|
58 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor' |
|
59 ) |
|
60 |
|
61 DELPHI_KEYWORDS = ( |
|
62 'as', 'class', 'except', 'exports', 'finalization', 'finally', |
|
63 'initialization', 'is', 'library', 'on', 'property', 'raise', |
|
64 'threadvar', 'try' |
|
65 ) |
|
66 |
|
67 FREE_PASCAL_KEYWORDS = ( |
|
68 'dispose', 'exit', 'false', 'new', 'true' |
|
69 ) |
|
70 |
|
71 BLOCK_KEYWORDS = set(( |
|
72 'begin', 'class', 'const', 'constructor', 'destructor', 'end', |
|
73 'finalization', 'function', 'implementation', 'initialization', |
|
74 'label', 'library', 'operator', 'procedure', 'program', 'property', |
|
75 'record', 'threadvar', 'type', 'unit', 'uses', 'var' |
|
76 )) |
|
77 |
|
78 FUNCTION_MODIFIERS = set(( |
|
79 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe', |
|
80 'pascal', 'register', 'safecall', 'softfloat', 'stdcall', |
|
81 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external', |
|
82 'override', 'assembler' |
|
83 )) |
|
84 |
|
85 # XXX: those aren't global. but currently we know no way for defining |
|
86 # them just for the type context. |
|
87 DIRECTIVES = set(( |
|
88 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far', |
|
89 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected', |
|
90 'published', 'public' |
|
91 )) |
|
92 |
|
93 BUILTIN_TYPES = set(( |
|
94 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool', |
|
95 'cardinal', 'char', 'comp', 'currency', 'double', 'dword', |
|
96 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint', |
|
97 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean', |
|
98 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency', |
|
99 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle', |
|
100 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer', |
|
101 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint', |
|
102 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword', |
|
103 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint', |
|
104 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate', |
|
105 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant', |
|
106 'widechar', 'widestring', 'word', 'wordbool' |
|
107 )) |
|
108 |
|
109 BUILTIN_UNITS = { |
|
110 'System': ( |
|
111 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8', |
|
112 'append', 'arctan', 'assert', 'assigned', 'assignfile', |
|
113 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir', |
|
114 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble', |
|
115 'concat', 'continue', 'copy', 'cos', 'dec', 'delete', |
|
116 'dispose', 'doubletocomp', 'endthread', 'enummodules', |
|
117 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr', |
|
118 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize', |
|
119 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance', |
|
120 'findresourcehinstance', 'flush', 'frac', 'freemem', |
|
121 'get8087cw', 'getdir', 'getlasterror', 'getmem', |
|
122 'getmemorymanager', 'getmodulefilename', 'getvariantmanager', |
|
123 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert', |
|
124 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset', |
|
125 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd', |
|
126 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount', |
|
127 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random', |
|
128 'randomize', 'read', 'readln', 'reallocmem', |
|
129 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir', |
|
130 'round', 'runerror', 'seek', 'seekeof', 'seekeoln', |
|
131 'set8087cw', 'setlength', 'setlinebreakstyle', |
|
132 'setmemorymanager', 'setstring', 'settextbuf', |
|
133 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt', |
|
134 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar', |
|
135 'succ', 'swap', 'trunc', 'truncate', 'typeinfo', |
|
136 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring', |
|
137 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi', |
|
138 'utf8tounicode', 'val', 'vararrayredim', 'varclear', |
|
139 'widecharlentostring', 'widecharlentostrvar', |
|
140 'widechartostring', 'widechartostrvar', |
|
141 'widestringtoucs4string', 'write', 'writeln' |
|
142 ), |
|
143 'SysUtils': ( |
|
144 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks', |
|
145 'allocmem', 'ansicomparefilename', 'ansicomparestr', |
|
146 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr', |
|
147 'ansilastchar', 'ansilowercase', 'ansilowercasefilename', |
|
148 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext', |
|
149 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp', |
|
150 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan', |
|
151 'ansistrscan', 'ansistrupper', 'ansiuppercase', |
|
152 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep', |
|
153 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype', |
|
154 'callterminateprocs', 'changefileext', 'charlength', |
|
155 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr', |
|
156 'comparetext', 'createdir', 'createguid', 'currentyear', |
|
157 'currtostr', 'currtostrf', 'date', 'datetimetofiledate', |
|
158 'datetimetostr', 'datetimetostring', 'datetimetosystemtime', |
|
159 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate', |
|
160 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists', |
|
161 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime', |
|
162 'exceptionerrormessage', 'excludetrailingbackslash', |
|
163 'excludetrailingpathdelimiter', 'expandfilename', |
|
164 'expandfilenamecase', 'expanduncfilename', 'extractfiledir', |
|
165 'extractfiledrive', 'extractfileext', 'extractfilename', |
|
166 'extractfilepath', 'extractrelativepath', 'extractshortpathname', |
|
167 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime', |
|
168 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly', |
|
169 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr', |
|
170 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage', |
|
171 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext', |
|
172 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr', |
|
173 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr', |
|
174 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr', |
|
175 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir', |
|
176 'getenvironmentvariable', 'getfileversion', 'getformatsettings', |
|
177 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription', |
|
178 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth', |
|
179 'includetrailingbackslash', 'includetrailingpathdelimiter', |
|
180 'incmonth', 'initializepackage', 'interlockeddecrement', |
|
181 'interlockedexchange', 'interlockedexchangeadd', |
|
182 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter', |
|
183 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident', |
|
184 'languages', 'lastdelimiter', 'loadpackage', 'loadstr', |
|
185 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now', |
|
186 'outofmemoryerror', 'quotedstr', 'raiselastoserror', |
|
187 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate', |
|
188 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext', |
|
189 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize', |
|
190 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy', |
|
191 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp', |
|
192 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy', |
|
193 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew', |
|
194 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos', |
|
195 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr', |
|
196 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime', |
|
197 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint', |
|
198 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime', |
|
199 'strtotimedef', 'strupper', 'supports', 'syserrormessage', |
|
200 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime', |
|
201 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright', |
|
202 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime', |
|
203 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime', |
|
204 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime', |
|
205 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext', |
|
206 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase', |
|
207 'widesamestr', 'widesametext', 'wideuppercase', 'win32check', |
|
208 'wraptext' |
|
209 ), |
|
210 'Classes': ( |
|
211 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize', |
|
212 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect', |
|
213 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass', |
|
214 'groupdescendantswith', 'hextobin', 'identtoint', |
|
215 'initinheritedcomponent', 'inttoident', 'invalidpoint', |
|
216 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext', |
|
217 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource', |
|
218 'pointsequal', 'readcomponentres', 'readcomponentresex', |
|
219 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias', |
|
220 'registerclasses', 'registercomponents', 'registerintegerconsts', |
|
221 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup', |
|
222 'teststreamformat', 'unregisterclass', 'unregisterclasses', |
|
223 'unregisterintegerconsts', 'unregistermoduleclasses', |
|
224 'writecomponentresfile' |
|
225 ), |
|
226 'Math': ( |
|
227 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', |
|
228 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil', |
|
229 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc', |
|
230 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle', |
|
231 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance', |
|
232 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask', |
|
233 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg', |
|
234 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate', |
|
235 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero', |
|
236 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue', |
|
237 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue', |
|
238 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods', |
|
239 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance', |
|
240 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd', |
|
241 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant', |
|
242 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode', |
|
243 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev', |
|
244 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation', |
|
245 'tan', 'tanh', 'totalvariance', 'variance' |
|
246 ) |
|
247 } |
|
248 |
|
249 ASM_REGISTERS = set(( |
|
250 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0', |
|
251 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0', |
|
252 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx', |
|
253 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp', |
|
254 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', |
|
255 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', |
|
256 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', |
|
257 'xmm6', 'xmm7' |
|
258 )) |
|
259 |
|
260 ASM_INSTRUCTIONS = set(( |
|
261 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound', |
|
262 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', |
|
263 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae', |
|
264 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg', |
|
265 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb', |
|
266 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', |
|
267 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', |
|
268 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb', |
|
269 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid', |
|
270 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt', |
|
271 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd', |
|
272 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd', |
|
273 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe', |
|
274 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle', |
|
275 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge', |
|
276 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe', |
|
277 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave', |
|
278 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw', |
|
279 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw', |
|
280 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr', |
|
281 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx', |
|
282 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', |
|
283 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw', |
|
284 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw', |
|
285 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe', |
|
286 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror', |
|
287 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb', |
|
288 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe', |
|
289 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle', |
|
290 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', |
|
291 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz', |
|
292 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl', |
|
293 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold', |
|
294 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str', |
|
295 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit', |
|
296 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait', |
|
297 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat', |
|
298 'xlatb', 'xor' |
|
299 )) |
|
300 |
|
301 def __init__(self, **options): |
|
302 Lexer.__init__(self, **options) |
|
303 self.keywords = set() |
|
304 if get_bool_opt(options, 'turbopascal', True): |
|
305 self.keywords.update(self.TURBO_PASCAL_KEYWORDS) |
|
306 if get_bool_opt(options, 'delphi', True): |
|
307 self.keywords.update(self.DELPHI_KEYWORDS) |
|
308 if get_bool_opt(options, 'freepascal', True): |
|
309 self.keywords.update(self.FREE_PASCAL_KEYWORDS) |
|
310 self.builtins = set() |
|
311 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)): |
|
312 self.builtins.update(self.BUILTIN_UNITS[unit]) |
|
313 |
|
314 def get_tokens_unprocessed(self, text): |
|
315 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE) |
|
316 stack = ['initial'] |
|
317 in_function_block = False |
|
318 in_property_block = False |
|
319 was_dot = False |
|
320 next_token_is_function = False |
|
321 next_token_is_property = False |
|
322 collect_labels = False |
|
323 block_labels = set() |
|
324 brace_balance = [0, 0] |
|
325 |
|
326 while not scanner.eos: |
|
327 token = Error |
|
328 |
|
329 if stack[-1] == 'initial': |
|
330 if scanner.scan(r'\s+'): |
|
331 token = Text |
|
332 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): |
|
333 if scanner.match.startswith('$'): |
|
334 token = Comment.Preproc |
|
335 else: |
|
336 token = Comment.Multiline |
|
337 elif scanner.scan(r'//.*?$'): |
|
338 token = Comment.Single |
|
339 elif scanner.scan(r'[-+*\/=<>:;,.@\^]'): |
|
340 token = Operator |
|
341 # stop label highlighting on next ";" |
|
342 if collect_labels and scanner.match == ';': |
|
343 collect_labels = False |
|
344 elif scanner.scan(r'[\(\)\[\]]+'): |
|
345 token = Punctuation |
|
346 # abort function naming ``foo = Function(...)`` |
|
347 next_token_is_function = False |
|
348 # if we are in a function block we count the open |
|
349 # braces because ootherwise it's impossible to |
|
350 # determine the end of the modifier context |
|
351 if in_function_block or in_property_block: |
|
352 if scanner.match == '(': |
|
353 brace_balance[0] += 1 |
|
354 elif scanner.match == ')': |
|
355 brace_balance[0] -= 1 |
|
356 elif scanner.match == '[': |
|
357 brace_balance[1] += 1 |
|
358 elif scanner.match == ']': |
|
359 brace_balance[1] -= 1 |
|
360 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): |
|
361 lowercase_name = scanner.match.lower() |
|
362 if lowercase_name == 'result': |
|
363 token = Name.Builtin.Pseudo |
|
364 elif lowercase_name in self.keywords: |
|
365 token = Keyword |
|
366 # if we are in a special block and a |
|
367 # block ending keyword occours (and the parenthesis |
|
368 # is balanced) we end the current block context |
|
369 if (in_function_block or in_property_block) and \ |
|
370 lowercase_name in self.BLOCK_KEYWORDS and \ |
|
371 brace_balance[0] <= 0 and \ |
|
372 brace_balance[1] <= 0: |
|
373 in_function_block = False |
|
374 in_property_block = False |
|
375 brace_balance = [0, 0] |
|
376 block_labels = set() |
|
377 if lowercase_name in ('label', 'goto'): |
|
378 collect_labels = True |
|
379 elif lowercase_name == 'asm': |
|
380 stack.append('asm') |
|
381 elif lowercase_name == 'property': |
|
382 in_property_block = True |
|
383 next_token_is_property = True |
|
384 elif lowercase_name in ('procedure', 'operator', |
|
385 'function', 'constructor', |
|
386 'destructor'): |
|
387 in_function_block = True |
|
388 next_token_is_function = True |
|
389 # we are in a function block and the current name |
|
390 # is in the set of registered modifiers. highlight |
|
391 # it as pseudo keyword |
|
392 elif in_function_block and \ |
|
393 lowercase_name in self.FUNCTION_MODIFIERS: |
|
394 token = Keyword.Pseudo |
|
395 # if we are in a property highlight some more |
|
396 # modifiers |
|
397 elif in_property_block and \ |
|
398 lowercase_name in ('read', 'write'): |
|
399 token = Keyword.Pseudo |
|
400 next_token_is_function = True |
|
401 # if the last iteration set next_token_is_function |
|
402 # to true we now want this name highlighted as |
|
403 # function. so do that and reset the state |
|
404 elif next_token_is_function: |
|
405 # Look if the next token is a dot. If yes it's |
|
406 # not a function, but a class name and the |
|
407 # part after the dot a function name |
|
408 if scanner.test(r'\s*\.\s*'): |
|
409 token = Name.Class |
|
410 # it's not a dot, our job is done |
|
411 else: |
|
412 token = Name.Function |
|
413 next_token_is_function = False |
|
414 # same for properties |
|
415 elif next_token_is_property: |
|
416 token = Name.Property |
|
417 next_token_is_property = False |
|
418 # Highlight this token as label and add it |
|
419 # to the list of known labels |
|
420 elif collect_labels: |
|
421 token = Name.Label |
|
422 block_labels.add(scanner.match.lower()) |
|
423 # name is in list of known labels |
|
424 elif lowercase_name in block_labels: |
|
425 token = Name.Label |
|
426 elif lowercase_name in self.BUILTIN_TYPES: |
|
427 token = Keyword.Type |
|
428 elif lowercase_name in self.DIRECTIVES: |
|
429 token = Keyword.Pseudo |
|
430 # builtins are just builtins if the token |
|
431 # before isn't a dot |
|
432 elif not was_dot and lowercase_name in self.builtins: |
|
433 token = Name.Builtin |
|
434 else: |
|
435 token = Name |
|
436 elif scanner.scan(r"'"): |
|
437 token = String |
|
438 stack.append('string') |
|
439 elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'): |
|
440 token = String.Char |
|
441 elif scanner.scan(r'\$[0-9A-Fa-f]+'): |
|
442 token = Number.Hex |
|
443 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): |
|
444 token = Number.Integer |
|
445 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): |
|
446 token = Number.Float |
|
447 else: |
|
448 # if the stack depth is deeper than once, pop |
|
449 if len(stack) > 1: |
|
450 stack.pop() |
|
451 scanner.get_char() |
|
452 |
|
453 elif stack[-1] == 'string': |
|
454 if scanner.scan(r"''"): |
|
455 token = String.Escape |
|
456 elif scanner.scan(r"'"): |
|
457 token = String |
|
458 stack.pop() |
|
459 elif scanner.scan(r"[^']*"): |
|
460 token = String |
|
461 else: |
|
462 scanner.get_char() |
|
463 stack.pop() |
|
464 |
|
465 elif stack[-1] == 'asm': |
|
466 if scanner.scan(r'\s+'): |
|
467 token = Text |
|
468 elif scanner.scan(r'end'): |
|
469 token = Keyword |
|
470 stack.pop() |
|
471 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): |
|
472 if scanner.match.startswith('$'): |
|
473 token = Comment.Preproc |
|
474 else: |
|
475 token = Comment.Multiline |
|
476 elif scanner.scan(r'//.*?$'): |
|
477 token = Comment.Single |
|
478 elif scanner.scan(r"'"): |
|
479 token = String |
|
480 stack.append('string') |
|
481 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'): |
|
482 token = Name.Label |
|
483 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): |
|
484 lowercase_name = scanner.match.lower() |
|
485 if lowercase_name in self.ASM_INSTRUCTIONS: |
|
486 token = Keyword |
|
487 elif lowercase_name in self.ASM_REGISTERS: |
|
488 token = Name.Builtin |
|
489 else: |
|
490 token = Name |
|
491 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'): |
|
492 token = Operator |
|
493 elif scanner.scan(r'[\(\)\[\]]+'): |
|
494 token = Punctuation |
|
495 elif scanner.scan(r'\$[0-9A-Fa-f]+'): |
|
496 token = Number.Hex |
|
497 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): |
|
498 token = Number.Integer |
|
499 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): |
|
500 token = Number.Float |
|
501 else: |
|
502 scanner.get_char() |
|
503 stack.pop() |
|
504 |
|
505 # save the dot!!!11 |
|
506 if scanner.match.strip(): |
|
507 was_dot = scanner.match == '.' |
|
508 yield scanner.start_pos, token, scanner.match or '' |
|
509 |
|
510 |
|
511 class AdaLexer(RegexLexer): |
|
512 """ |
|
513 For Ada source code. |
|
514 |
|
515 .. versionadded:: 1.3 |
|
516 """ |
|
517 |
|
518 name = 'Ada' |
|
519 aliases = ['ada', 'ada95', 'ada2005'] |
|
520 filenames = ['*.adb', '*.ads', '*.ada'] |
|
521 mimetypes = ['text/x-ada'] |
|
522 |
|
523 flags = re.MULTILINE | re.IGNORECASE |
|
524 |
|
525 tokens = { |
|
526 'root': [ |
|
527 (r'[^\S\n]+', Text), |
|
528 (r'--.*?\n', Comment.Single), |
|
529 (r'[^\S\n]+', Text), |
|
530 (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), |
|
531 (r'(subtype|type)(\s+)(\w+)', |
|
532 bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), |
|
533 (r'task|protected', Keyword.Declaration), |
|
534 (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), |
|
535 (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), |
|
536 (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, |
|
537 Comment.Preproc)), |
|
538 (r'(true|false|null)\b', Keyword.Constant), |
|
539 (words(( |
|
540 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', |
|
541 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', |
|
542 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', |
|
543 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', |
|
544 'Short_Float', 'Short_Integer', 'Short_Short_Float', |
|
545 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), |
|
546 suffix=r'\b'), |
|
547 Keyword.Type), |
|
548 (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), |
|
549 (r'generic|private', Keyword.Declaration), |
|
550 (r'package', Keyword.Declaration, 'package'), |
|
551 (r'array\b', Keyword.Reserved, 'array_def'), |
|
552 (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), |
|
553 (r'(\w+)(\s*)(:)(\s*)(constant)', |
|
554 bygroups(Name.Constant, Text, Punctuation, Text, |
|
555 Keyword.Reserved)), |
|
556 (r'<<\w+>>', Name.Label), |
|
557 (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', |
|
558 bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), |
|
559 (words(( |
|
560 'abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all', |
|
561 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare', |
|
562 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry', |
|
563 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited', |
|
564 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding', |
|
565 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue', |
|
566 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized', |
|
567 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when', |
|
568 'while', 'xor'), prefix=r'\b', suffix=r'\b'), |
|
569 Keyword.Reserved), |
|
570 (r'"[^"]*"', String), |
|
571 include('attribute'), |
|
572 include('numbers'), |
|
573 (r"'[^']'", String.Character), |
|
574 (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))), |
|
575 (r"(<>|=>|:=|[()|:;,.'])", Punctuation), |
|
576 (r'[*<>+=/&-]', Operator), |
|
577 (r'\n+', Text), |
|
578 ], |
|
579 'numbers': [ |
|
580 (r'[0-9_]+#[0-9a-f]+#', Number.Hex), |
|
581 (r'[0-9_]+\.[0-9_]*', Number.Float), |
|
582 (r'[0-9_]+', Number.Integer), |
|
583 ], |
|
584 'attribute': [ |
|
585 (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), |
|
586 ], |
|
587 'subprogram': [ |
|
588 (r'\(', Punctuation, ('#pop', 'formal_part')), |
|
589 (r';', Punctuation, '#pop'), |
|
590 (r'is\b', Keyword.Reserved, '#pop'), |
|
591 (r'"[^"]+"|\w+', Name.Function), |
|
592 include('root'), |
|
593 ], |
|
594 'end': [ |
|
595 ('(if|case|record|loop|select)', Keyword.Reserved), |
|
596 (r'"[^"]+"|[\w.]+', Name.Function), |
|
597 (r'\s+', Text), |
|
598 (';', Punctuation, '#pop'), |
|
599 ], |
|
600 'type_def': [ |
|
601 (r';', Punctuation, '#pop'), |
|
602 (r'\(', Punctuation, 'formal_part'), |
|
603 (r'with|and|use', Keyword.Reserved), |
|
604 (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), |
|
605 (r'record\b', Keyword.Reserved, ('record_def')), |
|
606 (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'), |
|
607 include('root'), |
|
608 ], |
|
609 'array_def': [ |
|
610 (r';', Punctuation, '#pop'), |
|
611 (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)), |
|
612 include('root'), |
|
613 ], |
|
614 'record_def': [ |
|
615 (r'end record', Keyword.Reserved, '#pop'), |
|
616 include('root'), |
|
617 ], |
|
618 'import': [ |
|
619 (r'[\w.]+', Name.Namespace, '#pop'), |
|
620 default('#pop'), |
|
621 ], |
|
622 'formal_part': [ |
|
623 (r'\)', Punctuation, '#pop'), |
|
624 (r'\w+', Name.Variable), |
|
625 (r',|:[^=]', Punctuation), |
|
626 (r'(in|not|null|out|access)\b', Keyword.Reserved), |
|
627 include('root'), |
|
628 ], |
|
629 'package': [ |
|
630 ('body', Keyword.Declaration), |
|
631 (r'is\s+new|renames', Keyword.Reserved), |
|
632 ('is', Keyword.Reserved, '#pop'), |
|
633 (';', Punctuation, '#pop'), |
|
634 (r'\(', Punctuation, 'package_instantiation'), |
|
635 (r'([\w.]+)', Name.Class), |
|
636 include('root'), |
|
637 ], |
|
638 'package_instantiation': [ |
|
639 (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)), |
|
640 (r'[\w.\'"]', Text), |
|
641 (r'\)', Punctuation, '#pop'), |
|
642 include('root'), |
|
643 ], |
|
644 } |