ThirdParty/Pygments/pygments/lexers/pascal.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.pascal
4 ~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Pascal family languages.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, include, bygroups, words, \
15 using, this, default
16 from pygments.util import get_bool_opt, get_list_opt
17 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
18 Number, Punctuation, Error
19 from pygments.scanner import Scanner
20
21 __all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer']
22
23
24 class DelphiLexer(Lexer):
25 """
26 For `Delphi <http://www.borland.com/delphi/>`_ (Borland Object Pascal),
27 Turbo Pascal and Free Pascal source code.
28
29 Additional options accepted:
30
31 `turbopascal`
32 Highlight Turbo Pascal specific keywords (default: ``True``).
33 `delphi`
34 Highlight Borland Delphi specific keywords (default: ``True``).
35 `freepascal`
36 Highlight Free Pascal specific keywords (default: ``True``).
37 `units`
38 A list of units that should be considered builtin, supported are
39 ``System``, ``SysUtils``, ``Classes`` and ``Math``.
40 Default is to consider all of them builtin.
41 """
42 name = 'Delphi'
43 aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
44 filenames = ['*.pas']
45 mimetypes = ['text/x-pascal']
46
47 TURBO_PASCAL_KEYWORDS = (
48 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
49 'const', 'constructor', 'continue', 'destructor', 'div', 'do',
50 'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
51 'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
52 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
53 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
54 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
55 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
56 )
57
58 DELPHI_KEYWORDS = (
59 'as', 'class', 'except', 'exports', 'finalization', 'finally',
60 'initialization', 'is', 'library', 'on', 'property', 'raise',
61 'threadvar', 'try'
62 )
63
64 FREE_PASCAL_KEYWORDS = (
65 'dispose', 'exit', 'false', 'new', 'true'
66 )
67
68 BLOCK_KEYWORDS = set((
69 'begin', 'class', 'const', 'constructor', 'destructor', 'end',
70 'finalization', 'function', 'implementation', 'initialization',
71 'label', 'library', 'operator', 'procedure', 'program', 'property',
72 'record', 'threadvar', 'type', 'unit', 'uses', 'var'
73 ))
74
75 FUNCTION_MODIFIERS = set((
76 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
77 'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
78 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
79 'override', 'assembler'
80 ))
81
82 # XXX: those aren't global. but currently we know no way for defining
83 # them just for the type context.
84 DIRECTIVES = set((
85 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
86 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
87 'published', 'public'
88 ))
89
90 BUILTIN_TYPES = set((
91 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
92 'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
93 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
94 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
95 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
96 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
97 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
98 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
99 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
100 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
101 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
102 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
103 'widechar', 'widestring', 'word', 'wordbool'
104 ))
105
106 BUILTIN_UNITS = {
107 'System': (
108 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
109 'append', 'arctan', 'assert', 'assigned', 'assignfile',
110 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
111 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
112 'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
113 'dispose', 'doubletocomp', 'endthread', 'enummodules',
114 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
115 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
116 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
117 'findresourcehinstance', 'flush', 'frac', 'freemem',
118 'get8087cw', 'getdir', 'getlasterror', 'getmem',
119 'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
120 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
121 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
122 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
123 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
124 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
125 'randomize', 'read', 'readln', 'reallocmem',
126 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
127 'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
128 'set8087cw', 'setlength', 'setlinebreakstyle',
129 'setmemorymanager', 'setstring', 'settextbuf',
130 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
131 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
132 'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
133 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
134 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
135 'utf8tounicode', 'val', 'vararrayredim', 'varclear',
136 'widecharlentostring', 'widecharlentostrvar',
137 'widechartostring', 'widechartostrvar',
138 'widestringtoucs4string', 'write', 'writeln'
139 ),
140 'SysUtils': (
141 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
142 'allocmem', 'ansicomparefilename', 'ansicomparestr',
143 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
144 'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
145 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
146 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
147 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
148 'ansistrscan', 'ansistrupper', 'ansiuppercase',
149 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
150 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
151 'callterminateprocs', 'changefileext', 'charlength',
152 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
153 'comparetext', 'createdir', 'createguid', 'currentyear',
154 'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
155 'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
156 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
157 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
158 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
159 'exceptionerrormessage', 'excludetrailingbackslash',
160 'excludetrailingpathdelimiter', 'expandfilename',
161 'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
162 'extractfiledrive', 'extractfileext', 'extractfilename',
163 'extractfilepath', 'extractrelativepath', 'extractshortpathname',
164 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
165 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
166 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
167 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
168 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
169 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
170 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
171 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
172 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
173 'getenvironmentvariable', 'getfileversion', 'getformatsettings',
174 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
175 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
176 'includetrailingbackslash', 'includetrailingpathdelimiter',
177 'incmonth', 'initializepackage', 'interlockeddecrement',
178 'interlockedexchange', 'interlockedexchangeadd',
179 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
180 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
181 'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
182 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
183 'outofmemoryerror', 'quotedstr', 'raiselastoserror',
184 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
185 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
186 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
187 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
188 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
189 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
190 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
191 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
192 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
193 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
194 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
195 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
196 'strtotimedef', 'strupper', 'supports', 'syserrormessage',
197 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
198 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
199 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
200 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
201 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
202 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
203 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
204 'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
205 'wraptext'
206 ),
207 'Classes': (
208 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
209 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
210 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
211 'groupdescendantswith', 'hextobin', 'identtoint',
212 'initinheritedcomponent', 'inttoident', 'invalidpoint',
213 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
214 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
215 'pointsequal', 'readcomponentres', 'readcomponentresex',
216 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
217 'registerclasses', 'registercomponents', 'registerintegerconsts',
218 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
219 'teststreamformat', 'unregisterclass', 'unregisterclasses',
220 'unregisterintegerconsts', 'unregistermoduleclasses',
221 'writecomponentresfile'
222 ),
223 'Math': (
224 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
225 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
226 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
227 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
228 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
229 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
230 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
231 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
232 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
233 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
234 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
235 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
236 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
237 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
238 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
239 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
240 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
241 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
242 'tan', 'tanh', 'totalvariance', 'variance'
243 )
244 }
245
246 ASM_REGISTERS = set((
247 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
248 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
249 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
250 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
251 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
252 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
253 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
254 'xmm6', 'xmm7'
255 ))
256
257 ASM_INSTRUCTIONS = set((
258 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
259 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
260 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
261 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
262 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
263 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
264 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
265 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
266 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
267 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
268 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
269 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
270 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
271 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
272 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
273 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
274 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
275 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
276 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
277 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
278 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
279 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
280 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
281 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
282 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
283 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
284 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
285 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
286 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
287 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
288 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
289 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
290 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
291 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
292 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
293 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
294 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
295 'xlatb', 'xor'
296 ))
297
298 def __init__(self, **options):
299 Lexer.__init__(self, **options)
300 self.keywords = set()
301 if get_bool_opt(options, 'turbopascal', True):
302 self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
303 if get_bool_opt(options, 'delphi', True):
304 self.keywords.update(self.DELPHI_KEYWORDS)
305 if get_bool_opt(options, 'freepascal', True):
306 self.keywords.update(self.FREE_PASCAL_KEYWORDS)
307 self.builtins = set()
308 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
309 self.builtins.update(self.BUILTIN_UNITS[unit])
310
311 def get_tokens_unprocessed(self, text):
312 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
313 stack = ['initial']
314 in_function_block = False
315 in_property_block = False
316 was_dot = False
317 next_token_is_function = False
318 next_token_is_property = False
319 collect_labels = False
320 block_labels = set()
321 brace_balance = [0, 0]
322
323 while not scanner.eos:
324 token = Error
325
326 if stack[-1] == 'initial':
327 if scanner.scan(r'\s+'):
328 token = Text
329 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
330 if scanner.match.startswith('$'):
331 token = Comment.Preproc
332 else:
333 token = Comment.Multiline
334 elif scanner.scan(r'//.*?$'):
335 token = Comment.Single
336 elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
337 token = Operator
338 # stop label highlighting on next ";"
339 if collect_labels and scanner.match == ';':
340 collect_labels = False
341 elif scanner.scan(r'[\(\)\[\]]+'):
342 token = Punctuation
343 # abort function naming ``foo = Function(...)``
344 next_token_is_function = False
345 # if we are in a function block we count the open
346 # braces because ootherwise it's impossible to
347 # determine the end of the modifier context
348 if in_function_block or in_property_block:
349 if scanner.match == '(':
350 brace_balance[0] += 1
351 elif scanner.match == ')':
352 brace_balance[0] -= 1
353 elif scanner.match == '[':
354 brace_balance[1] += 1
355 elif scanner.match == ']':
356 brace_balance[1] -= 1
357 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
358 lowercase_name = scanner.match.lower()
359 if lowercase_name == 'result':
360 token = Name.Builtin.Pseudo
361 elif lowercase_name in self.keywords:
362 token = Keyword
363 # if we are in a special block and a
364 # block ending keyword occours (and the parenthesis
365 # is balanced) we end the current block context
366 if (in_function_block or in_property_block) and \
367 lowercase_name in self.BLOCK_KEYWORDS and \
368 brace_balance[0] <= 0 and \
369 brace_balance[1] <= 0:
370 in_function_block = False
371 in_property_block = False
372 brace_balance = [0, 0]
373 block_labels = set()
374 if lowercase_name in ('label', 'goto'):
375 collect_labels = True
376 elif lowercase_name == 'asm':
377 stack.append('asm')
378 elif lowercase_name == 'property':
379 in_property_block = True
380 next_token_is_property = True
381 elif lowercase_name in ('procedure', 'operator',
382 'function', 'constructor',
383 'destructor'):
384 in_function_block = True
385 next_token_is_function = True
386 # we are in a function block and the current name
387 # is in the set of registered modifiers. highlight
388 # it as pseudo keyword
389 elif in_function_block and \
390 lowercase_name in self.FUNCTION_MODIFIERS:
391 token = Keyword.Pseudo
392 # if we are in a property highlight some more
393 # modifiers
394 elif in_property_block and \
395 lowercase_name in ('read', 'write'):
396 token = Keyword.Pseudo
397 next_token_is_function = True
398 # if the last iteration set next_token_is_function
399 # to true we now want this name highlighted as
400 # function. so do that and reset the state
401 elif next_token_is_function:
402 # Look if the next token is a dot. If yes it's
403 # not a function, but a class name and the
404 # part after the dot a function name
405 if scanner.test(r'\s*\.\s*'):
406 token = Name.Class
407 # it's not a dot, our job is done
408 else:
409 token = Name.Function
410 next_token_is_function = False
411 # same for properties
412 elif next_token_is_property:
413 token = Name.Property
414 next_token_is_property = False
415 # Highlight this token as label and add it
416 # to the list of known labels
417 elif collect_labels:
418 token = Name.Label
419 block_labels.add(scanner.match.lower())
420 # name is in list of known labels
421 elif lowercase_name in block_labels:
422 token = Name.Label
423 elif lowercase_name in self.BUILTIN_TYPES:
424 token = Keyword.Type
425 elif lowercase_name in self.DIRECTIVES:
426 token = Keyword.Pseudo
427 # builtins are just builtins if the token
428 # before isn't a dot
429 elif not was_dot and lowercase_name in self.builtins:
430 token = Name.Builtin
431 else:
432 token = Name
433 elif scanner.scan(r"'"):
434 token = String
435 stack.append('string')
436 elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
437 token = String.Char
438 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
439 token = Number.Hex
440 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
441 token = Number.Integer
442 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
443 token = Number.Float
444 else:
445 # if the stack depth is deeper than once, pop
446 if len(stack) > 1:
447 stack.pop()
448 scanner.get_char()
449
450 elif stack[-1] == 'string':
451 if scanner.scan(r"''"):
452 token = String.Escape
453 elif scanner.scan(r"'"):
454 token = String
455 stack.pop()
456 elif scanner.scan(r"[^']*"):
457 token = String
458 else:
459 scanner.get_char()
460 stack.pop()
461
462 elif stack[-1] == 'asm':
463 if scanner.scan(r'\s+'):
464 token = Text
465 elif scanner.scan(r'end'):
466 token = Keyword
467 stack.pop()
468 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
469 if scanner.match.startswith('$'):
470 token = Comment.Preproc
471 else:
472 token = Comment.Multiline
473 elif scanner.scan(r'//.*?$'):
474 token = Comment.Single
475 elif scanner.scan(r"'"):
476 token = String
477 stack.append('string')
478 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
479 token = Name.Label
480 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
481 lowercase_name = scanner.match.lower()
482 if lowercase_name in self.ASM_INSTRUCTIONS:
483 token = Keyword
484 elif lowercase_name in self.ASM_REGISTERS:
485 token = Name.Builtin
486 else:
487 token = Name
488 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
489 token = Operator
490 elif scanner.scan(r'[\(\)\[\]]+'):
491 token = Punctuation
492 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
493 token = Number.Hex
494 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
495 token = Number.Integer
496 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
497 token = Number.Float
498 else:
499 scanner.get_char()
500 stack.pop()
501
502 # save the dot!!!11
503 if scanner.match.strip():
504 was_dot = scanner.match == '.'
505 yield scanner.start_pos, token, scanner.match or ''
506
507
508 class Modula2Lexer(RegexLexer):
509 """
510 For `Modula-2 <http://www.modula2.org/>`_ source code.
511
512 Additional options that determine which keywords are highlighted:
513
514 `pim`
515 Select PIM Modula-2 dialect (default: True).
516 `iso`
517 Select ISO Modula-2 dialect (default: False).
518 `objm2`
519 Select Objective Modula-2 dialect (default: False).
520 `gm2ext`
521 Also highlight GNU extensions (default: False).
522
523 .. versionadded:: 1.3
524 """
525 name = 'Modula-2'
526 aliases = ['modula2', 'm2']
527 filenames = ['*.def', '*.mod']
528 mimetypes = ['text/x-modula2']
529
530 flags = re.MULTILINE | re.DOTALL
531
532 tokens = {
533 'whitespace': [
534 (r'\n+', Text), # blank lines
535 (r'\s+', Text), # whitespace
536 ],
537 'identifiers': [
538 (r'([a-zA-Z_$][\w$]*)', Name),
539 ],
540 'numliterals': [
541 (r'[01]+B', Number.Bin), # binary number (ObjM2)
542 (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO)
543 (r'[0-7]+C', Number.Oct), # char code (PIM + ISO)
544 (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2)
545 (r'[0-9A-F]+H', Number.Hex), # hexadecimal number
546 (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number
547 (r'[0-9]+\.[0-9]+', Number.Float), # real number
548 (r'[0-9]+', Number.Integer), # decimal whole number
549 ],
550 'strings': [
551 (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
552 (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
553 ],
554 'operators': [
555 (r'[*/+=#~&<>\^-]', Operator),
556 (r':=', Operator), # assignment
557 (r'@', Operator), # pointer deref (ISO)
558 (r'\.\.', Operator), # ellipsis or range
559 (r'`', Operator), # Smalltalk message (ObjM2)
560 (r'::', Operator), # type conversion (ObjM2)
561 ],
562 'punctuation': [
563 (r'[()\[\]{},.:;|]', Punctuation),
564 ],
565 'comments': [
566 (r'//.*?\n', Comment.Single), # ObjM2
567 (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2
568 (r'\(\*([^$].*?)\*\)', Comment.Multiline),
569 # TO DO: nesting of (* ... *) comments
570 ],
571 'pragmas': [
572 (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM
573 (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2
574 ],
575 'root': [
576 include('whitespace'),
577 include('comments'),
578 include('pragmas'),
579 include('identifiers'),
580 include('numliterals'),
581 include('strings'),
582 include('operators'),
583 include('punctuation'),
584 ]
585 }
586
587 pim_reserved_words = [
588 # 40 reserved words
589 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION',
590 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR',
591 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD',
592 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED',
593 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
594 'UNTIL', 'VAR', 'WHILE', 'WITH',
595 ]
596
597 pim_pervasives = [
598 # 31 pervasives
599 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC',
600 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL',
601 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD',
602 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL',
603 ]
604
605 iso_reserved_words = [
606 # 46 reserved words
607 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
608 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY',
609 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN',
610 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER',
611 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY',
612 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
613 'WITH',
614 ]
615
616 iso_pervasives = [
617 # 42 pervasives
618 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX',
619 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH',
620 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH',
621 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW',
622 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE',
623 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
624 ]
625
626 objm2_reserved_words = [
627 # base language, 42 reserved words
628 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
629 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF',
630 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD',
631 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE',
632 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
633 'UNTIL', 'VAR', 'VARIADIC', 'WHILE',
634 # OO extensions, 16 reserved words
635 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
636 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
637 'SUPER', 'TRY',
638 ]
639
640 objm2_pervasives = [
641 # base language, 38 pervasives
642 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE',
643 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD',
644 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL',
645 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX',
646 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF',
647 # OO extensions, 3 pervasives
648 'OBJECT', 'NO', 'YES',
649 ]
650
651 gnu_reserved_words = [
652 # 10 additional reserved words
653 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
654 '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
655 ]
656
657 gnu_pervasives = [
658 # 21 identifiers, actually from pseudo-module SYSTEM
659 # but we will highlight them as if they were pervasives
660 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
661 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
662 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
663 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
664 ]
665
666 def __init__(self, **options):
667 self.reserved_words = set()
668 self.pervasives = set()
669 # ISO Modula-2
670 if get_bool_opt(options, 'iso', False):
671 self.reserved_words.update(self.iso_reserved_words)
672 self.pervasives.update(self.iso_pervasives)
673 # Objective Modula-2
674 elif get_bool_opt(options, 'objm2', False):
675 self.reserved_words.update(self.objm2_reserved_words)
676 self.pervasives.update(self.objm2_pervasives)
677 # PIM Modula-2 (DEFAULT)
678 else:
679 self.reserved_words.update(self.pim_reserved_words)
680 self.pervasives.update(self.pim_pervasives)
681 # GNU extensions
682 if get_bool_opt(options, 'gm2ext', False):
683 self.reserved_words.update(self.gnu_reserved_words)
684 self.pervasives.update(self.gnu_pervasives)
685 # initialise
686 RegexLexer.__init__(self, **options)
687
688 def get_tokens_unprocessed(self, text):
689 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
690 # check for reserved words and pervasives
691 if token is Name:
692 if value in self.reserved_words:
693 token = Keyword.Reserved
694 elif value in self.pervasives:
695 token = Keyword.Pervasive
696 # return result
697 yield index, token, value
698
699
700 class AdaLexer(RegexLexer):
701 """
702 For Ada source code.
703
704 .. versionadded:: 1.3
705 """
706
707 name = 'Ada'
708 aliases = ['ada', 'ada95', 'ada2005']
709 filenames = ['*.adb', '*.ads', '*.ada']
710 mimetypes = ['text/x-ada']
711
712 flags = re.MULTILINE | re.IGNORECASE
713
714 tokens = {
715 'root': [
716 (r'[^\S\n]+', Text),
717 (r'--.*?\n', Comment.Single),
718 (r'[^\S\n]+', Text),
719 (r'function|procedure|entry', Keyword.Declaration, 'subprogram'),
720 (r'(subtype|type)(\s+)(\w+)',
721 bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
722 (r'task|protected', Keyword.Declaration),
723 (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)),
724 (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'),
725 (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text,
726 Comment.Preproc)),
727 (r'(true|false|null)\b', Keyword.Constant),
728 (words((
729 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor',
730 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float',
731 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive',
732 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float',
733 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'),
734 Keyword.Type),
735 (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word),
736 (r'generic|private', Keyword.Declaration),
737 (r'package', Keyword.Declaration, 'package'),
738 (r'array\b', Keyword.Reserved, 'array_def'),
739 (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
740 (r'(\w+)(\s*)(:)(\s*)(constant)',
741 bygroups(Name.Constant, Text, Punctuation, Text,
742 Keyword.Reserved)),
743 (r'<<\w+>>', Name.Label),
744 (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)',
745 bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)),
746 (words((
747 'abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all',
748 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare',
749 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry',
750 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited',
751 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding',
752 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue',
753 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized',
754 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when',
755 'while', 'xor'), prefix=r'\b', suffix=r'\b'),
756 Keyword.Reserved),
757 (r'"[^"]*"', String),
758 include('attribute'),
759 include('numbers'),
760 (r"'[^']'", String.Character),
761 (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
762 (r"(<>|=>|:=|[()|:;,.'])", Punctuation),
763 (r'[*<>+=/&-]', Operator),
764 (r'\n+', Text),
765 ],
766 'numbers': [
767 (r'[0-9_]+#[0-9a-f]+#', Number.Hex),
768 (r'[0-9_]+\.[0-9_]*', Number.Float),
769 (r'[0-9_]+', Number.Integer),
770 ],
771 'attribute': [
772 (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)),
773 ],
774 'subprogram': [
775 (r'\(', Punctuation, ('#pop', 'formal_part')),
776 (r';', Punctuation, '#pop'),
777 (r'is\b', Keyword.Reserved, '#pop'),
778 (r'"[^"]+"|\w+', Name.Function),
779 include('root'),
780 ],
781 'end': [
782 ('(if|case|record|loop|select)', Keyword.Reserved),
783 ('"[^"]+"|[\w.]+', Name.Function),
784 ('\s+', Text),
785 (';', Punctuation, '#pop'),
786 ],
787 'type_def': [
788 (r';', Punctuation, '#pop'),
789 (r'\(', Punctuation, 'formal_part'),
790 (r'with|and|use', Keyword.Reserved),
791 (r'array\b', Keyword.Reserved, ('#pop', 'array_def')),
792 (r'record\b', Keyword.Reserved, ('record_def')),
793 (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'),
794 include('root'),
795 ],
796 'array_def': [
797 (r';', Punctuation, '#pop'),
798 (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)),
799 include('root'),
800 ],
801 'record_def': [
802 (r'end record', Keyword.Reserved, '#pop'),
803 include('root'),
804 ],
805 'import': [
806 (r'[\w.]+', Name.Namespace, '#pop'),
807 default('#pop'),
808 ],
809 'formal_part': [
810 (r'\)', Punctuation, '#pop'),
811 (r'\w+', Name.Variable),
812 (r',|:[^=]', Punctuation),
813 (r'(in|not|null|out|access)\b', Keyword.Reserved),
814 include('root'),
815 ],
816 'package': [
817 ('body', Keyword.Declaration),
818 ('is\s+new|renames', Keyword.Reserved),
819 ('is', Keyword.Reserved, '#pop'),
820 (';', Punctuation, '#pop'),
821 ('\(', Punctuation, 'package_instantiation'),
822 ('([\w.]+)', Name.Class),
823 include('root'),
824 ],
825 'package_instantiation': [
826 (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)),
827 (r'[\w.\'"]', Text),
828 (r'\)', Punctuation, '#pop'),
829 include('root'),
830 ],
831 }

eric ide

mercurial