eric6/ThirdParty/Pygments/pygments/lexers/modula2.py

changeset 6942
2602857055c5
parent 5713
6762afd9f963
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.modula2
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Multi-Dialect Lexer for Modula-2.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, include
15 from pygments.util import get_bool_opt, get_list_opt
16 from pygments.token import Text, Comment, Operator, Keyword, Name, \
17 String, Number, Punctuation, Error
18
19 __all__ = ['Modula2Lexer']
20
21
22 # Multi-Dialect Modula-2 Lexer
23 class Modula2Lexer(RegexLexer):
24 """
25 For `Modula-2 <http://www.modula2.org/>`_ source code.
26
27 The Modula-2 lexer supports several dialects. By default, it operates in
28 fallback mode, recognising the *combined* literals, punctuation symbols
29 and operators of all supported dialects, and the *combined* reserved words
30 and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
31 differentiating between library defined identifiers.
32
33 To select a specific dialect, a dialect option may be passed
34 or a dialect tag may be embedded into a source file.
35
36 Dialect Options:
37
38 `m2pim`
39 Select PIM Modula-2 dialect.
40 `m2iso`
41 Select ISO Modula-2 dialect.
42 `m2r10`
43 Select Modula-2 R10 dialect.
44 `objm2`
45 Select Objective Modula-2 dialect.
46
47 The PIM and ISO dialect options may be qualified with a language extension.
48
49 Language Extensions:
50
51 `+aglet`
52 Select Aglet Modula-2 extensions, available with m2iso.
53 `+gm2`
54 Select GNU Modula-2 extensions, available with m2pim.
55 `+p1`
56 Select p1 Modula-2 extensions, available with m2iso.
57 `+xds`
58 Select XDS Modula-2 extensions, available with m2iso.
59
60
61 Passing a Dialect Option via Unix Commandline Interface
62
63 Dialect options may be passed to the lexer using the `dialect` key.
64 Only one such option should be passed. If multiple dialect options are
65 passed, the first valid option is used, any subsequent options are ignored.
66
67 Examples:
68
69 `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
70 Use ISO dialect to render input to HTML output
71 `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
72 Use ISO dialect with p1 extensions to render input to RTF output
73
74
75 Embedding a Dialect Option within a source file
76
77 A dialect option may be embedded in a source file in form of a dialect
78 tag, a specially formatted comment that specifies a dialect option.
79
80 Dialect Tag EBNF::
81
82 dialectTag :
83 OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
84
85 dialectOption :
86 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
87 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
88
89 Prefix : '!' ;
90
91 OpeningCommentDelim : '(*' ;
92
93 ClosingCommentDelim : '*)' ;
94
95 No whitespace is permitted between the tokens of a dialect tag.
96
97 In the event that a source file contains multiple dialect tags, the first
98 tag that contains a valid dialect option will be used and any subsequent
99 dialect tags will be ignored. Ideally, a dialect tag should be placed
100 at the beginning of a source file.
101
102 An embedded dialect tag overrides a dialect option set via command line.
103
104 Examples:
105
106 ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
107 Use Modula2 R10 dialect to render this source file.
108 ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
109 Use PIM dialect with GNU extensions to render this source file.
110
111
112 Algol Publication Mode:
113
114 In Algol publication mode, source text is rendered for publication of
115 algorithms in scientific papers and academic texts, following the format
116 of the Revised Algol-60 Language Report. It is activated by passing
117 one of two corresponding styles as an option:
118
119 `algol`
120 render reserved words lowercase underline boldface
121 and builtins lowercase boldface italic
122 `algol_nu`
123 render reserved words lowercase boldface (no underlining)
124 and builtins lowercase boldface italic
125
126 The lexer automatically performs the required lowercase conversion when
127 this mode is activated.
128
129 Example:
130
131 ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
132 Render input file in Algol publication mode to LaTeX output.
133
134
135 Rendering Mode of First Class ADT Identifiers:
136
137 The rendering of standard library first class ADT identifiers is controlled
138 by option flag "treat_stdlib_adts_as_builtins".
139
140 When this option is turned on, standard library ADT identifiers are rendered
141 as builtins. When it is turned off, they are rendered as ordinary library
142 identifiers.
143
144 `treat_stdlib_adts_as_builtins` (default: On)
145
146 The option is useful for dialects that support ADTs as first class objects
147 and provide ADTs in the standard library that would otherwise be built-in.
148
149 At present, only Modula-2 R10 supports library ADTs as first class objects
150 and therefore, no ADT identifiers are defined for any other dialects.
151
152 Example:
153
154 ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
155 Render standard library ADTs as ordinary library types.
156
157 .. versionadded:: 1.3
158
159 .. versionchanged:: 2.1
160 Added multi-dialect support.
161 """
162 name = 'Modula-2'
163 aliases = ['modula2', 'm2']
164 filenames = ['*.def', '*.mod']
165 mimetypes = ['text/x-modula2']
166
167 flags = re.MULTILINE | re.DOTALL
168
169 tokens = {
170 'whitespace': [
171 (r'\n+', Text), # blank lines
172 (r'\s+', Text), # whitespace
173 ],
174 'dialecttags': [
175 # PIM Dialect Tag
176 (r'\(\*!m2pim\*\)', Comment.Special),
177 # ISO Dialect Tag
178 (r'\(\*!m2iso\*\)', Comment.Special),
179 # M2R10 Dialect Tag
180 (r'\(\*!m2r10\*\)', Comment.Special),
181 # ObjM2 Dialect Tag
182 (r'\(\*!objm2\*\)', Comment.Special),
183 # Aglet Extensions Dialect Tag
184 (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
185 # GNU Extensions Dialect Tag
186 (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
187 # p1 Extensions Dialect Tag
188 (r'\(\*!m2iso\+p1\*\)', Comment.Special),
189 # XDS Extensions Dialect Tag
190 (r'\(\*!m2iso\+xds\*\)', Comment.Special),
191 ],
192 'identifiers': [
193 (r'([a-zA-Z_$][\w$]*)', Name),
194 ],
195 'prefixed_number_literals': [
196 #
197 # Base-2, whole number
198 (r'0b[01]+(\'[01]+)*', Number.Bin),
199 #
200 # Base-16, whole number
201 (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
202 ],
203 'plain_number_literals': [
204 #
205 # Base-10, real number with exponent
206 (r'[0-9]+(\'[0-9]+)*' # integral part
207 r'\.[0-9]+(\'[0-9]+)*' # fractional part
208 r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent
209 Number.Float),
210 #
211 # Base-10, real number without exponent
212 (r'[0-9]+(\'[0-9]+)*' # integral part
213 r'\.[0-9]+(\'[0-9]+)*', # fractional part
214 Number.Float),
215 #
216 # Base-10, whole number
217 (r'[0-9]+(\'[0-9]+)*', Number.Integer),
218 ],
219 'suffixed_number_literals': [
220 #
221 # Base-8, whole number
222 (r'[0-7]+B', Number.Oct),
223 #
224 # Base-8, character code
225 (r'[0-7]+C', Number.Oct),
226 #
227 # Base-16, number
228 (r'[0-9A-F]+H', Number.Hex),
229 ],
230 'string_literals': [
231 (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
232 (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
233 ],
234 'digraph_operators': [
235 # Dot Product Operator
236 (r'\*\.', Operator),
237 # Array Concatenation Operator
238 (r'\+>', Operator), # M2R10 + ObjM2
239 # Inequality Operator
240 (r'<>', Operator), # ISO + PIM
241 # Less-Or-Equal, Subset
242 (r'<=', Operator),
243 # Greater-Or-Equal, Superset
244 (r'>=', Operator),
245 # Identity Operator
246 (r'==', Operator), # M2R10 + ObjM2
247 # Type Conversion Operator
248 (r'::', Operator), # M2R10 + ObjM2
249 # Assignment Symbol
250 (r':=', Operator),
251 # Postfix Increment Mutator
252 (r'\+\+', Operator), # M2R10 + ObjM2
253 # Postfix Decrement Mutator
254 (r'--', Operator), # M2R10 + ObjM2
255 ],
256 'unigraph_operators': [
257 # Arithmetic Operators
258 (r'[+-]', Operator),
259 (r'[*/]', Operator),
260 # ISO 80000-2 compliant Set Difference Operator
261 (r'\\', Operator), # M2R10 + ObjM2
262 # Relational Operators
263 (r'[=#<>]', Operator),
264 # Dereferencing Operator
265 (r'\^', Operator),
266 # Dereferencing Operator Synonym
267 (r'@', Operator), # ISO
268 # Logical AND Operator Synonym
269 (r'&', Operator), # PIM + ISO
270 # Logical NOT Operator Synonym
271 (r'~', Operator), # PIM + ISO
272 # Smalltalk Message Prefix
273 (r'`', Operator), # ObjM2
274 ],
275 'digraph_punctuation': [
276 # Range Constructor
277 (r'\.\.', Punctuation),
278 # Opening Chevron Bracket
279 (r'<<', Punctuation), # M2R10 + ISO
280 # Closing Chevron Bracket
281 (r'>>', Punctuation), # M2R10 + ISO
282 # Blueprint Punctuation
283 (r'->', Punctuation), # M2R10 + ISO
284 # Distinguish |# and # in M2 R10
285 (r'\|#', Punctuation),
286 # Distinguish ## and # in M2 R10
287 (r'##', Punctuation),
288 # Distinguish |* and * in M2 R10
289 (r'\|\*', Punctuation),
290 ],
291 'unigraph_punctuation': [
292 # Common Punctuation
293 (r'[()\[\]{},.:;|]', Punctuation),
294 # Case Label Separator Synonym
295 (r'!', Punctuation), # ISO
296 # Blueprint Punctuation
297 (r'\?', Punctuation), # M2R10 + ObjM2
298 ],
299 'comments': [
300 # Single Line Comment
301 (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
302 # Block Comment
303 (r'\(\*([^$].*?)\*\)', Comment.Multiline),
304 # Template Block Comment
305 (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
306 ],
307 'pragmas': [
308 # ISO Style Pragmas
309 (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
310 # Pascal Style Pragmas
311 (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
312 ],
313 'root': [
314 include('whitespace'),
315 include('dialecttags'),
316 include('pragmas'),
317 include('comments'),
318 include('identifiers'),
319 include('suffixed_number_literals'), # PIM + ISO
320 include('prefixed_number_literals'), # M2R10 + ObjM2
321 include('plain_number_literals'),
322 include('string_literals'),
323 include('digraph_punctuation'),
324 include('digraph_operators'),
325 include('unigraph_punctuation'),
326 include('unigraph_operators'),
327 ]
328 }
329
330 # C o m m o n D a t a s e t s
331
332 # Common Reserved Words Dataset
333 common_reserved_words = (
334 # 37 common reserved words
335 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
336 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
337 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
338 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
339 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
340 )
341
342 # Common Builtins Dataset
343 common_builtins = (
344 # 16 common builtins
345 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
346 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
347 'TRUE',
348 )
349
350 # Common Pseudo-Module Builtins Dataset
351 common_pseudo_builtins = (
352 # 4 common pseudo builtins
353 'ADDRESS', 'BYTE', 'WORD', 'ADR'
354 )
355
356 # P I M M o d u l a - 2 D a t a s e t s
357
358 # Lexemes to Mark as Error Tokens for PIM Modula-2
359 pim_lexemes_to_reject = (
360 '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
361 '+>', '->', '<<', '>>', '|#', '##',
362 )
363
364 # PIM Modula-2 Additional Reserved Words Dataset
365 pim_additional_reserved_words = (
366 # 3 additional reserved words
367 'EXPORT', 'QUALIFIED', 'WITH',
368 )
369
370 # PIM Modula-2 Additional Builtins Dataset
371 pim_additional_builtins = (
372 # 16 additional builtins
373 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
374 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
375 )
376
377 # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
378 pim_additional_pseudo_builtins = (
379 # 5 additional pseudo builtins
380 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
381 )
382
383 # I S O M o d u l a - 2 D a t a s e t s
384
385 # Lexemes to Mark as Error Tokens for ISO Modula-2
386 iso_lexemes_to_reject = (
387 '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
388 '<<', '>>', '|#', '##',
389 )
390
391 # ISO Modula-2 Additional Reserved Words Dataset
392 iso_additional_reserved_words = (
393 # 9 additional reserved words (ISO 10514-1)
394 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
395 'REM', 'RETRY', 'WITH',
396 # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
397 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
398 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
399 )
400
401 # ISO Modula-2 Additional Builtins Dataset
402 iso_additional_builtins = (
403 # 26 additional builtins (ISO 10514-1)
404 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
405 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
406 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
407 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
408 # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
409 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
410 )
411
412 # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
413 iso_additional_pseudo_builtins = (
414 # 14 additional builtins (SYSTEM)
415 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
416 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
417 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
418 # 13 additional builtins (COROUTINES)
419 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
420 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
421 'NEWCOROUTINE', 'PROT', 'TRANSFER',
422 # 9 additional builtins (EXCEPTIONS)
423 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
424 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
425 'IsExceptionalExecution', 'RAISE',
426 # 3 additional builtins (TERMINATION)
427 'TERMINATION', 'IsTerminating', 'HasHalted',
428 # 4 additional builtins (M2EXCEPTION)
429 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
430 'indexException', 'rangeException', 'caseSelectException',
431 'invalidLocation', 'functionException', 'wholeValueException',
432 'wholeDivException', 'realValueException', 'realDivException',
433 'complexValueException', 'complexDivException', 'protException',
434 'sysException', 'coException', 'exException',
435 )
436
437 # M o d u l a - 2 R 1 0 D a t a s e t s
438
439 # Lexemes to Mark as Error Tokens for Modula-2 R10
440 m2r10_lexemes_to_reject = (
441 '!', '`', '@', '$', '%', '&', '<>',
442 )
443
444 # Modula-2 R10 reserved words in addition to the common set
445 m2r10_additional_reserved_words = (
446 # 12 additional reserved words
447 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
448 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
449 # 2 additional reserved words with symbolic assembly option
450 'ASM', 'REG',
451 )
452
453 # Modula-2 R10 builtins in addition to the common set
454 m2r10_additional_builtins = (
455 # 26 additional builtins
456 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
457 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
458 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
459 'UNICHAR', 'WRITE', 'WRITEF',
460 )
461
462 # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
463 m2r10_additional_pseudo_builtins = (
464 # 13 additional builtins (TPROPERTIES)
465 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
466 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
467 'TMAXEXP', 'TMINEXP',
468 # 4 additional builtins (CONVERSION)
469 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
470 # 35 additional builtins (UNSAFE)
471 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
472 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
473 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
474 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
475 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
476 # 11 additional builtins (ATOMIC)
477 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
478 'BWNAND', 'BWOR', 'BWXOR',
479 # 7 additional builtins (COMPILER)
480 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
481 'HASH',
482 # 5 additional builtins (ASSEMBLER)
483 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
484 )
485
486 # O b j e c t i v e M o d u l a - 2 D a t a s e t s
487
488 # Lexemes to Mark as Error Tokens for Objective Modula-2
489 objm2_lexemes_to_reject = (
490 '!', '$', '%', '&', '<>',
491 )
492
493 # Objective Modula-2 Extensions
494 # reserved words in addition to Modula-2 R10
495 objm2_additional_reserved_words = (
496 # 16 additional reserved words
497 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
498 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
499 'SUPER', 'TRY',
500 )
501
502 # Objective Modula-2 Extensions
503 # builtins in addition to Modula-2 R10
504 objm2_additional_builtins = (
505 # 3 additional builtins
506 'OBJECT', 'NO', 'YES',
507 )
508
509 # Objective Modula-2 Extensions
510 # pseudo-module builtins in addition to Modula-2 R10
511 objm2_additional_pseudo_builtins = (
512 # None
513 )
514
515 # A g l e t M o d u l a - 2 D a t a s e t s
516
517 # Aglet Extensions
518 # reserved words in addition to ISO Modula-2
519 aglet_additional_reserved_words = (
520 # None
521 )
522
523 # Aglet Extensions
524 # builtins in addition to ISO Modula-2
525 aglet_additional_builtins = (
526 # 9 additional builtins
527 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
528 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
529 )
530
531 # Aglet Modula-2 Extensions
532 # pseudo-module builtins in addition to ISO Modula-2
533 aglet_additional_pseudo_builtins = (
534 # None
535 )
536
537 # G N U M o d u l a - 2 D a t a s e t s
538
539 # GNU Extensions
540 # reserved words in addition to PIM Modula-2
541 gm2_additional_reserved_words = (
542 # 10 additional reserved words
543 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
544 '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
545 )
546
547 # GNU Extensions
548 # builtins in addition to PIM Modula-2
549 gm2_additional_builtins = (
550 # 21 additional builtins
551 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
552 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
553 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
554 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
555 )
556
557 # GNU Extensions
558 # pseudo-module builtins in addition to PIM Modula-2
559 gm2_additional_pseudo_builtins = (
560 # None
561 )
562
563 # p 1 M o d u l a - 2 D a t a s e t s
564
565 # p1 Extensions
566 # reserved words in addition to ISO Modula-2
567 p1_additional_reserved_words = (
568 # None
569 )
570
571 # p1 Extensions
572 # builtins in addition to ISO Modula-2
573 p1_additional_builtins = (
574 # None
575 )
576
577 # p1 Modula-2 Extensions
578 # pseudo-module builtins in addition to ISO Modula-2
579 p1_additional_pseudo_builtins = (
580 # 1 additional builtin
581 'BCD',
582 )
583
584 # X D S M o d u l a - 2 D a t a s e t s
585
586 # XDS Extensions
587 # reserved words in addition to ISO Modula-2
588 xds_additional_reserved_words = (
589 # 1 additional reserved word
590 'SEQ',
591 )
592
593 # XDS Extensions
594 # builtins in addition to ISO Modula-2
595 xds_additional_builtins = (
596 # 9 additional builtins
597 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
598 'LONGCARD', 'SHORTCARD', 'SHORTINT',
599 )
600
601 # XDS Modula-2 Extensions
602 # pseudo-module builtins in addition to ISO Modula-2
603 xds_additional_pseudo_builtins = (
604 # 22 additional builtins (SYSTEM)
605 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
606 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
607 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
608 # 3 additional builtins (COMPILER)
609 'COMPILER', 'OPTION', 'EQUATION'
610 )
611
612 # P I M S t a n d a r d L i b r a r y D a t a s e t s
613
614 # PIM Modula-2 Standard Library Modules Dataset
615 pim_stdlib_module_identifiers = (
616 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
617 )
618
619 # PIM Modula-2 Standard Library Types Dataset
620 pim_stdlib_type_identifiers = (
621 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
622 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
623 'DirectoryCommand',
624 )
625
626 # PIM Modula-2 Standard Library Procedures Dataset
627 pim_stdlib_proc_identifiers = (
628 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
629 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
630 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
631 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
632 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
633 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
634 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
635 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
636 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
637 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
638 )
639
640 # PIM Modula-2 Standard Library Variables Dataset
641 pim_stdlib_var_identifiers = (
642 'Done', 'termCH', 'in', 'out'
643 )
644
645 # PIM Modula-2 Standard Library Constants Dataset
646 pim_stdlib_const_identifiers = (
647 'EOL',
648 )
649
650 # I S O S t a n d a r d L i b r a r y D a t a s e t s
651
652 # ISO Modula-2 Standard Library Modules Dataset
653 iso_stdlib_module_identifiers = (
654 # TO DO
655 )
656
657 # ISO Modula-2 Standard Library Types Dataset
658 iso_stdlib_type_identifiers = (
659 # TO DO
660 )
661
662 # ISO Modula-2 Standard Library Procedures Dataset
663 iso_stdlib_proc_identifiers = (
664 # TO DO
665 )
666
667 # ISO Modula-2 Standard Library Variables Dataset
668 iso_stdlib_var_identifiers = (
669 # TO DO
670 )
671
672 # ISO Modula-2 Standard Library Constants Dataset
673 iso_stdlib_const_identifiers = (
674 # TO DO
675 )
676
677 # M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
678
679 # Modula-2 R10 Standard Library ADTs Dataset
680 m2r10_stdlib_adt_identifiers = (
681 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
682 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
683 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
684 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
685 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
686 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
687 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
688 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
689 'INT64', 'INT128', 'STRING', 'UNISTRING',
690 )
691
692 # Modula-2 R10 Standard Library Blueprints Dataset
693 m2r10_stdlib_blueprint_identifiers = (
694 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
695 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
696 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
697 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
698 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
699 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
700 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
701 )
702
703 # Modula-2 R10 Standard Library Modules Dataset
704 m2r10_stdlib_module_identifiers = (
705 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
706 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
707 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
708 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
709 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
710 )
711
712 # Modula-2 R10 Standard Library Types Dataset
713 m2r10_stdlib_type_identifiers = (
714 'File', 'Status',
715 # TO BE COMPLETED
716 )
717
718 # Modula-2 R10 Standard Library Procedures Dataset
719 m2r10_stdlib_proc_identifiers = (
720 'ALLOCATE', 'DEALLOCATE', 'SIZE',
721 # TO BE COMPLETED
722 )
723
724 # Modula-2 R10 Standard Library Variables Dataset
725 m2r10_stdlib_var_identifiers = (
726 'stdIn', 'stdOut', 'stdErr',
727 )
728
729 # Modula-2 R10 Standard Library Constants Dataset
730 m2r10_stdlib_const_identifiers = (
731 'pi', 'tau',
732 )
733
734 # D i a l e c t s
735
736 # Dialect modes
737 dialects = (
738 'unknown',
739 'm2pim', 'm2iso', 'm2r10', 'objm2',
740 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
741 )
742
743 # D a t a b a s e s
744
745 # Lexemes to Mark as Errors Database
746 lexemes_to_reject_db = {
747 # Lexemes to reject for unknown dialect
748 'unknown': (
749 # LEAVE THIS EMPTY
750 ),
751 # Lexemes to reject for PIM Modula-2
752 'm2pim': (
753 pim_lexemes_to_reject,
754 ),
755 # Lexemes to reject for ISO Modula-2
756 'm2iso': (
757 iso_lexemes_to_reject,
758 ),
759 # Lexemes to reject for Modula-2 R10
760 'm2r10': (
761 m2r10_lexemes_to_reject,
762 ),
763 # Lexemes to reject for Objective Modula-2
764 'objm2': (
765 objm2_lexemes_to_reject,
766 ),
767 # Lexemes to reject for Aglet Modula-2
768 'm2iso+aglet': (
769 iso_lexemes_to_reject,
770 ),
771 # Lexemes to reject for GNU Modula-2
772 'm2pim+gm2': (
773 pim_lexemes_to_reject,
774 ),
775 # Lexemes to reject for p1 Modula-2
776 'm2iso+p1': (
777 iso_lexemes_to_reject,
778 ),
779 # Lexemes to reject for XDS Modula-2
780 'm2iso+xds': (
781 iso_lexemes_to_reject,
782 ),
783 }
784
785 # Reserved Words Database
786 reserved_words_db = {
787 # Reserved words for unknown dialect
788 'unknown': (
789 common_reserved_words,
790 pim_additional_reserved_words,
791 iso_additional_reserved_words,
792 m2r10_additional_reserved_words,
793 ),
794
795 # Reserved words for PIM Modula-2
796 'm2pim': (
797 common_reserved_words,
798 pim_additional_reserved_words,
799 ),
800
801 # Reserved words for Modula-2 R10
802 'm2iso': (
803 common_reserved_words,
804 iso_additional_reserved_words,
805 ),
806
807 # Reserved words for ISO Modula-2
808 'm2r10': (
809 common_reserved_words,
810 m2r10_additional_reserved_words,
811 ),
812
813 # Reserved words for Objective Modula-2
814 'objm2': (
815 common_reserved_words,
816 m2r10_additional_reserved_words,
817 objm2_additional_reserved_words,
818 ),
819
820 # Reserved words for Aglet Modula-2 Extensions
821 'm2iso+aglet': (
822 common_reserved_words,
823 iso_additional_reserved_words,
824 aglet_additional_reserved_words,
825 ),
826
827 # Reserved words for GNU Modula-2 Extensions
828 'm2pim+gm2': (
829 common_reserved_words,
830 pim_additional_reserved_words,
831 gm2_additional_reserved_words,
832 ),
833
834 # Reserved words for p1 Modula-2 Extensions
835 'm2iso+p1': (
836 common_reserved_words,
837 iso_additional_reserved_words,
838 p1_additional_reserved_words,
839 ),
840
841 # Reserved words for XDS Modula-2 Extensions
842 'm2iso+xds': (
843 common_reserved_words,
844 iso_additional_reserved_words,
845 xds_additional_reserved_words,
846 ),
847 }
848
849 # Builtins Database
850 builtins_db = {
851 # Builtins for unknown dialect
852 'unknown': (
853 common_builtins,
854 pim_additional_builtins,
855 iso_additional_builtins,
856 m2r10_additional_builtins,
857 ),
858
859 # Builtins for PIM Modula-2
860 'm2pim': (
861 common_builtins,
862 pim_additional_builtins,
863 ),
864
865 # Builtins for ISO Modula-2
866 'm2iso': (
867 common_builtins,
868 iso_additional_builtins,
869 ),
870
871 # Builtins for ISO Modula-2
872 'm2r10': (
873 common_builtins,
874 m2r10_additional_builtins,
875 ),
876
877 # Builtins for Objective Modula-2
878 'objm2': (
879 common_builtins,
880 m2r10_additional_builtins,
881 objm2_additional_builtins,
882 ),
883
884 # Builtins for Aglet Modula-2 Extensions
885 'm2iso+aglet': (
886 common_builtins,
887 iso_additional_builtins,
888 aglet_additional_builtins,
889 ),
890
891 # Builtins for GNU Modula-2 Extensions
892 'm2pim+gm2': (
893 common_builtins,
894 pim_additional_builtins,
895 gm2_additional_builtins,
896 ),
897
898 # Builtins for p1 Modula-2 Extensions
899 'm2iso+p1': (
900 common_builtins,
901 iso_additional_builtins,
902 p1_additional_builtins,
903 ),
904
905 # Builtins for XDS Modula-2 Extensions
906 'm2iso+xds': (
907 common_builtins,
908 iso_additional_builtins,
909 xds_additional_builtins,
910 ),
911 }
912
913 # Pseudo-Module Builtins Database
914 pseudo_builtins_db = {
915 # Builtins for unknown dialect
916 'unknown': (
917 common_pseudo_builtins,
918 pim_additional_pseudo_builtins,
919 iso_additional_pseudo_builtins,
920 m2r10_additional_pseudo_builtins,
921 ),
922
923 # Builtins for PIM Modula-2
924 'm2pim': (
925 common_pseudo_builtins,
926 pim_additional_pseudo_builtins,
927 ),
928
929 # Builtins for ISO Modula-2
930 'm2iso': (
931 common_pseudo_builtins,
932 iso_additional_pseudo_builtins,
933 ),
934
935 # Builtins for ISO Modula-2
936 'm2r10': (
937 common_pseudo_builtins,
938 m2r10_additional_pseudo_builtins,
939 ),
940
941 # Builtins for Objective Modula-2
942 'objm2': (
943 common_pseudo_builtins,
944 m2r10_additional_pseudo_builtins,
945 objm2_additional_pseudo_builtins,
946 ),
947
948 # Builtins for Aglet Modula-2 Extensions
949 'm2iso+aglet': (
950 common_pseudo_builtins,
951 iso_additional_pseudo_builtins,
952 aglet_additional_pseudo_builtins,
953 ),
954
955 # Builtins for GNU Modula-2 Extensions
956 'm2pim+gm2': (
957 common_pseudo_builtins,
958 pim_additional_pseudo_builtins,
959 gm2_additional_pseudo_builtins,
960 ),
961
962 # Builtins for p1 Modula-2 Extensions
963 'm2iso+p1': (
964 common_pseudo_builtins,
965 iso_additional_pseudo_builtins,
966 p1_additional_pseudo_builtins,
967 ),
968
969 # Builtins for XDS Modula-2 Extensions
970 'm2iso+xds': (
971 common_pseudo_builtins,
972 iso_additional_pseudo_builtins,
973 xds_additional_pseudo_builtins,
974 ),
975 }
976
977 # Standard Library ADTs Database
978 stdlib_adts_db = {
979 # Empty entry for unknown dialect
980 'unknown': (
981 # LEAVE THIS EMPTY
982 ),
983 # Standard Library ADTs for PIM Modula-2
984 'm2pim': (
985 # No first class library types
986 ),
987
988 # Standard Library ADTs for ISO Modula-2
989 'm2iso': (
990 # No first class library types
991 ),
992
993 # Standard Library ADTs for Modula-2 R10
994 'm2r10': (
995 m2r10_stdlib_adt_identifiers,
996 ),
997
998 # Standard Library ADTs for Objective Modula-2
999 'objm2': (
1000 m2r10_stdlib_adt_identifiers,
1001 ),
1002
1003 # Standard Library ADTs for Aglet Modula-2
1004 'm2iso+aglet': (
1005 # No first class library types
1006 ),
1007
1008 # Standard Library ADTs for GNU Modula-2
1009 'm2pim+gm2': (
1010 # No first class library types
1011 ),
1012
1013 # Standard Library ADTs for p1 Modula-2
1014 'm2iso+p1': (
1015 # No first class library types
1016 ),
1017
1018 # Standard Library ADTs for XDS Modula-2
1019 'm2iso+xds': (
1020 # No first class library types
1021 ),
1022 }
1023
1024 # Standard Library Modules Database
1025 stdlib_modules_db = {
1026 # Empty entry for unknown dialect
1027 'unknown': (
1028 # LEAVE THIS EMPTY
1029 ),
1030 # Standard Library Modules for PIM Modula-2
1031 'm2pim': (
1032 pim_stdlib_module_identifiers,
1033 ),
1034
1035 # Standard Library Modules for ISO Modula-2
1036 'm2iso': (
1037 iso_stdlib_module_identifiers,
1038 ),
1039
1040 # Standard Library Modules for Modula-2 R10
1041 'm2r10': (
1042 m2r10_stdlib_blueprint_identifiers,
1043 m2r10_stdlib_module_identifiers,
1044 m2r10_stdlib_adt_identifiers,
1045 ),
1046
1047 # Standard Library Modules for Objective Modula-2
1048 'objm2': (
1049 m2r10_stdlib_blueprint_identifiers,
1050 m2r10_stdlib_module_identifiers,
1051 ),
1052
1053 # Standard Library Modules for Aglet Modula-2
1054 'm2iso+aglet': (
1055 iso_stdlib_module_identifiers,
1056 ),
1057
1058 # Standard Library Modules for GNU Modula-2
1059 'm2pim+gm2': (
1060 pim_stdlib_module_identifiers,
1061 ),
1062
1063 # Standard Library Modules for p1 Modula-2
1064 'm2iso+p1': (
1065 iso_stdlib_module_identifiers,
1066 ),
1067
1068 # Standard Library Modules for XDS Modula-2
1069 'm2iso+xds': (
1070 iso_stdlib_module_identifiers,
1071 ),
1072 }
1073
1074 # Standard Library Types Database
1075 stdlib_types_db = {
1076 # Empty entry for unknown dialect
1077 'unknown': (
1078 # LEAVE THIS EMPTY
1079 ),
1080 # Standard Library Types for PIM Modula-2
1081 'm2pim': (
1082 pim_stdlib_type_identifiers,
1083 ),
1084
1085 # Standard Library Types for ISO Modula-2
1086 'm2iso': (
1087 iso_stdlib_type_identifiers,
1088 ),
1089
1090 # Standard Library Types for Modula-2 R10
1091 'm2r10': (
1092 m2r10_stdlib_type_identifiers,
1093 ),
1094
1095 # Standard Library Types for Objective Modula-2
1096 'objm2': (
1097 m2r10_stdlib_type_identifiers,
1098 ),
1099
1100 # Standard Library Types for Aglet Modula-2
1101 'm2iso+aglet': (
1102 iso_stdlib_type_identifiers,
1103 ),
1104
1105 # Standard Library Types for GNU Modula-2
1106 'm2pim+gm2': (
1107 pim_stdlib_type_identifiers,
1108 ),
1109
1110 # Standard Library Types for p1 Modula-2
1111 'm2iso+p1': (
1112 iso_stdlib_type_identifiers,
1113 ),
1114
1115 # Standard Library Types for XDS Modula-2
1116 'm2iso+xds': (
1117 iso_stdlib_type_identifiers,
1118 ),
1119 }
1120
1121 # Standard Library Procedures Database
1122 stdlib_procedures_db = {
1123 # Empty entry for unknown dialect
1124 'unknown': (
1125 # LEAVE THIS EMPTY
1126 ),
1127 # Standard Library Procedures for PIM Modula-2
1128 'm2pim': (
1129 pim_stdlib_proc_identifiers,
1130 ),
1131
1132 # Standard Library Procedures for ISO Modula-2
1133 'm2iso': (
1134 iso_stdlib_proc_identifiers,
1135 ),
1136
1137 # Standard Library Procedures for Modula-2 R10
1138 'm2r10': (
1139 m2r10_stdlib_proc_identifiers,
1140 ),
1141
1142 # Standard Library Procedures for Objective Modula-2
1143 'objm2': (
1144 m2r10_stdlib_proc_identifiers,
1145 ),
1146
1147 # Standard Library Procedures for Aglet Modula-2
1148 'm2iso+aglet': (
1149 iso_stdlib_proc_identifiers,
1150 ),
1151
1152 # Standard Library Procedures for GNU Modula-2
1153 'm2pim+gm2': (
1154 pim_stdlib_proc_identifiers,
1155 ),
1156
1157 # Standard Library Procedures for p1 Modula-2
1158 'm2iso+p1': (
1159 iso_stdlib_proc_identifiers,
1160 ),
1161
1162 # Standard Library Procedures for XDS Modula-2
1163 'm2iso+xds': (
1164 iso_stdlib_proc_identifiers,
1165 ),
1166 }
1167
1168 # Standard Library Variables Database
1169 stdlib_variables_db = {
1170 # Empty entry for unknown dialect
1171 'unknown': (
1172 # LEAVE THIS EMPTY
1173 ),
1174 # Standard Library Variables for PIM Modula-2
1175 'm2pim': (
1176 pim_stdlib_var_identifiers,
1177 ),
1178
1179 # Standard Library Variables for ISO Modula-2
1180 'm2iso': (
1181 iso_stdlib_var_identifiers,
1182 ),
1183
1184 # Standard Library Variables for Modula-2 R10
1185 'm2r10': (
1186 m2r10_stdlib_var_identifiers,
1187 ),
1188
1189 # Standard Library Variables for Objective Modula-2
1190 'objm2': (
1191 m2r10_stdlib_var_identifiers,
1192 ),
1193
1194 # Standard Library Variables for Aglet Modula-2
1195 'm2iso+aglet': (
1196 iso_stdlib_var_identifiers,
1197 ),
1198
1199 # Standard Library Variables for GNU Modula-2
1200 'm2pim+gm2': (
1201 pim_stdlib_var_identifiers,
1202 ),
1203
1204 # Standard Library Variables for p1 Modula-2
1205 'm2iso+p1': (
1206 iso_stdlib_var_identifiers,
1207 ),
1208
1209 # Standard Library Variables for XDS Modula-2
1210 'm2iso+xds': (
1211 iso_stdlib_var_identifiers,
1212 ),
1213 }
1214
1215 # Standard Library Constants Database
1216 stdlib_constants_db = {
1217 # Empty entry for unknown dialect
1218 'unknown': (
1219 # LEAVE THIS EMPTY
1220 ),
1221 # Standard Library Constants for PIM Modula-2
1222 'm2pim': (
1223 pim_stdlib_const_identifiers,
1224 ),
1225
1226 # Standard Library Constants for ISO Modula-2
1227 'm2iso': (
1228 iso_stdlib_const_identifiers,
1229 ),
1230
1231 # Standard Library Constants for Modula-2 R10
1232 'm2r10': (
1233 m2r10_stdlib_const_identifiers,
1234 ),
1235
1236 # Standard Library Constants for Objective Modula-2
1237 'objm2': (
1238 m2r10_stdlib_const_identifiers,
1239 ),
1240
1241 # Standard Library Constants for Aglet Modula-2
1242 'm2iso+aglet': (
1243 iso_stdlib_const_identifiers,
1244 ),
1245
1246 # Standard Library Constants for GNU Modula-2
1247 'm2pim+gm2': (
1248 pim_stdlib_const_identifiers,
1249 ),
1250
1251 # Standard Library Constants for p1 Modula-2
1252 'm2iso+p1': (
1253 iso_stdlib_const_identifiers,
1254 ),
1255
1256 # Standard Library Constants for XDS Modula-2
1257 'm2iso+xds': (
1258 iso_stdlib_const_identifiers,
1259 ),
1260 }
1261
1262 # M e t h o d s
1263
1264 # initialise a lexer instance
1265 def __init__(self, **options):
1266 #
1267 # check dialect options
1268 #
1269 dialects = get_list_opt(options, 'dialect', [])
1270 #
1271 for dialect_option in dialects:
1272 if dialect_option in self.dialects[1:-1]:
1273 # valid dialect option found
1274 self.set_dialect(dialect_option)
1275 break
1276 #
1277 # Fallback Mode (DEFAULT)
1278 else:
1279 # no valid dialect option
1280 self.set_dialect('unknown')
1281 #
1282 self.dialect_set_by_tag = False
1283 #
1284 # check style options
1285 #
1286 styles = get_list_opt(options, 'style', [])
1287 #
1288 # use lowercase mode for Algol style
1289 if 'algol' in styles or 'algol_nu' in styles:
1290 self.algol_publication_mode = True
1291 else:
1292 self.algol_publication_mode = False
1293 #
1294 # Check option flags
1295 #
1296 self.treat_stdlib_adts_as_builtins = get_bool_opt(
1297 options, 'treat_stdlib_adts_as_builtins', True)
1298 #
1299 # call superclass initialiser
1300 RegexLexer.__init__(self, **options)
1301
1302 # Set lexer to a specified dialect
1303 def set_dialect(self, dialect_id):
1304 #
1305 # if __debug__:
1306 # print 'entered set_dialect with arg: ', dialect_id
1307 #
1308 # check dialect name against known dialects
1309 if dialect_id not in self.dialects:
1310 dialect = 'unknown' # default
1311 else:
1312 dialect = dialect_id
1313 #
1314 # compose lexemes to reject set
1315 lexemes_to_reject_set = set()
1316 # add each list of reject lexemes for this dialect
1317 for list in self.lexemes_to_reject_db[dialect]:
1318 lexemes_to_reject_set.update(set(list))
1319 #
1320 # compose reserved words set
1321 reswords_set = set()
1322 # add each list of reserved words for this dialect
1323 for list in self.reserved_words_db[dialect]:
1324 reswords_set.update(set(list))
1325 #
1326 # compose builtins set
1327 builtins_set = set()
1328 # add each list of builtins for this dialect excluding reserved words
1329 for list in self.builtins_db[dialect]:
1330 builtins_set.update(set(list).difference(reswords_set))
1331 #
1332 # compose pseudo-builtins set
1333 pseudo_builtins_set = set()
1334 # add each list of builtins for this dialect excluding reserved words
1335 for list in self.pseudo_builtins_db[dialect]:
1336 pseudo_builtins_set.update(set(list).difference(reswords_set))
1337 #
1338 # compose ADTs set
1339 adts_set = set()
1340 # add each list of ADTs for this dialect excluding reserved words
1341 for list in self.stdlib_adts_db[dialect]:
1342 adts_set.update(set(list).difference(reswords_set))
1343 #
1344 # compose modules set
1345 modules_set = set()
1346 # add each list of builtins for this dialect excluding builtins
1347 for list in self.stdlib_modules_db[dialect]:
1348 modules_set.update(set(list).difference(builtins_set))
1349 #
1350 # compose types set
1351 types_set = set()
1352 # add each list of types for this dialect excluding builtins
1353 for list in self.stdlib_types_db[dialect]:
1354 types_set.update(set(list).difference(builtins_set))
1355 #
1356 # compose procedures set
1357 procedures_set = set()
1358 # add each list of procedures for this dialect excluding builtins
1359 for list in self.stdlib_procedures_db[dialect]:
1360 procedures_set.update(set(list).difference(builtins_set))
1361 #
1362 # compose variables set
1363 variables_set = set()
1364 # add each list of variables for this dialect excluding builtins
1365 for list in self.stdlib_variables_db[dialect]:
1366 variables_set.update(set(list).difference(builtins_set))
1367 #
1368 # compose constants set
1369 constants_set = set()
1370 # add each list of constants for this dialect excluding builtins
1371 for list in self.stdlib_constants_db[dialect]:
1372 constants_set.update(set(list).difference(builtins_set))
1373 #
1374 # update lexer state
1375 self.dialect = dialect
1376 self.lexemes_to_reject = lexemes_to_reject_set
1377 self.reserved_words = reswords_set
1378 self.builtins = builtins_set
1379 self.pseudo_builtins = pseudo_builtins_set
1380 self.adts = adts_set
1381 self.modules = modules_set
1382 self.types = types_set
1383 self.procedures = procedures_set
1384 self.variables = variables_set
1385 self.constants = constants_set
1386 #
1387 # if __debug__:
1388 # print 'exiting set_dialect'
1389 # print ' self.dialect: ', self.dialect
1390 # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
1391 # print ' self.reserved_words: ', self.reserved_words
1392 # print ' self.builtins: ', self.builtins
1393 # print ' self.pseudo_builtins: ', self.pseudo_builtins
1394 # print ' self.adts: ', self.adts
1395 # print ' self.modules: ', self.modules
1396 # print ' self.types: ', self.types
1397 # print ' self.procedures: ', self.procedures
1398 # print ' self.variables: ', self.variables
1399 # print ' self.types: ', self.types
1400 # print ' self.constants: ', self.constants
1401
1402 # Extracts a dialect name from a dialect tag comment string and checks
1403 # the extracted name against known dialects. If a match is found, the
1404 # matching name is returned, otherwise dialect id 'unknown' is returned
1405 def get_dialect_from_dialect_tag(self, dialect_tag):
1406 #
1407 # if __debug__:
1408 # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
1409 #
1410 # constants
1411 left_tag_delim = '(*!'
1412 right_tag_delim = '*)'
1413 left_tag_delim_len = len(left_tag_delim)
1414 right_tag_delim_len = len(right_tag_delim)
1415 indicator_start = left_tag_delim_len
1416 indicator_end = -(right_tag_delim_len)
1417 #
1418 # check comment string for dialect indicator
1419 if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
1420 and dialect_tag.startswith(left_tag_delim) \
1421 and dialect_tag.endswith(right_tag_delim):
1422 #
1423 # if __debug__:
1424 # print 'dialect tag found'
1425 #
1426 # extract dialect indicator
1427 indicator = dialect_tag[indicator_start:indicator_end]
1428 #
1429 # if __debug__:
1430 # print 'extracted: ', indicator
1431 #
1432 # check against known dialects
1433 for index in range(1, len(self.dialects)):
1434 #
1435 # if __debug__:
1436 # print 'dialects[', index, ']: ', self.dialects[index]
1437 #
1438 if indicator == self.dialects[index]:
1439 #
1440 # if __debug__:
1441 # print 'matching dialect found'
1442 #
1443 # indicator matches known dialect
1444 return indicator
1445 else:
1446 # indicator does not match any dialect
1447 return 'unknown' # default
1448 else:
1449 # invalid indicator string
1450 return 'unknown' # default
1451
1452 # intercept the token stream, modify token attributes and return them
1453 def get_tokens_unprocessed(self, text):
1454 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
1455 #
1456 # check for dialect tag if dialect has not been set by tag
1457 if not self.dialect_set_by_tag and token == Comment.Special:
1458 indicated_dialect = self.get_dialect_from_dialect_tag(value)
1459 if indicated_dialect != 'unknown':
1460 # token is a dialect indicator
1461 # reset reserved words and builtins
1462 self.set_dialect(indicated_dialect)
1463 self.dialect_set_by_tag = True
1464 #
1465 # check for reserved words, predefined and stdlib identifiers
1466 if token is Name:
1467 if value in self.reserved_words:
1468 token = Keyword.Reserved
1469 if self.algol_publication_mode:
1470 value = value.lower()
1471 #
1472 elif value in self.builtins:
1473 token = Name.Builtin
1474 if self.algol_publication_mode:
1475 value = value.lower()
1476 #
1477 elif value in self.pseudo_builtins:
1478 token = Name.Builtin.Pseudo
1479 if self.algol_publication_mode:
1480 value = value.lower()
1481 #
1482 elif value in self.adts:
1483 if not self.treat_stdlib_adts_as_builtins:
1484 token = Name.Namespace
1485 else:
1486 token = Name.Builtin.Pseudo
1487 if self.algol_publication_mode:
1488 value = value.lower()
1489 #
1490 elif value in self.modules:
1491 token = Name.Namespace
1492 #
1493 elif value in self.types:
1494 token = Name.Class
1495 #
1496 elif value in self.procedures:
1497 token = Name.Function
1498 #
1499 elif value in self.variables:
1500 token = Name.Variable
1501 #
1502 elif value in self.constants:
1503 token = Name.Constant
1504 #
1505 elif token in Number:
1506 #
1507 # mark prefix number literals as error for PIM and ISO dialects
1508 if self.dialect not in ('unknown', 'm2r10', 'objm2'):
1509 if "'" in value or value[0:2] in ('0b', '0x', '0u'):
1510 token = Error
1511 #
1512 elif self.dialect in ('m2r10', 'objm2'):
1513 # mark base-8 number literals as errors for M2 R10 and ObjM2
1514 if token is Number.Oct:
1515 token = Error
1516 # mark suffix base-16 literals as errors for M2 R10 and ObjM2
1517 elif token is Number.Hex and 'H' in value:
1518 token = Error
1519 # mark real numbers with E as errors for M2 R10 and ObjM2
1520 elif token is Number.Float and 'E' in value:
1521 token = Error
1522 #
1523 elif token in Comment:
1524 #
1525 # mark single line comment as error for PIM and ISO dialects
1526 if token is Comment.Single:
1527 if self.dialect not in ('unknown', 'm2r10', 'objm2'):
1528 token = Error
1529 #
1530 if token is Comment.Preproc:
1531 # mark ISO pragma as error for PIM dialects
1532 if value.startswith('<*') and \
1533 self.dialect.startswith('m2pim'):
1534 token = Error
1535 # mark PIM pragma as comment for other dialects
1536 elif value.startswith('(*$') and \
1537 self.dialect != 'unknown' and \
1538 not self.dialect.startswith('m2pim'):
1539 token = Comment.Multiline
1540 #
1541 else: # token is neither Name nor Comment
1542 #
1543 # mark lexemes matching the dialect's error token set as errors
1544 if value in self.lexemes_to_reject:
1545 token = Error
1546 #
1547 # substitute lexemes when in Algol mode
1548 if self.algol_publication_mode:
1549 if value == '#':
1550 value = u'≠'
1551 elif value == '<=':
1552 value = u'≤'
1553 elif value == '>=':
1554 value = u'≥'
1555 elif value == '==':
1556 value = u'≡'
1557 elif value == '*.':
1558 value = u'•'
1559
1560 # return result
1561 yield index, token, value

eric ide

mercurial