eric6/ThirdParty/Pygments/pygments/lexers/modula2.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
diff -r a3cf077a8db3 -r 25f42e208e08 eric6/ThirdParty/Pygments/pygments/lexers/modula2.py
--- a/eric6/ThirdParty/Pygments/pygments/lexers/modula2.py	Tue Sep 15 18:46:58 2020 +0200
+++ b/eric6/ThirdParty/Pygments/pygments/lexers/modula2.py	Tue Sep 15 19:09:05 2020 +0200
@@ -1,1561 +1,1561 @@
-# -*- coding: utf-8 -*-
-"""
-    pygments.lexers.modula2
-    ~~~~~~~~~~~~~~~~~~~~~~~
-
-    Multi-Dialect Lexer for Modula-2.
-
-    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexer import RegexLexer, include
-from pygments.util import get_bool_opt, get_list_opt
-from pygments.token import Text, Comment, Operator, Keyword, Name, \
-    String, Number, Punctuation, Error
-
-__all__ = ['Modula2Lexer']
-
-
-# Multi-Dialect Modula-2 Lexer
-class Modula2Lexer(RegexLexer):
-    """
-    For `Modula-2 <http://www.modula2.org/>`_ source code.
-
-    The Modula-2 lexer supports several dialects.  By default, it operates in
-    fallback mode, recognising the *combined* literals, punctuation symbols
-    and operators of all supported dialects, and the *combined* reserved words
-    and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
-    differentiating between library defined identifiers.
-
-    To select a specific dialect, a dialect option may be passed
-    or a dialect tag may be embedded into a source file.
-
-    Dialect Options:
-
-    `m2pim`
-        Select PIM Modula-2 dialect.
-    `m2iso`
-        Select ISO Modula-2 dialect.
-    `m2r10`
-        Select Modula-2 R10 dialect.
-    `objm2`
-        Select Objective Modula-2 dialect.
-
-    The PIM and ISO dialect options may be qualified with a language extension.
-
-    Language Extensions:
-
-    `+aglet`
-        Select Aglet Modula-2 extensions, available with m2iso.
-    `+gm2`
-        Select GNU Modula-2 extensions, available with m2pim.
-    `+p1`
-        Select p1 Modula-2 extensions, available with m2iso.
-    `+xds`
-        Select XDS Modula-2 extensions, available with m2iso.
-
-
-    Passing a Dialect Option via Unix Commandline Interface
-
-    Dialect options may be passed to the lexer using the `dialect` key.
-    Only one such option should be passed. If multiple dialect options are
-    passed, the first valid option is used, any subsequent options are ignored.
-
-    Examples:
-
-    `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
-        Use ISO dialect to render input to HTML output
-    `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
-        Use ISO dialect with p1 extensions to render input to RTF output
-
-
-    Embedding a Dialect Option within a source file
-
-    A dialect option may be embedded in a source file in form of a dialect
-    tag, a specially formatted comment that specifies a dialect option.
-
-    Dialect Tag EBNF::
-
-       dialectTag :
-           OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
-
-       dialectOption :
-           'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
-           'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
-
-       Prefix : '!' ;
-
-       OpeningCommentDelim : '(*' ;
-
-       ClosingCommentDelim : '*)' ;
-
-    No whitespace is permitted between the tokens of a dialect tag.
-
-    In the event that a source file contains multiple dialect tags, the first
-    tag that contains a valid dialect option will be used and any subsequent
-    dialect tags will be ignored.  Ideally, a dialect tag should be placed
-    at the beginning of a source file.
-
-    An embedded dialect tag overrides a dialect option set via command line.
-
-    Examples:
-
-    ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
-        Use Modula2 R10 dialect to render this source file.
-    ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
-        Use PIM dialect with GNU extensions to render this source file.
-
-
-    Algol Publication Mode:
-
-    In Algol publication mode, source text is rendered for publication of
-    algorithms in scientific papers and academic texts, following the format
-    of the Revised Algol-60 Language Report.  It is activated by passing
-    one of two corresponding styles as an option:
-
-    `algol`
-        render reserved words lowercase underline boldface
-        and builtins lowercase boldface italic
-    `algol_nu`
-        render reserved words lowercase boldface (no underlining)
-        and builtins lowercase boldface italic
-
-    The lexer automatically performs the required lowercase conversion when
-    this mode is activated.
-
-    Example:
-
-    ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
-        Render input file in Algol publication mode to LaTeX output.
-
-
-    Rendering Mode of First Class ADT Identifiers:
-
-    The rendering of standard library first class ADT identifiers is controlled
-    by option flag "treat_stdlib_adts_as_builtins".
-
-    When this option is turned on, standard library ADT identifiers are rendered
-    as builtins.  When it is turned off, they are rendered as ordinary library
-    identifiers.
-
-    `treat_stdlib_adts_as_builtins` (default: On)
-
-    The option is useful for dialects that support ADTs as first class objects
-    and provide ADTs in the standard library that would otherwise be built-in.
-
-    At present, only Modula-2 R10 supports library ADTs as first class objects
-    and therefore, no ADT identifiers are defined for any other dialects.
-
-    Example:
-
-    ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
-        Render standard library ADTs as ordinary library types.
-
-    .. versionadded:: 1.3
-
-    .. versionchanged:: 2.1
-       Added multi-dialect support.
-    """
-    name = 'Modula-2'
-    aliases = ['modula2', 'm2']
-    filenames = ['*.def', '*.mod']
-    mimetypes = ['text/x-modula2']
-
-    flags = re.MULTILINE | re.DOTALL
-
-    tokens = {
-        'whitespace': [
-            (r'\n+', Text),  # blank lines
-            (r'\s+', Text),  # whitespace
-        ],
-        'dialecttags': [
-            # PIM Dialect Tag
-            (r'\(\*!m2pim\*\)', Comment.Special),
-            # ISO Dialect Tag
-            (r'\(\*!m2iso\*\)', Comment.Special),
-            # M2R10 Dialect Tag
-            (r'\(\*!m2r10\*\)', Comment.Special),
-            # ObjM2 Dialect Tag
-            (r'\(\*!objm2\*\)', Comment.Special),
-            # Aglet Extensions Dialect Tag
-            (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
-            # GNU Extensions Dialect Tag
-            (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
-            # p1 Extensions Dialect Tag
-            (r'\(\*!m2iso\+p1\*\)', Comment.Special),
-            # XDS Extensions Dialect Tag
-            (r'\(\*!m2iso\+xds\*\)', Comment.Special),
-        ],
-        'identifiers': [
-            (r'([a-zA-Z_$][\w$]*)', Name),
-        ],
-        'prefixed_number_literals': [
-            #
-            # Base-2, whole number
-            (r'0b[01]+(\'[01]+)*', Number.Bin),
-            #
-            # Base-16, whole number
-            (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
-        ],
-        'plain_number_literals': [
-            #
-            # Base-10, real number with exponent
-            (r'[0-9]+(\'[0-9]+)*'  # integral part
-             r'\.[0-9]+(\'[0-9]+)*'  # fractional part
-             r'[eE][+-]?[0-9]+(\'[0-9]+)*',  # exponent
-             Number.Float),
-            #
-            # Base-10, real number without exponent
-            (r'[0-9]+(\'[0-9]+)*'  # integral part
-             r'\.[0-9]+(\'[0-9]+)*',  # fractional part
-             Number.Float),
-            #
-            # Base-10, whole number
-            (r'[0-9]+(\'[0-9]+)*', Number.Integer),
-        ],
-        'suffixed_number_literals': [
-            #
-            # Base-8, whole number
-            (r'[0-7]+B', Number.Oct),
-            #
-            # Base-8, character code
-            (r'[0-7]+C', Number.Oct),
-            #
-            # Base-16, number
-            (r'[0-9A-F]+H', Number.Hex),
-        ],
-        'string_literals': [
-            (r"'(\\\\|\\'|[^'])*'", String),  # single quoted string
-            (r'"(\\\\|\\"|[^"])*"', String),  # double quoted string
-        ],
-        'digraph_operators': [
-            # Dot Product Operator
-            (r'\*\.', Operator),
-            # Array Concatenation Operator
-            (r'\+>', Operator),  # M2R10 + ObjM2
-            # Inequality Operator
-            (r'<>', Operator),  # ISO + PIM
-            # Less-Or-Equal, Subset
-            (r'<=', Operator),
-            # Greater-Or-Equal, Superset
-            (r'>=', Operator),
-            # Identity Operator
-            (r'==', Operator),  # M2R10 + ObjM2
-            # Type Conversion Operator
-            (r'::', Operator),  # M2R10 + ObjM2
-            # Assignment Symbol
-            (r':=', Operator),
-            # Postfix Increment Mutator
-            (r'\+\+', Operator),  # M2R10 + ObjM2
-            # Postfix Decrement Mutator
-            (r'--', Operator),  # M2R10 + ObjM2
-        ],
-        'unigraph_operators': [
-            # Arithmetic Operators
-            (r'[+-]', Operator),
-            (r'[*/]', Operator),
-            # ISO 80000-2 compliant Set Difference Operator
-            (r'\\', Operator),  # M2R10 + ObjM2
-            # Relational Operators
-            (r'[=#<>]', Operator),
-            # Dereferencing Operator
-            (r'\^', Operator),
-            # Dereferencing Operator Synonym
-            (r'@', Operator),  # ISO
-            # Logical AND Operator Synonym
-            (r'&', Operator),  # PIM + ISO
-            # Logical NOT Operator Synonym
-            (r'~', Operator),  # PIM + ISO
-            # Smalltalk Message Prefix
-            (r'`', Operator),  # ObjM2
-        ],
-        'digraph_punctuation': [
-            # Range Constructor
-            (r'\.\.', Punctuation),
-            # Opening Chevron Bracket
-            (r'<<', Punctuation),  # M2R10 + ISO
-            # Closing Chevron Bracket
-            (r'>>', Punctuation),  # M2R10 + ISO
-            # Blueprint Punctuation
-            (r'->', Punctuation),  # M2R10 + ISO
-            # Distinguish |# and # in M2 R10
-            (r'\|#', Punctuation),
-            # Distinguish ## and # in M2 R10
-            (r'##', Punctuation),
-            # Distinguish |* and * in M2 R10
-            (r'\|\*', Punctuation),
-        ],
-        'unigraph_punctuation': [
-            # Common Punctuation
-            (r'[()\[\]{},.:;|]', Punctuation),
-            # Case Label Separator Synonym
-            (r'!', Punctuation),  # ISO
-            # Blueprint Punctuation
-            (r'\?', Punctuation),  # M2R10 + ObjM2
-        ],
-        'comments': [
-            # Single Line Comment
-            (r'^//.*?\n', Comment.Single),  # M2R10 + ObjM2
-            # Block Comment
-            (r'\(\*([^$].*?)\*\)', Comment.Multiline),
-            # Template Block Comment
-            (r'/\*(.*?)\*/', Comment.Multiline),  # M2R10 + ObjM2
-        ],
-        'pragmas': [
-            # ISO Style Pragmas
-            (r'<\*.*?\*>', Comment.Preproc),  # ISO, M2R10 + ObjM2
-            # Pascal Style Pragmas
-            (r'\(\*\$.*?\*\)', Comment.Preproc),  # PIM
-        ],
-        'root': [
-            include('whitespace'),
-            include('dialecttags'),
-            include('pragmas'),
-            include('comments'),
-            include('identifiers'),
-            include('suffixed_number_literals'),  # PIM + ISO
-            include('prefixed_number_literals'),  # M2R10 + ObjM2
-            include('plain_number_literals'),
-            include('string_literals'),
-            include('digraph_punctuation'),
-            include('digraph_operators'),
-            include('unigraph_punctuation'),
-            include('unigraph_operators'),
-        ]
-    }
-
-#  C o m m o n   D a t a s e t s
-
-    # Common Reserved Words Dataset
-    common_reserved_words = (
-        # 37 common reserved words
-        'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
-        'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
-        'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
-        'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
-        'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
-    )
-
-    # Common Builtins Dataset
-    common_builtins = (
-        # 16 common builtins
-        'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
-        'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
-        'TRUE',
-    )
-
-    # Common Pseudo-Module Builtins Dataset
-    common_pseudo_builtins = (
-        # 4 common pseudo builtins
-        'ADDRESS', 'BYTE', 'WORD', 'ADR'
-    )
-
-#  P I M   M o d u l a - 2   D a t a s e t s
-
-    # Lexemes to Mark as Error Tokens for PIM Modula-2
-    pim_lexemes_to_reject = (
-        '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
-        '+>', '->', '<<', '>>', '|#', '##',
-    )
-
-    # PIM Modula-2 Additional Reserved Words Dataset
-    pim_additional_reserved_words = (
-        # 3 additional reserved words
-        'EXPORT', 'QUALIFIED', 'WITH',
-    )
-
-    # PIM Modula-2 Additional Builtins Dataset
-    pim_additional_builtins = (
-        # 16 additional builtins
-        'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
-        'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
-    )
-
-    # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
-    pim_additional_pseudo_builtins = (
-        # 5 additional pseudo builtins
-        'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
-    )
-
-#  I S O   M o d u l a - 2   D a t a s e t s
-
-    # Lexemes to Mark as Error Tokens for ISO Modula-2
-    iso_lexemes_to_reject = (
-        '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
-        '<<', '>>', '|#', '##',
-    )
-
-    # ISO Modula-2 Additional Reserved Words Dataset
-    iso_additional_reserved_words = (
-        # 9 additional reserved words (ISO 10514-1)
-        'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
-        'REM', 'RETRY', 'WITH',
-        # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
-        'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
-        'REVEAL', 'TRACED', 'UNSAFEGUARDED',
-    )
-
-    # ISO Modula-2 Additional Builtins Dataset
-    iso_additional_builtins = (
-        # 26 additional builtins (ISO 10514-1)
-        'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
-        'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE',  'LENGTH',
-        'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
-        'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
-        # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
-        'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
-    )
-
-    # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
-    iso_additional_pseudo_builtins = (
-        # 14 additional builtins (SYSTEM)
-        'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
-        'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
-        'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
-        # 13 additional builtins (COROUTINES)
-        'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
-        'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
-        'NEWCOROUTINE', 'PROT', 'TRANSFER',
-        # 9 additional builtins (EXCEPTIONS)
-        'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
-        'ExceptionSource', 'GetMessage', 'IsCurrentSource',
-        'IsExceptionalExecution', 'RAISE',
-        # 3 additional builtins (TERMINATION)
-        'TERMINATION', 'IsTerminating', 'HasHalted',
-        # 4 additional builtins (M2EXCEPTION)
-        'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
-        'indexException', 'rangeException', 'caseSelectException',
-        'invalidLocation', 'functionException', 'wholeValueException',
-        'wholeDivException', 'realValueException', 'realDivException',
-        'complexValueException', 'complexDivException', 'protException',
-        'sysException', 'coException', 'exException',
-    )
-
-#  M o d u l a - 2   R 1 0   D a t a s e t s
-
-    # Lexemes to Mark as Error Tokens for Modula-2 R10
-    m2r10_lexemes_to_reject = (
-        '!', '`', '@', '$', '%', '&', '<>',
-    )
-
-    # Modula-2 R10 reserved words in addition to the common set
-    m2r10_additional_reserved_words = (
-        # 12 additional reserved words
-        'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
-        'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
-        # 2 additional reserved words with symbolic assembly option
-        'ASM', 'REG',
-    )
-
-    # Modula-2 R10 builtins in addition to the common set
-    m2r10_additional_builtins = (
-        # 26 additional builtins
-        'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
-        'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
-        'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
-        'UNICHAR', 'WRITE', 'WRITEF',
-    )
-
-    # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
-    m2r10_additional_pseudo_builtins = (
-        # 13 additional builtins (TPROPERTIES)
-        'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
-        'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
-        'TMAXEXP', 'TMINEXP',
-        # 4 additional builtins (CONVERSION)
-        'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
-        # 35 additional builtins (UNSAFE)
-        'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
-        'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
-        'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
-        'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
-        'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
-        # 11 additional builtins (ATOMIC)
-        'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
-        'BWNAND', 'BWOR', 'BWXOR',
-        # 7 additional builtins (COMPILER)
-        'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
-        'HASH',
-        # 5 additional builtins (ASSEMBLER)
-        'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
-    )
-
-#  O b j e c t i v e   M o d u l a - 2   D a t a s e t s
-
-    # Lexemes to Mark as Error Tokens for Objective Modula-2
-    objm2_lexemes_to_reject = (
-        '!', '$', '%', '&', '<>',
-    )
-
-    # Objective Modula-2 Extensions
-    # reserved words in addition to Modula-2 R10
-    objm2_additional_reserved_words = (
-        # 16 additional reserved words
-        'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
-        'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
-        'SUPER', 'TRY',
-    )
-
-    # Objective Modula-2 Extensions
-    # builtins in addition to Modula-2 R10
-    objm2_additional_builtins = (
-        # 3 additional builtins
-        'OBJECT', 'NO', 'YES',
-    )
-
-    # Objective Modula-2 Extensions
-    # pseudo-module builtins in addition to Modula-2 R10
-    objm2_additional_pseudo_builtins = (
-        # None
-    )
-
-#  A g l e t   M o d u l a - 2   D a t a s e t s
-
-    # Aglet Extensions
-    # reserved words in addition to ISO Modula-2
-    aglet_additional_reserved_words = (
-        # None
-    )
-
-    # Aglet Extensions
-    # builtins in addition to ISO Modula-2
-    aglet_additional_builtins = (
-        # 9 additional builtins
-        'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
-        'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
-    )
-
-    # Aglet Modula-2 Extensions
-    # pseudo-module builtins in addition to ISO Modula-2
-    aglet_additional_pseudo_builtins = (
-        # None
-    )
-
-#  G N U   M o d u l a - 2   D a t a s e t s
-
-    # GNU Extensions
-    # reserved words in addition to PIM Modula-2
-    gm2_additional_reserved_words = (
-        # 10 additional reserved words
-        'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
-        '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
-    )
-
-    # GNU Extensions
-    # builtins in addition to PIM Modula-2
-    gm2_additional_builtins = (
-        # 21 additional builtins
-        'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
-        'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
-        'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
-        'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
-    )
-
-    # GNU Extensions
-    # pseudo-module builtins in addition to PIM Modula-2
-    gm2_additional_pseudo_builtins = (
-        # None
-    )
-
-#  p 1   M o d u l a - 2   D a t a s e t s
-
-    # p1 Extensions
-    # reserved words in addition to ISO Modula-2
-    p1_additional_reserved_words = (
-        # None
-    )
-
-    # p1 Extensions
-    # builtins in addition to ISO Modula-2
-    p1_additional_builtins = (
-        # None
-    )
-
-    # p1 Modula-2 Extensions
-    # pseudo-module builtins in addition to ISO Modula-2
-    p1_additional_pseudo_builtins = (
-        # 1 additional builtin
-        'BCD',
-    )
-
-#  X D S   M o d u l a - 2   D a t a s e t s
-
-    # XDS Extensions
-    # reserved words in addition to ISO Modula-2
-    xds_additional_reserved_words = (
-        # 1 additional reserved word
-        'SEQ',
-    )
-
-    # XDS Extensions
-    # builtins in addition to ISO Modula-2
-    xds_additional_builtins = (
-        # 9 additional builtins
-        'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
-        'LONGCARD', 'SHORTCARD', 'SHORTINT',
-    )
-
-    # XDS Modula-2 Extensions
-    # pseudo-module builtins in addition to ISO Modula-2
-    xds_additional_pseudo_builtins = (
-        # 22 additional builtins (SYSTEM)
-        'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
-        'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
-        'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
-        # 3 additional builtins (COMPILER)
-        'COMPILER', 'OPTION', 'EQUATION'
-    )
-
-#  P I M   S t a n d a r d   L i b r a r y   D a t a s e t s
-
-    # PIM Modula-2 Standard Library Modules Dataset
-    pim_stdlib_module_identifiers = (
-        'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
-    )
-
-    # PIM Modula-2 Standard Library Types Dataset
-    pim_stdlib_type_identifiers = (
-        'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
-        'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
-        'DirectoryCommand',
-    )
-
-    # PIM Modula-2 Standard Library Procedures Dataset
-    pim_stdlib_proc_identifiers = (
-        'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
-        'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
-        'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
-        'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
-        'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
-        'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
-        'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
-        'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
-        'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
-        'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
-    )
-
-    # PIM Modula-2 Standard Library Variables Dataset
-    pim_stdlib_var_identifiers = (
-        'Done', 'termCH', 'in', 'out'
-    )
-
-    # PIM Modula-2 Standard Library Constants Dataset
-    pim_stdlib_const_identifiers = (
-        'EOL',
-    )
-
-#  I S O   S t a n d a r d   L i b r a r y   D a t a s e t s
-
-    # ISO Modula-2 Standard Library Modules Dataset
-    iso_stdlib_module_identifiers = (
-        # TO DO
-    )
-
-    # ISO Modula-2 Standard Library Types Dataset
-    iso_stdlib_type_identifiers = (
-        # TO DO
-    )
-
-    # ISO Modula-2 Standard Library Procedures Dataset
-    iso_stdlib_proc_identifiers = (
-        # TO DO
-    )
-
-    # ISO Modula-2 Standard Library Variables Dataset
-    iso_stdlib_var_identifiers = (
-        # TO DO
-    )
-
-    # ISO Modula-2 Standard Library Constants Dataset
-    iso_stdlib_const_identifiers = (
-        # TO DO
-    )
-
-#  M 2   R 1 0   S t a n d a r d   L i b r a r y   D a t a s e t s
-
-    # Modula-2 R10 Standard Library ADTs Dataset
-    m2r10_stdlib_adt_identifiers = (
-        'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
-        'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
-        'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
-        'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
-        'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
-        'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
-        'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
-        'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
-        'INT64', 'INT128', 'STRING', 'UNISTRING',
-    )
-
-    # Modula-2 R10 Standard Library Blueprints Dataset
-    m2r10_stdlib_blueprint_identifiers = (
-        'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
-        'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
-        'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
-        'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
-        'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
-        'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
-        'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
-    )
-
-    # Modula-2 R10 Standard Library Modules Dataset
-    m2r10_stdlib_module_identifiers = (
-        'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
-        'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
-        'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
-        'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
-        'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
-    )
-
-    # Modula-2 R10 Standard Library Types Dataset
-    m2r10_stdlib_type_identifiers = (
-        'File', 'Status',
-        # TO BE COMPLETED
-    )
-
-    # Modula-2 R10 Standard Library Procedures Dataset
-    m2r10_stdlib_proc_identifiers = (
-        'ALLOCATE', 'DEALLOCATE', 'SIZE',
-        # TO BE COMPLETED
-    )
-
-    # Modula-2 R10 Standard Library Variables Dataset
-    m2r10_stdlib_var_identifiers = (
-        'stdIn', 'stdOut', 'stdErr',
-    )
-
-    # Modula-2 R10 Standard Library Constants Dataset
-    m2r10_stdlib_const_identifiers = (
-        'pi', 'tau',
-    )
-
-#  D i a l e c t s
-
-    # Dialect modes
-    dialects = (
-        'unknown',
-        'm2pim', 'm2iso', 'm2r10', 'objm2',
-        'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
-    )
-
-#   D a t a b a s e s
-
-    # Lexemes to Mark as Errors Database
-    lexemes_to_reject_db = {
-        # Lexemes to reject for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Lexemes to reject for PIM Modula-2
-        'm2pim': (
-            pim_lexemes_to_reject,
-        ),
-        # Lexemes to reject for ISO Modula-2
-        'm2iso': (
-            iso_lexemes_to_reject,
-        ),
-        # Lexemes to reject for Modula-2 R10
-        'm2r10': (
-            m2r10_lexemes_to_reject,
-        ),
-        # Lexemes to reject for Objective Modula-2
-        'objm2': (
-            objm2_lexemes_to_reject,
-        ),
-        # Lexemes to reject for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_lexemes_to_reject,
-        ),
-        # Lexemes to reject for GNU Modula-2
-        'm2pim+gm2': (
-            pim_lexemes_to_reject,
-        ),
-        # Lexemes to reject for p1 Modula-2
-        'm2iso+p1': (
-            iso_lexemes_to_reject,
-        ),
-        # Lexemes to reject for XDS Modula-2
-        'm2iso+xds': (
-            iso_lexemes_to_reject,
-        ),
-    }
-
-    # Reserved Words Database
-    reserved_words_db = {
-        # Reserved words for unknown dialect
-        'unknown': (
-            common_reserved_words,
-            pim_additional_reserved_words,
-            iso_additional_reserved_words,
-            m2r10_additional_reserved_words,
-        ),
-
-        # Reserved words for PIM Modula-2
-        'm2pim': (
-            common_reserved_words,
-            pim_additional_reserved_words,
-        ),
-
-        # Reserved words for Modula-2 R10
-        'm2iso': (
-            common_reserved_words,
-            iso_additional_reserved_words,
-        ),
-
-        # Reserved words for ISO Modula-2
-        'm2r10': (
-            common_reserved_words,
-            m2r10_additional_reserved_words,
-        ),
-
-        # Reserved words for Objective Modula-2
-        'objm2': (
-            common_reserved_words,
-            m2r10_additional_reserved_words,
-            objm2_additional_reserved_words,
-        ),
-
-        # Reserved words for Aglet Modula-2 Extensions
-        'm2iso+aglet': (
-            common_reserved_words,
-            iso_additional_reserved_words,
-            aglet_additional_reserved_words,
-        ),
-
-        # Reserved words for GNU Modula-2 Extensions
-        'm2pim+gm2': (
-            common_reserved_words,
-            pim_additional_reserved_words,
-            gm2_additional_reserved_words,
-        ),
-
-        # Reserved words for p1 Modula-2 Extensions
-        'm2iso+p1': (
-            common_reserved_words,
-            iso_additional_reserved_words,
-            p1_additional_reserved_words,
-        ),
-
-        # Reserved words for XDS Modula-2 Extensions
-        'm2iso+xds': (
-            common_reserved_words,
-            iso_additional_reserved_words,
-            xds_additional_reserved_words,
-        ),
-    }
-
-    # Builtins Database
-    builtins_db = {
-        # Builtins for unknown dialect
-        'unknown': (
-            common_builtins,
-            pim_additional_builtins,
-            iso_additional_builtins,
-            m2r10_additional_builtins,
-        ),
-
-        # Builtins for PIM Modula-2
-        'm2pim': (
-            common_builtins,
-            pim_additional_builtins,
-        ),
-
-        # Builtins for ISO Modula-2
-        'm2iso': (
-            common_builtins,
-            iso_additional_builtins,
-        ),
-
-        # Builtins for ISO Modula-2
-        'm2r10': (
-            common_builtins,
-            m2r10_additional_builtins,
-        ),
-
-        # Builtins for Objective Modula-2
-        'objm2': (
-            common_builtins,
-            m2r10_additional_builtins,
-            objm2_additional_builtins,
-        ),
-
-        # Builtins for Aglet Modula-2 Extensions
-        'm2iso+aglet': (
-            common_builtins,
-            iso_additional_builtins,
-            aglet_additional_builtins,
-        ),
-
-        # Builtins for GNU Modula-2 Extensions
-        'm2pim+gm2': (
-            common_builtins,
-            pim_additional_builtins,
-            gm2_additional_builtins,
-        ),
-
-        # Builtins for p1 Modula-2 Extensions
-        'm2iso+p1': (
-            common_builtins,
-            iso_additional_builtins,
-            p1_additional_builtins,
-        ),
-
-        # Builtins for XDS Modula-2 Extensions
-        'm2iso+xds': (
-            common_builtins,
-            iso_additional_builtins,
-            xds_additional_builtins,
-        ),
-    }
-
-    # Pseudo-Module Builtins Database
-    pseudo_builtins_db = {
-        # Builtins for unknown dialect
-        'unknown': (
-            common_pseudo_builtins,
-            pim_additional_pseudo_builtins,
-            iso_additional_pseudo_builtins,
-            m2r10_additional_pseudo_builtins,
-        ),
-
-        # Builtins for PIM Modula-2
-        'm2pim': (
-            common_pseudo_builtins,
-            pim_additional_pseudo_builtins,
-        ),
-
-        # Builtins for ISO Modula-2
-        'm2iso': (
-            common_pseudo_builtins,
-            iso_additional_pseudo_builtins,
-        ),
-
-        # Builtins for ISO Modula-2
-        'm2r10': (
-            common_pseudo_builtins,
-            m2r10_additional_pseudo_builtins,
-        ),
-
-        # Builtins for Objective Modula-2
-        'objm2': (
-            common_pseudo_builtins,
-            m2r10_additional_pseudo_builtins,
-            objm2_additional_pseudo_builtins,
-        ),
-
-        # Builtins for Aglet Modula-2 Extensions
-        'm2iso+aglet': (
-            common_pseudo_builtins,
-            iso_additional_pseudo_builtins,
-            aglet_additional_pseudo_builtins,
-        ),
-
-        # Builtins for GNU Modula-2 Extensions
-        'm2pim+gm2': (
-            common_pseudo_builtins,
-            pim_additional_pseudo_builtins,
-            gm2_additional_pseudo_builtins,
-        ),
-
-        # Builtins for p1 Modula-2 Extensions
-        'm2iso+p1': (
-            common_pseudo_builtins,
-            iso_additional_pseudo_builtins,
-            p1_additional_pseudo_builtins,
-        ),
-
-        # Builtins for XDS Modula-2 Extensions
-        'm2iso+xds': (
-            common_pseudo_builtins,
-            iso_additional_pseudo_builtins,
-            xds_additional_pseudo_builtins,
-        ),
-    }
-
-    # Standard Library ADTs Database
-    stdlib_adts_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library ADTs for PIM Modula-2
-        'm2pim': (
-            # No first class library types
-        ),
-
-        # Standard Library ADTs for ISO Modula-2
-        'm2iso': (
-            # No first class library types
-        ),
-
-        # Standard Library ADTs for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_adt_identifiers,
-        ),
-
-        # Standard Library ADTs for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_adt_identifiers,
-        ),
-
-        # Standard Library ADTs for Aglet Modula-2
-        'm2iso+aglet': (
-            # No first class library types
-        ),
-
-        # Standard Library ADTs for GNU Modula-2
-        'm2pim+gm2': (
-            # No first class library types
-        ),
-
-        # Standard Library ADTs for p1 Modula-2
-        'm2iso+p1': (
-            # No first class library types
-        ),
-
-        # Standard Library ADTs for XDS Modula-2
-        'm2iso+xds': (
-            # No first class library types
-        ),
-    }
-
-    # Standard Library Modules Database
-    stdlib_modules_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library Modules for PIM Modula-2
-        'm2pim': (
-            pim_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for ISO Modula-2
-        'm2iso': (
-            iso_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_blueprint_identifiers,
-            m2r10_stdlib_module_identifiers,
-            m2r10_stdlib_adt_identifiers,
-        ),
-
-        # Standard Library Modules for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_blueprint_identifiers,
-            m2r10_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for GNU Modula-2
-        'm2pim+gm2': (
-            pim_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for p1 Modula-2
-        'm2iso+p1': (
-            iso_stdlib_module_identifiers,
-        ),
-
-        # Standard Library Modules for XDS Modula-2
-        'm2iso+xds': (
-            iso_stdlib_module_identifiers,
-        ),
-    }
-
-    # Standard Library Types Database
-    stdlib_types_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library Types for PIM Modula-2
-        'm2pim': (
-            pim_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for ISO Modula-2
-        'm2iso': (
-            iso_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for GNU Modula-2
-        'm2pim+gm2': (
-            pim_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for p1 Modula-2
-        'm2iso+p1': (
-            iso_stdlib_type_identifiers,
-        ),
-
-        # Standard Library Types for XDS Modula-2
-        'm2iso+xds': (
-            iso_stdlib_type_identifiers,
-        ),
-    }
-
-    # Standard Library Procedures Database
-    stdlib_procedures_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library Procedures for PIM Modula-2
-        'm2pim': (
-            pim_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for ISO Modula-2
-        'm2iso': (
-            iso_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for GNU Modula-2
-        'm2pim+gm2': (
-            pim_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for p1 Modula-2
-        'm2iso+p1': (
-            iso_stdlib_proc_identifiers,
-        ),
-
-        # Standard Library Procedures for XDS Modula-2
-        'm2iso+xds': (
-            iso_stdlib_proc_identifiers,
-        ),
-    }
-
-    # Standard Library Variables Database
-    stdlib_variables_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library Variables for PIM Modula-2
-        'm2pim': (
-            pim_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for ISO Modula-2
-        'm2iso': (
-            iso_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for GNU Modula-2
-        'm2pim+gm2': (
-            pim_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for p1 Modula-2
-        'm2iso+p1': (
-            iso_stdlib_var_identifiers,
-        ),
-
-        # Standard Library Variables for XDS Modula-2
-        'm2iso+xds': (
-            iso_stdlib_var_identifiers,
-        ),
-    }
-
-    # Standard Library Constants Database
-    stdlib_constants_db = {
-        # Empty entry for unknown dialect
-        'unknown': (
-            # LEAVE THIS EMPTY
-        ),
-        # Standard Library Constants for PIM Modula-2
-        'm2pim': (
-            pim_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for ISO Modula-2
-        'm2iso': (
-            iso_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for Modula-2 R10
-        'm2r10': (
-            m2r10_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for Objective Modula-2
-        'objm2': (
-            m2r10_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for Aglet Modula-2
-        'm2iso+aglet': (
-            iso_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for GNU Modula-2
-        'm2pim+gm2': (
-            pim_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for p1 Modula-2
-        'm2iso+p1': (
-            iso_stdlib_const_identifiers,
-        ),
-
-        # Standard Library Constants for XDS Modula-2
-        'm2iso+xds': (
-            iso_stdlib_const_identifiers,
-        ),
-    }
-
-#   M e t h o d s
-
-    # initialise a lexer instance
-    def __init__(self, **options):
-        #
-        # check dialect options
-        #
-        dialects = get_list_opt(options, 'dialect', [])
-        #
-        for dialect_option in dialects:
-            if dialect_option in self.dialects[1:-1]:
-                # valid dialect option found
-                self.set_dialect(dialect_option)
-                break
-        #
-        # Fallback Mode (DEFAULT)
-        else:
-            # no valid dialect option
-            self.set_dialect('unknown')
-        #
-        self.dialect_set_by_tag = False
-        #
-        # check style options
-        #
-        styles = get_list_opt(options, 'style', [])
-        #
-        # use lowercase mode for Algol style
-        if 'algol' in styles or 'algol_nu' in styles:
-            self.algol_publication_mode = True
-        else:
-            self.algol_publication_mode = False
-        #
-        # Check option flags
-        #
-        self.treat_stdlib_adts_as_builtins = get_bool_opt(
-            options, 'treat_stdlib_adts_as_builtins', True)
-        #
-        # call superclass initialiser
-        RegexLexer.__init__(self, **options)
-
-    # Set lexer to a specified dialect
-    def set_dialect(self, dialect_id):
-        #
-        # if __debug__:
-        #    print 'entered set_dialect with arg: ', dialect_id
-        #
-        # check dialect name against known dialects
-        if dialect_id not in self.dialects:
-            dialect = 'unknown'  # default
-        else:
-            dialect = dialect_id
-        #
-        # compose lexemes to reject set
-        lexemes_to_reject_set = set()
-        # add each list of reject lexemes for this dialect
-        for list in self.lexemes_to_reject_db[dialect]:
-            lexemes_to_reject_set.update(set(list))
-        #
-        # compose reserved words set
-        reswords_set = set()
-        # add each list of reserved words for this dialect
-        for list in self.reserved_words_db[dialect]:
-            reswords_set.update(set(list))
-        #
-        # compose builtins set
-        builtins_set = set()
-        # add each list of builtins for this dialect excluding reserved words
-        for list in self.builtins_db[dialect]:
-            builtins_set.update(set(list).difference(reswords_set))
-        #
-        # compose pseudo-builtins set
-        pseudo_builtins_set = set()
-        # add each list of builtins for this dialect excluding reserved words
-        for list in self.pseudo_builtins_db[dialect]:
-            pseudo_builtins_set.update(set(list).difference(reswords_set))
-        #
-        # compose ADTs set
-        adts_set = set()
-        # add each list of ADTs for this dialect excluding reserved words
-        for list in self.stdlib_adts_db[dialect]:
-            adts_set.update(set(list).difference(reswords_set))
-        #
-        # compose modules set
-        modules_set = set()
-        # add each list of builtins for this dialect excluding builtins
-        for list in self.stdlib_modules_db[dialect]:
-            modules_set.update(set(list).difference(builtins_set))
-        #
-        # compose types set
-        types_set = set()
-        # add each list of types for this dialect excluding builtins
-        for list in self.stdlib_types_db[dialect]:
-            types_set.update(set(list).difference(builtins_set))
-        #
-        # compose procedures set
-        procedures_set = set()
-        # add each list of procedures for this dialect excluding builtins
-        for list in self.stdlib_procedures_db[dialect]:
-            procedures_set.update(set(list).difference(builtins_set))
-        #
-        # compose variables set
-        variables_set = set()
-        # add each list of variables for this dialect excluding builtins
-        for list in self.stdlib_variables_db[dialect]:
-            variables_set.update(set(list).difference(builtins_set))
-        #
-        # compose constants set
-        constants_set = set()
-        # add each list of constants for this dialect excluding builtins
-        for list in self.stdlib_constants_db[dialect]:
-            constants_set.update(set(list).difference(builtins_set))
-        #
-        # update lexer state
-        self.dialect = dialect
-        self.lexemes_to_reject = lexemes_to_reject_set
-        self.reserved_words = reswords_set
-        self.builtins = builtins_set
-        self.pseudo_builtins = pseudo_builtins_set
-        self.adts = adts_set
-        self.modules = modules_set
-        self.types = types_set
-        self.procedures = procedures_set
-        self.variables = variables_set
-        self.constants = constants_set
-        #
-        # if __debug__:
-        #    print 'exiting set_dialect'
-        #    print ' self.dialect: ', self.dialect
-        #    print ' self.lexemes_to_reject: ', self.lexemes_to_reject
-        #    print ' self.reserved_words: ', self.reserved_words
-        #    print ' self.builtins: ', self.builtins
-        #    print ' self.pseudo_builtins: ', self.pseudo_builtins
-        #    print ' self.adts: ', self.adts
-        #    print ' self.modules: ', self.modules
-        #    print ' self.types: ', self.types
-        #    print ' self.procedures: ', self.procedures
-        #    print ' self.variables: ', self.variables
-        #    print ' self.types: ', self.types
-        #    print ' self.constants: ', self.constants
-
-    # Extracts a dialect name from a dialect tag comment string  and checks
-    # the extracted name against known dialects.  If a match is found,  the
-    # matching name is returned, otherwise dialect id 'unknown' is returned
-    def get_dialect_from_dialect_tag(self, dialect_tag):
-        #
-        # if __debug__:
-        #    print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
-        #
-        # constants
-        left_tag_delim = '(*!'
-        right_tag_delim = '*)'
-        left_tag_delim_len = len(left_tag_delim)
-        right_tag_delim_len = len(right_tag_delim)
-        indicator_start = left_tag_delim_len
-        indicator_end = -(right_tag_delim_len)
-        #
-        # check comment string for dialect indicator
-        if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
-           and dialect_tag.startswith(left_tag_delim) \
-           and dialect_tag.endswith(right_tag_delim):
-            #
-            # if __debug__:
-            #    print 'dialect tag found'
-            #
-            # extract dialect indicator
-            indicator = dialect_tag[indicator_start:indicator_end]
-            #
-            # if __debug__:
-            #    print 'extracted: ', indicator
-            #
-            # check against known dialects
-            for index in range(1, len(self.dialects)):
-                #
-                # if __debug__:
-                #    print 'dialects[', index, ']: ', self.dialects[index]
-                #
-                if indicator == self.dialects[index]:
-                    #
-                    # if __debug__:
-                    #    print 'matching dialect found'
-                    #
-                    # indicator matches known dialect
-                    return indicator
-            else:
-                # indicator does not match any dialect
-                return 'unknown'  # default
-        else:
-            # invalid indicator string
-            return 'unknown'  # default
-
-    # intercept the token stream, modify token attributes and return them
-    def get_tokens_unprocessed(self, text):
-        for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
-            #
-            # check for dialect tag if dialect has not been set by tag
-            if not self.dialect_set_by_tag and token == Comment.Special:
-                indicated_dialect = self.get_dialect_from_dialect_tag(value)
-                if indicated_dialect != 'unknown':
-                    # token is a dialect indicator
-                    # reset reserved words and builtins
-                    self.set_dialect(indicated_dialect)
-                    self.dialect_set_by_tag = True
-            #
-            # check for reserved words, predefined and stdlib identifiers
-            if token is Name:
-                if value in self.reserved_words:
-                    token = Keyword.Reserved
-                    if self.algol_publication_mode:
-                        value = value.lower()
-                #
-                elif value in self.builtins:
-                    token = Name.Builtin
-                    if self.algol_publication_mode:
-                        value = value.lower()
-                #
-                elif value in self.pseudo_builtins:
-                    token = Name.Builtin.Pseudo
-                    if self.algol_publication_mode:
-                        value = value.lower()
-                #
-                elif value in self.adts:
-                    if not self.treat_stdlib_adts_as_builtins:
-                        token = Name.Namespace
-                    else:
-                        token = Name.Builtin.Pseudo
-                        if self.algol_publication_mode:
-                            value = value.lower()
-                #
-                elif value in self.modules:
-                    token = Name.Namespace
-                #
-                elif value in self.types:
-                    token = Name.Class
-                #
-                elif value in self.procedures:
-                    token = Name.Function
-                #
-                elif value in self.variables:
-                    token = Name.Variable
-                #
-                elif value in self.constants:
-                    token = Name.Constant
-            #
-            elif token in Number:
-                #
-                # mark prefix number literals as error for PIM and ISO dialects
-                if self.dialect not in ('unknown', 'm2r10', 'objm2'):
-                    if "'" in value or value[0:2] in ('0b', '0x', '0u'):
-                        token = Error
-                #
-                elif self.dialect in ('m2r10', 'objm2'):
-                    # mark base-8 number literals as errors for M2 R10 and ObjM2
-                    if token is Number.Oct:
-                        token = Error
-                    # mark suffix base-16 literals as errors for M2 R10 and ObjM2
-                    elif token is Number.Hex and 'H' in value:
-                        token = Error
-                    # mark real numbers with E as errors for M2 R10 and ObjM2
-                    elif token is Number.Float and 'E' in value:
-                        token = Error
-            #
-            elif token in Comment:
-                #
-                # mark single line comment as error for PIM and ISO dialects
-                if token is Comment.Single:
-                    if self.dialect not in ('unknown', 'm2r10', 'objm2'):
-                        token = Error
-                #
-                if token is Comment.Preproc:
-                    # mark ISO pragma as error for PIM dialects
-                    if value.startswith('<*') and \
-                       self.dialect.startswith('m2pim'):
-                        token = Error
-                    # mark PIM pragma as comment for other dialects
-                    elif value.startswith('(*$') and \
-                            self.dialect != 'unknown' and \
-                            not self.dialect.startswith('m2pim'):
-                        token = Comment.Multiline
-            #
-            else:  # token is neither Name nor Comment
-                #
-                # mark lexemes matching the dialect's error token set as errors
-                if value in self.lexemes_to_reject:
-                    token = Error
-                #
-                # substitute lexemes when in Algol mode
-                if self.algol_publication_mode:
-                    if value == '#':
-                        value = u'≠'
-                    elif value == '<=':
-                        value = u'≤'
-                    elif value == '>=':
-                        value = u'≥'
-                    elif value == '==':
-                        value = u'≡'
-                    elif value == '*.':
-                        value = u'•'
-
-            # return result
-            yield index, token, value
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.modula2
+    ~~~~~~~~~~~~~~~~~~~~~~~
+
+    Multi-Dialect Lexer for Modula-2.
+
+    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, include
+from pygments.util import get_bool_opt, get_list_opt
+from pygments.token import Text, Comment, Operator, Keyword, Name, \
+    String, Number, Punctuation, Error
+
+__all__ = ['Modula2Lexer']
+
+
+# Multi-Dialect Modula-2 Lexer
+class Modula2Lexer(RegexLexer):
+    """
+    For `Modula-2 <http://www.modula2.org/>`_ source code.
+
+    The Modula-2 lexer supports several dialects.  By default, it operates in
+    fallback mode, recognising the *combined* literals, punctuation symbols
+    and operators of all supported dialects, and the *combined* reserved words
+    and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
+    differentiating between library defined identifiers.
+
+    To select a specific dialect, a dialect option may be passed
+    or a dialect tag may be embedded into a source file.
+
+    Dialect Options:
+
+    `m2pim`
+        Select PIM Modula-2 dialect.
+    `m2iso`
+        Select ISO Modula-2 dialect.
+    `m2r10`
+        Select Modula-2 R10 dialect.
+    `objm2`
+        Select Objective Modula-2 dialect.
+
+    The PIM and ISO dialect options may be qualified with a language extension.
+
+    Language Extensions:
+
+    `+aglet`
+        Select Aglet Modula-2 extensions, available with m2iso.
+    `+gm2`
+        Select GNU Modula-2 extensions, available with m2pim.
+    `+p1`
+        Select p1 Modula-2 extensions, available with m2iso.
+    `+xds`
+        Select XDS Modula-2 extensions, available with m2iso.
+
+
+    Passing a Dialect Option via Unix Commandline Interface
+
+    Dialect options may be passed to the lexer using the `dialect` key.
+    Only one such option should be passed. If multiple dialect options are
+    passed, the first valid option is used, any subsequent options are ignored.
+
+    Examples:
+
+    `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
+        Use ISO dialect to render input to HTML output
+    `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
+        Use ISO dialect with p1 extensions to render input to RTF output
+
+
+    Embedding a Dialect Option within a source file
+
+    A dialect option may be embedded in a source file in form of a dialect
+    tag, a specially formatted comment that specifies a dialect option.
+
+    Dialect Tag EBNF::
+
+       dialectTag :
+           OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
+
+       dialectOption :
+           'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
+           'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
+
+       Prefix : '!' ;
+
+       OpeningCommentDelim : '(*' ;
+
+       ClosingCommentDelim : '*)' ;
+
+    No whitespace is permitted between the tokens of a dialect tag.
+
+    In the event that a source file contains multiple dialect tags, the first
+    tag that contains a valid dialect option will be used and any subsequent
+    dialect tags will be ignored.  Ideally, a dialect tag should be placed
+    at the beginning of a source file.
+
+    An embedded dialect tag overrides a dialect option set via command line.
+
+    Examples:
+
+    ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
+        Use Modula2 R10 dialect to render this source file.
+    ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
+        Use PIM dialect with GNU extensions to render this source file.
+
+
+    Algol Publication Mode:
+
+    In Algol publication mode, source text is rendered for publication of
+    algorithms in scientific papers and academic texts, following the format
+    of the Revised Algol-60 Language Report.  It is activated by passing
+    one of two corresponding styles as an option:
+
+    `algol`
+        render reserved words lowercase underline boldface
+        and builtins lowercase boldface italic
+    `algol_nu`
+        render reserved words lowercase boldface (no underlining)
+        and builtins lowercase boldface italic
+
+    The lexer automatically performs the required lowercase conversion when
+    this mode is activated.
+
+    Example:
+
+    ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
+        Render input file in Algol publication mode to LaTeX output.
+
+
+    Rendering Mode of First Class ADT Identifiers:
+
+    The rendering of standard library first class ADT identifiers is controlled
+    by option flag "treat_stdlib_adts_as_builtins".
+
+    When this option is turned on, standard library ADT identifiers are rendered
+    as builtins.  When it is turned off, they are rendered as ordinary library
+    identifiers.
+
+    `treat_stdlib_adts_as_builtins` (default: On)
+
+    The option is useful for dialects that support ADTs as first class objects
+    and provide ADTs in the standard library that would otherwise be built-in.
+
+    At present, only Modula-2 R10 supports library ADTs as first class objects
+    and therefore, no ADT identifiers are defined for any other dialects.
+
+    Example:
+
+    ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
+        Render standard library ADTs as ordinary library types.
+
+    .. versionadded:: 1.3
+
+    .. versionchanged:: 2.1
+       Added multi-dialect support.
+    """
+    name = 'Modula-2'
+    aliases = ['modula2', 'm2']
+    filenames = ['*.def', '*.mod']
+    mimetypes = ['text/x-modula2']
+
+    flags = re.MULTILINE | re.DOTALL
+
+    tokens = {
+        'whitespace': [
+            (r'\n+', Text),  # blank lines
+            (r'\s+', Text),  # whitespace
+        ],
+        'dialecttags': [
+            # PIM Dialect Tag
+            (r'\(\*!m2pim\*\)', Comment.Special),
+            # ISO Dialect Tag
+            (r'\(\*!m2iso\*\)', Comment.Special),
+            # M2R10 Dialect Tag
+            (r'\(\*!m2r10\*\)', Comment.Special),
+            # ObjM2 Dialect Tag
+            (r'\(\*!objm2\*\)', Comment.Special),
+            # Aglet Extensions Dialect Tag
+            (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
+            # GNU Extensions Dialect Tag
+            (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
+            # p1 Extensions Dialect Tag
+            (r'\(\*!m2iso\+p1\*\)', Comment.Special),
+            # XDS Extensions Dialect Tag
+            (r'\(\*!m2iso\+xds\*\)', Comment.Special),
+        ],
+        'identifiers': [
+            (r'([a-zA-Z_$][\w$]*)', Name),
+        ],
+        'prefixed_number_literals': [
+            #
+            # Base-2, whole number
+            (r'0b[01]+(\'[01]+)*', Number.Bin),
+            #
+            # Base-16, whole number
+            (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
+        ],
+        'plain_number_literals': [
+            #
+            # Base-10, real number with exponent
+            (r'[0-9]+(\'[0-9]+)*'  # integral part
+             r'\.[0-9]+(\'[0-9]+)*'  # fractional part
+             r'[eE][+-]?[0-9]+(\'[0-9]+)*',  # exponent
+             Number.Float),
+            #
+            # Base-10, real number without exponent
+            (r'[0-9]+(\'[0-9]+)*'  # integral part
+             r'\.[0-9]+(\'[0-9]+)*',  # fractional part
+             Number.Float),
+            #
+            # Base-10, whole number
+            (r'[0-9]+(\'[0-9]+)*', Number.Integer),
+        ],
+        'suffixed_number_literals': [
+            #
+            # Base-8, whole number
+            (r'[0-7]+B', Number.Oct),
+            #
+            # Base-8, character code
+            (r'[0-7]+C', Number.Oct),
+            #
+            # Base-16, number
+            (r'[0-9A-F]+H', Number.Hex),
+        ],
+        'string_literals': [
+            (r"'(\\\\|\\'|[^'])*'", String),  # single quoted string
+            (r'"(\\\\|\\"|[^"])*"', String),  # double quoted string
+        ],
+        'digraph_operators': [
+            # Dot Product Operator
+            (r'\*\.', Operator),
+            # Array Concatenation Operator
+            (r'\+>', Operator),  # M2R10 + ObjM2
+            # Inequality Operator
+            (r'<>', Operator),  # ISO + PIM
+            # Less-Or-Equal, Subset
+            (r'<=', Operator),
+            # Greater-Or-Equal, Superset
+            (r'>=', Operator),
+            # Identity Operator
+            (r'==', Operator),  # M2R10 + ObjM2
+            # Type Conversion Operator
+            (r'::', Operator),  # M2R10 + ObjM2
+            # Assignment Symbol
+            (r':=', Operator),
+            # Postfix Increment Mutator
+            (r'\+\+', Operator),  # M2R10 + ObjM2
+            # Postfix Decrement Mutator
+            (r'--', Operator),  # M2R10 + ObjM2
+        ],
+        'unigraph_operators': [
+            # Arithmetic Operators
+            (r'[+-]', Operator),
+            (r'[*/]', Operator),
+            # ISO 80000-2 compliant Set Difference Operator
+            (r'\\', Operator),  # M2R10 + ObjM2
+            # Relational Operators
+            (r'[=#<>]', Operator),
+            # Dereferencing Operator
+            (r'\^', Operator),
+            # Dereferencing Operator Synonym
+            (r'@', Operator),  # ISO
+            # Logical AND Operator Synonym
+            (r'&', Operator),  # PIM + ISO
+            # Logical NOT Operator Synonym
+            (r'~', Operator),  # PIM + ISO
+            # Smalltalk Message Prefix
+            (r'`', Operator),  # ObjM2
+        ],
+        'digraph_punctuation': [
+            # Range Constructor
+            (r'\.\.', Punctuation),
+            # Opening Chevron Bracket
+            (r'<<', Punctuation),  # M2R10 + ISO
+            # Closing Chevron Bracket
+            (r'>>', Punctuation),  # M2R10 + ISO
+            # Blueprint Punctuation
+            (r'->', Punctuation),  # M2R10 + ISO
+            # Distinguish |# and # in M2 R10
+            (r'\|#', Punctuation),
+            # Distinguish ## and # in M2 R10
+            (r'##', Punctuation),
+            # Distinguish |* and * in M2 R10
+            (r'\|\*', Punctuation),
+        ],
+        'unigraph_punctuation': [
+            # Common Punctuation
+            (r'[()\[\]{},.:;|]', Punctuation),
+            # Case Label Separator Synonym
+            (r'!', Punctuation),  # ISO
+            # Blueprint Punctuation
+            (r'\?', Punctuation),  # M2R10 + ObjM2
+        ],
+        'comments': [
+            # Single Line Comment
+            (r'^//.*?\n', Comment.Single),  # M2R10 + ObjM2
+            # Block Comment
+            (r'\(\*([^$].*?)\*\)', Comment.Multiline),
+            # Template Block Comment
+            (r'/\*(.*?)\*/', Comment.Multiline),  # M2R10 + ObjM2
+        ],
+        'pragmas': [
+            # ISO Style Pragmas
+            (r'<\*.*?\*>', Comment.Preproc),  # ISO, M2R10 + ObjM2
+            # Pascal Style Pragmas
+            (r'\(\*\$.*?\*\)', Comment.Preproc),  # PIM
+        ],
+        'root': [
+            include('whitespace'),
+            include('dialecttags'),
+            include('pragmas'),
+            include('comments'),
+            include('identifiers'),
+            include('suffixed_number_literals'),  # PIM + ISO
+            include('prefixed_number_literals'),  # M2R10 + ObjM2
+            include('plain_number_literals'),
+            include('string_literals'),
+            include('digraph_punctuation'),
+            include('digraph_operators'),
+            include('unigraph_punctuation'),
+            include('unigraph_operators'),
+        ]
+    }
+
+#  C o m m o n   D a t a s e t s
+
+    # Common Reserved Words Dataset
+    common_reserved_words = (
+        # 37 common reserved words
+        'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
+        'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
+        'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
+        'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
+        'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
+    )
+
+    # Common Builtins Dataset
+    common_builtins = (
+        # 16 common builtins
+        'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
+        'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
+        'TRUE',
+    )
+
+    # Common Pseudo-Module Builtins Dataset
+    common_pseudo_builtins = (
+        # 4 common pseudo builtins
+        'ADDRESS', 'BYTE', 'WORD', 'ADR'
+    )
+
+#  P I M   M o d u l a - 2   D a t a s e t s
+
+    # Lexemes to Mark as Error Tokens for PIM Modula-2
+    pim_lexemes_to_reject = (
+        '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
+        '+>', '->', '<<', '>>', '|#', '##',
+    )
+
+    # PIM Modula-2 Additional Reserved Words Dataset
+    pim_additional_reserved_words = (
+        # 3 additional reserved words
+        'EXPORT', 'QUALIFIED', 'WITH',
+    )
+
+    # PIM Modula-2 Additional Builtins Dataset
+    pim_additional_builtins = (
+        # 16 additional builtins
+        'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
+        'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
+    )
+
+    # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
+    pim_additional_pseudo_builtins = (
+        # 5 additional pseudo builtins
+        'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
+    )
+
+#  I S O   M o d u l a - 2   D a t a s e t s
+
+    # Lexemes to Mark as Error Tokens for ISO Modula-2
+    iso_lexemes_to_reject = (
+        '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
+        '<<', '>>', '|#', '##',
+    )
+
+    # ISO Modula-2 Additional Reserved Words Dataset
+    iso_additional_reserved_words = (
+        # 9 additional reserved words (ISO 10514-1)
+        'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
+        'REM', 'RETRY', 'WITH',
+        # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
+        'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
+        'REVEAL', 'TRACED', 'UNSAFEGUARDED',
+    )
+
+    # ISO Modula-2 Additional Builtins Dataset
+    iso_additional_builtins = (
+        # 26 additional builtins (ISO 10514-1)
+        'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
+        'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE',  'LENGTH',
+        'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
+        'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
+        # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
+        'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
+    )
+
+    # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
+    iso_additional_pseudo_builtins = (
+        # 14 additional builtins (SYSTEM)
+        'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
+        'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
+        'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
+        # 13 additional builtins (COROUTINES)
+        'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
+        'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
+        'NEWCOROUTINE', 'PROT', 'TRANSFER',
+        # 9 additional builtins (EXCEPTIONS)
+        'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
+        'ExceptionSource', 'GetMessage', 'IsCurrentSource',
+        'IsExceptionalExecution', 'RAISE',
+        # 3 additional builtins (TERMINATION)
+        'TERMINATION', 'IsTerminating', 'HasHalted',
+        # 4 additional builtins (M2EXCEPTION)
+        'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
+        'indexException', 'rangeException', 'caseSelectException',
+        'invalidLocation', 'functionException', 'wholeValueException',
+        'wholeDivException', 'realValueException', 'realDivException',
+        'complexValueException', 'complexDivException', 'protException',
+        'sysException', 'coException', 'exException',
+    )
+
+#  M o d u l a - 2   R 1 0   D a t a s e t s
+
+    # Lexemes to Mark as Error Tokens for Modula-2 R10
+    m2r10_lexemes_to_reject = (
+        '!', '`', '@', '$', '%', '&', '<>',
+    )
+
+    # Modula-2 R10 reserved words in addition to the common set
+    m2r10_additional_reserved_words = (
+        # 12 additional reserved words
+        'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
+        'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
+        # 2 additional reserved words with symbolic assembly option
+        'ASM', 'REG',
+    )
+
+    # Modula-2 R10 builtins in addition to the common set
+    m2r10_additional_builtins = (
+        # 26 additional builtins
+        'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
+        'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
+        'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
+        'UNICHAR', 'WRITE', 'WRITEF',
+    )
+
+    # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
+    m2r10_additional_pseudo_builtins = (
+        # 13 additional builtins (TPROPERTIES)
+        'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
+        'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
+        'TMAXEXP', 'TMINEXP',
+        # 4 additional builtins (CONVERSION)
+        'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
+        # 35 additional builtins (UNSAFE)
+        'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
+        'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
+        'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
+        'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
+        'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
+        # 11 additional builtins (ATOMIC)
+        'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
+        'BWNAND', 'BWOR', 'BWXOR',
+        # 7 additional builtins (COMPILER)
+        'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
+        'HASH',
+        # 5 additional builtins (ASSEMBLER)
+        'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
+    )
+
+#  O b j e c t i v e   M o d u l a - 2   D a t a s e t s
+
+    # Lexemes to Mark as Error Tokens for Objective Modula-2
+    objm2_lexemes_to_reject = (
+        '!', '$', '%', '&', '<>',
+    )
+
+    # Objective Modula-2 Extensions
+    # reserved words in addition to Modula-2 R10
+    objm2_additional_reserved_words = (
+        # 16 additional reserved words
+        'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
+        'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
+        'SUPER', 'TRY',
+    )
+
+    # Objective Modula-2 Extensions
+    # builtins in addition to Modula-2 R10
+    objm2_additional_builtins = (
+        # 3 additional builtins
+        'OBJECT', 'NO', 'YES',
+    )
+
+    # Objective Modula-2 Extensions
+    # pseudo-module builtins in addition to Modula-2 R10
+    objm2_additional_pseudo_builtins = (
+        # None
+    )
+
+#  A g l e t   M o d u l a - 2   D a t a s e t s
+
+    # Aglet Extensions
+    # reserved words in addition to ISO Modula-2
+    aglet_additional_reserved_words = (
+        # None
+    )
+
+    # Aglet Extensions
+    # builtins in addition to ISO Modula-2
+    aglet_additional_builtins = (
+        # 9 additional builtins
+        'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
+        'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
+    )
+
+    # Aglet Modula-2 Extensions
+    # pseudo-module builtins in addition to ISO Modula-2
+    aglet_additional_pseudo_builtins = (
+        # None
+    )
+
+#  G N U   M o d u l a - 2   D a t a s e t s
+
+    # GNU Extensions
+    # reserved words in addition to PIM Modula-2
+    gm2_additional_reserved_words = (
+        # 10 additional reserved words
+        'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
+        '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
+    )
+
+    # GNU Extensions
+    # builtins in addition to PIM Modula-2
+    gm2_additional_builtins = (
+        # 21 additional builtins
+        'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
+        'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
+        'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
+        'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
+    )
+
+    # GNU Extensions
+    # pseudo-module builtins in addition to PIM Modula-2
+    gm2_additional_pseudo_builtins = (
+        # None
+    )
+
+#  p 1   M o d u l a - 2   D a t a s e t s
+
+    # p1 Extensions
+    # reserved words in addition to ISO Modula-2
+    p1_additional_reserved_words = (
+        # None
+    )
+
+    # p1 Extensions
+    # builtins in addition to ISO Modula-2
+    p1_additional_builtins = (
+        # None
+    )
+
+    # p1 Modula-2 Extensions
+    # pseudo-module builtins in addition to ISO Modula-2
+    p1_additional_pseudo_builtins = (
+        # 1 additional builtin
+        'BCD',
+    )
+
+#  X D S   M o d u l a - 2   D a t a s e t s
+
+    # XDS Extensions
+    # reserved words in addition to ISO Modula-2
+    xds_additional_reserved_words = (
+        # 1 additional reserved word
+        'SEQ',
+    )
+
+    # XDS Extensions
+    # builtins in addition to ISO Modula-2
+    xds_additional_builtins = (
+        # 9 additional builtins
+        'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
+        'LONGCARD', 'SHORTCARD', 'SHORTINT',
+    )
+
+    # XDS Modula-2 Extensions
+    # pseudo-module builtins in addition to ISO Modula-2
+    xds_additional_pseudo_builtins = (
+        # 22 additional builtins (SYSTEM)
+        'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
+        'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
+        'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
+        # 3 additional builtins (COMPILER)
+        'COMPILER', 'OPTION', 'EQUATION'
+    )
+
+#  P I M   S t a n d a r d   L i b r a r y   D a t a s e t s
+
+    # PIM Modula-2 Standard Library Modules Dataset
+    pim_stdlib_module_identifiers = (
+        'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
+    )
+
+    # PIM Modula-2 Standard Library Types Dataset
+    pim_stdlib_type_identifiers = (
+        'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
+        'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
+        'DirectoryCommand',
+    )
+
+    # PIM Modula-2 Standard Library Procedures Dataset
+    pim_stdlib_proc_identifiers = (
+        'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
+        'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
+        'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
+        'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
+        'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
+        'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
+        'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
+        'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
+        'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
+        'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
+    )
+
+    # PIM Modula-2 Standard Library Variables Dataset
+    pim_stdlib_var_identifiers = (
+        'Done', 'termCH', 'in', 'out'
+    )
+
+    # PIM Modula-2 Standard Library Constants Dataset
+    pim_stdlib_const_identifiers = (
+        'EOL',
+    )
+
+#  I S O   S t a n d a r d   L i b r a r y   D a t a s e t s
+
+    # ISO Modula-2 Standard Library Modules Dataset
+    iso_stdlib_module_identifiers = (
+        # TO DO
+    )
+
+    # ISO Modula-2 Standard Library Types Dataset
+    iso_stdlib_type_identifiers = (
+        # TO DO
+    )
+
+    # ISO Modula-2 Standard Library Procedures Dataset
+    iso_stdlib_proc_identifiers = (
+        # TO DO
+    )
+
+    # ISO Modula-2 Standard Library Variables Dataset
+    iso_stdlib_var_identifiers = (
+        # TO DO
+    )
+
+    # ISO Modula-2 Standard Library Constants Dataset
+    iso_stdlib_const_identifiers = (
+        # TO DO
+    )
+
+#  M 2   R 1 0   S t a n d a r d   L i b r a r y   D a t a s e t s
+
+    # Modula-2 R10 Standard Library ADTs Dataset
+    m2r10_stdlib_adt_identifiers = (
+        'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
+        'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
+        'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
+        'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
+        'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
+        'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
+        'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
+        'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
+        'INT64', 'INT128', 'STRING', 'UNISTRING',
+    )
+
+    # Modula-2 R10 Standard Library Blueprints Dataset
+    m2r10_stdlib_blueprint_identifiers = (
+        'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
+        'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
+        'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
+        'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
+        'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
+        'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
+        'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
+    )
+
+    # Modula-2 R10 Standard Library Modules Dataset
+    m2r10_stdlib_module_identifiers = (
+        'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
+        'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
+        'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
+        'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
+        'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
+    )
+
+    # Modula-2 R10 Standard Library Types Dataset
+    m2r10_stdlib_type_identifiers = (
+        'File', 'Status',
+        # TO BE COMPLETED
+    )
+
+    # Modula-2 R10 Standard Library Procedures Dataset
+    m2r10_stdlib_proc_identifiers = (
+        'ALLOCATE', 'DEALLOCATE', 'SIZE',
+        # TO BE COMPLETED
+    )
+
+    # Modula-2 R10 Standard Library Variables Dataset
+    m2r10_stdlib_var_identifiers = (
+        'stdIn', 'stdOut', 'stdErr',
+    )
+
+    # Modula-2 R10 Standard Library Constants Dataset
+    m2r10_stdlib_const_identifiers = (
+        'pi', 'tau',
+    )
+
+#  D i a l e c t s
+
+    # Dialect modes
+    dialects = (
+        'unknown',
+        'm2pim', 'm2iso', 'm2r10', 'objm2',
+        'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
+    )
+
+#   D a t a b a s e s
+
+    # Lexemes to Mark as Errors Database
+    lexemes_to_reject_db = {
+        # Lexemes to reject for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Lexemes to reject for PIM Modula-2
+        'm2pim': (
+            pim_lexemes_to_reject,
+        ),
+        # Lexemes to reject for ISO Modula-2
+        'm2iso': (
+            iso_lexemes_to_reject,
+        ),
+        # Lexemes to reject for Modula-2 R10
+        'm2r10': (
+            m2r10_lexemes_to_reject,
+        ),
+        # Lexemes to reject for Objective Modula-2
+        'objm2': (
+            objm2_lexemes_to_reject,
+        ),
+        # Lexemes to reject for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_lexemes_to_reject,
+        ),
+        # Lexemes to reject for GNU Modula-2
+        'm2pim+gm2': (
+            pim_lexemes_to_reject,
+        ),
+        # Lexemes to reject for p1 Modula-2
+        'm2iso+p1': (
+            iso_lexemes_to_reject,
+        ),
+        # Lexemes to reject for XDS Modula-2
+        'm2iso+xds': (
+            iso_lexemes_to_reject,
+        ),
+    }
+
+    # Reserved Words Database
+    reserved_words_db = {
+        # Reserved words for unknown dialect
+        'unknown': (
+            common_reserved_words,
+            pim_additional_reserved_words,
+            iso_additional_reserved_words,
+            m2r10_additional_reserved_words,
+        ),
+
+        # Reserved words for PIM Modula-2
+        'm2pim': (
+            common_reserved_words,
+            pim_additional_reserved_words,
+        ),
+
+        # Reserved words for Modula-2 R10
+        'm2iso': (
+            common_reserved_words,
+            iso_additional_reserved_words,
+        ),
+
+        # Reserved words for ISO Modula-2
+        'm2r10': (
+            common_reserved_words,
+            m2r10_additional_reserved_words,
+        ),
+
+        # Reserved words for Objective Modula-2
+        'objm2': (
+            common_reserved_words,
+            m2r10_additional_reserved_words,
+            objm2_additional_reserved_words,
+        ),
+
+        # Reserved words for Aglet Modula-2 Extensions
+        'm2iso+aglet': (
+            common_reserved_words,
+            iso_additional_reserved_words,
+            aglet_additional_reserved_words,
+        ),
+
+        # Reserved words for GNU Modula-2 Extensions
+        'm2pim+gm2': (
+            common_reserved_words,
+            pim_additional_reserved_words,
+            gm2_additional_reserved_words,
+        ),
+
+        # Reserved words for p1 Modula-2 Extensions
+        'm2iso+p1': (
+            common_reserved_words,
+            iso_additional_reserved_words,
+            p1_additional_reserved_words,
+        ),
+
+        # Reserved words for XDS Modula-2 Extensions
+        'm2iso+xds': (
+            common_reserved_words,
+            iso_additional_reserved_words,
+            xds_additional_reserved_words,
+        ),
+    }
+
+    # Builtins Database
+    builtins_db = {
+        # Builtins for unknown dialect
+        'unknown': (
+            common_builtins,
+            pim_additional_builtins,
+            iso_additional_builtins,
+            m2r10_additional_builtins,
+        ),
+
+        # Builtins for PIM Modula-2
+        'm2pim': (
+            common_builtins,
+            pim_additional_builtins,
+        ),
+
+        # Builtins for ISO Modula-2
+        'm2iso': (
+            common_builtins,
+            iso_additional_builtins,
+        ),
+
+        # Builtins for ISO Modula-2
+        'm2r10': (
+            common_builtins,
+            m2r10_additional_builtins,
+        ),
+
+        # Builtins for Objective Modula-2
+        'objm2': (
+            common_builtins,
+            m2r10_additional_builtins,
+            objm2_additional_builtins,
+        ),
+
+        # Builtins for Aglet Modula-2 Extensions
+        'm2iso+aglet': (
+            common_builtins,
+            iso_additional_builtins,
+            aglet_additional_builtins,
+        ),
+
+        # Builtins for GNU Modula-2 Extensions
+        'm2pim+gm2': (
+            common_builtins,
+            pim_additional_builtins,
+            gm2_additional_builtins,
+        ),
+
+        # Builtins for p1 Modula-2 Extensions
+        'm2iso+p1': (
+            common_builtins,
+            iso_additional_builtins,
+            p1_additional_builtins,
+        ),
+
+        # Builtins for XDS Modula-2 Extensions
+        'm2iso+xds': (
+            common_builtins,
+            iso_additional_builtins,
+            xds_additional_builtins,
+        ),
+    }
+
+    # Pseudo-Module Builtins Database
+    pseudo_builtins_db = {
+        # Builtins for unknown dialect
+        'unknown': (
+            common_pseudo_builtins,
+            pim_additional_pseudo_builtins,
+            iso_additional_pseudo_builtins,
+            m2r10_additional_pseudo_builtins,
+        ),
+
+        # Builtins for PIM Modula-2
+        'm2pim': (
+            common_pseudo_builtins,
+            pim_additional_pseudo_builtins,
+        ),
+
+        # Builtins for ISO Modula-2
+        'm2iso': (
+            common_pseudo_builtins,
+            iso_additional_pseudo_builtins,
+        ),
+
+        # Builtins for ISO Modula-2
+        'm2r10': (
+            common_pseudo_builtins,
+            m2r10_additional_pseudo_builtins,
+        ),
+
+        # Builtins for Objective Modula-2
+        'objm2': (
+            common_pseudo_builtins,
+            m2r10_additional_pseudo_builtins,
+            objm2_additional_pseudo_builtins,
+        ),
+
+        # Builtins for Aglet Modula-2 Extensions
+        'm2iso+aglet': (
+            common_pseudo_builtins,
+            iso_additional_pseudo_builtins,
+            aglet_additional_pseudo_builtins,
+        ),
+
+        # Builtins for GNU Modula-2 Extensions
+        'm2pim+gm2': (
+            common_pseudo_builtins,
+            pim_additional_pseudo_builtins,
+            gm2_additional_pseudo_builtins,
+        ),
+
+        # Builtins for p1 Modula-2 Extensions
+        'm2iso+p1': (
+            common_pseudo_builtins,
+            iso_additional_pseudo_builtins,
+            p1_additional_pseudo_builtins,
+        ),
+
+        # Builtins for XDS Modula-2 Extensions
+        'm2iso+xds': (
+            common_pseudo_builtins,
+            iso_additional_pseudo_builtins,
+            xds_additional_pseudo_builtins,
+        ),
+    }
+
+    # Standard Library ADTs Database
+    stdlib_adts_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library ADTs for PIM Modula-2
+        'm2pim': (
+            # No first class library types
+        ),
+
+        # Standard Library ADTs for ISO Modula-2
+        'm2iso': (
+            # No first class library types
+        ),
+
+        # Standard Library ADTs for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_adt_identifiers,
+        ),
+
+        # Standard Library ADTs for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_adt_identifiers,
+        ),
+
+        # Standard Library ADTs for Aglet Modula-2
+        'm2iso+aglet': (
+            # No first class library types
+        ),
+
+        # Standard Library ADTs for GNU Modula-2
+        'm2pim+gm2': (
+            # No first class library types
+        ),
+
+        # Standard Library ADTs for p1 Modula-2
+        'm2iso+p1': (
+            # No first class library types
+        ),
+
+        # Standard Library ADTs for XDS Modula-2
+        'm2iso+xds': (
+            # No first class library types
+        ),
+    }
+
+    # Standard Library Modules Database
+    stdlib_modules_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library Modules for PIM Modula-2
+        'm2pim': (
+            pim_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for ISO Modula-2
+        'm2iso': (
+            iso_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_blueprint_identifiers,
+            m2r10_stdlib_module_identifiers,
+            m2r10_stdlib_adt_identifiers,
+        ),
+
+        # Standard Library Modules for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_blueprint_identifiers,
+            m2r10_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for GNU Modula-2
+        'm2pim+gm2': (
+            pim_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for p1 Modula-2
+        'm2iso+p1': (
+            iso_stdlib_module_identifiers,
+        ),
+
+        # Standard Library Modules for XDS Modula-2
+        'm2iso+xds': (
+            iso_stdlib_module_identifiers,
+        ),
+    }
+
+    # Standard Library Types Database
+    stdlib_types_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library Types for PIM Modula-2
+        'm2pim': (
+            pim_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for ISO Modula-2
+        'm2iso': (
+            iso_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for GNU Modula-2
+        'm2pim+gm2': (
+            pim_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for p1 Modula-2
+        'm2iso+p1': (
+            iso_stdlib_type_identifiers,
+        ),
+
+        # Standard Library Types for XDS Modula-2
+        'm2iso+xds': (
+            iso_stdlib_type_identifiers,
+        ),
+    }
+
+    # Standard Library Procedures Database
+    stdlib_procedures_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library Procedures for PIM Modula-2
+        'm2pim': (
+            pim_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for ISO Modula-2
+        'm2iso': (
+            iso_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for GNU Modula-2
+        'm2pim+gm2': (
+            pim_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for p1 Modula-2
+        'm2iso+p1': (
+            iso_stdlib_proc_identifiers,
+        ),
+
+        # Standard Library Procedures for XDS Modula-2
+        'm2iso+xds': (
+            iso_stdlib_proc_identifiers,
+        ),
+    }
+
+    # Standard Library Variables Database
+    stdlib_variables_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library Variables for PIM Modula-2
+        'm2pim': (
+            pim_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for ISO Modula-2
+        'm2iso': (
+            iso_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for GNU Modula-2
+        'm2pim+gm2': (
+            pim_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for p1 Modula-2
+        'm2iso+p1': (
+            iso_stdlib_var_identifiers,
+        ),
+
+        # Standard Library Variables for XDS Modula-2
+        'm2iso+xds': (
+            iso_stdlib_var_identifiers,
+        ),
+    }
+
+    # Standard Library Constants Database
+    stdlib_constants_db = {
+        # Empty entry for unknown dialect
+        'unknown': (
+            # LEAVE THIS EMPTY
+        ),
+        # Standard Library Constants for PIM Modula-2
+        'm2pim': (
+            pim_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for ISO Modula-2
+        'm2iso': (
+            iso_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for Modula-2 R10
+        'm2r10': (
+            m2r10_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for Objective Modula-2
+        'objm2': (
+            m2r10_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for Aglet Modula-2
+        'm2iso+aglet': (
+            iso_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for GNU Modula-2
+        'm2pim+gm2': (
+            pim_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for p1 Modula-2
+        'm2iso+p1': (
+            iso_stdlib_const_identifiers,
+        ),
+
+        # Standard Library Constants for XDS Modula-2
+        'm2iso+xds': (
+            iso_stdlib_const_identifiers,
+        ),
+    }
+
+#   M e t h o d s
+
+    # initialise a lexer instance
+    def __init__(self, **options):
+        #
+        # check dialect options
+        #
+        dialects = get_list_opt(options, 'dialect', [])
+        #
+        for dialect_option in dialects:
+            if dialect_option in self.dialects[1:-1]:
+                # valid dialect option found
+                self.set_dialect(dialect_option)
+                break
+        #
+        # Fallback Mode (DEFAULT)
+        else:
+            # no valid dialect option
+            self.set_dialect('unknown')
+        #
+        self.dialect_set_by_tag = False
+        #
+        # check style options
+        #
+        styles = get_list_opt(options, 'style', [])
+        #
+        # use lowercase mode for Algol style
+        if 'algol' in styles or 'algol_nu' in styles:
+            self.algol_publication_mode = True
+        else:
+            self.algol_publication_mode = False
+        #
+        # Check option flags
+        #
+        self.treat_stdlib_adts_as_builtins = get_bool_opt(
+            options, 'treat_stdlib_adts_as_builtins', True)
+        #
+        # call superclass initialiser
+        RegexLexer.__init__(self, **options)
+
+    # Set lexer to a specified dialect
+    def set_dialect(self, dialect_id):
+        #
+        # if __debug__:
+        #    print 'entered set_dialect with arg: ', dialect_id
+        #
+        # check dialect name against known dialects
+        if dialect_id not in self.dialects:
+            dialect = 'unknown'  # default
+        else:
+            dialect = dialect_id
+        #
+        # compose lexemes to reject set
+        lexemes_to_reject_set = set()
+        # add each list of reject lexemes for this dialect
+        for list in self.lexemes_to_reject_db[dialect]:
+            lexemes_to_reject_set.update(set(list))
+        #
+        # compose reserved words set
+        reswords_set = set()
+        # add each list of reserved words for this dialect
+        for list in self.reserved_words_db[dialect]:
+            reswords_set.update(set(list))
+        #
+        # compose builtins set
+        builtins_set = set()
+        # add each list of builtins for this dialect excluding reserved words
+        for list in self.builtins_db[dialect]:
+            builtins_set.update(set(list).difference(reswords_set))
+        #
+        # compose pseudo-builtins set
+        pseudo_builtins_set = set()
+        # add each list of builtins for this dialect excluding reserved words
+        for list in self.pseudo_builtins_db[dialect]:
+            pseudo_builtins_set.update(set(list).difference(reswords_set))
+        #
+        # compose ADTs set
+        adts_set = set()
+        # add each list of ADTs for this dialect excluding reserved words
+        for list in self.stdlib_adts_db[dialect]:
+            adts_set.update(set(list).difference(reswords_set))
+        #
+        # compose modules set
+        modules_set = set()
+        # add each list of builtins for this dialect excluding builtins
+        for list in self.stdlib_modules_db[dialect]:
+            modules_set.update(set(list).difference(builtins_set))
+        #
+        # compose types set
+        types_set = set()
+        # add each list of types for this dialect excluding builtins
+        for list in self.stdlib_types_db[dialect]:
+            types_set.update(set(list).difference(builtins_set))
+        #
+        # compose procedures set
+        procedures_set = set()
+        # add each list of procedures for this dialect excluding builtins
+        for list in self.stdlib_procedures_db[dialect]:
+            procedures_set.update(set(list).difference(builtins_set))
+        #
+        # compose variables set
+        variables_set = set()
+        # add each list of variables for this dialect excluding builtins
+        for list in self.stdlib_variables_db[dialect]:
+            variables_set.update(set(list).difference(builtins_set))
+        #
+        # compose constants set
+        constants_set = set()
+        # add each list of constants for this dialect excluding builtins
+        for list in self.stdlib_constants_db[dialect]:
+            constants_set.update(set(list).difference(builtins_set))
+        #
+        # update lexer state
+        self.dialect = dialect
+        self.lexemes_to_reject = lexemes_to_reject_set
+        self.reserved_words = reswords_set
+        self.builtins = builtins_set
+        self.pseudo_builtins = pseudo_builtins_set
+        self.adts = adts_set
+        self.modules = modules_set
+        self.types = types_set
+        self.procedures = procedures_set
+        self.variables = variables_set
+        self.constants = constants_set
+        #
+        # if __debug__:
+        #    print 'exiting set_dialect'
+        #    print ' self.dialect: ', self.dialect
+        #    print ' self.lexemes_to_reject: ', self.lexemes_to_reject
+        #    print ' self.reserved_words: ', self.reserved_words
+        #    print ' self.builtins: ', self.builtins
+        #    print ' self.pseudo_builtins: ', self.pseudo_builtins
+        #    print ' self.adts: ', self.adts
+        #    print ' self.modules: ', self.modules
+        #    print ' self.types: ', self.types
+        #    print ' self.procedures: ', self.procedures
+        #    print ' self.variables: ', self.variables
+        #    print ' self.types: ', self.types
+        #    print ' self.constants: ', self.constants
+
+    # Extracts a dialect name from a dialect tag comment string  and checks
+    # the extracted name against known dialects.  If a match is found,  the
+    # matching name is returned, otherwise dialect id 'unknown' is returned
+    def get_dialect_from_dialect_tag(self, dialect_tag):
+        #
+        # if __debug__:
+        #    print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
+        #
+        # constants
+        left_tag_delim = '(*!'
+        right_tag_delim = '*)'
+        left_tag_delim_len = len(left_tag_delim)
+        right_tag_delim_len = len(right_tag_delim)
+        indicator_start = left_tag_delim_len
+        indicator_end = -(right_tag_delim_len)
+        #
+        # check comment string for dialect indicator
+        if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
+           and dialect_tag.startswith(left_tag_delim) \
+           and dialect_tag.endswith(right_tag_delim):
+            #
+            # if __debug__:
+            #    print 'dialect tag found'
+            #
+            # extract dialect indicator
+            indicator = dialect_tag[indicator_start:indicator_end]
+            #
+            # if __debug__:
+            #    print 'extracted: ', indicator
+            #
+            # check against known dialects
+            for index in range(1, len(self.dialects)):
+                #
+                # if __debug__:
+                #    print 'dialects[', index, ']: ', self.dialects[index]
+                #
+                if indicator == self.dialects[index]:
+                    #
+                    # if __debug__:
+                    #    print 'matching dialect found'
+                    #
+                    # indicator matches known dialect
+                    return indicator
+            else:
+                # indicator does not match any dialect
+                return 'unknown'  # default
+        else:
+            # invalid indicator string
+            return 'unknown'  # default
+
+    # intercept the token stream, modify token attributes and return them
+    def get_tokens_unprocessed(self, text):
+        for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
+            #
+            # check for dialect tag if dialect has not been set by tag
+            if not self.dialect_set_by_tag and token == Comment.Special:
+                indicated_dialect = self.get_dialect_from_dialect_tag(value)
+                if indicated_dialect != 'unknown':
+                    # token is a dialect indicator
+                    # reset reserved words and builtins
+                    self.set_dialect(indicated_dialect)
+                    self.dialect_set_by_tag = True
+            #
+            # check for reserved words, predefined and stdlib identifiers
+            if token is Name:
+                if value in self.reserved_words:
+                    token = Keyword.Reserved
+                    if self.algol_publication_mode:
+                        value = value.lower()
+                #
+                elif value in self.builtins:
+                    token = Name.Builtin
+                    if self.algol_publication_mode:
+                        value = value.lower()
+                #
+                elif value in self.pseudo_builtins:
+                    token = Name.Builtin.Pseudo
+                    if self.algol_publication_mode:
+                        value = value.lower()
+                #
+                elif value in self.adts:
+                    if not self.treat_stdlib_adts_as_builtins:
+                        token = Name.Namespace
+                    else:
+                        token = Name.Builtin.Pseudo
+                        if self.algol_publication_mode:
+                            value = value.lower()
+                #
+                elif value in self.modules:
+                    token = Name.Namespace
+                #
+                elif value in self.types:
+                    token = Name.Class
+                #
+                elif value in self.procedures:
+                    token = Name.Function
+                #
+                elif value in self.variables:
+                    token = Name.Variable
+                #
+                elif value in self.constants:
+                    token = Name.Constant
+            #
+            elif token in Number:
+                #
+                # mark prefix number literals as error for PIM and ISO dialects
+                if self.dialect not in ('unknown', 'm2r10', 'objm2'):
+                    if "'" in value or value[0:2] in ('0b', '0x', '0u'):
+                        token = Error
+                #
+                elif self.dialect in ('m2r10', 'objm2'):
+                    # mark base-8 number literals as errors for M2 R10 and ObjM2
+                    if token is Number.Oct:
+                        token = Error
+                    # mark suffix base-16 literals as errors for M2 R10 and ObjM2
+                    elif token is Number.Hex and 'H' in value:
+                        token = Error
+                    # mark real numbers with E as errors for M2 R10 and ObjM2
+                    elif token is Number.Float and 'E' in value:
+                        token = Error
+            #
+            elif token in Comment:
+                #
+                # mark single line comment as error for PIM and ISO dialects
+                if token is Comment.Single:
+                    if self.dialect not in ('unknown', 'm2r10', 'objm2'):
+                        token = Error
+                #
+                if token is Comment.Preproc:
+                    # mark ISO pragma as error for PIM dialects
+                    if value.startswith('<*') and \
+                       self.dialect.startswith('m2pim'):
+                        token = Error
+                    # mark PIM pragma as comment for other dialects
+                    elif value.startswith('(*$') and \
+                            self.dialect != 'unknown' and \
+                            not self.dialect.startswith('m2pim'):
+                        token = Comment.Multiline
+            #
+            else:  # token is neither Name nor Comment
+                #
+                # mark lexemes matching the dialect's error token set as errors
+                if value in self.lexemes_to_reject:
+                    token = Error
+                #
+                # substitute lexemes when in Algol mode
+                if self.algol_publication_mode:
+                    if value == '#':
+                        value = '≠'
+                    elif value == '<=':
+                        value = '≤'
+                    elif value == '>=':
+                        value = '≥'
+                    elif value == '==':
+                        value = '≡'
+                    elif value == '*.':
+                        value = '•'
+
+            # return result
+            yield index, token, value

eric ide

mercurial