503 if scanner.match.strip(): |
506 if scanner.match.strip(): |
504 was_dot = scanner.match == '.' |
507 was_dot = scanner.match == '.' |
505 yield scanner.start_pos, token, scanner.match or '' |
508 yield scanner.start_pos, token, scanner.match or '' |
506 |
509 |
507 |
510 |
508 class Modula2Lexer(RegexLexer): |
|
509 """ |
|
510 For `Modula-2 <http://www.modula2.org/>`_ source code. |
|
511 |
|
512 Additional options that determine which keywords are highlighted: |
|
513 |
|
514 `pim` |
|
515 Select PIM Modula-2 dialect (default: True). |
|
516 `iso` |
|
517 Select ISO Modula-2 dialect (default: False). |
|
518 `objm2` |
|
519 Select Objective Modula-2 dialect (default: False). |
|
520 `gm2ext` |
|
521 Also highlight GNU extensions (default: False). |
|
522 |
|
523 .. versionadded:: 1.3 |
|
524 """ |
|
525 name = 'Modula-2' |
|
526 aliases = ['modula2', 'm2'] |
|
527 filenames = ['*.def', '*.mod'] |
|
528 mimetypes = ['text/x-modula2'] |
|
529 |
|
530 flags = re.MULTILINE | re.DOTALL |
|
531 |
|
532 tokens = { |
|
533 'whitespace': [ |
|
534 (r'\n+', Text), # blank lines |
|
535 (r'\s+', Text), # whitespace |
|
536 ], |
|
537 'identifiers': [ |
|
538 (r'([a-zA-Z_$][\w$]*)', Name), |
|
539 ], |
|
540 'numliterals': [ |
|
541 (r'[01]+B', Number.Bin), # binary number (ObjM2) |
|
542 (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) |
|
543 (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) |
|
544 (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) |
|
545 (r'[0-9A-F]+H', Number.Hex), # hexadecimal number |
|
546 (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number |
|
547 (r'[0-9]+\.[0-9]+', Number.Float), # real number |
|
548 (r'[0-9]+', Number.Integer), # decimal whole number |
|
549 ], |
|
550 'strings': [ |
|
551 (r"'(\\\\|\\'|[^'])*'", String), # single quoted string |
|
552 (r'"(\\\\|\\"|[^"])*"', String), # double quoted string |
|
553 ], |
|
554 'operators': [ |
|
555 (r'[*/+=#~&<>\^-]', Operator), |
|
556 (r':=', Operator), # assignment |
|
557 (r'@', Operator), # pointer deref (ISO) |
|
558 (r'\.\.', Operator), # ellipsis or range |
|
559 (r'`', Operator), # Smalltalk message (ObjM2) |
|
560 (r'::', Operator), # type conversion (ObjM2) |
|
561 ], |
|
562 'punctuation': [ |
|
563 (r'[()\[\]{},.:;|]', Punctuation), |
|
564 ], |
|
565 'comments': [ |
|
566 (r'//.*?\n', Comment.Single), # ObjM2 |
|
567 (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 |
|
568 (r'\(\*([^$].*?)\*\)', Comment.Multiline), |
|
569 # TO DO: nesting of (* ... *) comments |
|
570 ], |
|
571 'pragmas': [ |
|
572 (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM |
|
573 (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 |
|
574 ], |
|
575 'root': [ |
|
576 include('whitespace'), |
|
577 include('comments'), |
|
578 include('pragmas'), |
|
579 include('identifiers'), |
|
580 include('numliterals'), |
|
581 include('strings'), |
|
582 include('operators'), |
|
583 include('punctuation'), |
|
584 ] |
|
585 } |
|
586 |
|
587 pim_reserved_words = [ |
|
588 # 40 reserved words |
|
589 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', |
|
590 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', |
|
591 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', |
|
592 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', |
|
593 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', |
|
594 'UNTIL', 'VAR', 'WHILE', 'WITH', |
|
595 ] |
|
596 |
|
597 pim_pervasives = [ |
|
598 # 31 pervasives |
|
599 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', |
|
600 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', |
|
601 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', |
|
602 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', |
|
603 ] |
|
604 |
|
605 iso_reserved_words = [ |
|
606 # 46 reserved words |
|
607 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', |
|
608 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', |
|
609 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', |
|
610 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', |
|
611 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', |
|
612 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', |
|
613 'WITH', |
|
614 ] |
|
615 |
|
616 iso_pervasives = [ |
|
617 # 42 pervasives |
|
618 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', |
|
619 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', |
|
620 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', |
|
621 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', |
|
622 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', |
|
623 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', |
|
624 ] |
|
625 |
|
626 objm2_reserved_words = [ |
|
627 # base language, 42 reserved words |
|
628 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', |
|
629 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', |
|
630 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', |
|
631 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', |
|
632 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', |
|
633 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', |
|
634 # OO extensions, 16 reserved words |
|
635 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', |
|
636 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', |
|
637 'SUPER', 'TRY', |
|
638 ] |
|
639 |
|
640 objm2_pervasives = [ |
|
641 # base language, 38 pervasives |
|
642 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', |
|
643 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', |
|
644 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', |
|
645 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', |
|
646 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', |
|
647 # OO extensions, 3 pervasives |
|
648 'OBJECT', 'NO', 'YES', |
|
649 ] |
|
650 |
|
651 gnu_reserved_words = [ |
|
652 # 10 additional reserved words |
|
653 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', |
|
654 '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', |
|
655 ] |
|
656 |
|
657 gnu_pervasives = [ |
|
658 # 21 identifiers, actually from pseudo-module SYSTEM |
|
659 # but we will highlight them as if they were pervasives |
|
660 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', |
|
661 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', |
|
662 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', |
|
663 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', |
|
664 ] |
|
665 |
|
666 def __init__(self, **options): |
|
667 self.reserved_words = set() |
|
668 self.pervasives = set() |
|
669 # ISO Modula-2 |
|
670 if get_bool_opt(options, 'iso', False): |
|
671 self.reserved_words.update(self.iso_reserved_words) |
|
672 self.pervasives.update(self.iso_pervasives) |
|
673 # Objective Modula-2 |
|
674 elif get_bool_opt(options, 'objm2', False): |
|
675 self.reserved_words.update(self.objm2_reserved_words) |
|
676 self.pervasives.update(self.objm2_pervasives) |
|
677 # PIM Modula-2 (DEFAULT) |
|
678 else: |
|
679 self.reserved_words.update(self.pim_reserved_words) |
|
680 self.pervasives.update(self.pim_pervasives) |
|
681 # GNU extensions |
|
682 if get_bool_opt(options, 'gm2ext', False): |
|
683 self.reserved_words.update(self.gnu_reserved_words) |
|
684 self.pervasives.update(self.gnu_pervasives) |
|
685 # initialise |
|
686 RegexLexer.__init__(self, **options) |
|
687 |
|
688 def get_tokens_unprocessed(self, text): |
|
689 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): |
|
690 # check for reserved words and pervasives |
|
691 if token is Name: |
|
692 if value in self.reserved_words: |
|
693 token = Keyword.Reserved |
|
694 elif value in self.pervasives: |
|
695 token = Keyword.Pervasive |
|
696 # return result |
|
697 yield index, token, value |
|
698 |
|
699 |
|
700 class AdaLexer(RegexLexer): |
511 class AdaLexer(RegexLexer): |
701 """ |
512 """ |
702 For Ada source code. |
513 For Ada source code. |
703 |
514 |
704 .. versionadded:: 1.3 |
515 .. versionadded:: 1.3 |
724 (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), |
535 (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), |
725 (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, |
536 (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, |
726 Comment.Preproc)), |
537 Comment.Preproc)), |
727 (r'(true|false|null)\b', Keyword.Constant), |
538 (r'(true|false|null)\b', Keyword.Constant), |
728 (words(( |
539 (words(( |
729 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor', |
540 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', |
730 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float', |
541 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', |
731 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive', |
542 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', |
732 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float', |
543 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', |
733 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'), |
544 'Short_Float', 'Short_Integer', 'Short_Short_Float', |
|
545 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), |
|
546 suffix=r'\b'), |
734 Keyword.Type), |
547 Keyword.Type), |
735 (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), |
548 (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), |
736 (r'generic|private', Keyword.Declaration), |
549 (r'generic|private', Keyword.Declaration), |
737 (r'package', Keyword.Declaration, 'package'), |
550 (r'package', Keyword.Declaration, 'package'), |
738 (r'array\b', Keyword.Reserved, 'array_def'), |
551 (r'array\b', Keyword.Reserved, 'array_def'), |