32 - handles psql backslash commands. |
32 - handles psql backslash commands. |
33 |
33 |
34 The ``tests/examplefiles`` contains a few test files with data to be |
34 The ``tests/examplefiles`` contains a few test files with data to be |
35 parsed by these lexers. |
35 parsed by these lexers. |
36 |
36 |
37 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. |
37 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. |
38 :license: BSD, see LICENSE for details. |
38 :license: BSD, see LICENSE for details. |
39 """ |
39 """ |
40 |
40 |
41 import re |
41 import re |
42 |
42 |
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words |
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words |
44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ |
44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ |
45 Keyword, Name, String, Number, Generic |
45 Keyword, Name, String, Number, Generic, Literal |
46 from pygments.lexers import get_lexer_by_name, ClassNotFound |
46 from pygments.lexers import get_lexer_by_name, ClassNotFound |
47 |
47 |
48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ |
48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ |
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS |
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS |
|
50 from pygments.lexers._mysql_builtins import \ |
|
51 MYSQL_CONSTANTS, \ |
|
52 MYSQL_DATATYPES, \ |
|
53 MYSQL_FUNCTIONS, \ |
|
54 MYSQL_KEYWORDS, \ |
|
55 MYSQL_OPTIMIZER_HINTS |
|
56 |
50 from pygments.lexers import _tsql_builtins |
57 from pygments.lexers import _tsql_builtins |
51 |
58 |
52 |
59 |
53 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', |
60 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', |
54 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', |
61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', |
581 rating += 0.1 |
584 rating += 0.1 |
582 return rating |
585 return rating |
583 |
586 |
584 |
587 |
585 class MySqlLexer(RegexLexer): |
588 class MySqlLexer(RegexLexer): |
586 """ |
589 """The Oracle MySQL lexer. |
587 Special lexer for MySQL. |
590 |
|
591 This lexer does not attempt to maintain strict compatibility with |
|
592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code |
|
593 history suggests there may be significant overlap between the two, |
|
594 compatibility between the two is not a target for this lexer. |
588 """ |
595 """ |
589 |
596 |
590 name = 'MySQL' |
597 name = 'MySQL' |
591 aliases = ['mysql'] |
598 aliases = ['mysql'] |
592 mimetypes = ['text/x-mysql'] |
599 mimetypes = ['text/x-mysql'] |
593 |
600 |
594 flags = re.IGNORECASE |
601 flags = re.IGNORECASE |
595 tokens = { |
602 tokens = { |
596 'root': [ |
603 'root': [ |
597 (r'\s+', Text), |
604 (r'\s+', Text), |
598 (r'(#|--\s+).*\n?', Comment.Single), |
605 |
599 (r'/\*', Comment.Multiline, 'multiline-comments'), |
606 # Comments |
|
607 (r'(?:#|--\s+).*', Comment.Single), |
|
608 (r'/\*\+', Comment.Special, 'optimizer-hints'), |
|
609 (r'/\*', Comment.Multiline, 'multiline-comment'), |
|
610 |
|
611 # Hexadecimal literals |
|
612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. |
|
613 (r'0x[0-9a-f]+', Number.Hex), |
|
614 |
|
615 # Binary literals |
|
616 (r"b'[01]+'", Number.Bin), |
|
617 (r'0b[01]+', Number.Bin), |
|
618 |
|
619 # Numeric literals |
|
620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent |
|
621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent |
|
622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats |
600 (r'[0-9]+', Number.Integer), |
623 (r'[0-9]+', Number.Integer), |
601 (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float), |
624 |
602 (r"'(\\\\|\\'|''|[^'])*'", String.Single), |
625 # Date literals |
603 (r'"(\\\\|\\"|""|[^"])*"', String.Double), |
626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", |
604 (r"`(\\\\|\\`|``|[^`])*`", String.Symbol), |
627 Literal.Date), |
605 (r'[+*/<>=~!@#%^&|`?-]', Operator), |
628 |
606 (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|' |
629 # Time literals |
607 r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|' |
630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", |
608 r'tinyblob|mediumblob|longblob|blob|float|double|double\s+' |
631 Literal.Date), |
609 r'precision|real|numeric|dec|decimal|timestamp|year|char|' |
632 |
610 r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?', |
633 # Timestamp literals |
611 bygroups(Keyword.Type, Text, Punctuation)), |
634 ( |
612 (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|' |
635 r"\{\s*ts\s*(?P<quote>['\"])\s*" |
613 r'bigint|binary|blob|both|by|call|cascade|case|change|char|' |
636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part |
614 r'character|check|collate|column|condition|constraint|continue|' |
637 r"\s+" # Whitespace between date and time |
615 r'convert|create|cross|current_date|current_time|' |
638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part |
616 r'current_timestamp|current_user|cursor|database|databases|' |
639 r"\s*(?P=quote)\s*\}", |
617 r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|' |
640 Literal.Date |
618 r'declare|default|delayed|delete|desc|describe|deterministic|' |
641 ), |
619 r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|' |
642 |
620 r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|' |
643 # String literals |
621 r'float8|for|force|foreign|from|fulltext|grant|group|having|' |
644 (r"'", String.Single, 'single-quoted-string'), |
622 r'high_priority|hour_microsecond|hour_minute|hour_second|if|' |
645 (r'"', String.Double, 'double-quoted-string'), |
623 r'ignore|in|index|infile|inner|inout|insensitive|insert|int|' |
646 |
624 r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|' |
647 # Variables |
625 r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|' |
648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), |
626 r'localtime|localtimestamp|lock|long|loop|low_priority|match|' |
649 (r'@[a-z0-9_$.]+', Name.Variable), |
627 r'minute_microsecond|minute_second|mod|modifies|natural|' |
650 (r"@'", Name.Variable, 'single-quoted-variable'), |
628 r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|' |
651 (r'@"', Name.Variable, 'double-quoted-variable'), |
629 r'or|order|out|outer|outfile|precision|primary|procedure|purge|' |
652 (r"@`", Name.Variable, 'backtick-quoted-variable'), |
630 r'raid0|read|reads|real|references|regexp|release|rename|repeat|' |
653 (r'\?', Name.Variable), # For demonstrating prepared statements |
631 r'replace|require|restrict|return|revoke|right|rlike|schema|' |
654 |
632 r'schemas|second_microsecond|select|sensitive|separator|set|' |
655 # Operators |
633 r'show|smallint|soname|spatial|specific|sql|sql_big_result|' |
656 (r'[!%&*+/:<=>^|~-]+', Operator), |
634 r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|' |
657 |
635 r'sqlwarning|ssl|starting|straight_join|table|terminated|then|' |
658 # Exceptions; these words tokenize differently in different contexts. |
636 r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|' |
659 (r'\b(set)(?!\s*\()', Keyword), |
637 r'usage|use|using|utc_date|utc_time|utc_timestamp|values|' |
660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)), |
638 r'varying|when|where|while|with|write|x509|xor|year_month|' |
661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. |
639 r'zerofill)\b', Keyword), |
662 |
640 # TODO: this list is not complete |
663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant), |
641 (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo), |
664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type), |
642 (r'(true|false|null)', Name.Constant), |
665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword), |
643 (r'([a-z_]\w*)(\s*)(\()', |
666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), |
644 bygroups(Name.Function, Text, Punctuation)), |
667 bygroups(Name.Function, Text, Punctuation)), |
645 (r'[a-z_]\w*', Name), |
668 |
646 (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable), |
669 # Schema object names |
647 (r'[;:()\[\],.]', Punctuation) |
670 # |
648 ], |
671 # Note: Although the first regex supports unquoted all-numeric |
649 'multiline-comments': [ |
672 # identifiers, this will not be a problem in practice because |
650 (r'/\*', Comment.Multiline, 'multiline-comments'), |
673 # numeric literals have already been handled above. |
|
674 # |
|
675 ('[0-9a-z$_\u0080-\uffff]+', Name), |
|
676 (r'`', Name, 'schema-object-name'), |
|
677 |
|
678 # Punctuation |
|
679 (r'[(),.;]', Punctuation), |
|
680 ], |
|
681 |
|
682 # Multiline comment substates |
|
683 # --------------------------- |
|
684 |
|
685 'optimizer-hints': [ |
|
686 (r'[^*a-z]+', Comment.Special), |
|
687 (r'\*/', Comment.Special, '#pop'), |
|
688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc), |
|
689 ('[a-z]+', Comment.Special), |
|
690 (r'\*', Comment.Special), |
|
691 ], |
|
692 |
|
693 'multiline-comment': [ |
|
694 (r'[^*]+', Comment.Multiline), |
651 (r'\*/', Comment.Multiline, '#pop'), |
695 (r'\*/', Comment.Multiline, '#pop'), |
652 (r'[^/*]+', Comment.Multiline), |
696 (r'\*', Comment.Multiline), |
653 (r'[/*]', Comment.Multiline) |
697 ], |
654 ] |
698 |
|
699 # String substates |
|
700 # ---------------- |
|
701 |
|
702 'single-quoted-string': [ |
|
703 (r"[^'\\]+", String.Single), |
|
704 (r"''", String.Escape), |
|
705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), |
|
706 (r"'", String.Single, '#pop'), |
|
707 ], |
|
708 |
|
709 'double-quoted-string': [ |
|
710 (r'[^"\\]+', String.Double), |
|
711 (r'""', String.Escape), |
|
712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), |
|
713 (r'"', String.Double, '#pop'), |
|
714 ], |
|
715 |
|
716 # Variable substates |
|
717 # ------------------ |
|
718 |
|
719 'single-quoted-variable': [ |
|
720 (r"[^']+", Name.Variable), |
|
721 (r"''", Name.Variable), |
|
722 (r"'", Name.Variable, '#pop'), |
|
723 ], |
|
724 |
|
725 'double-quoted-variable': [ |
|
726 (r'[^"]+', Name.Variable), |
|
727 (r'""', Name.Variable), |
|
728 (r'"', Name.Variable, '#pop'), |
|
729 ], |
|
730 |
|
731 'backtick-quoted-variable': [ |
|
732 (r'[^`]+', Name.Variable), |
|
733 (r'``', Name.Variable), |
|
734 (r'`', Name.Variable, '#pop'), |
|
735 ], |
|
736 |
|
737 # Schema object name substates |
|
738 # ---------------------------- |
|
739 # |
|
740 # Backtick-quoted schema object names support escape characters. |
|
741 # It may be desirable to tokenize escape sequences differently, |
|
742 # but currently Pygments does not have an obvious token type for |
|
743 # this unique situation (for example, "Name.Escape"). |
|
744 # |
|
745 'schema-object-name': [ |
|
746 (r'[^`\\]+', Name), |
|
747 (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type. |
|
748 (r'`', Name, '#pop'), |
|
749 ], |
655 } |
750 } |
656 |
751 |
657 def analyse_text(text): |
752 def analyse_text(text): |
658 rating = 0 |
753 rating = 0 |
659 name_between_backtick_count = len( |
754 name_between_backtick_count = len( |