eric6/ThirdParty/Pygments/pygments/lexers/sql.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
equal deleted inserted replaced
7700:a3cf077a8db3 7701:25f42e208e08
32 - handles psql backslash commands. 32 - handles psql backslash commands.
33 33
34 The ``tests/examplefiles`` contains a few test files with data to be 34 The ``tests/examplefiles`` contains a few test files with data to be
35 parsed by these lexers. 35 parsed by these lexers.
36 36
37 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. 37 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
38 :license: BSD, see LICENSE for details. 38 :license: BSD, see LICENSE for details.
39 """ 39 """
40 40
41 import re 41 import re
42 42
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words 43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ 44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
45 Keyword, Name, String, Number, Generic 45 Keyword, Name, String, Number, Generic, Literal
46 from pygments.lexers import get_lexer_by_name, ClassNotFound 46 from pygments.lexers import get_lexer_by_name, ClassNotFound
47 47
48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ 48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS 49 PSEUDO_TYPES, PLPGSQL_KEYWORDS
50 from pygments.lexers._mysql_builtins import \
51 MYSQL_CONSTANTS, \
52 MYSQL_DATATYPES, \
53 MYSQL_FUNCTIONS, \
54 MYSQL_KEYWORDS, \
55 MYSQL_OPTIMIZER_HINTS
56
50 from pygments.lexers import _tsql_builtins 57 from pygments.lexers import _tsql_builtins
51 58
52 59
53 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', 60 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
54 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', 61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
115 seem to suggest that regexp lexers are not really subclassable. 122 seem to suggest that regexp lexers are not really subclassable.
116 """ 123 """
117 def get_tokens_unprocessed(self, text, *args): 124 def get_tokens_unprocessed(self, text, *args):
118 # Have a copy of the entire text to be used by `language_callback`. 125 # Have a copy of the entire text to be used by `language_callback`.
119 self.text = text 126 self.text = text
120 for x in super(PostgresBase, self).get_tokens_unprocessed( 127 yield from super().get_tokens_unprocessed(text, *args)
121 text, *args):
122 yield x
123 128
124 def _get_lexer(self, lang): 129 def _get_lexer(self, lang):
125 if lang.lower() == 'sql': 130 if lang.lower() == 'sql':
126 return get_lexer_by_name('postgresql', **self.options) 131 return get_lexer_by_name('postgresql', **self.options)
127 132
317 insertions = [] 322 insertions = []
318 for line in lines: 323 for line in lines:
319 # Identify a shell prompt in case of psql commandline example 324 # Identify a shell prompt in case of psql commandline example
320 if line.startswith('$') and not curcode: 325 if line.startswith('$') and not curcode:
321 lexer = get_lexer_by_name('console', **self.options) 326 lexer = get_lexer_by_name('console', **self.options)
322 for x in lexer.get_tokens_unprocessed(line): 327 yield from lexer.get_tokens_unprocessed(line)
323 yield x
324 break 328 break
325 329
326 # Identify a psql prompt 330 # Identify a psql prompt
327 mprompt = re_prompt.match(line) 331 mprompt = re_prompt.match(line)
328 if mprompt is not None: 332 if mprompt is not None:
338 if re_psql_command.match(curcode) \ 342 if re_psql_command.match(curcode) \
339 or re_end_command.search(curcode): 343 or re_end_command.search(curcode):
340 break 344 break
341 345
342 # Emit the combined stream of command and prompt(s) 346 # Emit the combined stream of command and prompt(s)
343 for item in do_insertions(insertions, 347 yield from do_insertions(insertions,
344 sql.get_tokens_unprocessed(curcode)): 348 sql.get_tokens_unprocessed(curcode))
345 yield item
346 349
347 # Emit the output lines 350 # Emit the output lines
348 out_token = Generic.Output 351 out_token = Generic.Output
349 for line in lines: 352 for line in lines:
350 mprompt = re_prompt.match(line) 353 mprompt = re_prompt.match(line)
514 # Use re.UNICODE to allow non ASCII letters in names. 517 # Use re.UNICODE to allow non ASCII letters in names.
515 flags = re.IGNORECASE | re.UNICODE 518 flags = re.IGNORECASE | re.UNICODE
516 tokens = { 519 tokens = {
517 'root': [ 520 'root': [
518 (r'\s+', Whitespace), 521 (r'\s+', Whitespace),
519 (r'(?m)--.*?$\n?', Comment.Single), 522 (r'--.*?$\n?', Comment.Single),
520 (r'/\*', Comment.Multiline, 'multiline-comments'), 523 (r'/\*', Comment.Multiline, 'multiline-comments'),
521 (words(_tsql_builtins.OPERATORS), Operator), 524 (words(_tsql_builtins.OPERATORS), Operator),
522 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), 525 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
523 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class), 526 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
524 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function), 527 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
581 rating += 0.1 584 rating += 0.1
582 return rating 585 return rating
583 586
584 587
585 class MySqlLexer(RegexLexer): 588 class MySqlLexer(RegexLexer):
586 """ 589 """The Oracle MySQL lexer.
587 Special lexer for MySQL. 590
591 This lexer does not attempt to maintain strict compatibility with
592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
593 history suggests there may be significant overlap between the two,
594 compatibility between the two is not a target for this lexer.
588 """ 595 """
589 596
590 name = 'MySQL' 597 name = 'MySQL'
591 aliases = ['mysql'] 598 aliases = ['mysql']
592 mimetypes = ['text/x-mysql'] 599 mimetypes = ['text/x-mysql']
593 600
594 flags = re.IGNORECASE 601 flags = re.IGNORECASE
595 tokens = { 602 tokens = {
596 'root': [ 603 'root': [
597 (r'\s+', Text), 604 (r'\s+', Text),
598 (r'(#|--\s+).*\n?', Comment.Single), 605
599 (r'/\*', Comment.Multiline, 'multiline-comments'), 606 # Comments
607 (r'(?:#|--\s+).*', Comment.Single),
608 (r'/\*\+', Comment.Special, 'optimizer-hints'),
609 (r'/\*', Comment.Multiline, 'multiline-comment'),
610
611 # Hexadecimal literals
612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
613 (r'0x[0-9a-f]+', Number.Hex),
614
615 # Binary literals
616 (r"b'[01]+'", Number.Bin),
617 (r'0b[01]+', Number.Bin),
618
619 # Numeric literals
620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
600 (r'[0-9]+', Number.Integer), 623 (r'[0-9]+', Number.Integer),
601 (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float), 624
602 (r"'(\\\\|\\'|''|[^'])*'", String.Single), 625 # Date literals
603 (r'"(\\\\|\\"|""|[^"])*"', String.Double), 626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
604 (r"`(\\\\|\\`|``|[^`])*`", String.Symbol), 627 Literal.Date),
605 (r'[+*/<>=~!@#%^&|`?-]', Operator), 628
606 (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|' 629 # Time literals
607 r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|' 630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
608 r'tinyblob|mediumblob|longblob|blob|float|double|double\s+' 631 Literal.Date),
609 r'precision|real|numeric|dec|decimal|timestamp|year|char|' 632
610 r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?', 633 # Timestamp literals
611 bygroups(Keyword.Type, Text, Punctuation)), 634 (
612 (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|' 635 r"\{\s*ts\s*(?P<quote>['\"])\s*"
613 r'bigint|binary|blob|both|by|call|cascade|case|change|char|' 636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
614 r'character|check|collate|column|condition|constraint|continue|' 637 r"\s+" # Whitespace between date and time
615 r'convert|create|cross|current_date|current_time|' 638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
616 r'current_timestamp|current_user|cursor|database|databases|' 639 r"\s*(?P=quote)\s*\}",
617 r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|' 640 Literal.Date
618 r'declare|default|delayed|delete|desc|describe|deterministic|' 641 ),
619 r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|' 642
620 r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|' 643 # String literals
621 r'float8|for|force|foreign|from|fulltext|grant|group|having|' 644 (r"'", String.Single, 'single-quoted-string'),
622 r'high_priority|hour_microsecond|hour_minute|hour_second|if|' 645 (r'"', String.Double, 'double-quoted-string'),
623 r'ignore|in|index|infile|inner|inout|insensitive|insert|int|' 646
624 r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|' 647 # Variables
625 r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|' 648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
626 r'localtime|localtimestamp|lock|long|loop|low_priority|match|' 649 (r'@[a-z0-9_$.]+', Name.Variable),
627 r'minute_microsecond|minute_second|mod|modifies|natural|' 650 (r"@'", Name.Variable, 'single-quoted-variable'),
628 r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|' 651 (r'@"', Name.Variable, 'double-quoted-variable'),
629 r'or|order|out|outer|outfile|precision|primary|procedure|purge|' 652 (r"@`", Name.Variable, 'backtick-quoted-variable'),
630 r'raid0|read|reads|real|references|regexp|release|rename|repeat|' 653 (r'\?', Name.Variable), # For demonstrating prepared statements
631 r'replace|require|restrict|return|revoke|right|rlike|schema|' 654
632 r'schemas|second_microsecond|select|sensitive|separator|set|' 655 # Operators
633 r'show|smallint|soname|spatial|specific|sql|sql_big_result|' 656 (r'[!%&*+/:<=>^|~-]+', Operator),
634 r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|' 657
635 r'sqlwarning|ssl|starting|straight_join|table|terminated|then|' 658 # Exceptions; these words tokenize differently in different contexts.
636 r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|' 659 (r'\b(set)(?!\s*\()', Keyword),
637 r'usage|use|using|utc_date|utc_time|utc_timestamp|values|' 660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)),
638 r'varying|when|where|while|with|write|x509|xor|year_month|' 661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
639 r'zerofill)\b', Keyword), 662
640 # TODO: this list is not complete 663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
641 (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo), 664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
642 (r'(true|false|null)', Name.Constant), 665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
643 (r'([a-z_]\w*)(\s*)(\()', 666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
644 bygroups(Name.Function, Text, Punctuation)), 667 bygroups(Name.Function, Text, Punctuation)),
645 (r'[a-z_]\w*', Name), 668
646 (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable), 669 # Schema object names
647 (r'[;:()\[\],.]', Punctuation) 670 #
648 ], 671 # Note: Although the first regex supports unquoted all-numeric
649 'multiline-comments': [ 672 # identifiers, this will not be a problem in practice because
650 (r'/\*', Comment.Multiline, 'multiline-comments'), 673 # numeric literals have already been handled above.
674 #
675 ('[0-9a-z$_\u0080-\uffff]+', Name),
676 (r'`', Name, 'schema-object-name'),
677
678 # Punctuation
679 (r'[(),.;]', Punctuation),
680 ],
681
682 # Multiline comment substates
683 # ---------------------------
684
685 'optimizer-hints': [
686 (r'[^*a-z]+', Comment.Special),
687 (r'\*/', Comment.Special, '#pop'),
688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
689 ('[a-z]+', Comment.Special),
690 (r'\*', Comment.Special),
691 ],
692
693 'multiline-comment': [
694 (r'[^*]+', Comment.Multiline),
651 (r'\*/', Comment.Multiline, '#pop'), 695 (r'\*/', Comment.Multiline, '#pop'),
652 (r'[^/*]+', Comment.Multiline), 696 (r'\*', Comment.Multiline),
653 (r'[/*]', Comment.Multiline) 697 ],
654 ] 698
699 # String substates
700 # ----------------
701
702 'single-quoted-string': [
703 (r"[^'\\]+", String.Single),
704 (r"''", String.Escape),
705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
706 (r"'", String.Single, '#pop'),
707 ],
708
709 'double-quoted-string': [
710 (r'[^"\\]+', String.Double),
711 (r'""', String.Escape),
712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
713 (r'"', String.Double, '#pop'),
714 ],
715
716 # Variable substates
717 # ------------------
718
719 'single-quoted-variable': [
720 (r"[^']+", Name.Variable),
721 (r"''", Name.Variable),
722 (r"'", Name.Variable, '#pop'),
723 ],
724
725 'double-quoted-variable': [
726 (r'[^"]+', Name.Variable),
727 (r'""', Name.Variable),
728 (r'"', Name.Variable, '#pop'),
729 ],
730
731 'backtick-quoted-variable': [
732 (r'[^`]+', Name.Variable),
733 (r'``', Name.Variable),
734 (r'`', Name.Variable, '#pop'),
735 ],
736
737 # Schema object name substates
738 # ----------------------------
739 #
740 # Backtick-quoted schema object names support escape characters.
741 # It may be desirable to tokenize escape sequences differently,
742 # but currently Pygments does not have an obvious token type for
743 # this unique situation (for example, "Name.Escape").
744 #
745 'schema-object-name': [
746 (r'[^`\\]+', Name),
747 (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type.
748 (r'`', Name, '#pop'),
749 ],
655 } 750 }
656 751
657 def analyse_text(text): 752 def analyse_text(text):
658 rating = 0 753 rating = 0
659 name_between_backtick_count = len( 754 name_between_backtick_count = len(
696 insertions.append((len(curcode), 791 insertions.append((len(curcode),
697 [(0, Generic.Prompt, line[:8])])) 792 [(0, Generic.Prompt, line[:8])]))
698 curcode += line[8:] 793 curcode += line[8:]
699 else: 794 else:
700 if curcode: 795 if curcode:
701 for item in do_insertions(insertions, 796 yield from do_insertions(insertions,
702 sql.get_tokens_unprocessed(curcode)): 797 sql.get_tokens_unprocessed(curcode))
703 yield item
704 curcode = '' 798 curcode = ''
705 insertions = [] 799 insertions = []
706 if line.startswith('SQL error: '): 800 if line.startswith('SQL error: '):
707 yield (match.start(), Generic.Traceback, line) 801 yield (match.start(), Generic.Traceback, line)
708 else: 802 else:
709 yield (match.start(), Generic.Output, line) 803 yield (match.start(), Generic.Output, line)
710 if curcode: 804 if curcode:
711 for item in do_insertions(insertions, 805 yield from do_insertions(insertions,
712 sql.get_tokens_unprocessed(curcode)): 806 sql.get_tokens_unprocessed(curcode))
713 yield item
714 807
715 808
716 class RqlLexer(RegexLexer): 809 class RqlLexer(RegexLexer):
717 """ 810 """
718 Lexer for Relation Query Language. 811 Lexer for Relation Query Language.

eric ide

mercurial