ThirdParty/Pygments/pygments/lexers/sql.py

changeset 5713
6762afd9f963
parent 5072
aab59042fefb
child 6651
e8f3b5568b21
equal deleted inserted replaced
5712:f0d08bdeacf4 5713:6762afd9f963
32 - handles psql backslash commands. 32 - handles psql backslash commands.
33 33
34 The ``tests/examplefiles`` contains a few test files with data to be 34 The ``tests/examplefiles`` contains a few test files with data to be
35 parsed by these lexers. 35 parsed by these lexers.
36 36
37 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. 37 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
38 :license: BSD, see LICENSE for details. 38 :license: BSD, see LICENSE for details.
39 """ 39 """
40 40
41 import re 41 import re
42 42
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words 43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
44 from pygments.token import Punctuation, \ 44 from pygments.token import Punctuation, Whitespace, Error, \
45 Text, Comment, Operator, Keyword, Name, String, Number, Generic 45 Text, Comment, Operator, Keyword, Name, String, Number, Generic
46 from pygments.lexers import get_lexer_by_name, ClassNotFound 46 from pygments.lexers import get_lexer_by_name, ClassNotFound
47 from pygments.util import iteritems 47 from pygments.util import iteritems
48 48
49 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ 49 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
50 PSEUDO_TYPES, PLPGSQL_KEYWORDS 50 PSEUDO_TYPES, PLPGSQL_KEYWORDS
51 from pygments.lexers import _tsql_builtins
51 52
52 53
53 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', 54 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
54 'SqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer'] 55 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
56 'SqliteConsoleLexer', 'RqlLexer']
55 57
56 line_re = re.compile('.*?\n') 58 line_re = re.compile('.*?\n')
57 59
58 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) 60 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
61
62 do_re = re.compile(r'\bDO\b', re.IGNORECASE)
59 63
60 64
61 def language_callback(lexer, match): 65 def language_callback(lexer, match):
62 """Parse the content of a $-string using a lexer 66 """Parse the content of a $-string using a lexer
63 67
64 The lexer is chosen looking for a nearby LANGUAGE. 68 The lexer is chosen looking for a nearby LANGUAGE or assumed as
69 plpgsql if inside a DO statement and no LANGUAGE has been found.
65 """ 70 """
66 l = None 71 l = None
67 m = language_re.match(lexer.text[match.end():match.end()+100]) 72 m = language_re.match(lexer.text[match.end():match.end()+100])
68 if m is not None: 73 if m is not None:
69 l = lexer._get_lexer(m.group(1)) 74 l = lexer._get_lexer(m.group(1))
70 else: 75 else:
71 m = list(language_re.finditer( 76 m = list(language_re.finditer(
72 lexer.text[max(0, match.start()-100):match.start()])) 77 lexer.text[max(0, match.start()-100):match.start()]))
73 if m: 78 if m:
74 l = lexer._get_lexer(m[-1].group(1)) 79 l = lexer._get_lexer(m[-1].group(1))
75 80 else:
81 m = list(do_re.finditer(
82 lexer.text[max(0, match.start()-25):match.start()]))
83 if m:
84 l = lexer._get_lexer('plpgsql')
85
86 # 1 = $, 2 = delimiter, 3 = $
87 yield (match.start(1), String, match.group(1))
88 yield (match.start(2), String.Delimiter, match.group(2))
89 yield (match.start(3), String, match.group(3))
90 # 4 = string contents
76 if l: 91 if l:
77 yield (match.start(1), String, match.group(1)) 92 for x in l.get_tokens_unprocessed(match.group(4)):
78 for x in l.get_tokens_unprocessed(match.group(2)):
79 yield x 93 yield x
80 yield (match.start(3), String, match.group(3))
81
82 else: 94 else:
83 yield (match.start(), String, match.group()) 95 yield (match.start(4), String, match.group(4))
96 # 5 = $, 6 = delimiter, 7 = $
97 yield (match.start(5), String, match.group(5))
98 yield (match.start(6), String.Delimiter, match.group(6))
99 yield (match.start(7), String, match.group(7))
84 100
85 101
86 class PostgresBase(object): 102 class PostgresBase(object):
87 """Base class for Postgres-related lexers. 103 """Base class for Postgres-related lexers.
88 104
135 151
136 flags = re.IGNORECASE 152 flags = re.IGNORECASE
137 tokens = { 153 tokens = {
138 'root': [ 154 'root': [
139 (r'\s+', Text), 155 (r'\s+', Text),
140 (r'--.*?\n', Comment.Single), 156 (r'--.*\n?', Comment.Single),
141 (r'/\*', Comment.Multiline, 'multiline-comments'), 157 (r'/\*', Comment.Multiline, 'multiline-comments'),
142 (r'(' + '|'.join(s.replace(" ", "\s+") 158 (r'(' + '|'.join(s.replace(" ", "\s+")
143 for s in DATATYPES + PSEUDO_TYPES) 159 for s in DATATYPES + PSEUDO_TYPES)
144 + r')\b', Name.Builtin), 160 + r')\b', Name.Builtin),
145 (words(KEYWORDS, suffix=r'\b'), Keyword), 161 (words(KEYWORDS, suffix=r'\b'), Keyword),
146 (r'[+*/<>=~!@#%^&|`?-]+', Operator), 162 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
147 (r'::', Operator), # cast 163 (r'::', Operator), # cast
148 (r'\$\d+', Name.Variable), 164 (r'\$\d+', Name.Variable),
149 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), 165 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
150 (r'[0-9]+', Number.Integer), 166 (r'[0-9]+', Number.Integer),
151 (r"(E|U&)?'", String.Single, 'string'), 167 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
152 (r'(U&)?"', String.Name, 'quoted-ident'), # quoted identifier 168 # quoted identifier
153 (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback), 169 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
170 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
154 (r'[a-z_]\w*', Name), 171 (r'[a-z_]\w*', Name),
155 172
156 # psql variable in SQL 173 # psql variable in SQL
157 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 174 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
158 175
361 378
362 flags = re.IGNORECASE 379 flags = re.IGNORECASE
363 tokens = { 380 tokens = {
364 'root': [ 381 'root': [
365 (r'\s+', Text), 382 (r'\s+', Text),
366 (r'--.*?\n', Comment.Single), 383 (r'--.*\n?', Comment.Single),
367 (r'/\*', Comment.Multiline, 'multiline-comments'), 384 (r'/\*', Comment.Multiline, 'multiline-comments'),
368 (words(( 385 (words((
369 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 'AGGREGATE', 386 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 'AGGREGATE',
370 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'ANY', 'ARE', 'AS', 387 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'ANY', 'ARE', 'AS',
371 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', 'ASYMMETRIC', 'AT', 'ATOMIC', 388 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', 'ASYMMETRIC', 'AT', 'ATOMIC',
462 (r'[/*]', Comment.Multiline) 479 (r'[/*]', Comment.Multiline)
463 ] 480 ]
464 } 481 }
465 482
466 483
484 class TransactSqlLexer(RegexLexer):
485 """
486 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
487 SQL.
488
489 The list of keywords includes ODBC and keywords reserved for future use..
490 """
491
492 name = 'Transact-SQL'
493 aliases = ['tsql', 't-sql']
494 filenames = ['*.sql']
495 mimetypes = ['text/x-tsql']
496
497 # Use re.UNICODE to allow non ASCII letters in names.
498 flags = re.IGNORECASE | re.UNICODE
499 tokens = {
500 'root': [
501 (r'\s+', Whitespace),
502 (r'--(?m).*?$\n?', Comment.Single),
503 (r'/\*', Comment.Multiline, 'multiline-comments'),
504 (words(_tsql_builtins.OPERATORS), Operator),
505 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
506 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
507 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
508 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
509 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
510 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
511 (r'0x[0-9a-f]+', Number.Hex),
512 # Float variant 1, for example: 1., 1.e2, 1.2e3
513 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
514 # Float variant 2, for example: .1, .1e2
515 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
516 # Float variant 3, for example: 123e45
517 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
518 (r'[0-9]+', Number.Integer),
519 (r"'(''|[^'])*'", String.Single),
520 (r'"(""|[^"])*"', String.Symbol),
521 (r'[;(),.]', Punctuation),
522 # Below we use \w even for the first "real" character because
523 # tokens starting with a digit have already been recognized
524 # as Number above.
525 (r'@@\w+', Name.Builtin),
526 (r'@\w+', Name.Variable),
527 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
528 (r'#?#?\w+', Name), # names for temp tables and anything else
529 (r'\?', Name.Variable.Magic), # parameter for prepared statements
530 ],
531 'multiline-comments': [
532 (r'/\*', Comment.Multiline, 'multiline-comments'),
533 (r'\*/', Comment.Multiline, '#pop'),
534 (r'[^/*]+', Comment.Multiline),
535 (r'[/*]', Comment.Multiline)
536 ]
537 }
538
539
467 class MySqlLexer(RegexLexer): 540 class MySqlLexer(RegexLexer):
468 """ 541 """
469 Special lexer for MySQL. 542 Special lexer for MySQL.
470 """ 543 """
471 544
475 548
476 flags = re.IGNORECASE 549 flags = re.IGNORECASE
477 tokens = { 550 tokens = {
478 'root': [ 551 'root': [
479 (r'\s+', Text), 552 (r'\s+', Text),
480 (r'(#|--\s+).*?\n', Comment.Single), 553 (r'(#|--\s+).*\n?', Comment.Single),
481 (r'/\*', Comment.Multiline, 'multiline-comments'), 554 (r'/\*', Comment.Multiline, 'multiline-comments'),
482 (r'[0-9]+', Number.Integer), 555 (r'[0-9]+', Number.Integer),
483 (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float), 556 (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float),
484 (r"'(\\\\|\\'|''|[^'])*'", String.Single), 557 (r"'(\\\\|\\'|''|[^'])*'", String.Single),
485 (r'"(\\\\|\\"|""|[^"])*"', String.Double), 558 (r'"(\\\\|\\"|""|[^"])*"', String.Double),

eric ide

mercurial