eric6/ThirdParty/Pygments/pygments/lexers/sql.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.sql
4 ~~~~~~~~~~~~~~~~~~~
5
6 Lexers for various SQL dialects and related interactive sessions.
7
8 Postgres specific lexers:
9
10 `PostgresLexer`
11 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
12 lexer are:
13
14 - keywords and data types list parsed from the PG docs (run the
15 `_postgres_builtins` module to update them);
16 - Content of $-strings parsed using a specific lexer, e.g. the content
17 of a PL/Python function is parsed using the Python lexer;
18 - parse PG specific constructs: E-strings, $-strings, U&-strings,
19 different operators and punctuation.
20
21 `PlPgsqlLexer`
22 A lexer for the PL/pgSQL language. Adds a few specific construct on
23 top of the PG SQL lexer (such as <<label>>).
24
25 `PostgresConsoleLexer`
26 A lexer to highlight an interactive psql session:
27
28 - identifies the prompt and does its best to detect the end of command
29 in multiline statement where not all the lines are prefixed by a
30 prompt, telling them apart from the output;
31 - highlights errors in the output and notification levels;
32 - handles psql backslash commands.
33
34 The ``tests/examplefiles`` contains a few test files with data to be
35 parsed by these lexers.
36
37 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
38 :license: BSD, see LICENSE for details.
39 """
40
41 import re
42
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
45 Keyword, Name, String, Number, Generic, Literal
46 from pygments.lexers import get_lexer_by_name, ClassNotFound
47
48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS
50 from pygments.lexers._mysql_builtins import \
51 MYSQL_CONSTANTS, \
52 MYSQL_DATATYPES, \
53 MYSQL_FUNCTIONS, \
54 MYSQL_KEYWORDS, \
55 MYSQL_OPTIMIZER_HINTS
56
57 from pygments.lexers import _tsql_builtins
58
59
60 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
62 'SqliteConsoleLexer', 'RqlLexer']
63
64 line_re = re.compile('.*?\n')
65
66 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
67
68 do_re = re.compile(r'\bDO\b', re.IGNORECASE)
69
70 # Regular expressions for analyse_text()
71 name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
72 name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
73 tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
74 tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
75 tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
76
77
78 def language_callback(lexer, match):
79 """Parse the content of a $-string using a lexer
80
81 The lexer is chosen looking for a nearby LANGUAGE or assumed as
82 plpgsql if inside a DO statement and no LANGUAGE has been found.
83 """
84 lx = None
85 m = language_re.match(lexer.text[match.end():match.end()+100])
86 if m is not None:
87 lx = lexer._get_lexer(m.group(1))
88 else:
89 m = list(language_re.finditer(
90 lexer.text[max(0, match.start()-100):match.start()]))
91 if m:
92 lx = lexer._get_lexer(m[-1].group(1))
93 else:
94 m = list(do_re.finditer(
95 lexer.text[max(0, match.start()-25):match.start()]))
96 if m:
97 lx = lexer._get_lexer('plpgsql')
98
99 # 1 = $, 2 = delimiter, 3 = $
100 yield (match.start(1), String, match.group(1))
101 yield (match.start(2), String.Delimiter, match.group(2))
102 yield (match.start(3), String, match.group(3))
103 # 4 = string contents
104 if lx:
105 for x in lx.get_tokens_unprocessed(match.group(4)):
106 yield x
107 else:
108 yield (match.start(4), String, match.group(4))
109 # 5 = $, 6 = delimiter, 7 = $
110 yield (match.start(5), String, match.group(5))
111 yield (match.start(6), String.Delimiter, match.group(6))
112 yield (match.start(7), String, match.group(7))
113
114
115 class PostgresBase:
116 """Base class for Postgres-related lexers.
117
118 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
119 this way the different lexer don't have a common Lexer ancestor. If they
120 had, _tokens could be created on this ancestor and not updated for the
121 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
122 seem to suggest that regexp lexers are not really subclassable.
123 """
124 def get_tokens_unprocessed(self, text, *args):
125 # Have a copy of the entire text to be used by `language_callback`.
126 self.text = text
127 yield from super().get_tokens_unprocessed(text, *args)
128
129 def _get_lexer(self, lang):
130 if lang.lower() == 'sql':
131 return get_lexer_by_name('postgresql', **self.options)
132
133 tries = [lang]
134 if lang.startswith('pl'):
135 tries.append(lang[2:])
136 if lang.endswith('u'):
137 tries.append(lang[:-1])
138 if lang.startswith('pl') and lang.endswith('u'):
139 tries.append(lang[2:-1])
140
141 for lx in tries:
142 try:
143 return get_lexer_by_name(lx, **self.options)
144 except ClassNotFound:
145 pass
146 else:
147 # TODO: better logging
148 # print >>sys.stderr, "language not found:", lang
149 return None
150
151
152 class PostgresLexer(PostgresBase, RegexLexer):
153 """
154 Lexer for the PostgreSQL dialect of SQL.
155
156 .. versionadded:: 1.5
157 """
158
159 name = 'PostgreSQL SQL dialect'
160 aliases = ['postgresql', 'postgres']
161 mimetypes = ['text/x-postgresql']
162
163 flags = re.IGNORECASE
164 tokens = {
165 'root': [
166 (r'\s+', Text),
167 (r'--.*\n?', Comment.Single),
168 (r'/\*', Comment.Multiline, 'multiline-comments'),
169 (r'(' + '|'.join(s.replace(" ", r"\s+")
170 for s in DATATYPES + PSEUDO_TYPES) + r')\b',
171 Name.Builtin),
172 (words(KEYWORDS, suffix=r'\b'), Keyword),
173 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
174 (r'::', Operator), # cast
175 (r'\$\d+', Name.Variable),
176 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
177 (r'[0-9]+', Number.Integer),
178 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
179 # quoted identifier
180 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
181 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
182 (r'[a-z_]\w*', Name),
183
184 # psql variable in SQL
185 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
186
187 (r'[;:()\[\]{},.]', Punctuation),
188 ],
189 'multiline-comments': [
190 (r'/\*', Comment.Multiline, 'multiline-comments'),
191 (r'\*/', Comment.Multiline, '#pop'),
192 (r'[^/*]+', Comment.Multiline),
193 (r'[/*]', Comment.Multiline)
194 ],
195 'string': [
196 (r"[^']+", String.Single),
197 (r"''", String.Single),
198 (r"'", String.Single, '#pop'),
199 ],
200 'quoted-ident': [
201 (r'[^"]+', String.Name),
202 (r'""', String.Name),
203 (r'"', String.Name, '#pop'),
204 ],
205 }
206
207
208 class PlPgsqlLexer(PostgresBase, RegexLexer):
209 """
210 Handle the extra syntax in Pl/pgSQL language.
211
212 .. versionadded:: 1.5
213 """
214 name = 'PL/pgSQL'
215 aliases = ['plpgsql']
216 mimetypes = ['text/x-plpgsql']
217
218 flags = re.IGNORECASE
219 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
220
221 # extend the keywords list
222 for i, pattern in enumerate(tokens['root']):
223 if pattern[1] == Keyword:
224 tokens['root'][i] = (
225 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
226 Keyword)
227 del i
228 break
229 else:
230 assert 0, "SQL keywords not found"
231
232 # Add specific PL/pgSQL rules (before the SQL ones)
233 tokens['root'][:0] = [
234 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
235 (r':=', Operator),
236 (r'\<\<[a-z]\w*\>\>', Name.Label),
237 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
238 ]
239
240
241 class PsqlRegexLexer(PostgresBase, RegexLexer):
242 """
243 Extend the PostgresLexer adding support specific for psql commands.
244
245 This is not a complete psql lexer yet as it lacks prompt support
246 and output rendering.
247 """
248
249 name = 'PostgreSQL console - regexp based lexer'
250 aliases = [] # not public
251
252 flags = re.IGNORECASE
253 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
254
255 tokens['root'].append(
256 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
257 tokens['psql-command'] = [
258 (r'\n', Text, 'root'),
259 (r'\s+', Text),
260 (r'\\[^\s]+', Keyword.Pseudo),
261 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
262 (r"'(''|[^'])*'", String.Single),
263 (r"`([^`])*`", String.Backtick),
264 (r"[^\s]+", String.Symbol),
265 ]
266
267
268 re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
269 re_psql_command = re.compile(r'\s*\\')
270 re_end_command = re.compile(r';\s*(--.*?)?$')
271 re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
272 re_error = re.compile(r'(ERROR|FATAL):')
273 re_message = re.compile(
274 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
275 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
276
277
278 class lookahead:
279 """Wrap an iterator and allow pushing back an item."""
280 def __init__(self, x):
281 self.iter = iter(x)
282 self._nextitem = None
283
284 def __iter__(self):
285 return self
286
287 def send(self, i):
288 self._nextitem = i
289 return i
290
291 def __next__(self):
292 if self._nextitem is not None:
293 ni = self._nextitem
294 self._nextitem = None
295 return ni
296 return next(self.iter)
297 next = __next__
298
299
300 class PostgresConsoleLexer(Lexer):
301 """
302 Lexer for psql sessions.
303
304 .. versionadded:: 1.5
305 """
306
307 name = 'PostgreSQL console (psql)'
308 aliases = ['psql', 'postgresql-console', 'postgres-console']
309 mimetypes = ['text/x-postgresql-psql']
310
311 def get_tokens_unprocessed(self, data):
312 sql = PsqlRegexLexer(**self.options)
313
314 lines = lookahead(line_re.findall(data))
315
316 # prompt-output cycle
317 while 1:
318
319 # consume the lines of the command: start with an optional prompt
320 # and continue until the end of command is detected
321 curcode = ''
322 insertions = []
323 for line in lines:
324 # Identify a shell prompt in case of psql commandline example
325 if line.startswith('$') and not curcode:
326 lexer = get_lexer_by_name('console', **self.options)
327 yield from lexer.get_tokens_unprocessed(line)
328 break
329
330 # Identify a psql prompt
331 mprompt = re_prompt.match(line)
332 if mprompt is not None:
333 insertions.append((len(curcode),
334 [(0, Generic.Prompt, mprompt.group())]))
335 curcode += line[len(mprompt.group()):]
336 else:
337 curcode += line
338
339 # Check if this is the end of the command
340 # TODO: better handle multiline comments at the end with
341 # a lexer with an external state?
342 if re_psql_command.match(curcode) \
343 or re_end_command.search(curcode):
344 break
345
346 # Emit the combined stream of command and prompt(s)
347 yield from do_insertions(insertions,
348 sql.get_tokens_unprocessed(curcode))
349
350 # Emit the output lines
351 out_token = Generic.Output
352 for line in lines:
353 mprompt = re_prompt.match(line)
354 if mprompt is not None:
355 # push the line back to have it processed by the prompt
356 lines.send(line)
357 break
358
359 mmsg = re_message.match(line)
360 if mmsg is not None:
361 if mmsg.group(1).startswith("ERROR") \
362 or mmsg.group(1).startswith("FATAL"):
363 out_token = Generic.Error
364 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
365 yield (mmsg.start(2), out_token, mmsg.group(2))
366 else:
367 yield (0, out_token, line)
368 else:
369 return
370
371
372 class SqlLexer(RegexLexer):
373 """
374 Lexer for Structured Query Language. Currently, this lexer does
375 not recognize any special syntax except ANSI SQL.
376 """
377
378 name = 'SQL'
379 aliases = ['sql']
380 filenames = ['*.sql']
381 mimetypes = ['text/x-sql']
382
383 flags = re.IGNORECASE
384 tokens = {
385 'root': [
386 (r'\s+', Text),
387 (r'--.*\n?', Comment.Single),
388 (r'/\*', Comment.Multiline, 'multiline-comments'),
389 (words((
390 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
391 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
392 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
393 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
394 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
395 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
396 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
397 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
398 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
399 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
400 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
401 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
402 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
403 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
404 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
405 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
406 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
407 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
408 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
409 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
410 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
411 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
412 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
413 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
414 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
415 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
416 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
417 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
418 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
419 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
420 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
421 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
422 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
423 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
424 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
425 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
426 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
427 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
428 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
429 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
430 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
431 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
432 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
433 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
434 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
435 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
436 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
437 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
438 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
439 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
440 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
441 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
442 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
443 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
444 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
445 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
446 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
447 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
448 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
449 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
450 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
451 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
452 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
453 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
454 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
455 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
456 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
457 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
458 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
459 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
460 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
461 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
462 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
463 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
464 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
465 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
466 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
467 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
468 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
469 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
470 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
471 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
472 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
473 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
474 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
475 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
476 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
477 Keyword),
478 (words((
479 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
480 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
481 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
482 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
483 Name.Builtin),
484 (r'[+*/<>=~!@#%^&|`?-]', Operator),
485 (r'[0-9]+', Number.Integer),
486 # TODO: Backslash escapes?
487 (r"'(''|[^'])*'", String.Single),
488 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
489 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
490 (r'[;:()\[\],.]', Punctuation)
491 ],
492 'multiline-comments': [
493 (r'/\*', Comment.Multiline, 'multiline-comments'),
494 (r'\*/', Comment.Multiline, '#pop'),
495 (r'[^/*]+', Comment.Multiline),
496 (r'[/*]', Comment.Multiline)
497 ]
498 }
499
500 def analyse_text(text):
501 return 0.01
502
503
504 class TransactSqlLexer(RegexLexer):
505 """
506 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
507 SQL.
508
509 The list of keywords includes ODBC and keywords reserved for future use..
510 """
511
512 name = 'Transact-SQL'
513 aliases = ['tsql', 't-sql']
514 filenames = ['*.sql']
515 mimetypes = ['text/x-tsql']
516
517 # Use re.UNICODE to allow non ASCII letters in names.
518 flags = re.IGNORECASE | re.UNICODE
519 tokens = {
520 'root': [
521 (r'\s+', Whitespace),
522 (r'--.*?$\n?', Comment.Single),
523 (r'/\*', Comment.Multiline, 'multiline-comments'),
524 (words(_tsql_builtins.OPERATORS), Operator),
525 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
526 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
527 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
528 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
529 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
530 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
531 (r'0x[0-9a-f]+', Number.Hex),
532 # Float variant 1, for example: 1., 1.e2, 1.2e3
533 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
534 # Float variant 2, for example: .1, .1e2
535 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
536 # Float variant 3, for example: 123e45
537 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
538 (r'[0-9]+', Number.Integer),
539 (r"'(''|[^'])*'", String.Single),
540 (r'"(""|[^"])*"', String.Symbol),
541 (r'[;(),.]', Punctuation),
542 # Below we use \w even for the first "real" character because
543 # tokens starting with a digit have already been recognized
544 # as Number above.
545 (r'@@\w+', Name.Builtin),
546 (r'@\w+', Name.Variable),
547 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
548 (r'#?#?\w+', Name), # names for temp tables and anything else
549 (r'\?', Name.Variable.Magic), # parameter for prepared statements
550 ],
551 'multiline-comments': [
552 (r'/\*', Comment.Multiline, 'multiline-comments'),
553 (r'\*/', Comment.Multiline, '#pop'),
554 (r'[^/*]+', Comment.Multiline),
555 (r'[/*]', Comment.Multiline)
556 ]
557 }
558
559 def analyse_text(text):
560 rating = 0
561 if tsql_declare_re.search(text):
562 # Found T-SQL variable declaration.
563 rating = 1.0
564 else:
565 name_between_backtick_count = len(
566 name_between_backtick_re.findall(text))
567 name_between_bracket_count = len(
568 name_between_bracket_re.findall(text))
569 # We need to check if there are any names using
570 # backticks or brackets, as otherwise both are 0
571 # and 0 >= 2 * 0, so we would always assume it's true
572 dialect_name_count = name_between_backtick_count + name_between_bracket_count
573 if dialect_name_count >= 1 and \
574 name_between_bracket_count >= 2 * name_between_backtick_count:
575 # Found at least twice as many [name] as `name`.
576 rating += 0.5
577 elif name_between_bracket_count > name_between_backtick_count:
578 rating += 0.2
579 elif name_between_bracket_count > 0:
580 rating += 0.1
581 if tsql_variable_re.search(text) is not None:
582 rating += 0.1
583 if tsql_go_re.search(text) is not None:
584 rating += 0.1
585 return rating
586
587
588 class MySqlLexer(RegexLexer):
589 """The Oracle MySQL lexer.
590
591 This lexer does not attempt to maintain strict compatibility with
592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
593 history suggests there may be significant overlap between the two,
594 compatibility between the two is not a target for this lexer.
595 """
596
597 name = 'MySQL'
598 aliases = ['mysql']
599 mimetypes = ['text/x-mysql']
600
601 flags = re.IGNORECASE
602 tokens = {
603 'root': [
604 (r'\s+', Text),
605
606 # Comments
607 (r'(?:#|--\s+).*', Comment.Single),
608 (r'/\*\+', Comment.Special, 'optimizer-hints'),
609 (r'/\*', Comment.Multiline, 'multiline-comment'),
610
611 # Hexadecimal literals
612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
613 (r'0x[0-9a-f]+', Number.Hex),
614
615 # Binary literals
616 (r"b'[01]+'", Number.Bin),
617 (r'0b[01]+', Number.Bin),
618
619 # Numeric literals
620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
623 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
624
625 # Date literals
626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
627 Literal.Date),
628
629 # Time literals
630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
631 Literal.Date),
632
633 # Timestamp literals
634 (
635 r"\{\s*ts\s*(?P<quote>['\"])\s*"
636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
637 r"\s+" # Whitespace between date and time
638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
639 r"\s*(?P=quote)\s*\}",
640 Literal.Date
641 ),
642
643 # String literals
644 (r"'", String.Single, 'single-quoted-string'),
645 (r'"', String.Double, 'double-quoted-string'),
646
647 # Variables
648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
649 (r'@[a-z0-9_$.]+', Name.Variable),
650 (r"@'", Name.Variable, 'single-quoted-variable'),
651 (r'@"', Name.Variable, 'double-quoted-variable'),
652 (r"@`", Name.Variable, 'backtick-quoted-variable'),
653 (r'\?', Name.Variable), # For demonstrating prepared statements
654
655 # Operators
656 (r'[!%&*+/:<=>^|~-]+', Operator),
657
658 # Exceptions; these words tokenize differently in different contexts.
659 (r'\b(set)(?!\s*\()', Keyword),
660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)),
661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
662
663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
667 bygroups(Name.Function, Text, Punctuation)),
668
669 # Schema object names
670 #
671 # Note: Although the first regex supports unquoted all-numeric
672 # identifiers, this will not be a problem in practice because
673 # numeric literals have already been handled above.
674 #
675 ('[0-9a-z$_\u0080-\uffff]+', Name),
676 (r'`', Name.Quoted, 'schema-object-name'),
677
678 # Punctuation
679 (r'[(),.;]', Punctuation),
680 ],
681
682 # Multiline comment substates
683 # ---------------------------
684
685 'optimizer-hints': [
686 (r'[^*a-z]+', Comment.Special),
687 (r'\*/', Comment.Special, '#pop'),
688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
689 ('[a-z]+', Comment.Special),
690 (r'\*', Comment.Special),
691 ],
692
693 'multiline-comment': [
694 (r'[^*]+', Comment.Multiline),
695 (r'\*/', Comment.Multiline, '#pop'),
696 (r'\*', Comment.Multiline),
697 ],
698
699 # String substates
700 # ----------------
701
702 'single-quoted-string': [
703 (r"[^'\\]+", String.Single),
704 (r"''", String.Escape),
705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
706 (r"'", String.Single, '#pop'),
707 ],
708
709 'double-quoted-string': [
710 (r'[^"\\]+', String.Double),
711 (r'""', String.Escape),
712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
713 (r'"', String.Double, '#pop'),
714 ],
715
716 # Variable substates
717 # ------------------
718
719 'single-quoted-variable': [
720 (r"[^']+", Name.Variable),
721 (r"''", Name.Variable),
722 (r"'", Name.Variable, '#pop'),
723 ],
724
725 'double-quoted-variable': [
726 (r'[^"]+', Name.Variable),
727 (r'""', Name.Variable),
728 (r'"', Name.Variable, '#pop'),
729 ],
730
731 'backtick-quoted-variable': [
732 (r'[^`]+', Name.Variable),
733 (r'``', Name.Variable),
734 (r'`', Name.Variable, '#pop'),
735 ],
736
737 # Schema object name substates
738 # ----------------------------
739 #
740 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
741 # formatters will style them as "Name" by default but add
742 # additional styles based on the token name. This gives users
743 # flexibility to add custom styles as desired.
744 #
745 'schema-object-name': [
746 (r'[^`]+', Name.Quoted),
747 (r'``', Name.Quoted.Escape),
748 (r'`', Name.Quoted, '#pop'),
749 ],
750 }
751
752 def analyse_text(text):
753 rating = 0
754 name_between_backtick_count = len(
755 name_between_backtick_re.findall(text))
756 name_between_bracket_count = len(
757 name_between_bracket_re.findall(text))
758 # Same logic as above in the TSQL analysis
759 dialect_name_count = name_between_backtick_count + name_between_bracket_count
760 if dialect_name_count >= 1 and \
761 name_between_backtick_count >= 2 * name_between_bracket_count:
762 # Found at least twice as many `name` as [name].
763 rating += 0.5
764 elif name_between_backtick_count > name_between_bracket_count:
765 rating += 0.2
766 elif name_between_backtick_count > 0:
767 rating += 0.1
768 return rating
769
770
771 class SqliteConsoleLexer(Lexer):
772 """
773 Lexer for example sessions using sqlite3.
774
775 .. versionadded:: 0.11
776 """
777
778 name = 'sqlite3con'
779 aliases = ['sqlite3']
780 filenames = ['*.sqlite3-console']
781 mimetypes = ['text/x-sqlite3-console']
782
783 def get_tokens_unprocessed(self, data):
784 sql = SqlLexer(**self.options)
785
786 curcode = ''
787 insertions = []
788 for match in line_re.finditer(data):
789 line = match.group()
790 if line.startswith('sqlite> ') or line.startswith(' ...> '):
791 insertions.append((len(curcode),
792 [(0, Generic.Prompt, line[:8])]))
793 curcode += line[8:]
794 else:
795 if curcode:
796 yield from do_insertions(insertions,
797 sql.get_tokens_unprocessed(curcode))
798 curcode = ''
799 insertions = []
800 if line.startswith('SQL error: '):
801 yield (match.start(), Generic.Traceback, line)
802 else:
803 yield (match.start(), Generic.Output, line)
804 if curcode:
805 yield from do_insertions(insertions,
806 sql.get_tokens_unprocessed(curcode))
807
808
809 class RqlLexer(RegexLexer):
810 """
811 Lexer for Relation Query Language.
812
813 `RQL <http://www.logilab.org/project/rql>`_
814
815 .. versionadded:: 2.0
816 """
817 name = 'RQL'
818 aliases = ['rql']
819 filenames = ['*.rql']
820 mimetypes = ['text/x-rql']
821
822 flags = re.IGNORECASE
823 tokens = {
824 'root': [
825 (r'\s+', Text),
826 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
827 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
828 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
829 (r'[+*/<>=%-]', Operator),
830 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
831 (r'[0-9]+', Number.Integer),
832 (r'[A-Z_]\w*\??', Name),
833 (r"'(''|[^'])*'", String.Single),
834 (r'"(""|[^"])*"', String.Single),
835 (r'[;:()\[\],.]', Punctuation)
836 ],
837 }

eric ide

mercurial