eric6/ThirdParty/Pygments/pygments/lexers/sql.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.sql
4 ~~~~~~~~~~~~~~~~~~~
5
6 Lexers for various SQL dialects and related interactive sessions.
7
8 Postgres specific lexers:
9
10 `PostgresLexer`
11 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
12 lexer are:
13
14 - keywords and data types list parsed from the PG docs (run the
15 `_postgres_builtins` module to update them);
16 - Content of $-strings parsed using a specific lexer, e.g. the content
17 of a PL/Python function is parsed using the Python lexer;
18 - parse PG specific constructs: E-strings, $-strings, U&-strings,
19 different operators and punctuation.
20
21 `PlPgsqlLexer`
22 A lexer for the PL/pgSQL language. Adds a few specific construct on
23 top of the PG SQL lexer (such as <<label>>).
24
25 `PostgresConsoleLexer`
26 A lexer to highlight an interactive psql session:
27
28 - identifies the prompt and does its best to detect the end of command
29 in multiline statement where not all the lines are prefixed by a
30 prompt, telling them apart from the output;
31 - highlights errors in the output and notification levels;
32 - handles psql backslash commands.
33
34 The ``tests/examplefiles`` contains a few test files with data to be
35 parsed by these lexers.
36
37 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
38 :license: BSD, see LICENSE for details.
39 """
40
41 import re
42
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
44 from pygments.token import Punctuation, Whitespace, Error, \
45 Text, Comment, Operator, Keyword, Name, String, Number, Generic
46 from pygments.lexers import get_lexer_by_name, ClassNotFound
47 from pygments.util import iteritems
48
49 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
50 PSEUDO_TYPES, PLPGSQL_KEYWORDS
51 from pygments.lexers import _tsql_builtins
52
53
54 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
55 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
56 'SqliteConsoleLexer', 'RqlLexer']
57
58 line_re = re.compile('.*?\n')
59
60 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
61
62 do_re = re.compile(r'\bDO\b', re.IGNORECASE)
63
64
65 def language_callback(lexer, match):
66 """Parse the content of a $-string using a lexer
67
68 The lexer is chosen looking for a nearby LANGUAGE or assumed as
69 plpgsql if inside a DO statement and no LANGUAGE has been found.
70 """
71 l = None
72 m = language_re.match(lexer.text[match.end():match.end()+100])
73 if m is not None:
74 l = lexer._get_lexer(m.group(1))
75 else:
76 m = list(language_re.finditer(
77 lexer.text[max(0, match.start()-100):match.start()]))
78 if m:
79 l = lexer._get_lexer(m[-1].group(1))
80 else:
81 m = list(do_re.finditer(
82 lexer.text[max(0, match.start()-25):match.start()]))
83 if m:
84 l = lexer._get_lexer('plpgsql')
85
86 # 1 = $, 2 = delimiter, 3 = $
87 yield (match.start(1), String, match.group(1))
88 yield (match.start(2), String.Delimiter, match.group(2))
89 yield (match.start(3), String, match.group(3))
90 # 4 = string contents
91 if l:
92 for x in l.get_tokens_unprocessed(match.group(4)):
93 yield x
94 else:
95 yield (match.start(4), String, match.group(4))
96 # 5 = $, 6 = delimiter, 7 = $
97 yield (match.start(5), String, match.group(5))
98 yield (match.start(6), String.Delimiter, match.group(6))
99 yield (match.start(7), String, match.group(7))
100
101
102 class PostgresBase(object):
103 """Base class for Postgres-related lexers.
104
105 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
106 this way the different lexer don't have a common Lexer ancestor. If they
107 had, _tokens could be created on this ancestor and not updated for the
108 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
109 seem to suggest that regexp lexers are not really subclassable.
110 """
111 def get_tokens_unprocessed(self, text, *args):
112 # Have a copy of the entire text to be used by `language_callback`.
113 self.text = text
114 for x in super(PostgresBase, self).get_tokens_unprocessed(
115 text, *args):
116 yield x
117
118 def _get_lexer(self, lang):
119 if lang.lower() == 'sql':
120 return get_lexer_by_name('postgresql', **self.options)
121
122 tries = [lang]
123 if lang.startswith('pl'):
124 tries.append(lang[2:])
125 if lang.endswith('u'):
126 tries.append(lang[:-1])
127 if lang.startswith('pl') and lang.endswith('u'):
128 tries.append(lang[2:-1])
129
130 for l in tries:
131 try:
132 return get_lexer_by_name(l, **self.options)
133 except ClassNotFound:
134 pass
135 else:
136 # TODO: better logging
137 # print >>sys.stderr, "language not found:", lang
138 return None
139
140
141 class PostgresLexer(PostgresBase, RegexLexer):
142 """
143 Lexer for the PostgreSQL dialect of SQL.
144
145 .. versionadded:: 1.5
146 """
147
148 name = 'PostgreSQL SQL dialect'
149 aliases = ['postgresql', 'postgres']
150 mimetypes = ['text/x-postgresql']
151
152 flags = re.IGNORECASE
153 tokens = {
154 'root': [
155 (r'\s+', Text),
156 (r'--.*\n?', Comment.Single),
157 (r'/\*', Comment.Multiline, 'multiline-comments'),
158 (r'(' + '|'.join(s.replace(" ", r"\s+")
159 for s in DATATYPES + PSEUDO_TYPES)
160 + r')\b', Name.Builtin),
161 (words(KEYWORDS, suffix=r'\b'), Keyword),
162 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
163 (r'::', Operator), # cast
164 (r'\$\d+', Name.Variable),
165 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
166 (r'[0-9]+', Number.Integer),
167 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
168 # quoted identifier
169 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
170 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
171 (r'[a-z_]\w*', Name),
172
173 # psql variable in SQL
174 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
175
176 (r'[;:()\[\]{},.]', Punctuation),
177 ],
178 'multiline-comments': [
179 (r'/\*', Comment.Multiline, 'multiline-comments'),
180 (r'\*/', Comment.Multiline, '#pop'),
181 (r'[^/*]+', Comment.Multiline),
182 (r'[/*]', Comment.Multiline)
183 ],
184 'string': [
185 (r"[^']+", String.Single),
186 (r"''", String.Single),
187 (r"'", String.Single, '#pop'),
188 ],
189 'quoted-ident': [
190 (r'[^"]+', String.Name),
191 (r'""', String.Name),
192 (r'"', String.Name, '#pop'),
193 ],
194 }
195
196
197 class PlPgsqlLexer(PostgresBase, RegexLexer):
198 """
199 Handle the extra syntax in Pl/pgSQL language.
200
201 .. versionadded:: 1.5
202 """
203 name = 'PL/pgSQL'
204 aliases = ['plpgsql']
205 mimetypes = ['text/x-plpgsql']
206
207 flags = re.IGNORECASE
208 tokens = dict((k, l[:]) for (k, l) in iteritems(PostgresLexer.tokens))
209
210 # extend the keywords list
211 for i, pattern in enumerate(tokens['root']):
212 if pattern[1] == Keyword:
213 tokens['root'][i] = (
214 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
215 Keyword)
216 del i
217 break
218 else:
219 assert 0, "SQL keywords not found"
220
221 # Add specific PL/pgSQL rules (before the SQL ones)
222 tokens['root'][:0] = [
223 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
224 (r':=', Operator),
225 (r'\<\<[a-z]\w*\>\>', Name.Label),
226 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
227 ]
228
229
230 class PsqlRegexLexer(PostgresBase, RegexLexer):
231 """
232 Extend the PostgresLexer adding support specific for psql commands.
233
234 This is not a complete psql lexer yet as it lacks prompt support
235 and output rendering.
236 """
237
238 name = 'PostgreSQL console - regexp based lexer'
239 aliases = [] # not public
240
241 flags = re.IGNORECASE
242 tokens = dict((k, l[:]) for (k, l) in iteritems(PostgresLexer.tokens))
243
244 tokens['root'].append(
245 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
246 tokens['psql-command'] = [
247 (r'\n', Text, 'root'),
248 (r'\s+', Text),
249 (r'\\[^\s]+', Keyword.Pseudo),
250 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
251 (r"'(''|[^'])*'", String.Single),
252 (r"`([^`])*`", String.Backtick),
253 (r"[^\s]+", String.Symbol),
254 ]
255
256 re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
257 re_psql_command = re.compile(r'\s*\\')
258 re_end_command = re.compile(r';\s*(--.*?)?$')
259 re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
260 re_error = re.compile(r'(ERROR|FATAL):')
261 re_message = re.compile(
262 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
263 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
264
265
266 class lookahead(object):
267 """Wrap an iterator and allow pushing back an item."""
268 def __init__(self, x):
269 self.iter = iter(x)
270 self._nextitem = None
271
272 def __iter__(self):
273 return self
274
275 def send(self, i):
276 self._nextitem = i
277 return i
278
279 def __next__(self):
280 if self._nextitem is not None:
281 ni = self._nextitem
282 self._nextitem = None
283 return ni
284 return next(self.iter)
285 next = __next__
286
287
288 class PostgresConsoleLexer(Lexer):
289 """
290 Lexer for psql sessions.
291
292 .. versionadded:: 1.5
293 """
294
295 name = 'PostgreSQL console (psql)'
296 aliases = ['psql', 'postgresql-console', 'postgres-console']
297 mimetypes = ['text/x-postgresql-psql']
298
299 def get_tokens_unprocessed(self, data):
300 sql = PsqlRegexLexer(**self.options)
301
302 lines = lookahead(line_re.findall(data))
303
304 # prompt-output cycle
305 while 1:
306
307 # consume the lines of the command: start with an optional prompt
308 # and continue until the end of command is detected
309 curcode = ''
310 insertions = []
311 for line in lines:
312 # Identify a shell prompt in case of psql commandline example
313 if line.startswith('$') and not curcode:
314 lexer = get_lexer_by_name('console', **self.options)
315 for x in lexer.get_tokens_unprocessed(line):
316 yield x
317 break
318
319 # Identify a psql prompt
320 mprompt = re_prompt.match(line)
321 if mprompt is not None:
322 insertions.append((len(curcode),
323 [(0, Generic.Prompt, mprompt.group())]))
324 curcode += line[len(mprompt.group()):]
325 else:
326 curcode += line
327
328 # Check if this is the end of the command
329 # TODO: better handle multiline comments at the end with
330 # a lexer with an external state?
331 if re_psql_command.match(curcode) \
332 or re_end_command.search(curcode):
333 break
334
335 # Emit the combined stream of command and prompt(s)
336 for item in do_insertions(insertions,
337 sql.get_tokens_unprocessed(curcode)):
338 yield item
339
340 # Emit the output lines
341 out_token = Generic.Output
342 for line in lines:
343 mprompt = re_prompt.match(line)
344 if mprompt is not None:
345 # push the line back to have it processed by the prompt
346 lines.send(line)
347 break
348
349 mmsg = re_message.match(line)
350 if mmsg is not None:
351 if mmsg.group(1).startswith("ERROR") \
352 or mmsg.group(1).startswith("FATAL"):
353 out_token = Generic.Error
354 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
355 yield (mmsg.start(2), out_token, mmsg.group(2))
356 else:
357 yield (0, out_token, line)
358 else:
359 return
360
361
362 class SqlLexer(RegexLexer):
363 """
364 Lexer for Structured Query Language. Currently, this lexer does
365 not recognize any special syntax except ANSI SQL.
366 """
367
368 name = 'SQL'
369 aliases = ['sql']
370 filenames = ['*.sql']
371 mimetypes = ['text/x-sql']
372
373 flags = re.IGNORECASE
374 tokens = {
375 'root': [
376 (r'\s+', Text),
377 (r'--.*\n?', Comment.Single),
378 (r'/\*', Comment.Multiline, 'multiline-comments'),
379 (words((
380 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 'AGGREGATE',
381 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'ANY', 'ARE', 'AS',
382 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', 'ASYMMETRIC', 'AT', 'ATOMIC',
383 'AUTHORIZATION', 'AVG', 'BACKWARD', 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR',
384 'BIT_LENGTH', 'BOTH', 'BREADTH', 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY',
385 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
386 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
387 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
388 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE', 'CLUSTER',
389 'COALSECE', 'COBOL', 'COLLATE', 'COLLATION', 'COLLATION_CATALOG',
390 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN', 'COLUMN_NAME',
391 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT', 'COMMIT',
392 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT', 'CONNECTION',
393 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS', 'CONSTRAINT_CATALOG',
394 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA', 'CONSTRUCTOR', 'CONTAINS',
395 'CONTINUE', 'CONVERSION', 'CONVERT', 'COPY', 'CORRESPONTING', 'COUNT',
396 'CREATE', 'CREATEDB', 'CREATEUSER', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE',
397 'CURRENT_PATH', 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP',
398 'CURRENT_USER', 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
399 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
400 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED',
401 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DEREF', 'DESC',
402 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR', 'DETERMINISTIC',
403 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH', 'DISTINCT', 'DO',
404 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION', 'DYNAMIC_FUNCTION_CODE', 'EACH',
405 'ELSE', 'ELSIF', 'ENCODING', 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY',
406 'EXCEPTION', 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
407 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL', 'FIRST', 'FOR',
408 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE', 'FREEZE', 'FROM', 'FULL',
409 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET', 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED',
410 'GROUP', 'GROUPING', 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
411 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT', 'IN',
412 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX', 'INHERITS', 'INITIALIZE',
413 'INITIALLY', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT', 'INSTANTIABLE',
414 'INSTEAD', 'INTERSECT', 'INTO', 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN',
415 'KEY', 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
416 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT', 'LISTEN', 'LOAD',
417 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION', 'LOCATOR', 'LOCK', 'LOWER',
418 'MAP', 'MATCH', 'MAX', 'MAXVALUE', 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH',
419 'MESSAGE_TEXT', 'METHOD', 'MIN', 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES',
420 'MODIFY', 'MONTH', 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR',
421 'NCLOB', 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT', 'NOTHING',
422 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT', 'OCTET_LENGTH', 'OF', 'OFF',
423 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY', 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS',
424 'OR', 'ORDER', 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY', 'OVERRIDING',
425 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE', 'PARAMATER_NAME',
426 'PARAMATER_ORDINAL_POSITION', 'PARAMETER_SPECIFIC_CATALOG',
427 'PARAMETER_SPECIFIC_NAME', 'PARAMATER_SPECIFIC_SCHEMA', 'PARTIAL',
428 'PASCAL', 'PENDANT', 'PLACING', 'PLI', 'POSITION', 'POSTFIX', 'PRECISION', 'PREFIX',
429 'PREORDER', 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
430 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF', 'REFERENCES',
431 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME', 'REPEATABLE', 'REPLACE', 'RESET',
432 'RESTART', 'RESTRICT', 'RESULT', 'RETURN', 'RETURNED_LENGTH',
433 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE', 'RETURNS', 'REVOKE', 'RIGHT',
434 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE', 'ROUTINE_CATALOG', 'ROUTINE_NAME',
435 'ROUTINE_SCHEMA', 'ROW', 'ROWS', 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA',
436 'SCHEMA_NAME', 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
437 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER', 'SET',
438 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE', 'SOME', 'SOURCE', 'SPACE',
439 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME', 'SQL', 'SQLCODE', 'SQLERROR',
440 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG', 'STABLE', 'START', 'STATE', 'STATEMENT',
441 'STATIC', 'STATISTICS', 'STDIN', 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE',
442 'SUBCLASS_ORIGIN', 'SUBLIST', 'SUBSTRING', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
443 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY', 'TERMINATE',
444 'THAN', 'THEN', 'TIMESTAMP', 'TIMEZONE_HOUR', 'TIMEZONE_MINUTE', 'TO', 'TOAST',
445 'TRAILING', 'TRANSATION', 'TRANSACTIONS_COMMITTED',
446 'TRANSACTIONS_ROLLED_BACK', 'TRANSATION_ACTIVE', 'TRANSFORM',
447 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER', 'TRIGGER_CATALOG',
448 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE',
449 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN',
450 'UNNAMED', 'UNNEST', 'UNTIL', 'UPDATE', 'UPPER', 'USAGE', 'USER',
451 'USER_DEFINED_TYPE_CATALOG', 'USER_DEFINED_TYPE_NAME',
452 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM', 'VALID', 'VALIDATOR', 'VALUES',
453 'VARIABLE', 'VERBOSE', 'VERSION', 'VIEW', 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE',
454 'WITH', 'WITHOUT', 'WORK', 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
455 Keyword),
456 (words((
457 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR', 'CHARACTER', 'DATE',
458 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER', 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL',
459 'SERIAL', 'SMALLINT', 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
460 Name.Builtin),
461 (r'[+*/<>=~!@#%^&|`?-]', Operator),
462 (r'[0-9]+', Number.Integer),
463 # TODO: Backslash escapes?
464 (r"'(''|[^'])*'", String.Single),
465 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
466 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
467 (r'[;:()\[\],.]', Punctuation)
468 ],
469 'multiline-comments': [
470 (r'/\*', Comment.Multiline, 'multiline-comments'),
471 (r'\*/', Comment.Multiline, '#pop'),
472 (r'[^/*]+', Comment.Multiline),
473 (r'[/*]', Comment.Multiline)
474 ]
475 }
476
477
478 class TransactSqlLexer(RegexLexer):
479 """
480 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
481 SQL.
482
483 The list of keywords includes ODBC and keywords reserved for future use..
484 """
485
486 name = 'Transact-SQL'
487 aliases = ['tsql', 't-sql']
488 filenames = ['*.sql']
489 mimetypes = ['text/x-tsql']
490
491 # Use re.UNICODE to allow non ASCII letters in names.
492 flags = re.IGNORECASE | re.UNICODE
493 tokens = {
494 'root': [
495 (r'\s+', Whitespace),
496 (r'(?m)--.*?$\n?', Comment.Single),
497 (r'/\*', Comment.Multiline, 'multiline-comments'),
498 (words(_tsql_builtins.OPERATORS), Operator),
499 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
500 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
501 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
502 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
503 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
504 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
505 (r'0x[0-9a-f]+', Number.Hex),
506 # Float variant 1, for example: 1., 1.e2, 1.2e3
507 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
508 # Float variant 2, for example: .1, .1e2
509 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
510 # Float variant 3, for example: 123e45
511 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
512 (r'[0-9]+', Number.Integer),
513 (r"'(''|[^'])*'", String.Single),
514 (r'"(""|[^"])*"', String.Symbol),
515 (r'[;(),.]', Punctuation),
516 # Below we use \w even for the first "real" character because
517 # tokens starting with a digit have already been recognized
518 # as Number above.
519 (r'@@\w+', Name.Builtin),
520 (r'@\w+', Name.Variable),
521 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
522 (r'#?#?\w+', Name), # names for temp tables and anything else
523 (r'\?', Name.Variable.Magic), # parameter for prepared statements
524 ],
525 'multiline-comments': [
526 (r'/\*', Comment.Multiline, 'multiline-comments'),
527 (r'\*/', Comment.Multiline, '#pop'),
528 (r'[^/*]+', Comment.Multiline),
529 (r'[/*]', Comment.Multiline)
530 ]
531 }
532
533
534 class MySqlLexer(RegexLexer):
535 """
536 Special lexer for MySQL.
537 """
538
539 name = 'MySQL'
540 aliases = ['mysql']
541 mimetypes = ['text/x-mysql']
542
543 flags = re.IGNORECASE
544 tokens = {
545 'root': [
546 (r'\s+', Text),
547 (r'(#|--\s+).*\n?', Comment.Single),
548 (r'/\*', Comment.Multiline, 'multiline-comments'),
549 (r'[0-9]+', Number.Integer),
550 (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float),
551 (r"'(\\\\|\\'|''|[^'])*'", String.Single),
552 (r'"(\\\\|\\"|""|[^"])*"', String.Double),
553 (r"`(\\\\|\\`|``|[^`])*`", String.Symbol),
554 (r'[+*/<>=~!@#%^&|`?-]', Operator),
555 (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|'
556 r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|'
557 r'tinyblob|mediumblob|longblob|blob|float|double|double\s+'
558 r'precision|real|numeric|dec|decimal|timestamp|year|char|'
559 r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?',
560 bygroups(Keyword.Type, Text, Punctuation)),
561 (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|'
562 r'bigint|binary|blob|both|by|call|cascade|case|change|char|'
563 r'character|check|collate|column|condition|constraint|continue|'
564 r'convert|create|cross|current_date|current_time|'
565 r'current_timestamp|current_user|cursor|database|databases|'
566 r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|'
567 r'declare|default|delayed|delete|desc|describe|deterministic|'
568 r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|'
569 r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|'
570 r'float8|for|force|foreign|from|fulltext|grant|group|having|'
571 r'high_priority|hour_microsecond|hour_minute|hour_second|if|'
572 r'ignore|in|index|infile|inner|inout|insensitive|insert|int|'
573 r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|'
574 r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|'
575 r'localtime|localtimestamp|lock|long|loop|low_priority|match|'
576 r'minute_microsecond|minute_second|mod|modifies|natural|'
577 r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|'
578 r'or|order|out|outer|outfile|precision|primary|procedure|purge|'
579 r'raid0|read|reads|real|references|regexp|release|rename|repeat|'
580 r'replace|require|restrict|return|revoke|right|rlike|schema|'
581 r'schemas|second_microsecond|select|sensitive|separator|set|'
582 r'show|smallint|soname|spatial|specific|sql|sql_big_result|'
583 r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|'
584 r'sqlwarning|ssl|starting|straight_join|table|terminated|then|'
585 r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|'
586 r'usage|use|using|utc_date|utc_time|utc_timestamp|values|'
587 r'varying|when|where|while|with|write|x509|xor|year_month|'
588 r'zerofill)\b', Keyword),
589 # TODO: this list is not complete
590 (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo),
591 (r'(true|false|null)', Name.Constant),
592 (r'([a-z_]\w*)(\s*)(\()',
593 bygroups(Name.Function, Text, Punctuation)),
594 (r'[a-z_]\w*', Name),
595 (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable),
596 (r'[;:()\[\],.]', Punctuation)
597 ],
598 'multiline-comments': [
599 (r'/\*', Comment.Multiline, 'multiline-comments'),
600 (r'\*/', Comment.Multiline, '#pop'),
601 (r'[^/*]+', Comment.Multiline),
602 (r'[/*]', Comment.Multiline)
603 ]
604 }
605
606
607 class SqliteConsoleLexer(Lexer):
608 """
609 Lexer for example sessions using sqlite3.
610
611 .. versionadded:: 0.11
612 """
613
614 name = 'sqlite3con'
615 aliases = ['sqlite3']
616 filenames = ['*.sqlite3-console']
617 mimetypes = ['text/x-sqlite3-console']
618
619 def get_tokens_unprocessed(self, data):
620 sql = SqlLexer(**self.options)
621
622 curcode = ''
623 insertions = []
624 for match in line_re.finditer(data):
625 line = match.group()
626 if line.startswith('sqlite> ') or line.startswith(' ...> '):
627 insertions.append((len(curcode),
628 [(0, Generic.Prompt, line[:8])]))
629 curcode += line[8:]
630 else:
631 if curcode:
632 for item in do_insertions(insertions,
633 sql.get_tokens_unprocessed(curcode)):
634 yield item
635 curcode = ''
636 insertions = []
637 if line.startswith('SQL error: '):
638 yield (match.start(), Generic.Traceback, line)
639 else:
640 yield (match.start(), Generic.Output, line)
641 if curcode:
642 for item in do_insertions(insertions,
643 sql.get_tokens_unprocessed(curcode)):
644 yield item
645
646
647 class RqlLexer(RegexLexer):
648 """
649 Lexer for Relation Query Language.
650
651 `RQL <http://www.logilab.org/project/rql>`_
652
653 .. versionadded:: 2.0
654 """
655 name = 'RQL'
656 aliases = ['rql']
657 filenames = ['*.rql']
658 mimetypes = ['text/x-rql']
659
660 flags = re.IGNORECASE
661 tokens = {
662 'root': [
663 (r'\s+', Text),
664 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
665 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
666 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
667 (r'[+*/<>=%-]', Operator),
668 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
669 (r'[0-9]+', Number.Integer),
670 (r'[A-Z_]\w*\??', Name),
671 (r"'(''|[^'])*'", String.Single),
672 (r'"(""|[^"])*"', String.Single),
673 (r'[;:()\[\],.]', Punctuation)
674 ],
675 }

eric ide

mercurial