1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.sql |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for various SQL dialects and related interactive sessions. |
|
7 |
|
8 Postgres specific lexers: |
|
9 |
|
10 `PostgresLexer` |
|
11 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL |
|
12 lexer are: |
|
13 |
|
14 - keywords and data types list parsed from the PG docs (run the |
|
15 `_postgres_builtins` module to update them); |
|
16 - Content of $-strings parsed using a specific lexer, e.g. the content |
|
17 of a PL/Python function is parsed using the Python lexer; |
|
18 - parse PG specific constructs: E-strings, $-strings, U&-strings, |
|
19 different operators and punctuation. |
|
20 |
|
21 `PlPgsqlLexer` |
|
22 A lexer for the PL/pgSQL language. Adds a few specific construct on |
|
23 top of the PG SQL lexer (such as <<label>>). |
|
24 |
|
25 `PostgresConsoleLexer` |
|
26 A lexer to highlight an interactive psql session: |
|
27 |
|
28 - identifies the prompt and does its best to detect the end of command |
|
29 in multiline statement where not all the lines are prefixed by a |
|
30 prompt, telling them apart from the output; |
|
31 - highlights errors in the output and notification levels; |
|
32 - handles psql backslash commands. |
|
33 |
|
34 The ``tests/examplefiles`` contains a few test files with data to be |
|
35 parsed by these lexers. |
|
36 |
|
37 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
38 :license: BSD, see LICENSE for details. |
|
39 """ |
|
40 |
|
41 import re |
|
42 |
|
43 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words |
|
44 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ |
|
45 Keyword, Name, String, Number, Generic, Literal |
|
46 from pygments.lexers import get_lexer_by_name, ClassNotFound |
|
47 |
|
48 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ |
|
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS |
|
50 from pygments.lexers._mysql_builtins import \ |
|
51 MYSQL_CONSTANTS, \ |
|
52 MYSQL_DATATYPES, \ |
|
53 MYSQL_FUNCTIONS, \ |
|
54 MYSQL_KEYWORDS, \ |
|
55 MYSQL_OPTIMIZER_HINTS |
|
56 |
|
57 from pygments.lexers import _tsql_builtins |
|
58 |
|
59 |
|
60 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', |
|
61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', |
|
62 'SqliteConsoleLexer', 'RqlLexer'] |
|
63 |
|
64 line_re = re.compile('.*?\n') |
|
65 |
|
66 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) |
|
67 |
|
68 do_re = re.compile(r'\bDO\b', re.IGNORECASE) |
|
69 |
|
70 # Regular expressions for analyse_text() |
|
71 name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]') |
|
72 name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`') |
|
73 tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE) |
|
74 tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE) |
|
75 tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b') |
|
76 |
|
77 |
|
78 def language_callback(lexer, match): |
|
79 """Parse the content of a $-string using a lexer |
|
80 |
|
81 The lexer is chosen looking for a nearby LANGUAGE or assumed as |
|
82 plpgsql if inside a DO statement and no LANGUAGE has been found. |
|
83 """ |
|
84 lx = None |
|
85 m = language_re.match(lexer.text[match.end():match.end()+100]) |
|
86 if m is not None: |
|
87 lx = lexer._get_lexer(m.group(1)) |
|
88 else: |
|
89 m = list(language_re.finditer( |
|
90 lexer.text[max(0, match.start()-100):match.start()])) |
|
91 if m: |
|
92 lx = lexer._get_lexer(m[-1].group(1)) |
|
93 else: |
|
94 m = list(do_re.finditer( |
|
95 lexer.text[max(0, match.start()-25):match.start()])) |
|
96 if m: |
|
97 lx = lexer._get_lexer('plpgsql') |
|
98 |
|
99 # 1 = $, 2 = delimiter, 3 = $ |
|
100 yield (match.start(1), String, match.group(1)) |
|
101 yield (match.start(2), String.Delimiter, match.group(2)) |
|
102 yield (match.start(3), String, match.group(3)) |
|
103 # 4 = string contents |
|
104 if lx: |
|
105 for x in lx.get_tokens_unprocessed(match.group(4)): |
|
106 yield x |
|
107 else: |
|
108 yield (match.start(4), String, match.group(4)) |
|
109 # 5 = $, 6 = delimiter, 7 = $ |
|
110 yield (match.start(5), String, match.group(5)) |
|
111 yield (match.start(6), String.Delimiter, match.group(6)) |
|
112 yield (match.start(7), String, match.group(7)) |
|
113 |
|
114 |
|
115 class PostgresBase: |
|
116 """Base class for Postgres-related lexers. |
|
117 |
|
118 This is implemented as a mixin to avoid the Lexer metaclass kicking in. |
|
119 this way the different lexer don't have a common Lexer ancestor. If they |
|
120 had, _tokens could be created on this ancestor and not updated for the |
|
121 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming |
|
122 seem to suggest that regexp lexers are not really subclassable. |
|
123 """ |
|
124 def get_tokens_unprocessed(self, text, *args): |
|
125 # Have a copy of the entire text to be used by `language_callback`. |
|
126 self.text = text |
|
127 yield from super().get_tokens_unprocessed(text, *args) |
|
128 |
|
129 def _get_lexer(self, lang): |
|
130 if lang.lower() == 'sql': |
|
131 return get_lexer_by_name('postgresql', **self.options) |
|
132 |
|
133 tries = [lang] |
|
134 if lang.startswith('pl'): |
|
135 tries.append(lang[2:]) |
|
136 if lang.endswith('u'): |
|
137 tries.append(lang[:-1]) |
|
138 if lang.startswith('pl') and lang.endswith('u'): |
|
139 tries.append(lang[2:-1]) |
|
140 |
|
141 for lx in tries: |
|
142 try: |
|
143 return get_lexer_by_name(lx, **self.options) |
|
144 except ClassNotFound: |
|
145 pass |
|
146 else: |
|
147 # TODO: better logging |
|
148 # print >>sys.stderr, "language not found:", lang |
|
149 return None |
|
150 |
|
151 |
|
152 class PostgresLexer(PostgresBase, RegexLexer): |
|
153 """ |
|
154 Lexer for the PostgreSQL dialect of SQL. |
|
155 |
|
156 .. versionadded:: 1.5 |
|
157 """ |
|
158 |
|
159 name = 'PostgreSQL SQL dialect' |
|
160 aliases = ['postgresql', 'postgres'] |
|
161 mimetypes = ['text/x-postgresql'] |
|
162 |
|
163 flags = re.IGNORECASE |
|
164 tokens = { |
|
165 'root': [ |
|
166 (r'\s+', Text), |
|
167 (r'--.*\n?', Comment.Single), |
|
168 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
169 (r'(' + '|'.join(s.replace(" ", r"\s+") |
|
170 for s in DATATYPES + PSEUDO_TYPES) + r')\b', |
|
171 Name.Builtin), |
|
172 (words(KEYWORDS, suffix=r'\b'), Keyword), |
|
173 (r'[+*/<>=~!@#%^&|`?-]+', Operator), |
|
174 (r'::', Operator), # cast |
|
175 (r'\$\d+', Name.Variable), |
|
176 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), |
|
177 (r'[0-9]+', Number.Integer), |
|
178 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), |
|
179 # quoted identifier |
|
180 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), |
|
181 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), |
|
182 (r'[a-z_]\w*', Name), |
|
183 |
|
184 # psql variable in SQL |
|
185 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), |
|
186 |
|
187 (r'[;:()\[\]{},.]', Punctuation), |
|
188 ], |
|
189 'multiline-comments': [ |
|
190 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
191 (r'\*/', Comment.Multiline, '#pop'), |
|
192 (r'[^/*]+', Comment.Multiline), |
|
193 (r'[/*]', Comment.Multiline) |
|
194 ], |
|
195 'string': [ |
|
196 (r"[^']+", String.Single), |
|
197 (r"''", String.Single), |
|
198 (r"'", String.Single, '#pop'), |
|
199 ], |
|
200 'quoted-ident': [ |
|
201 (r'[^"]+', String.Name), |
|
202 (r'""', String.Name), |
|
203 (r'"', String.Name, '#pop'), |
|
204 ], |
|
205 } |
|
206 |
|
207 |
|
208 class PlPgsqlLexer(PostgresBase, RegexLexer): |
|
209 """ |
|
210 Handle the extra syntax in Pl/pgSQL language. |
|
211 |
|
212 .. versionadded:: 1.5 |
|
213 """ |
|
214 name = 'PL/pgSQL' |
|
215 aliases = ['plpgsql'] |
|
216 mimetypes = ['text/x-plpgsql'] |
|
217 |
|
218 flags = re.IGNORECASE |
|
219 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} |
|
220 |
|
221 # extend the keywords list |
|
222 for i, pattern in enumerate(tokens['root']): |
|
223 if pattern[1] == Keyword: |
|
224 tokens['root'][i] = ( |
|
225 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'), |
|
226 Keyword) |
|
227 del i |
|
228 break |
|
229 else: |
|
230 assert 0, "SQL keywords not found" |
|
231 |
|
232 # Add specific PL/pgSQL rules (before the SQL ones) |
|
233 tokens['root'][:0] = [ |
|
234 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype |
|
235 (r':=', Operator), |
|
236 (r'\<\<[a-z]\w*\>\>', Name.Label), |
|
237 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict |
|
238 ] |
|
239 |
|
240 |
|
241 class PsqlRegexLexer(PostgresBase, RegexLexer): |
|
242 """ |
|
243 Extend the PostgresLexer adding support specific for psql commands. |
|
244 |
|
245 This is not a complete psql lexer yet as it lacks prompt support |
|
246 and output rendering. |
|
247 """ |
|
248 |
|
249 name = 'PostgreSQL console - regexp based lexer' |
|
250 aliases = [] # not public |
|
251 |
|
252 flags = re.IGNORECASE |
|
253 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} |
|
254 |
|
255 tokens['root'].append( |
|
256 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) |
|
257 tokens['psql-command'] = [ |
|
258 (r'\n', Text, 'root'), |
|
259 (r'\s+', Text), |
|
260 (r'\\[^\s]+', Keyword.Pseudo), |
|
261 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), |
|
262 (r"'(''|[^'])*'", String.Single), |
|
263 (r"`([^`])*`", String.Backtick), |
|
264 (r"[^\s]+", String.Symbol), |
|
265 ] |
|
266 |
|
267 |
|
268 re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]') |
|
269 re_psql_command = re.compile(r'\s*\\') |
|
270 re_end_command = re.compile(r';\s*(--.*?)?$') |
|
271 re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') |
|
272 re_error = re.compile(r'(ERROR|FATAL):') |
|
273 re_message = re.compile( |
|
274 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|' |
|
275 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)') |
|
276 |
|
277 |
|
278 class lookahead: |
|
279 """Wrap an iterator and allow pushing back an item.""" |
|
280 def __init__(self, x): |
|
281 self.iter = iter(x) |
|
282 self._nextitem = None |
|
283 |
|
284 def __iter__(self): |
|
285 return self |
|
286 |
|
287 def send(self, i): |
|
288 self._nextitem = i |
|
289 return i |
|
290 |
|
291 def __next__(self): |
|
292 if self._nextitem is not None: |
|
293 ni = self._nextitem |
|
294 self._nextitem = None |
|
295 return ni |
|
296 return next(self.iter) |
|
297 next = __next__ |
|
298 |
|
299 |
|
300 class PostgresConsoleLexer(Lexer): |
|
301 """ |
|
302 Lexer for psql sessions. |
|
303 |
|
304 .. versionadded:: 1.5 |
|
305 """ |
|
306 |
|
307 name = 'PostgreSQL console (psql)' |
|
308 aliases = ['psql', 'postgresql-console', 'postgres-console'] |
|
309 mimetypes = ['text/x-postgresql-psql'] |
|
310 |
|
311 def get_tokens_unprocessed(self, data): |
|
312 sql = PsqlRegexLexer(**self.options) |
|
313 |
|
314 lines = lookahead(line_re.findall(data)) |
|
315 |
|
316 # prompt-output cycle |
|
317 while 1: |
|
318 |
|
319 # consume the lines of the command: start with an optional prompt |
|
320 # and continue until the end of command is detected |
|
321 curcode = '' |
|
322 insertions = [] |
|
323 for line in lines: |
|
324 # Identify a shell prompt in case of psql commandline example |
|
325 if line.startswith('$') and not curcode: |
|
326 lexer = get_lexer_by_name('console', **self.options) |
|
327 yield from lexer.get_tokens_unprocessed(line) |
|
328 break |
|
329 |
|
330 # Identify a psql prompt |
|
331 mprompt = re_prompt.match(line) |
|
332 if mprompt is not None: |
|
333 insertions.append((len(curcode), |
|
334 [(0, Generic.Prompt, mprompt.group())])) |
|
335 curcode += line[len(mprompt.group()):] |
|
336 else: |
|
337 curcode += line |
|
338 |
|
339 # Check if this is the end of the command |
|
340 # TODO: better handle multiline comments at the end with |
|
341 # a lexer with an external state? |
|
342 if re_psql_command.match(curcode) \ |
|
343 or re_end_command.search(curcode): |
|
344 break |
|
345 |
|
346 # Emit the combined stream of command and prompt(s) |
|
347 yield from do_insertions(insertions, |
|
348 sql.get_tokens_unprocessed(curcode)) |
|
349 |
|
350 # Emit the output lines |
|
351 out_token = Generic.Output |
|
352 for line in lines: |
|
353 mprompt = re_prompt.match(line) |
|
354 if mprompt is not None: |
|
355 # push the line back to have it processed by the prompt |
|
356 lines.send(line) |
|
357 break |
|
358 |
|
359 mmsg = re_message.match(line) |
|
360 if mmsg is not None: |
|
361 if mmsg.group(1).startswith("ERROR") \ |
|
362 or mmsg.group(1).startswith("FATAL"): |
|
363 out_token = Generic.Error |
|
364 yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) |
|
365 yield (mmsg.start(2), out_token, mmsg.group(2)) |
|
366 else: |
|
367 yield (0, out_token, line) |
|
368 else: |
|
369 return |
|
370 |
|
371 |
|
372 class SqlLexer(RegexLexer): |
|
373 """ |
|
374 Lexer for Structured Query Language. Currently, this lexer does |
|
375 not recognize any special syntax except ANSI SQL. |
|
376 """ |
|
377 |
|
378 name = 'SQL' |
|
379 aliases = ['sql'] |
|
380 filenames = ['*.sql'] |
|
381 mimetypes = ['text/x-sql'] |
|
382 |
|
383 flags = re.IGNORECASE |
|
384 tokens = { |
|
385 'root': [ |
|
386 (r'\s+', Text), |
|
387 (r'--.*\n?', Comment.Single), |
|
388 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
389 (words(( |
|
390 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', |
|
391 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', |
|
392 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', |
|
393 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD', |
|
394 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH', |
|
395 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE', |
|
396 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN', |
|
397 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG', |
|
398 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK', |
|
399 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE', |
|
400 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION', |
|
401 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN', |
|
402 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT', |
|
403 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT', |
|
404 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS', |
|
405 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA', |
|
406 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT', |
|
407 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER', |
|
408 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH', |
|
409 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', |
|
410 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE', |
|
411 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY', |
|
412 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', |
|
413 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', |
|
414 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR', |
|
415 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH', |
|
416 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION', |
|
417 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING', |
|
418 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION', |
|
419 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING', |
|
420 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL', |
|
421 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE', |
|
422 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET', |
|
423 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING', |
|
424 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF', |
|
425 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT', |
|
426 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX', |
|
427 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT', |
|
428 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO', |
|
429 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY', |
|
430 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST', |
|
431 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT', |
|
432 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION', |
|
433 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE', |
|
434 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN', |
|
435 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH', |
|
436 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB', |
|
437 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT', |
|
438 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT', |
|
439 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY', |
|
440 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER', |
|
441 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY', |
|
442 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE', |
|
443 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION', |
|
444 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME', |
|
445 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING', |
|
446 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER', |
|
447 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL', |
|
448 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF', |
|
449 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME', |
|
450 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT', |
|
451 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE', |
|
452 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE', |
|
453 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS', |
|
454 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME', |
|
455 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF', |
|
456 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER', |
|
457 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE', |
|
458 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME', |
|
459 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG', |
|
460 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN', |
|
461 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN', |
|
462 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM', |
|
463 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY', |
|
464 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR', |
|
465 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION', |
|
466 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE', |
|
467 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER', |
|
468 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE', |
|
469 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED', |
|
470 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL', |
|
471 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG', |
|
472 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM', |
|
473 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE', |
|
474 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW', |
|
475 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK', |
|
476 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'), |
|
477 Keyword), |
|
478 (words(( |
|
479 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR', |
|
480 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER', |
|
481 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT', |
|
482 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'), |
|
483 Name.Builtin), |
|
484 (r'[+*/<>=~!@#%^&|`?-]', Operator), |
|
485 (r'[0-9]+', Number.Integer), |
|
486 # TODO: Backslash escapes? |
|
487 (r"'(''|[^'])*'", String.Single), |
|
488 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL |
|
489 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle |
|
490 (r'[;:()\[\],.]', Punctuation) |
|
491 ], |
|
492 'multiline-comments': [ |
|
493 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
494 (r'\*/', Comment.Multiline, '#pop'), |
|
495 (r'[^/*]+', Comment.Multiline), |
|
496 (r'[/*]', Comment.Multiline) |
|
497 ] |
|
498 } |
|
499 |
|
500 def analyse_text(text): |
|
501 return 0.01 |
|
502 |
|
503 |
|
504 class TransactSqlLexer(RegexLexer): |
|
505 """ |
|
506 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to |
|
507 SQL. |
|
508 |
|
509 The list of keywords includes ODBC and keywords reserved for future use.. |
|
510 """ |
|
511 |
|
512 name = 'Transact-SQL' |
|
513 aliases = ['tsql', 't-sql'] |
|
514 filenames = ['*.sql'] |
|
515 mimetypes = ['text/x-tsql'] |
|
516 |
|
517 # Use re.UNICODE to allow non ASCII letters in names. |
|
518 flags = re.IGNORECASE | re.UNICODE |
|
519 tokens = { |
|
520 'root': [ |
|
521 (r'\s+', Whitespace), |
|
522 (r'--.*?$\n?', Comment.Single), |
|
523 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
524 (words(_tsql_builtins.OPERATORS), Operator), |
|
525 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), |
|
526 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class), |
|
527 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function), |
|
528 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)), |
|
529 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword), |
|
530 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)), |
|
531 (r'0x[0-9a-f]+', Number.Hex), |
|
532 # Float variant 1, for example: 1., 1.e2, 1.2e3 |
|
533 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), |
|
534 # Float variant 2, for example: .1, .1e2 |
|
535 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), |
|
536 # Float variant 3, for example: 123e45 |
|
537 (r'[0-9]+e[+-]?[0-9]+', Number.Float), |
|
538 (r'[0-9]+', Number.Integer), |
|
539 (r"'(''|[^'])*'", String.Single), |
|
540 (r'"(""|[^"])*"', String.Symbol), |
|
541 (r'[;(),.]', Punctuation), |
|
542 # Below we use \w even for the first "real" character because |
|
543 # tokens starting with a digit have already been recognized |
|
544 # as Number above. |
|
545 (r'@@\w+', Name.Builtin), |
|
546 (r'@\w+', Name.Variable), |
|
547 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)), |
|
548 (r'#?#?\w+', Name), # names for temp tables and anything else |
|
549 (r'\?', Name.Variable.Magic), # parameter for prepared statements |
|
550 ], |
|
551 'multiline-comments': [ |
|
552 (r'/\*', Comment.Multiline, 'multiline-comments'), |
|
553 (r'\*/', Comment.Multiline, '#pop'), |
|
554 (r'[^/*]+', Comment.Multiline), |
|
555 (r'[/*]', Comment.Multiline) |
|
556 ] |
|
557 } |
|
558 |
|
559 def analyse_text(text): |
|
560 rating = 0 |
|
561 if tsql_declare_re.search(text): |
|
562 # Found T-SQL variable declaration. |
|
563 rating = 1.0 |
|
564 else: |
|
565 name_between_backtick_count = len( |
|
566 name_between_backtick_re.findall(text)) |
|
567 name_between_bracket_count = len( |
|
568 name_between_bracket_re.findall(text)) |
|
569 # We need to check if there are any names using |
|
570 # backticks or brackets, as otherwise both are 0 |
|
571 # and 0 >= 2 * 0, so we would always assume it's true |
|
572 dialect_name_count = name_between_backtick_count + name_between_bracket_count |
|
573 if dialect_name_count >= 1 and \ |
|
574 name_between_bracket_count >= 2 * name_between_backtick_count: |
|
575 # Found at least twice as many [name] as `name`. |
|
576 rating += 0.5 |
|
577 elif name_between_bracket_count > name_between_backtick_count: |
|
578 rating += 0.2 |
|
579 elif name_between_bracket_count > 0: |
|
580 rating += 0.1 |
|
581 if tsql_variable_re.search(text) is not None: |
|
582 rating += 0.1 |
|
583 if tsql_go_re.search(text) is not None: |
|
584 rating += 0.1 |
|
585 return rating |
|
586 |
|
587 |
|
588 class MySqlLexer(RegexLexer): |
|
589 """The Oracle MySQL lexer. |
|
590 |
|
591 This lexer does not attempt to maintain strict compatibility with |
|
592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code |
|
593 history suggests there may be significant overlap between the two, |
|
594 compatibility between the two is not a target for this lexer. |
|
595 """ |
|
596 |
|
597 name = 'MySQL' |
|
598 aliases = ['mysql'] |
|
599 mimetypes = ['text/x-mysql'] |
|
600 |
|
601 flags = re.IGNORECASE |
|
602 tokens = { |
|
603 'root': [ |
|
604 (r'\s+', Text), |
|
605 |
|
606 # Comments |
|
607 (r'(?:#|--\s+).*', Comment.Single), |
|
608 (r'/\*\+', Comment.Special, 'optimizer-hints'), |
|
609 (r'/\*', Comment.Multiline, 'multiline-comment'), |
|
610 |
|
611 # Hexadecimal literals |
|
612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. |
|
613 (r'0x[0-9a-f]+', Number.Hex), |
|
614 |
|
615 # Binary literals |
|
616 (r"b'[01]+'", Number.Bin), |
|
617 (r'0b[01]+', Number.Bin), |
|
618 |
|
619 # Numeric literals |
|
620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent |
|
621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent |
|
622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats |
|
623 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name |
|
624 |
|
625 # Date literals |
|
626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", |
|
627 Literal.Date), |
|
628 |
|
629 # Time literals |
|
630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", |
|
631 Literal.Date), |
|
632 |
|
633 # Timestamp literals |
|
634 ( |
|
635 r"\{\s*ts\s*(?P<quote>['\"])\s*" |
|
636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part |
|
637 r"\s+" # Whitespace between date and time |
|
638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part |
|
639 r"\s*(?P=quote)\s*\}", |
|
640 Literal.Date |
|
641 ), |
|
642 |
|
643 # String literals |
|
644 (r"'", String.Single, 'single-quoted-string'), |
|
645 (r'"', String.Double, 'double-quoted-string'), |
|
646 |
|
647 # Variables |
|
648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), |
|
649 (r'@[a-z0-9_$.]+', Name.Variable), |
|
650 (r"@'", Name.Variable, 'single-quoted-variable'), |
|
651 (r'@"', Name.Variable, 'double-quoted-variable'), |
|
652 (r"@`", Name.Variable, 'backtick-quoted-variable'), |
|
653 (r'\?', Name.Variable), # For demonstrating prepared statements |
|
654 |
|
655 # Operators |
|
656 (r'[!%&*+/:<=>^|~-]+', Operator), |
|
657 |
|
658 # Exceptions; these words tokenize differently in different contexts. |
|
659 (r'\b(set)(?!\s*\()', Keyword), |
|
660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)), |
|
661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. |
|
662 |
|
663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant), |
|
664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type), |
|
665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword), |
|
666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), |
|
667 bygroups(Name.Function, Text, Punctuation)), |
|
668 |
|
669 # Schema object names |
|
670 # |
|
671 # Note: Although the first regex supports unquoted all-numeric |
|
672 # identifiers, this will not be a problem in practice because |
|
673 # numeric literals have already been handled above. |
|
674 # |
|
675 ('[0-9a-z$_\u0080-\uffff]+', Name), |
|
676 (r'`', Name.Quoted, 'schema-object-name'), |
|
677 |
|
678 # Punctuation |
|
679 (r'[(),.;]', Punctuation), |
|
680 ], |
|
681 |
|
682 # Multiline comment substates |
|
683 # --------------------------- |
|
684 |
|
685 'optimizer-hints': [ |
|
686 (r'[^*a-z]+', Comment.Special), |
|
687 (r'\*/', Comment.Special, '#pop'), |
|
688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc), |
|
689 ('[a-z]+', Comment.Special), |
|
690 (r'\*', Comment.Special), |
|
691 ], |
|
692 |
|
693 'multiline-comment': [ |
|
694 (r'[^*]+', Comment.Multiline), |
|
695 (r'\*/', Comment.Multiline, '#pop'), |
|
696 (r'\*', Comment.Multiline), |
|
697 ], |
|
698 |
|
699 # String substates |
|
700 # ---------------- |
|
701 |
|
702 'single-quoted-string': [ |
|
703 (r"[^'\\]+", String.Single), |
|
704 (r"''", String.Escape), |
|
705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), |
|
706 (r"'", String.Single, '#pop'), |
|
707 ], |
|
708 |
|
709 'double-quoted-string': [ |
|
710 (r'[^"\\]+', String.Double), |
|
711 (r'""', String.Escape), |
|
712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), |
|
713 (r'"', String.Double, '#pop'), |
|
714 ], |
|
715 |
|
716 # Variable substates |
|
717 # ------------------ |
|
718 |
|
719 'single-quoted-variable': [ |
|
720 (r"[^']+", Name.Variable), |
|
721 (r"''", Name.Variable), |
|
722 (r"'", Name.Variable, '#pop'), |
|
723 ], |
|
724 |
|
725 'double-quoted-variable': [ |
|
726 (r'[^"]+', Name.Variable), |
|
727 (r'""', Name.Variable), |
|
728 (r'"', Name.Variable, '#pop'), |
|
729 ], |
|
730 |
|
731 'backtick-quoted-variable': [ |
|
732 (r'[^`]+', Name.Variable), |
|
733 (r'``', Name.Variable), |
|
734 (r'`', Name.Variable, '#pop'), |
|
735 ], |
|
736 |
|
737 # Schema object name substates |
|
738 # ---------------------------- |
|
739 # |
|
740 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but |
|
741 # formatters will style them as "Name" by default but add |
|
742 # additional styles based on the token name. This gives users |
|
743 # flexibility to add custom styles as desired. |
|
744 # |
|
745 'schema-object-name': [ |
|
746 (r'[^`]+', Name.Quoted), |
|
747 (r'``', Name.Quoted.Escape), |
|
748 (r'`', Name.Quoted, '#pop'), |
|
749 ], |
|
750 } |
|
751 |
|
752 def analyse_text(text): |
|
753 rating = 0 |
|
754 name_between_backtick_count = len( |
|
755 name_between_backtick_re.findall(text)) |
|
756 name_between_bracket_count = len( |
|
757 name_between_bracket_re.findall(text)) |
|
758 # Same logic as above in the TSQL analysis |
|
759 dialect_name_count = name_between_backtick_count + name_between_bracket_count |
|
760 if dialect_name_count >= 1 and \ |
|
761 name_between_backtick_count >= 2 * name_between_bracket_count: |
|
762 # Found at least twice as many `name` as [name]. |
|
763 rating += 0.5 |
|
764 elif name_between_backtick_count > name_between_bracket_count: |
|
765 rating += 0.2 |
|
766 elif name_between_backtick_count > 0: |
|
767 rating += 0.1 |
|
768 return rating |
|
769 |
|
770 |
|
771 class SqliteConsoleLexer(Lexer): |
|
772 """ |
|
773 Lexer for example sessions using sqlite3. |
|
774 |
|
775 .. versionadded:: 0.11 |
|
776 """ |
|
777 |
|
778 name = 'sqlite3con' |
|
779 aliases = ['sqlite3'] |
|
780 filenames = ['*.sqlite3-console'] |
|
781 mimetypes = ['text/x-sqlite3-console'] |
|
782 |
|
783 def get_tokens_unprocessed(self, data): |
|
784 sql = SqlLexer(**self.options) |
|
785 |
|
786 curcode = '' |
|
787 insertions = [] |
|
788 for match in line_re.finditer(data): |
|
789 line = match.group() |
|
790 if line.startswith('sqlite> ') or line.startswith(' ...> '): |
|
791 insertions.append((len(curcode), |
|
792 [(0, Generic.Prompt, line[:8])])) |
|
793 curcode += line[8:] |
|
794 else: |
|
795 if curcode: |
|
796 yield from do_insertions(insertions, |
|
797 sql.get_tokens_unprocessed(curcode)) |
|
798 curcode = '' |
|
799 insertions = [] |
|
800 if line.startswith('SQL error: '): |
|
801 yield (match.start(), Generic.Traceback, line) |
|
802 else: |
|
803 yield (match.start(), Generic.Output, line) |
|
804 if curcode: |
|
805 yield from do_insertions(insertions, |
|
806 sql.get_tokens_unprocessed(curcode)) |
|
807 |
|
808 |
|
809 class RqlLexer(RegexLexer): |
|
810 """ |
|
811 Lexer for Relation Query Language. |
|
812 |
|
813 `RQL <http://www.logilab.org/project/rql>`_ |
|
814 |
|
815 .. versionadded:: 2.0 |
|
816 """ |
|
817 name = 'RQL' |
|
818 aliases = ['rql'] |
|
819 filenames = ['*.rql'] |
|
820 mimetypes = ['text/x-rql'] |
|
821 |
|
822 flags = re.IGNORECASE |
|
823 tokens = { |
|
824 'root': [ |
|
825 (r'\s+', Text), |
|
826 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR' |
|
827 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET' |
|
828 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword), |
|
829 (r'[+*/<>=%-]', Operator), |
|
830 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin), |
|
831 (r'[0-9]+', Number.Integer), |
|
832 (r'[A-Z_]\w*\??', Name), |
|
833 (r"'(''|[^'])*'", String.Single), |
|
834 (r'"(""|[^"])*"', String.Single), |
|
835 (r'[;:()\[\],.]', Punctuation) |
|
836 ], |
|
837 } |
|