|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers._postgres_builtins |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Self-updating data files for PostgreSQL lexer. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 |
|
13 # Autogenerated: please edit them if you like wasting your time. |
|
14 |
|
15 KEYWORDS = ( |
|
16 'ABORT', |
|
17 'ABSOLUTE', |
|
18 'ACCESS', |
|
19 'ACTION', |
|
20 'ADD', |
|
21 'ADMIN', |
|
22 'AFTER', |
|
23 'AGGREGATE', |
|
24 'ALL', |
|
25 'ALSO', |
|
26 'ALTER', |
|
27 'ALWAYS', |
|
28 'ANALYSE', |
|
29 'ANALYZE', |
|
30 'AND', |
|
31 'ANY', |
|
32 'ARRAY', |
|
33 'AS', |
|
34 'ASC', |
|
35 'ASSERTION', |
|
36 'ASSIGNMENT', |
|
37 'ASYMMETRIC', |
|
38 'AT', |
|
39 'ATTRIBUTE', |
|
40 'AUTHORIZATION', |
|
41 'BACKWARD', |
|
42 'BEFORE', |
|
43 'BEGIN', |
|
44 'BETWEEN', |
|
45 'BIGINT', |
|
46 'BINARY', |
|
47 'BIT', |
|
48 'BOOLEAN', |
|
49 'BOTH', |
|
50 'BY', |
|
51 'CACHE', |
|
52 'CALLED', |
|
53 'CASCADE', |
|
54 'CASCADED', |
|
55 'CASE', |
|
56 'CAST', |
|
57 'CATALOG', |
|
58 'CHAIN', |
|
59 'CHAR', |
|
60 'CHARACTER', |
|
61 'CHARACTERISTICS', |
|
62 'CHECK', |
|
63 'CHECKPOINT', |
|
64 'CLASS', |
|
65 'CLOSE', |
|
66 'CLUSTER', |
|
67 'COALESCE', |
|
68 'COLLATE', |
|
69 'COLLATION', |
|
70 'COLUMN', |
|
71 'COMMENT', |
|
72 'COMMENTS', |
|
73 'COMMIT', |
|
74 'COMMITTED', |
|
75 'CONCURRENTLY', |
|
76 'CONFIGURATION', |
|
77 'CONNECTION', |
|
78 'CONSTRAINT', |
|
79 'CONSTRAINTS', |
|
80 'CONTENT', |
|
81 'CONTINUE', |
|
82 'CONVERSION', |
|
83 'COPY', |
|
84 'COST', |
|
85 'CREATE', |
|
86 'CROSS', |
|
87 'CSV', |
|
88 'CURRENT', |
|
89 'CURRENT_CATALOG', |
|
90 'CURRENT_DATE', |
|
91 'CURRENT_ROLE', |
|
92 'CURRENT_SCHEMA', |
|
93 'CURRENT_TIME', |
|
94 'CURRENT_TIMESTAMP', |
|
95 'CURRENT_USER', |
|
96 'CURSOR', |
|
97 'CYCLE', |
|
98 'DATA', |
|
99 'DATABASE', |
|
100 'DAY', |
|
101 'DEALLOCATE', |
|
102 'DEC', |
|
103 'DECIMAL', |
|
104 'DECLARE', |
|
105 'DEFAULT', |
|
106 'DEFAULTS', |
|
107 'DEFERRABLE', |
|
108 'DEFERRED', |
|
109 'DEFINER', |
|
110 'DELETE', |
|
111 'DELIMITER', |
|
112 'DELIMITERS', |
|
113 'DESC', |
|
114 'DICTIONARY', |
|
115 'DISABLE', |
|
116 'DISCARD', |
|
117 'DISTINCT', |
|
118 'DO', |
|
119 'DOCUMENT', |
|
120 'DOMAIN', |
|
121 'DOUBLE', |
|
122 'DROP', |
|
123 'EACH', |
|
124 'ELSE', |
|
125 'ENABLE', |
|
126 'ENCODING', |
|
127 'ENCRYPTED', |
|
128 'END', |
|
129 'ENUM', |
|
130 'ESCAPE', |
|
131 'EVENT', |
|
132 'EXCEPT', |
|
133 'EXCLUDE', |
|
134 'EXCLUDING', |
|
135 'EXCLUSIVE', |
|
136 'EXECUTE', |
|
137 'EXISTS', |
|
138 'EXPLAIN', |
|
139 'EXTENSION', |
|
140 'EXTERNAL', |
|
141 'EXTRACT', |
|
142 'FALSE', |
|
143 'FAMILY', |
|
144 'FETCH', |
|
145 'FILTER', |
|
146 'FIRST', |
|
147 'FLOAT', |
|
148 'FOLLOWING', |
|
149 'FOR', |
|
150 'FORCE', |
|
151 'FOREIGN', |
|
152 'FORWARD', |
|
153 'FREEZE', |
|
154 'FROM', |
|
155 'FULL', |
|
156 'FUNCTION', |
|
157 'FUNCTIONS', |
|
158 'GLOBAL', |
|
159 'GRANT', |
|
160 'GRANTED', |
|
161 'GREATEST', |
|
162 'GROUP', |
|
163 'HANDLER', |
|
164 'HAVING', |
|
165 'HEADER', |
|
166 'HOLD', |
|
167 'HOUR', |
|
168 'IDENTITY', |
|
169 'IF', |
|
170 'ILIKE', |
|
171 'IMMEDIATE', |
|
172 'IMMUTABLE', |
|
173 'IMPLICIT', |
|
174 'IN', |
|
175 'INCLUDING', |
|
176 'INCREMENT', |
|
177 'INDEX', |
|
178 'INDEXES', |
|
179 'INHERIT', |
|
180 'INHERITS', |
|
181 'INITIALLY', |
|
182 'INLINE', |
|
183 'INNER', |
|
184 'INOUT', |
|
185 'INPUT', |
|
186 'INSENSITIVE', |
|
187 'INSERT', |
|
188 'INSTEAD', |
|
189 'INT', |
|
190 'INTEGER', |
|
191 'INTERSECT', |
|
192 'INTERVAL', |
|
193 'INTO', |
|
194 'INVOKER', |
|
195 'IS', |
|
196 'ISNULL', |
|
197 'ISOLATION', |
|
198 'JOIN', |
|
199 'KEY', |
|
200 'LABEL', |
|
201 'LANGUAGE', |
|
202 'LARGE', |
|
203 'LAST', |
|
204 'LATERAL', |
|
205 'LC_COLLATE', |
|
206 'LC_CTYPE', |
|
207 'LEADING', |
|
208 'LEAKPROOF', |
|
209 'LEAST', |
|
210 'LEFT', |
|
211 'LEVEL', |
|
212 'LIKE', |
|
213 'LIMIT', |
|
214 'LISTEN', |
|
215 'LOAD', |
|
216 'LOCAL', |
|
217 'LOCALTIME', |
|
218 'LOCALTIMESTAMP', |
|
219 'LOCATION', |
|
220 'LOCK', |
|
221 'MAPPING', |
|
222 'MATCH', |
|
223 'MATERIALIZED', |
|
224 'MAXVALUE', |
|
225 'MINUTE', |
|
226 'MINVALUE', |
|
227 'MODE', |
|
228 'MONTH', |
|
229 'MOVE', |
|
230 'NAME', |
|
231 'NAMES', |
|
232 'NATIONAL', |
|
233 'NATURAL', |
|
234 'NCHAR', |
|
235 'NEXT', |
|
236 'NO', |
|
237 'NONE', |
|
238 'NOT', |
|
239 'NOTHING', |
|
240 'NOTIFY', |
|
241 'NOTNULL', |
|
242 'NOWAIT', |
|
243 'NULL', |
|
244 'NULLIF', |
|
245 'NULLS', |
|
246 'NUMERIC', |
|
247 'OBJECT', |
|
248 'OF', |
|
249 'OFF', |
|
250 'OFFSET', |
|
251 'OIDS', |
|
252 'ON', |
|
253 'ONLY', |
|
254 'OPERATOR', |
|
255 'OPTION', |
|
256 'OPTIONS', |
|
257 'OR', |
|
258 'ORDER', |
|
259 'ORDINALITY', |
|
260 'OUT', |
|
261 'OUTER', |
|
262 'OVER', |
|
263 'OVERLAPS', |
|
264 'OVERLAY', |
|
265 'OWNED', |
|
266 'OWNER', |
|
267 'PARSER', |
|
268 'PARTIAL', |
|
269 'PARTITION', |
|
270 'PASSING', |
|
271 'PASSWORD', |
|
272 'PLACING', |
|
273 'PLANS', |
|
274 'POLICY', |
|
275 'POSITION', |
|
276 'PRECEDING', |
|
277 'PRECISION', |
|
278 'PREPARE', |
|
279 'PREPARED', |
|
280 'PRESERVE', |
|
281 'PRIMARY', |
|
282 'PRIOR', |
|
283 'PRIVILEGES', |
|
284 'PROCEDURAL', |
|
285 'PROCEDURE', |
|
286 'PROGRAM', |
|
287 'QUOTE', |
|
288 'RANGE', |
|
289 'READ', |
|
290 'REAL', |
|
291 'REASSIGN', |
|
292 'RECHECK', |
|
293 'RECURSIVE', |
|
294 'REF', |
|
295 'REFERENCES', |
|
296 'REFRESH', |
|
297 'REINDEX', |
|
298 'RELATIVE', |
|
299 'RELEASE', |
|
300 'RENAME', |
|
301 'REPEATABLE', |
|
302 'REPLACE', |
|
303 'REPLICA', |
|
304 'RESET', |
|
305 'RESTART', |
|
306 'RESTRICT', |
|
307 'RETURNING', |
|
308 'RETURNS', |
|
309 'REVOKE', |
|
310 'RIGHT', |
|
311 'ROLE', |
|
312 'ROLLBACK', |
|
313 'ROW', |
|
314 'ROWS', |
|
315 'RULE', |
|
316 'SAVEPOINT', |
|
317 'SCHEMA', |
|
318 'SCROLL', |
|
319 'SEARCH', |
|
320 'SECOND', |
|
321 'SECURITY', |
|
322 'SELECT', |
|
323 'SEQUENCE', |
|
324 'SEQUENCES', |
|
325 'SERIALIZABLE', |
|
326 'SERVER', |
|
327 'SESSION', |
|
328 'SESSION_USER', |
|
329 'SET', |
|
330 'SETOF', |
|
331 'SHARE', |
|
332 'SHOW', |
|
333 'SIMILAR', |
|
334 'SIMPLE', |
|
335 'SMALLINT', |
|
336 'SNAPSHOT', |
|
337 'SOME', |
|
338 'STABLE', |
|
339 'STANDALONE', |
|
340 'START', |
|
341 'STATEMENT', |
|
342 'STATISTICS', |
|
343 'STDIN', |
|
344 'STDOUT', |
|
345 'STORAGE', |
|
346 'STRICT', |
|
347 'STRIP', |
|
348 'SUBSTRING', |
|
349 'SYMMETRIC', |
|
350 'SYSID', |
|
351 'SYSTEM', |
|
352 'TABLE', |
|
353 'TABLES', |
|
354 'TABLESPACE', |
|
355 'TEMP', |
|
356 'TEMPLATE', |
|
357 'TEMPORARY', |
|
358 'TEXT', |
|
359 'THEN', |
|
360 'TIME', |
|
361 'TIMESTAMP', |
|
362 'TO', |
|
363 'TRAILING', |
|
364 'TRANSACTION', |
|
365 'TREAT', |
|
366 'TRIGGER', |
|
367 'TRIM', |
|
368 'TRUE', |
|
369 'TRUNCATE', |
|
370 'TRUSTED', |
|
371 'TYPE', |
|
372 'TYPES', |
|
373 'UNBOUNDED', |
|
374 'UNCOMMITTED', |
|
375 'UNENCRYPTED', |
|
376 'UNION', |
|
377 'UNIQUE', |
|
378 'UNKNOWN', |
|
379 'UNLISTEN', |
|
380 'UNLOGGED', |
|
381 'UNTIL', |
|
382 'UPDATE', |
|
383 'USER', |
|
384 'USING', |
|
385 'VACUUM', |
|
386 'VALID', |
|
387 'VALIDATE', |
|
388 'VALIDATOR', |
|
389 'VALUE', |
|
390 'VALUES', |
|
391 'VARCHAR', |
|
392 'VARIADIC', |
|
393 'VARYING', |
|
394 'VERBOSE', |
|
395 'VERSION', |
|
396 'VIEW', |
|
397 'VIEWS', |
|
398 'VOLATILE', |
|
399 'WHEN', |
|
400 'WHERE', |
|
401 'WHITESPACE', |
|
402 'WINDOW', |
|
403 'WITH', |
|
404 'WITHIN', |
|
405 'WITHOUT', |
|
406 'WORK', |
|
407 'WRAPPER', |
|
408 'WRITE', |
|
409 'XML', |
|
410 'XMLATTRIBUTES', |
|
411 'XMLCONCAT', |
|
412 'XMLELEMENT', |
|
413 'XMLEXISTS', |
|
414 'XMLFOREST', |
|
415 'XMLPARSE', |
|
416 'XMLPI', |
|
417 'XMLROOT', |
|
418 'XMLSERIALIZE', |
|
419 'YEAR', |
|
420 'YES', |
|
421 'ZONE', |
|
422 ) |
|
423 |
|
424 DATATYPES = ( |
|
425 'bigint', |
|
426 'bigserial', |
|
427 'bit', |
|
428 'bit varying', |
|
429 'bool', |
|
430 'boolean', |
|
431 'box', |
|
432 'bytea', |
|
433 'char', |
|
434 'character', |
|
435 'character varying', |
|
436 'cidr', |
|
437 'circle', |
|
438 'date', |
|
439 'decimal', |
|
440 'double precision', |
|
441 'float4', |
|
442 'float8', |
|
443 'inet', |
|
444 'int', |
|
445 'int2', |
|
446 'int4', |
|
447 'int8', |
|
448 'integer', |
|
449 'interval', |
|
450 'json', |
|
451 'jsonb', |
|
452 'line', |
|
453 'lseg', |
|
454 'macaddr', |
|
455 'money', |
|
456 'numeric', |
|
457 'path', |
|
458 'pg_lsn', |
|
459 'point', |
|
460 'polygon', |
|
461 'real', |
|
462 'serial', |
|
463 'serial2', |
|
464 'serial4', |
|
465 'serial8', |
|
466 'smallint', |
|
467 'smallserial', |
|
468 'text', |
|
469 'time', |
|
470 'timestamp', |
|
471 'timestamptz', |
|
472 'timetz', |
|
473 'tsquery', |
|
474 'tsvector', |
|
475 'txid_snapshot', |
|
476 'uuid', |
|
477 'varbit', |
|
478 'varchar', |
|
479 'with time zone', |
|
480 'without time zone', |
|
481 'xml', |
|
482 ) |
|
483 |
|
484 PSEUDO_TYPES = ( |
|
485 'any', |
|
486 'anyelement', |
|
487 'anyarray', |
|
488 'anynonarray', |
|
489 'anyenum', |
|
490 'anyrange', |
|
491 'cstring', |
|
492 'internal', |
|
493 'language_handler', |
|
494 'fdw_handler', |
|
495 'record', |
|
496 'trigger', |
|
497 'void', |
|
498 'opaque', |
|
499 ) |
|
500 |
|
501 # Remove 'trigger' from types |
|
502 PSEUDO_TYPES = tuple(sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS)))) |
|
503 |
|
504 PLPGSQL_KEYWORDS = ( |
|
505 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT', |
|
506 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE', |
|
507 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE', |
|
508 ) |
|
509 |
|
510 |
|
511 if __name__ == '__main__': # pragma: no cover |
|
512 import re |
|
513 try: |
|
514 from urllib import urlopen |
|
515 except ImportError: |
|
516 from urllib.request import urlopen |
|
517 |
|
518 from pygments.util import format_lines |
|
519 |
|
520 # One man's constant is another man's variable. |
|
521 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' |
|
522 KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' |
|
523 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' |
|
524 |
|
525 def update_myself(): |
|
526 data_file = list(urlopen(DATATYPES_URL)) |
|
527 datatypes = parse_datatypes(data_file) |
|
528 pseudos = parse_pseudos(data_file) |
|
529 |
|
530 keywords = parse_keywords(urlopen(KEYWORDS_URL)) |
|
531 update_consts(__file__, 'DATATYPES', datatypes) |
|
532 update_consts(__file__, 'PSEUDO_TYPES', pseudos) |
|
533 update_consts(__file__, 'KEYWORDS', keywords) |
|
534 |
|
535 def parse_keywords(f): |
|
536 kw = [] |
|
537 for m in re.finditer( |
|
538 r'\s*<entry><token>([^<]+)</token></entry>\s*' |
|
539 r'<entry>([^<]+)</entry>', f.read()): |
|
540 kw.append(m.group(1)) |
|
541 |
|
542 if not kw: |
|
543 raise ValueError('no keyword found') |
|
544 |
|
545 kw.sort() |
|
546 return kw |
|
547 |
|
548 def parse_datatypes(f): |
|
549 dt = set() |
|
550 for line in f: |
|
551 if '<sect1' in line: |
|
552 break |
|
553 if '<entry><type>' not in line: |
|
554 continue |
|
555 |
|
556 # Parse a string such as |
|
557 # time [ (<replaceable>p</replaceable>) ] [ without time zone ] |
|
558 # into types "time" and "without time zone" |
|
559 |
|
560 # remove all the tags |
|
561 line = re.sub("<replaceable>[^<]+</replaceable>", "", line) |
|
562 line = re.sub("<[^>]+>", "", line) |
|
563 |
|
564 # Drop the parts containing braces |
|
565 for tmp in [t for tmp in line.split('[') |
|
566 for t in tmp.split(']') if "(" not in t]: |
|
567 for t in tmp.split(','): |
|
568 t = t.strip() |
|
569 if not t: continue |
|
570 dt.add(" ".join(t.split())) |
|
571 |
|
572 dt = list(dt) |
|
573 dt.sort() |
|
574 return dt |
|
575 |
|
576 def parse_pseudos(f): |
|
577 dt = [] |
|
578 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">') |
|
579 re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>') |
|
580 re_end = re.compile(r'\s*</table>') |
|
581 |
|
582 f = iter(f) |
|
583 for line in f: |
|
584 if re_start.match(line) is not None: |
|
585 break |
|
586 else: |
|
587 raise ValueError('pseudo datatypes table not found') |
|
588 |
|
589 for line in f: |
|
590 m = re_entry.match(line) |
|
591 if m is not None: |
|
592 dt.append(m.group(1)) |
|
593 |
|
594 if re_end.match(line) is not None: |
|
595 break |
|
596 else: |
|
597 raise ValueError('end of pseudo datatypes table not found') |
|
598 |
|
599 if not dt: |
|
600 raise ValueError('pseudo datatypes not found') |
|
601 |
|
602 return dt |
|
603 |
|
604 def update_consts(filename, constname, content): |
|
605 with open(filename) as f: |
|
606 data = f.read() |
|
607 |
|
608 # Line to start/end inserting |
|
609 re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S) |
|
610 m = re_match.search(data) |
|
611 if not m: |
|
612 raise ValueError('Could not find existing definition for %s' % |
|
613 (constname,)) |
|
614 |
|
615 new_block = format_lines(constname, content) |
|
616 data = data[:m.start()] + new_block + data[m.end():] |
|
617 |
|
618 with open(filename, 'w') as f: |
|
619 f.write(data) |
|
620 |
|
621 update_myself() |