ThirdParty/Pygments/pygments/lexers/_postgres_builtins.py

changeset 4172
4f20dba37ab6
parent 3484
645c12de6b0c
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
3 pygments.lexers._postgres_builtins 3 pygments.lexers._postgres_builtins
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 Self-updating data files for PostgreSQL lexer. 6 Self-updating data files for PostgreSQL lexer.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 from __future__ import unicode_literals
13 try:
14 str = unicode
15 import urllib2 as request
16 except NameError:
17 import urllib.request as request # __IGNORE_WARNING__
18
19 import re
20
21 # One man's constant is another man's variable.
22 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
23 KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml'
24 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
25
26 def update_myself():
27 data_file = list(fetch(DATATYPES_URL))
28 datatypes = parse_datatypes(data_file)
29 pseudos = parse_pseudos(data_file)
30
31 keywords = parse_keywords(fetch(KEYWORDS_URL))
32 update_consts(__file__, 'DATATYPES', datatypes)
33 update_consts(__file__, 'PSEUDO_TYPES', pseudos)
34 update_consts(__file__, 'KEYWORDS', keywords)
35
36 def parse_keywords(f):
37 kw = []
38 for m in re.finditer(
39 r'\s*<entry><token>([^<]+)</token></entry>\s*'
40 r'<entry>([^<]+)</entry>', f.read()):
41 kw.append(m.group(1))
42
43 if not kw:
44 raise ValueError('no keyword found')
45
46 kw.sort()
47 return kw
48
49 def parse_datatypes(f):
50 dt = set()
51 for line in f:
52 if '<sect1' in line:
53 break
54 if '<entry><type>' not in line:
55 continue
56
57 # Parse a string such as
58 # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
59 # into types "time" and "without time zone"
60
61 # remove all the tags
62 line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
63 line = re.sub("<[^>]+>", "", line)
64
65 # Drop the parts containing braces
66 for tmp in [t for tmp in line.split('[')
67 for t in tmp.split(']') if "(" not in t]:
68 for t in tmp.split(','):
69 t = t.strip()
70 if not t: continue
71 dt.add(" ".join(t.split()))
72
73 dt = list(dt)
74 dt.sort()
75 return dt
76
77 def parse_pseudos(f):
78 dt = []
79 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
80 re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>')
81 re_end = re.compile(r'\s*</table>')
82
83 f = iter(f)
84 for line in f:
85 if re_start.match(line) is not None:
86 break
87 else:
88 raise ValueError('pseudo datatypes table not found')
89
90 for line in f:
91 m = re_entry.match(line)
92 if m is not None:
93 dt.append(m.group(1))
94
95 if re_end.match(line) is not None:
96 break
97 else:
98 raise ValueError('end of pseudo datatypes table not found')
99
100 if not dt:
101 raise ValueError('pseudo datatypes not found')
102
103 return dt
104
105 def fetch(url):
106 return request.urlopen(url)
107
108 def update_consts(filename, constname, content):
109 f = open(filename)
110 lines = f.readlines()
111 f.close()
112
113 # Line to start/end inserting
114 re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname)
115 re_end = re.compile(r'^\s*\]\s*$')
116 start = [ n for n, l in enumerate(lines) if re_start.match(l) ]
117 if not start:
118 raise ValueError("couldn't find line containing '%s = ['" % constname)
119 if len(start) > 1:
120 raise ValueError("too many lines containing '%s = ['" % constname)
121 start = start[0] + 1
122
123 end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ]
124 if not end:
125 raise ValueError("couldn't find line containing ']' after %s " % constname)
126 end = end[0]
127
128 # Pack the new content in lines not too long
129 content = [repr(item) for item in content ]
130 new_lines = [[]]
131 for item in content:
132 if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75:
133 new_lines.append([])
134 new_lines[-1].append(item)
135
136 lines[start:end] = [ " %s,\n" % ", ".join(items) for items in new_lines ]
137
138 f = open(filename, 'w')
139 f.write(''.join(lines))
140 f.close()
141
142 12
143 # Autogenerated: please edit them if you like wasting your time. 13 # Autogenerated: please edit them if you like wasting your time.
144 14
145 KEYWORDS = [ 15 KEYWORDS = (
146 'ABORT', 'ABSOLUTE', 'ACCESS', 'ACTION', 'ADD', 'ADMIN', 'AFTER', 16 'ABORT',
147 'AGGREGATE', 'ALL', 'ALSO', 'ALTER', 'ALWAYS', 'ANALYSE', 'ANALYZE', 17 'ABSOLUTE',
148 'AND', 'ANY', 'ARRAY', 'AS', 'ASC', 'ASSERTION', 'ASSIGNMENT', 18 'ACCESS',
149 'ASYMMETRIC', 'AT', 'ATTRIBUTE', 'AUTHORIZATION', 'BACKWARD', 'BEFORE', 19 'ACTION',
150 'BEGIN', 'BETWEEN', 'BIGINT', 'BINARY', 'BIT', 'BOOLEAN', 'BOTH', 'BY', 20 'ADD',
151 'CACHE', 'CALLED', 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG', 21 'ADMIN',
152 'CHAIN', 'CHAR', 'CHARACTER', 'CHARACTERISTICS', 'CHECK', 'CHECKPOINT', 22 'AFTER',
153 'CLASS', 'CLOSE', 'CLUSTER', 'COALESCE', 'COLLATE', 'COLLATION', 23 'AGGREGATE',
154 'COLUMN', 'COMMENT', 'COMMENTS', 'COMMIT', 'COMMITTED', 'CONCURRENTLY', 24 'ALL',
155 'CONFIGURATION', 'CONNECTION', 'CONSTRAINT', 'CONSTRAINTS', 'CONTENT', 25 'ALSO',
156 'CONTINUE', 'CONVERSION', 'COPY', 'COST', 'CREATE', 'CROSS', 'CSV', 26 'ALTER',
157 'CURRENT', 'CURRENT_CATALOG', 'CURRENT_DATE', 'CURRENT_ROLE', 27 'ALWAYS',
158 'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 28 'ANALYSE',
159 'CURSOR', 'CYCLE', 'DATA', 'DATABASE', 'DAY', 'DEALLOCATE', 'DEC', 29 'ANALYZE',
160 'DECIMAL', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED', 30 'AND',
161 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DESC', 'DICTIONARY', 31 'ANY',
162 'DISABLE', 'DISCARD', 'DISTINCT', 'DO', 'DOCUMENT', 'DOMAIN', 'DOUBLE', 32 'ARRAY',
163 'DROP', 'EACH', 'ELSE', 'ENABLE', 'ENCODING', 'ENCRYPTED', 'END', 33 'AS',
164 'ENUM', 'ESCAPE', 'EXCEPT', 'EXCLUDE', 'EXCLUDING', 'EXCLUSIVE', 34 'ASC',
165 'EXECUTE', 'EXISTS', 'EXPLAIN', 'EXTENSION', 'EXTERNAL', 'EXTRACT', 35 'ASSERTION',
166 'FALSE', 'FAMILY', 'FETCH', 'FIRST', 'FLOAT', 'FOLLOWING', 'FOR', 36 'ASSIGNMENT',
167 'FORCE', 'FOREIGN', 'FORWARD', 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 37 'ASYMMETRIC',
168 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GRANTED', 'GREATEST', 'GROUP', 38 'AT',
169 'HANDLER', 'HAVING', 'HEADER', 'HOLD', 'HOUR', 'IDENTITY', 'IF', 39 'ATTRIBUTE',
170 'ILIKE', 'IMMEDIATE', 'IMMUTABLE', 'IMPLICIT', 'IN', 'INCLUDING', 40 'AUTHORIZATION',
171 'INCREMENT', 'INDEX', 'INDEXES', 'INHERIT', 'INHERITS', 'INITIALLY', 41 'BACKWARD',
172 'INLINE', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT', 'INSTEAD', 42 'BEFORE',
173 'INT', 'INTEGER', 'INTERSECT', 'INTERVAL', 'INTO', 'INVOKER', 'IS', 43 'BEGIN',
174 'ISNULL', 'ISOLATION', 'JOIN', 'KEY', 'LABEL', 'LANGUAGE', 'LARGE', 44 'BETWEEN',
175 'LAST', 'LC_COLLATE', 'LC_CTYPE', 'LEADING', 'LEAST', 'LEFT', 'LEVEL', 45 'BIGINT',
176 'LIKE', 'LIMIT', 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 46 'BINARY',
177 'LOCALTIMESTAMP', 'LOCATION', 'LOCK', 'MAPPING', 'MATCH', 'MAXVALUE', 47 'BIT',
178 'MINUTE', 'MINVALUE', 'MODE', 'MONTH', 'MOVE', 'NAME', 'NAMES', 48 'BOOLEAN',
179 'NATIONAL', 'NATURAL', 'NCHAR', 'NEXT', 'NO', 'NONE', 'NOT', 'NOTHING', 49 'BOTH',
180 'NOTIFY', 'NOTNULL', 'NOWAIT', 'NULL', 'NULLIF', 'NULLS', 'NUMERIC', 50 'BY',
181 'OBJECT', 'OF', 'OFF', 'OFFSET', 'OIDS', 'ON', 'ONLY', 'OPERATOR', 51 'CACHE',
182 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OVER', 'OVERLAPS', 52 'CALLED',
183 'OVERLAY', 'OWNED', 'OWNER', 'PARSER', 'PARTIAL', 'PARTITION', 53 'CASCADE',
184 'PASSING', 'PASSWORD', 'PLACING', 'PLANS', 'POSITION', 'PRECEDING', 54 'CASCADED',
185 'PRECISION', 'PREPARE', 'PREPARED', 'PRESERVE', 'PRIMARY', 'PRIOR', 55 'CASE',
186 'PRIVILEGES', 'PROCEDURAL', 'PROCEDURE', 'QUOTE', 'RANGE', 'READ', 56 'CAST',
187 'REAL', 'REASSIGN', 'RECHECK', 'RECURSIVE', 'REF', 'REFERENCES', 57 'CATALOG',
188 'REINDEX', 'RELATIVE', 'RELEASE', 'RENAME', 'REPEATABLE', 'REPLACE', 58 'CHAIN',
189 'REPLICA', 'RESET', 'RESTART', 'RESTRICT', 'RETURNING', 'RETURNS', 59 'CHAR',
190 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROW', 'ROWS', 'RULE', 60 'CHARACTER',
191 'SAVEPOINT', 'SCHEMA', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 61 'CHARACTERISTICS',
192 'SELECT', 'SEQUENCE', 'SEQUENCES', 'SERIALIZABLE', 'SERVER', 'SESSION', 62 'CHECK',
193 'SESSION_USER', 'SET', 'SETOF', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 63 'CHECKPOINT',
194 'SMALLINT', 'SOME', 'STABLE', 'STANDALONE', 'START', 'STATEMENT', 64 'CLASS',
195 'STATISTICS', 'STDIN', 'STDOUT', 'STORAGE', 'STRICT', 'STRIP', 65 'CLOSE',
196 'SUBSTRING', 'SYMMETRIC', 'SYSID', 'SYSTEM', 'TABLE', 'TABLES', 66 'CLUSTER',
197 'TABLESPACE', 'TEMP', 'TEMPLATE', 'TEMPORARY', 'TEXT', 'THEN', 'TIME', 67 'COALESCE',
198 'TIMESTAMP', 'TO', 'TRAILING', 'TRANSACTION', 'TREAT', 'TRIGGER', 68 'COLLATE',
199 'TRIM', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNBOUNDED', 69 'COLLATION',
200 'UNCOMMITTED', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 70 'COLUMN',
201 'UNLOGGED', 'UNTIL', 'UPDATE', 'USER', 'USING', 'VACUUM', 'VALID', 71 'COMMENT',
202 'VALIDATE', 'VALIDATOR', 'VALUE', 'VALUES', 'VARCHAR', 'VARIADIC', 72 'COMMENTS',
203 'VARYING', 'VERBOSE', 'VERSION', 'VIEW', 'VOLATILE', 'WHEN', 'WHERE', 73 'COMMIT',
204 'WHITESPACE', 'WINDOW', 'WITH', 'WITHOUT', 'WORK', 'WRAPPER', 'WRITE', 74 'COMMITTED',
205 'XML', 'XMLATTRIBUTES', 'XMLCONCAT', 'XMLELEMENT', 'XMLEXISTS', 75 'CONCURRENTLY',
206 'XMLFOREST', 'XMLPARSE', 'XMLPI', 'XMLROOT', 'XMLSERIALIZE', 'YEAR', 76 'CONFIGURATION',
207 'YES', 'ZONE', 77 'CONNECTION',
208 ] 78 'CONSTRAINT',
209 79 'CONSTRAINTS',
210 DATATYPES = [ 80 'CONTENT',
211 'bigint', 'bigserial', 'bit', 'bit varying', 'bool', 'boolean', 'box', 81 'CONTINUE',
212 'bytea', 'char', 'character', 'character varying', 'cidr', 'circle', 82 'CONVERSION',
213 'date', 'decimal', 'double precision', 'float4', 'float8', 'inet', 83 'COPY',
214 'int', 'int2', 'int4', 'int8', 'integer', 'interval', 'json', 'line', 84 'COST',
215 'lseg', 'macaddr', 'money', 'numeric', 'path', 'point', 'polygon', 85 'CREATE',
216 'real', 'serial', 'serial2', 'serial4', 'serial8', 'smallint', 86 'CROSS',
217 'smallserial', 'text', 'time', 'timestamp', 'timestamptz', 'timetz', 87 'CSV',
218 'tsquery', 'tsvector', 'txid_snapshot', 'uuid', 'varbit', 'varchar', 88 'CURRENT',
219 'with time zone', 'without time zone', 'xml', 89 'CURRENT_CATALOG',
220 ] 90 'CURRENT_DATE',
221 91 'CURRENT_ROLE',
222 PSEUDO_TYPES = [ 92 'CURRENT_SCHEMA',
223 'any', 'anyelement', 'anyarray', 'anynonarray', 'anyenum', 'anyrange', 93 'CURRENT_TIME',
224 'cstring', 'internal', 'language_handler', 'fdw_handler', 'record', 94 'CURRENT_TIMESTAMP',
225 'trigger', 'void', 'opaque', 95 'CURRENT_USER',
226 ] 96 'CURSOR',
97 'CYCLE',
98 'DATA',
99 'DATABASE',
100 'DAY',
101 'DEALLOCATE',
102 'DEC',
103 'DECIMAL',
104 'DECLARE',
105 'DEFAULT',
106 'DEFAULTS',
107 'DEFERRABLE',
108 'DEFERRED',
109 'DEFINER',
110 'DELETE',
111 'DELIMITER',
112 'DELIMITERS',
113 'DESC',
114 'DICTIONARY',
115 'DISABLE',
116 'DISCARD',
117 'DISTINCT',
118 'DO',
119 'DOCUMENT',
120 'DOMAIN',
121 'DOUBLE',
122 'DROP',
123 'EACH',
124 'ELSE',
125 'ENABLE',
126 'ENCODING',
127 'ENCRYPTED',
128 'END',
129 'ENUM',
130 'ESCAPE',
131 'EVENT',
132 'EXCEPT',
133 'EXCLUDE',
134 'EXCLUDING',
135 'EXCLUSIVE',
136 'EXECUTE',
137 'EXISTS',
138 'EXPLAIN',
139 'EXTENSION',
140 'EXTERNAL',
141 'EXTRACT',
142 'FALSE',
143 'FAMILY',
144 'FETCH',
145 'FILTER',
146 'FIRST',
147 'FLOAT',
148 'FOLLOWING',
149 'FOR',
150 'FORCE',
151 'FOREIGN',
152 'FORWARD',
153 'FREEZE',
154 'FROM',
155 'FULL',
156 'FUNCTION',
157 'FUNCTIONS',
158 'GLOBAL',
159 'GRANT',
160 'GRANTED',
161 'GREATEST',
162 'GROUP',
163 'HANDLER',
164 'HAVING',
165 'HEADER',
166 'HOLD',
167 'HOUR',
168 'IDENTITY',
169 'IF',
170 'ILIKE',
171 'IMMEDIATE',
172 'IMMUTABLE',
173 'IMPLICIT',
174 'IN',
175 'INCLUDING',
176 'INCREMENT',
177 'INDEX',
178 'INDEXES',
179 'INHERIT',
180 'INHERITS',
181 'INITIALLY',
182 'INLINE',
183 'INNER',
184 'INOUT',
185 'INPUT',
186 'INSENSITIVE',
187 'INSERT',
188 'INSTEAD',
189 'INT',
190 'INTEGER',
191 'INTERSECT',
192 'INTERVAL',
193 'INTO',
194 'INVOKER',
195 'IS',
196 'ISNULL',
197 'ISOLATION',
198 'JOIN',
199 'KEY',
200 'LABEL',
201 'LANGUAGE',
202 'LARGE',
203 'LAST',
204 'LATERAL',
205 'LC_COLLATE',
206 'LC_CTYPE',
207 'LEADING',
208 'LEAKPROOF',
209 'LEAST',
210 'LEFT',
211 'LEVEL',
212 'LIKE',
213 'LIMIT',
214 'LISTEN',
215 'LOAD',
216 'LOCAL',
217 'LOCALTIME',
218 'LOCALTIMESTAMP',
219 'LOCATION',
220 'LOCK',
221 'MAPPING',
222 'MATCH',
223 'MATERIALIZED',
224 'MAXVALUE',
225 'MINUTE',
226 'MINVALUE',
227 'MODE',
228 'MONTH',
229 'MOVE',
230 'NAME',
231 'NAMES',
232 'NATIONAL',
233 'NATURAL',
234 'NCHAR',
235 'NEXT',
236 'NO',
237 'NONE',
238 'NOT',
239 'NOTHING',
240 'NOTIFY',
241 'NOTNULL',
242 'NOWAIT',
243 'NULL',
244 'NULLIF',
245 'NULLS',
246 'NUMERIC',
247 'OBJECT',
248 'OF',
249 'OFF',
250 'OFFSET',
251 'OIDS',
252 'ON',
253 'ONLY',
254 'OPERATOR',
255 'OPTION',
256 'OPTIONS',
257 'OR',
258 'ORDER',
259 'ORDINALITY',
260 'OUT',
261 'OUTER',
262 'OVER',
263 'OVERLAPS',
264 'OVERLAY',
265 'OWNED',
266 'OWNER',
267 'PARSER',
268 'PARTIAL',
269 'PARTITION',
270 'PASSING',
271 'PASSWORD',
272 'PLACING',
273 'PLANS',
274 'POLICY',
275 'POSITION',
276 'PRECEDING',
277 'PRECISION',
278 'PREPARE',
279 'PREPARED',
280 'PRESERVE',
281 'PRIMARY',
282 'PRIOR',
283 'PRIVILEGES',
284 'PROCEDURAL',
285 'PROCEDURE',
286 'PROGRAM',
287 'QUOTE',
288 'RANGE',
289 'READ',
290 'REAL',
291 'REASSIGN',
292 'RECHECK',
293 'RECURSIVE',
294 'REF',
295 'REFERENCES',
296 'REFRESH',
297 'REINDEX',
298 'RELATIVE',
299 'RELEASE',
300 'RENAME',
301 'REPEATABLE',
302 'REPLACE',
303 'REPLICA',
304 'RESET',
305 'RESTART',
306 'RESTRICT',
307 'RETURNING',
308 'RETURNS',
309 'REVOKE',
310 'RIGHT',
311 'ROLE',
312 'ROLLBACK',
313 'ROW',
314 'ROWS',
315 'RULE',
316 'SAVEPOINT',
317 'SCHEMA',
318 'SCROLL',
319 'SEARCH',
320 'SECOND',
321 'SECURITY',
322 'SELECT',
323 'SEQUENCE',
324 'SEQUENCES',
325 'SERIALIZABLE',
326 'SERVER',
327 'SESSION',
328 'SESSION_USER',
329 'SET',
330 'SETOF',
331 'SHARE',
332 'SHOW',
333 'SIMILAR',
334 'SIMPLE',
335 'SMALLINT',
336 'SNAPSHOT',
337 'SOME',
338 'STABLE',
339 'STANDALONE',
340 'START',
341 'STATEMENT',
342 'STATISTICS',
343 'STDIN',
344 'STDOUT',
345 'STORAGE',
346 'STRICT',
347 'STRIP',
348 'SUBSTRING',
349 'SYMMETRIC',
350 'SYSID',
351 'SYSTEM',
352 'TABLE',
353 'TABLES',
354 'TABLESPACE',
355 'TEMP',
356 'TEMPLATE',
357 'TEMPORARY',
358 'TEXT',
359 'THEN',
360 'TIME',
361 'TIMESTAMP',
362 'TO',
363 'TRAILING',
364 'TRANSACTION',
365 'TREAT',
366 'TRIGGER',
367 'TRIM',
368 'TRUE',
369 'TRUNCATE',
370 'TRUSTED',
371 'TYPE',
372 'TYPES',
373 'UNBOUNDED',
374 'UNCOMMITTED',
375 'UNENCRYPTED',
376 'UNION',
377 'UNIQUE',
378 'UNKNOWN',
379 'UNLISTEN',
380 'UNLOGGED',
381 'UNTIL',
382 'UPDATE',
383 'USER',
384 'USING',
385 'VACUUM',
386 'VALID',
387 'VALIDATE',
388 'VALIDATOR',
389 'VALUE',
390 'VALUES',
391 'VARCHAR',
392 'VARIADIC',
393 'VARYING',
394 'VERBOSE',
395 'VERSION',
396 'VIEW',
397 'VIEWS',
398 'VOLATILE',
399 'WHEN',
400 'WHERE',
401 'WHITESPACE',
402 'WINDOW',
403 'WITH',
404 'WITHIN',
405 'WITHOUT',
406 'WORK',
407 'WRAPPER',
408 'WRITE',
409 'XML',
410 'XMLATTRIBUTES',
411 'XMLCONCAT',
412 'XMLELEMENT',
413 'XMLEXISTS',
414 'XMLFOREST',
415 'XMLPARSE',
416 'XMLPI',
417 'XMLROOT',
418 'XMLSERIALIZE',
419 'YEAR',
420 'YES',
421 'ZONE',
422 )
423
424 DATATYPES = (
425 'bigint',
426 'bigserial',
427 'bit',
428 'bit varying',
429 'bool',
430 'boolean',
431 'box',
432 'bytea',
433 'char',
434 'character',
435 'character varying',
436 'cidr',
437 'circle',
438 'date',
439 'decimal',
440 'double precision',
441 'float4',
442 'float8',
443 'inet',
444 'int',
445 'int2',
446 'int4',
447 'int8',
448 'integer',
449 'interval',
450 'json',
451 'jsonb',
452 'line',
453 'lseg',
454 'macaddr',
455 'money',
456 'numeric',
457 'path',
458 'pg_lsn',
459 'point',
460 'polygon',
461 'real',
462 'serial',
463 'serial2',
464 'serial4',
465 'serial8',
466 'smallint',
467 'smallserial',
468 'text',
469 'time',
470 'timestamp',
471 'timestamptz',
472 'timetz',
473 'tsquery',
474 'tsvector',
475 'txid_snapshot',
476 'uuid',
477 'varbit',
478 'varchar',
479 'with time zone',
480 'without time zone',
481 'xml',
482 )
483
484 PSEUDO_TYPES = (
485 'any',
486 'anyelement',
487 'anyarray',
488 'anynonarray',
489 'anyenum',
490 'anyrange',
491 'cstring',
492 'internal',
493 'language_handler',
494 'fdw_handler',
495 'record',
496 'trigger',
497 'void',
498 'opaque',
499 )
227 500
228 # Remove 'trigger' from types 501 # Remove 'trigger' from types
229 PSEUDO_TYPES = sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS))) 502 PSEUDO_TYPES = tuple(sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS))))
230 503
231 PLPGSQL_KEYWORDS = [ 504 PLPGSQL_KEYWORDS = (
232 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT', 505 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT',
233 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE', 506 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE',
234 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE', 507 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
235 ] 508 )
236 509
237 if __name__ == '__main__': 510
511 if __name__ == '__main__': # pragma: no cover
512 import re
513 try:
514 from urllib import urlopen
515 except ImportError:
516 from urllib.request import urlopen
517
518 from pygments.util import format_lines
519
520 # One man's constant is another man's variable.
521 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
522 KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml'
523 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
524
525 def update_myself():
526 data_file = list(urlopen(DATATYPES_URL))
527 datatypes = parse_datatypes(data_file)
528 pseudos = parse_pseudos(data_file)
529
530 keywords = parse_keywords(urlopen(KEYWORDS_URL))
531 update_consts(__file__, 'DATATYPES', datatypes)
532 update_consts(__file__, 'PSEUDO_TYPES', pseudos)
533 update_consts(__file__, 'KEYWORDS', keywords)
534
535 def parse_keywords(f):
536 kw = []
537 for m in re.finditer(
538 r'\s*<entry><token>([^<]+)</token></entry>\s*'
539 r'<entry>([^<]+)</entry>', f.read()):
540 kw.append(m.group(1))
541
542 if not kw:
543 raise ValueError('no keyword found')
544
545 kw.sort()
546 return kw
547
548 def parse_datatypes(f):
549 dt = set()
550 for line in f:
551 if '<sect1' in line:
552 break
553 if '<entry><type>' not in line:
554 continue
555
556 # Parse a string such as
557 # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
558 # into types "time" and "without time zone"
559
560 # remove all the tags
561 line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
562 line = re.sub("<[^>]+>", "", line)
563
564 # Drop the parts containing braces
565 for tmp in [t for tmp in line.split('[')
566 for t in tmp.split(']') if "(" not in t]:
567 for t in tmp.split(','):
568 t = t.strip()
569 if not t: continue
570 dt.add(" ".join(t.split()))
571
572 dt = list(dt)
573 dt.sort()
574 return dt
575
576 def parse_pseudos(f):
577 dt = []
578 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
579 re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>')
580 re_end = re.compile(r'\s*</table>')
581
582 f = iter(f)
583 for line in f:
584 if re_start.match(line) is not None:
585 break
586 else:
587 raise ValueError('pseudo datatypes table not found')
588
589 for line in f:
590 m = re_entry.match(line)
591 if m is not None:
592 dt.append(m.group(1))
593
594 if re_end.match(line) is not None:
595 break
596 else:
597 raise ValueError('end of pseudo datatypes table not found')
598
599 if not dt:
600 raise ValueError('pseudo datatypes not found')
601
602 return dt
603
604 def update_consts(filename, constname, content):
605 with open(filename) as f:
606 data = f.read()
607
608 # Line to start/end inserting
609 re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S)
610 m = re_match.search(data)
611 if not m:
612 raise ValueError('Could not find existing definition for %s' %
613 (constname,))
614
615 new_block = format_lines(constname, content)
616 data = data[:m.start()] + new_block + data[m.end():]
617
618 with open(filename, 'w') as f:
619 f.write(data)
620
238 update_myself() 621 update_myself()
239

eric ide

mercurial