|
1 """ |
|
2 pygments.lexers._postgres_builtins |
|
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
4 |
|
5 Self-updating data files for PostgreSQL lexer. |
|
6 |
|
7 :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. |
|
8 :license: BSD, see LICENSE for details. |
|
9 """ |
|
10 |
|
11 import re |
|
12 import urllib.request, urllib.parse, urllib.error |
|
13 |
|
14 # One man's constant is another man's variable. |
|
15 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' |
|
16 KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' |
|
17 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' |
|
18 |
|
19 def update_myself(): |
|
20 data_file = list(fetch(DATATYPES_URL)) |
|
21 datatypes = parse_datatypes(data_file) |
|
22 pseudos = parse_pseudos(data_file) |
|
23 |
|
24 keywords = parse_keywords(fetch(KEYWORDS_URL)) |
|
25 update_consts(__file__, 'DATATYPES', datatypes) |
|
26 update_consts(__file__, 'PSEUDO_TYPES', pseudos) |
|
27 update_consts(__file__, 'KEYWORDS', keywords) |
|
28 |
|
29 def parse_keywords(f): |
|
30 kw = [] |
|
31 for m in re.finditer( |
|
32 r'\s*<entry><token>([^<]+)</token></entry>\s*' |
|
33 r'<entry>([^<]+)</entry>', f.read()): |
|
34 kw.append(m.group(1)) |
|
35 |
|
36 if not kw: |
|
37 raise ValueError('no keyword found') |
|
38 |
|
39 kw.sort() |
|
40 return kw |
|
41 |
|
42 def parse_datatypes(f): |
|
43 dt = set() |
|
44 re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>') |
|
45 for line in f: |
|
46 if '<sect1' in line: |
|
47 break |
|
48 if '<entry><type>' not in line: |
|
49 continue |
|
50 |
|
51 # Parse a string such as |
|
52 # time [ (<replaceable>p</replaceable>) ] [ without time zone ] |
|
53 # into types "time" and "without time zone" |
|
54 |
|
55 # remove all the tags |
|
56 line = re.sub("<replaceable>[^<]+</replaceable>", "", line) |
|
57 line = re.sub("<[^>]+>", "", line) |
|
58 |
|
59 # Drop the parts containing braces |
|
60 for tmp in [ t for tmp in line.split('[') for t in tmp.split(']') if "(" not in t ]: |
|
61 for t in tmp.split(','): |
|
62 t = t.strip() |
|
63 if not t: continue |
|
64 dt.add(" ".join(t.split())) |
|
65 |
|
66 dt = list(dt) |
|
67 dt.sort() |
|
68 return dt |
|
69 |
|
70 def parse_pseudos(f): |
|
71 dt = [] |
|
72 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">') |
|
73 re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>') |
|
74 re_end = re.compile(r'\s*</table>') |
|
75 |
|
76 f = iter(f) |
|
77 for line in f: |
|
78 if re_start.match(line) is not None: |
|
79 break |
|
80 else: |
|
81 raise ValueError('pseudo datatypes table not found') |
|
82 |
|
83 for line in f: |
|
84 m = re_entry.match(line) |
|
85 if m is not None: |
|
86 dt.append(m.group(1)) |
|
87 |
|
88 if re_end.match(line) is not None: |
|
89 break |
|
90 else: |
|
91 raise ValueError('end of pseudo datatypes table not found') |
|
92 |
|
93 if not dt: |
|
94 raise ValueError('pseudo datatypes not found') |
|
95 |
|
96 return dt |
|
97 |
|
98 def fetch(url): |
|
99 return urllib.request.urlopen(url) |
|
100 |
|
101 def update_consts(filename, constname, content): |
|
102 f = open(filename) |
|
103 lines = f.readlines() |
|
104 f.close() |
|
105 |
|
106 # Line to start/end inserting |
|
107 re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname) |
|
108 re_end = re.compile(r'^\s*\]\s*$') |
|
109 start = [ n for n, l in enumerate(lines) if re_start.match(l) ] |
|
110 if not start: |
|
111 raise ValueError("couldn't find line containing '%s = ['" % constname) |
|
112 if len(start) > 1: |
|
113 raise ValueError("too many lines containing '%s = ['" % constname) |
|
114 start = start[0] + 1 |
|
115 |
|
116 end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ] |
|
117 if not end: |
|
118 raise ValueError("couldn't find line containing ']' after %s " % constname) |
|
119 end = end[0] |
|
120 |
|
121 # Pack the new content in lines not too long |
|
122 content = [repr(item) for item in content ] |
|
123 new_lines = [[]] |
|
124 for item in content: |
|
125 if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75: |
|
126 new_lines.append([]) |
|
127 new_lines[-1].append(item) |
|
128 |
|
129 lines[start:end] = [ " %s,\n" % ", ".join(items) for items in new_lines ] |
|
130 |
|
131 f = open(filename, 'w') |
|
132 f.write(''.join(lines)) |
|
133 f.close() |
|
134 |
|
135 |
|
136 # Autogenerated: please edit them if you like wasting your time. |
|
137 |
|
138 KEYWORDS = [ |
|
139 'ABORT', 'ABSOLUTE', 'ACCESS', 'ACTION', 'ADD', 'ADMIN', 'AFTER', |
|
140 'AGGREGATE', 'ALL', 'ALSO', 'ALTER', 'ALWAYS', 'ANALYSE', 'ANALYZE', |
|
141 'AND', 'ANY', 'ARRAY', 'AS', 'ASC', 'ASSERTION', 'ASSIGNMENT', |
|
142 'ASYMMETRIC', 'AT', 'ATTRIBUTE', 'AUTHORIZATION', 'BACKWARD', 'BEFORE', |
|
143 'BEGIN', 'BETWEEN', 'BIGINT', 'BINARY', 'BIT', 'BOOLEAN', 'BOTH', 'BY', |
|
144 'CACHE', 'CALLED', 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG', |
|
145 'CHAIN', 'CHAR', 'CHARACTER', 'CHARACTERISTICS', 'CHECK', 'CHECKPOINT', |
|
146 'CLASS', 'CLOSE', 'CLUSTER', 'COALESCE', 'COLLATE', 'COLLATION', |
|
147 'COLUMN', 'COMMENT', 'COMMENTS', 'COMMIT', 'COMMITTED', 'CONCURRENTLY', |
|
148 'CONFIGURATION', 'CONNECTION', 'CONSTRAINT', 'CONSTRAINTS', 'CONTENT', |
|
149 'CONTINUE', 'CONVERSION', 'COPY', 'COST', 'CREATE', 'CROSS', 'CSV', |
|
150 'CURRENT', 'CURRENT_CATALOG', 'CURRENT_DATE', 'CURRENT_ROLE', |
|
151 'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', |
|
152 'CURSOR', 'CYCLE', 'DATA', 'DATABASE', 'DAY', 'DEALLOCATE', 'DEC', |
|
153 'DECIMAL', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED', |
|
154 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DESC', 'DICTIONARY', |
|
155 'DISABLE', 'DISCARD', 'DISTINCT', 'DO', 'DOCUMENT', 'DOMAIN', 'DOUBLE', |
|
156 'DROP', 'EACH', 'ELSE', 'ENABLE', 'ENCODING', 'ENCRYPTED', 'END', |
|
157 'ENUM', 'ESCAPE', 'EXCEPT', 'EXCLUDE', 'EXCLUDING', 'EXCLUSIVE', |
|
158 'EXECUTE', 'EXISTS', 'EXPLAIN', 'EXTENSION', 'EXTERNAL', 'EXTRACT', |
|
159 'FALSE', 'FAMILY', 'FETCH', 'FIRST', 'FLOAT', 'FOLLOWING', 'FOR', |
|
160 'FORCE', 'FOREIGN', 'FORWARD', 'FREEZE', 'FROM', 'FULL', 'FUNCTION', |
|
161 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GRANTED', 'GREATEST', 'GROUP', |
|
162 'HANDLER', 'HAVING', 'HEADER', 'HOLD', 'HOUR', 'IDENTITY', 'IF', |
|
163 'ILIKE', 'IMMEDIATE', 'IMMUTABLE', 'IMPLICIT', 'IN', 'INCLUDING', |
|
164 'INCREMENT', 'INDEX', 'INDEXES', 'INHERIT', 'INHERITS', 'INITIALLY', |
|
165 'INLINE', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT', 'INSTEAD', |
|
166 'INT', 'INTEGER', 'INTERSECT', 'INTERVAL', 'INTO', 'INVOKER', 'IS', |
|
167 'ISNULL', 'ISOLATION', 'JOIN', 'KEY', 'LABEL', 'LANGUAGE', 'LARGE', |
|
168 'LAST', 'LC_COLLATE', 'LC_CTYPE', 'LEADING', 'LEAST', 'LEFT', 'LEVEL', |
|
169 'LIKE', 'LIMIT', 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', |
|
170 'LOCALTIMESTAMP', 'LOCATION', 'LOCK', 'MAPPING', 'MATCH', 'MAXVALUE', |
|
171 'MINUTE', 'MINVALUE', 'MODE', 'MONTH', 'MOVE', 'NAME', 'NAMES', |
|
172 'NATIONAL', 'NATURAL', 'NCHAR', 'NEXT', 'NO', 'NONE', 'NOT', 'NOTHING', |
|
173 'NOTIFY', 'NOTNULL', 'NOWAIT', 'NULL', 'NULLIF', 'NULLS', 'NUMERIC', |
|
174 'OBJECT', 'OF', 'OFF', 'OFFSET', 'OIDS', 'ON', 'ONLY', 'OPERATOR', |
|
175 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OVER', 'OVERLAPS', |
|
176 'OVERLAY', 'OWNED', 'OWNER', 'PARSER', 'PARTIAL', 'PARTITION', |
|
177 'PASSING', 'PASSWORD', 'PLACING', 'PLANS', 'POSITION', 'PRECEDING', |
|
178 'PRECISION', 'PREPARE', 'PREPARED', 'PRESERVE', 'PRIMARY', 'PRIOR', |
|
179 'PRIVILEGES', 'PROCEDURAL', 'PROCEDURE', 'QUOTE', 'RANGE', 'READ', |
|
180 'REAL', 'REASSIGN', 'RECHECK', 'RECURSIVE', 'REF', 'REFERENCES', |
|
181 'REINDEX', 'RELATIVE', 'RELEASE', 'RENAME', 'REPEATABLE', 'REPLACE', |
|
182 'REPLICA', 'RESET', 'RESTART', 'RESTRICT', 'RETURNING', 'RETURNS', |
|
183 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROW', 'ROWS', 'RULE', |
|
184 'SAVEPOINT', 'SCHEMA', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', |
|
185 'SELECT', 'SEQUENCE', 'SEQUENCES', 'SERIALIZABLE', 'SERVER', 'SESSION', |
|
186 'SESSION_USER', 'SET', 'SETOF', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', |
|
187 'SMALLINT', 'SOME', 'STABLE', 'STANDALONE', 'START', 'STATEMENT', |
|
188 'STATISTICS', 'STDIN', 'STDOUT', 'STORAGE', 'STRICT', 'STRIP', |
|
189 'SUBSTRING', 'SYMMETRIC', 'SYSID', 'SYSTEM', 'TABLE', 'TABLES', |
|
190 'TABLESPACE', 'TEMP', 'TEMPLATE', 'TEMPORARY', 'TEXT', 'THEN', 'TIME', |
|
191 'TIMESTAMP', 'TO', 'TRAILING', 'TRANSACTION', 'TREAT', 'TRIGGER', |
|
192 'TRIM', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNBOUNDED', |
|
193 'UNCOMMITTED', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', |
|
194 'UNLOGGED', 'UNTIL', 'UPDATE', 'USER', 'USING', 'VACUUM', 'VALID', |
|
195 'VALIDATE', 'VALIDATOR', 'VALUE', 'VALUES', 'VARCHAR', 'VARIADIC', |
|
196 'VARYING', 'VERBOSE', 'VERSION', 'VIEW', 'VOLATILE', 'WHEN', 'WHERE', |
|
197 'WHITESPACE', 'WINDOW', 'WITH', 'WITHOUT', 'WORK', 'WRAPPER', 'WRITE', |
|
198 'XML', 'XMLATTRIBUTES', 'XMLCONCAT', 'XMLELEMENT', 'XMLEXISTS', |
|
199 'XMLFOREST', 'XMLPARSE', 'XMLPI', 'XMLROOT', 'XMLSERIALIZE', 'YEAR', |
|
200 'YES', 'ZONE', |
|
201 ] |
|
202 |
|
203 DATATYPES = [ |
|
204 'bigint', 'bigserial', 'bit', 'bit varying', 'bool', 'boolean', 'box', |
|
205 'bytea', 'char', 'character', 'character varying', 'cidr', 'circle', |
|
206 'date', 'decimal', 'double precision', 'float4', 'float8', 'inet', |
|
207 'int', 'int2', 'int4', 'int8', 'integer', 'interval', 'line', 'lseg', |
|
208 'macaddr', 'money', 'numeric', 'path', 'point', 'polygon', 'real', |
|
209 'serial', 'serial4', 'serial8', 'smallint', 'text', 'time', 'timestamp', |
|
210 'timestamptz', 'timetz', 'tsquery', 'tsvector', 'txid_snapshot', 'uuid', |
|
211 'varbit', 'varchar', 'with time zone', 'without time zone', 'xml', |
|
212 ] |
|
213 |
|
214 PSEUDO_TYPES = [ |
|
215 'any', 'anyarray', 'anyelement', 'anyenum', 'anynonarray', 'cstring', |
|
216 'internal', 'language_handler', 'fdw_handler', 'record', 'trigger', |
|
217 'void', 'opaque', |
|
218 ] |
|
219 |
|
220 # Remove 'trigger' from types |
|
221 PSEUDO_TYPES = sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS))) |
|
222 |
|
223 PLPGSQL_KEYWORDS = [ |
|
224 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT', |
|
225 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE', |
|
226 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE', |
|
227 ] |
|
228 |
|
229 if __name__ == '__main__': |
|
230 update_myself() |
|
231 |