ThirdParty/Pygments/pygments/lexers/_postgres_builtins.py

changeset 1705
b0fbc9300f2b
child 2426
da76c71624de
equal deleted inserted replaced
1704:02ae6c55b35b 1705:b0fbc9300f2b
1 """
2 pygments.lexers._postgres_builtins
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Self-updating data files for PostgreSQL lexer.
6
7 :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9 """
10
11 import re
12 import urllib.request, urllib.parse, urllib.error
13
14 # One man's constant is another man's variable.
15 SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
16 KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml'
17 DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
18
19 def update_myself():
20 data_file = list(fetch(DATATYPES_URL))
21 datatypes = parse_datatypes(data_file)
22 pseudos = parse_pseudos(data_file)
23
24 keywords = parse_keywords(fetch(KEYWORDS_URL))
25 update_consts(__file__, 'DATATYPES', datatypes)
26 update_consts(__file__, 'PSEUDO_TYPES', pseudos)
27 update_consts(__file__, 'KEYWORDS', keywords)
28
29 def parse_keywords(f):
30 kw = []
31 for m in re.finditer(
32 r'\s*<entry><token>([^<]+)</token></entry>\s*'
33 r'<entry>([^<]+)</entry>', f.read()):
34 kw.append(m.group(1))
35
36 if not kw:
37 raise ValueError('no keyword found')
38
39 kw.sort()
40 return kw
41
42 def parse_datatypes(f):
43 dt = set()
44 re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>')
45 for line in f:
46 if '<sect1' in line:
47 break
48 if '<entry><type>' not in line:
49 continue
50
51 # Parse a string such as
52 # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
53 # into types "time" and "without time zone"
54
55 # remove all the tags
56 line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
57 line = re.sub("<[^>]+>", "", line)
58
59 # Drop the parts containing braces
60 for tmp in [ t for tmp in line.split('[') for t in tmp.split(']') if "(" not in t ]:
61 for t in tmp.split(','):
62 t = t.strip()
63 if not t: continue
64 dt.add(" ".join(t.split()))
65
66 dt = list(dt)
67 dt.sort()
68 return dt
69
70 def parse_pseudos(f):
71 dt = []
72 re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
73 re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>')
74 re_end = re.compile(r'\s*</table>')
75
76 f = iter(f)
77 for line in f:
78 if re_start.match(line) is not None:
79 break
80 else:
81 raise ValueError('pseudo datatypes table not found')
82
83 for line in f:
84 m = re_entry.match(line)
85 if m is not None:
86 dt.append(m.group(1))
87
88 if re_end.match(line) is not None:
89 break
90 else:
91 raise ValueError('end of pseudo datatypes table not found')
92
93 if not dt:
94 raise ValueError('pseudo datatypes not found')
95
96 return dt
97
98 def fetch(url):
99 return urllib.request.urlopen(url)
100
101 def update_consts(filename, constname, content):
102 f = open(filename)
103 lines = f.readlines()
104 f.close()
105
106 # Line to start/end inserting
107 re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname)
108 re_end = re.compile(r'^\s*\]\s*$')
109 start = [ n for n, l in enumerate(lines) if re_start.match(l) ]
110 if not start:
111 raise ValueError("couldn't find line containing '%s = ['" % constname)
112 if len(start) > 1:
113 raise ValueError("too many lines containing '%s = ['" % constname)
114 start = start[0] + 1
115
116 end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ]
117 if not end:
118 raise ValueError("couldn't find line containing ']' after %s " % constname)
119 end = end[0]
120
121 # Pack the new content in lines not too long
122 content = [repr(item) for item in content ]
123 new_lines = [[]]
124 for item in content:
125 if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75:
126 new_lines.append([])
127 new_lines[-1].append(item)
128
129 lines[start:end] = [ " %s,\n" % ", ".join(items) for items in new_lines ]
130
131 f = open(filename, 'w')
132 f.write(''.join(lines))
133 f.close()
134
135
136 # Autogenerated: please edit them if you like wasting your time.
137
138 KEYWORDS = [
139 'ABORT', 'ABSOLUTE', 'ACCESS', 'ACTION', 'ADD', 'ADMIN', 'AFTER',
140 'AGGREGATE', 'ALL', 'ALSO', 'ALTER', 'ALWAYS', 'ANALYSE', 'ANALYZE',
141 'AND', 'ANY', 'ARRAY', 'AS', 'ASC', 'ASSERTION', 'ASSIGNMENT',
142 'ASYMMETRIC', 'AT', 'ATTRIBUTE', 'AUTHORIZATION', 'BACKWARD', 'BEFORE',
143 'BEGIN', 'BETWEEN', 'BIGINT', 'BINARY', 'BIT', 'BOOLEAN', 'BOTH', 'BY',
144 'CACHE', 'CALLED', 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG',
145 'CHAIN', 'CHAR', 'CHARACTER', 'CHARACTERISTICS', 'CHECK', 'CHECKPOINT',
146 'CLASS', 'CLOSE', 'CLUSTER', 'COALESCE', 'COLLATE', 'COLLATION',
147 'COLUMN', 'COMMENT', 'COMMENTS', 'COMMIT', 'COMMITTED', 'CONCURRENTLY',
148 'CONFIGURATION', 'CONNECTION', 'CONSTRAINT', 'CONSTRAINTS', 'CONTENT',
149 'CONTINUE', 'CONVERSION', 'COPY', 'COST', 'CREATE', 'CROSS', 'CSV',
150 'CURRENT', 'CURRENT_CATALOG', 'CURRENT_DATE', 'CURRENT_ROLE',
151 'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
152 'CURSOR', 'CYCLE', 'DATA', 'DATABASE', 'DAY', 'DEALLOCATE', 'DEC',
153 'DECIMAL', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED',
154 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DESC', 'DICTIONARY',
155 'DISABLE', 'DISCARD', 'DISTINCT', 'DO', 'DOCUMENT', 'DOMAIN', 'DOUBLE',
156 'DROP', 'EACH', 'ELSE', 'ENABLE', 'ENCODING', 'ENCRYPTED', 'END',
157 'ENUM', 'ESCAPE', 'EXCEPT', 'EXCLUDE', 'EXCLUDING', 'EXCLUSIVE',
158 'EXECUTE', 'EXISTS', 'EXPLAIN', 'EXTENSION', 'EXTERNAL', 'EXTRACT',
159 'FALSE', 'FAMILY', 'FETCH', 'FIRST', 'FLOAT', 'FOLLOWING', 'FOR',
160 'FORCE', 'FOREIGN', 'FORWARD', 'FREEZE', 'FROM', 'FULL', 'FUNCTION',
161 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GRANTED', 'GREATEST', 'GROUP',
162 'HANDLER', 'HAVING', 'HEADER', 'HOLD', 'HOUR', 'IDENTITY', 'IF',
163 'ILIKE', 'IMMEDIATE', 'IMMUTABLE', 'IMPLICIT', 'IN', 'INCLUDING',
164 'INCREMENT', 'INDEX', 'INDEXES', 'INHERIT', 'INHERITS', 'INITIALLY',
165 'INLINE', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT', 'INSTEAD',
166 'INT', 'INTEGER', 'INTERSECT', 'INTERVAL', 'INTO', 'INVOKER', 'IS',
167 'ISNULL', 'ISOLATION', 'JOIN', 'KEY', 'LABEL', 'LANGUAGE', 'LARGE',
168 'LAST', 'LC_COLLATE', 'LC_CTYPE', 'LEADING', 'LEAST', 'LEFT', 'LEVEL',
169 'LIKE', 'LIMIT', 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME',
170 'LOCALTIMESTAMP', 'LOCATION', 'LOCK', 'MAPPING', 'MATCH', 'MAXVALUE',
171 'MINUTE', 'MINVALUE', 'MODE', 'MONTH', 'MOVE', 'NAME', 'NAMES',
172 'NATIONAL', 'NATURAL', 'NCHAR', 'NEXT', 'NO', 'NONE', 'NOT', 'NOTHING',
173 'NOTIFY', 'NOTNULL', 'NOWAIT', 'NULL', 'NULLIF', 'NULLS', 'NUMERIC',
174 'OBJECT', 'OF', 'OFF', 'OFFSET', 'OIDS', 'ON', 'ONLY', 'OPERATOR',
175 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OVER', 'OVERLAPS',
176 'OVERLAY', 'OWNED', 'OWNER', 'PARSER', 'PARTIAL', 'PARTITION',
177 'PASSING', 'PASSWORD', 'PLACING', 'PLANS', 'POSITION', 'PRECEDING',
178 'PRECISION', 'PREPARE', 'PREPARED', 'PRESERVE', 'PRIMARY', 'PRIOR',
179 'PRIVILEGES', 'PROCEDURAL', 'PROCEDURE', 'QUOTE', 'RANGE', 'READ',
180 'REAL', 'REASSIGN', 'RECHECK', 'RECURSIVE', 'REF', 'REFERENCES',
181 'REINDEX', 'RELATIVE', 'RELEASE', 'RENAME', 'REPEATABLE', 'REPLACE',
182 'REPLICA', 'RESET', 'RESTART', 'RESTRICT', 'RETURNING', 'RETURNS',
183 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROW', 'ROWS', 'RULE',
184 'SAVEPOINT', 'SCHEMA', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY',
185 'SELECT', 'SEQUENCE', 'SEQUENCES', 'SERIALIZABLE', 'SERVER', 'SESSION',
186 'SESSION_USER', 'SET', 'SETOF', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE',
187 'SMALLINT', 'SOME', 'STABLE', 'STANDALONE', 'START', 'STATEMENT',
188 'STATISTICS', 'STDIN', 'STDOUT', 'STORAGE', 'STRICT', 'STRIP',
189 'SUBSTRING', 'SYMMETRIC', 'SYSID', 'SYSTEM', 'TABLE', 'TABLES',
190 'TABLESPACE', 'TEMP', 'TEMPLATE', 'TEMPORARY', 'TEXT', 'THEN', 'TIME',
191 'TIMESTAMP', 'TO', 'TRAILING', 'TRANSACTION', 'TREAT', 'TRIGGER',
192 'TRIM', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNBOUNDED',
193 'UNCOMMITTED', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN',
194 'UNLOGGED', 'UNTIL', 'UPDATE', 'USER', 'USING', 'VACUUM', 'VALID',
195 'VALIDATE', 'VALIDATOR', 'VALUE', 'VALUES', 'VARCHAR', 'VARIADIC',
196 'VARYING', 'VERBOSE', 'VERSION', 'VIEW', 'VOLATILE', 'WHEN', 'WHERE',
197 'WHITESPACE', 'WINDOW', 'WITH', 'WITHOUT', 'WORK', 'WRAPPER', 'WRITE',
198 'XML', 'XMLATTRIBUTES', 'XMLCONCAT', 'XMLELEMENT', 'XMLEXISTS',
199 'XMLFOREST', 'XMLPARSE', 'XMLPI', 'XMLROOT', 'XMLSERIALIZE', 'YEAR',
200 'YES', 'ZONE',
201 ]
202
203 DATATYPES = [
204 'bigint', 'bigserial', 'bit', 'bit varying', 'bool', 'boolean', 'box',
205 'bytea', 'char', 'character', 'character varying', 'cidr', 'circle',
206 'date', 'decimal', 'double precision', 'float4', 'float8', 'inet',
207 'int', 'int2', 'int4', 'int8', 'integer', 'interval', 'line', 'lseg',
208 'macaddr', 'money', 'numeric', 'path', 'point', 'polygon', 'real',
209 'serial', 'serial4', 'serial8', 'smallint', 'text', 'time', 'timestamp',
210 'timestamptz', 'timetz', 'tsquery', 'tsvector', 'txid_snapshot', 'uuid',
211 'varbit', 'varchar', 'with time zone', 'without time zone', 'xml',
212 ]
213
214 PSEUDO_TYPES = [
215 'any', 'anyarray', 'anyelement', 'anyenum', 'anynonarray', 'cstring',
216 'internal', 'language_handler', 'fdw_handler', 'record', 'trigger',
217 'void', 'opaque',
218 ]
219
220 # Remove 'trigger' from types
221 PSEUDO_TYPES = sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS)))
222
223 PLPGSQL_KEYWORDS = [
224 'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT',
225 'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE',
226 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
227 ]
228
229 if __name__ == '__main__':
230 update_myself()
231

eric ide

mercurial