|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.rdf |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for semantic web and RDF query languages and markup. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, bygroups, default |
|
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ |
|
16 Whitespace, Name, Literal, Comment, Text |
|
17 |
|
18 __all__ = ['SparqlLexer', 'TurtleLexer'] |
|
19 |
|
20 |
|
21 class SparqlLexer(RegexLexer): |
|
22 """ |
|
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. |
|
24 |
|
25 .. versionadded:: 2.0 |
|
26 """ |
|
27 name = 'SPARQL' |
|
28 aliases = ['sparql'] |
|
29 filenames = ['*.rq', '*.sparql'] |
|
30 mimetypes = ['application/sparql-query'] |
|
31 |
|
32 # character group definitions :: |
|
33 |
|
34 PN_CHARS_BASE_GRP = (u'a-zA-Z' |
|
35 u'\u00c0-\u00d6' |
|
36 u'\u00d8-\u00f6' |
|
37 u'\u00f8-\u02ff' |
|
38 u'\u0370-\u037d' |
|
39 u'\u037f-\u1fff' |
|
40 u'\u200c-\u200d' |
|
41 u'\u2070-\u218f' |
|
42 u'\u2c00-\u2fef' |
|
43 u'\u3001-\ud7ff' |
|
44 u'\uf900-\ufdcf' |
|
45 u'\ufdf0-\ufffd') |
|
46 |
|
47 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') |
|
48 |
|
49 PN_CHARS_GRP = (PN_CHARS_U_GRP + |
|
50 r'\-' + |
|
51 r'0-9' + |
|
52 u'\u00b7' + |
|
53 u'\u0300-\u036f' + |
|
54 u'\u203f-\u2040') |
|
55 |
|
56 HEX_GRP = '0-9A-Fa-f' |
|
57 |
|
58 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' |
|
59 |
|
60 # terminal productions :: |
|
61 |
|
62 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' |
|
63 |
|
64 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' |
|
65 |
|
66 PN_CHARS = '[' + PN_CHARS_GRP + ']' |
|
67 |
|
68 HEX = '[' + HEX_GRP + ']' |
|
69 |
|
70 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' |
|
71 |
|
72 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>' |
|
73 |
|
74 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ |
|
75 '.]*' + PN_CHARS + ')?' |
|
76 |
|
77 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' |
|
78 |
|
79 VARNAME = u'[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \ |
|
80 u'0-9\u00b7\u0300-\u036f\u203f-\u2040]*' |
|
81 |
|
82 PERCENT = '%' + HEX + HEX |
|
83 |
|
84 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
|
85 |
|
86 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
|
87 |
|
88 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + |
|
89 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + |
|
90 PN_CHARS_GRP + ':]|' + PLX + '))?') |
|
91 |
|
92 EXPONENT = r'[eE][+-]?\d+' |
|
93 |
|
94 # Lexer token definitions :: |
|
95 |
|
96 tokens = { |
|
97 'root': [ |
|
98 (r'\s+', Text), |
|
99 # keywords :: |
|
100 (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|' |
|
101 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|' |
|
102 r'offset|bindings|load|clear|drop|create|add|move|copy|' |
|
103 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|' |
|
104 r'using\s+named|using|graph|default|named|all|optional|service|' |
|
105 r'silent|bind|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword), |
|
106 (r'(a)\b', Keyword), |
|
107 # IRIs :: |
|
108 ('(' + IRIREF + ')', Name.Label), |
|
109 # blank nodes :: |
|
110 ('(' + BLANK_NODE_LABEL + ')', Name.Label), |
|
111 # # variables :: |
|
112 ('[?$]' + VARNAME, Name.Variable), |
|
113 # prefixed names :: |
|
114 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', |
|
115 bygroups(Name.Namespace, Punctuation, Name.Tag)), |
|
116 # function names :: |
|
117 (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' |
|
118 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' |
|
119 r'contains|strstarts|strends|strbefore|strafter|year|month|day|' |
|
120 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|' |
|
121 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' |
|
122 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|' |
|
123 r'count|sum|min|max|avg|sample|group_concat|separator)\b', |
|
124 Name.Function), |
|
125 # boolean literals :: |
|
126 (r'(true|false)', Keyword.Constant), |
|
127 # double literals :: |
|
128 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), |
|
129 # decimal literals :: |
|
130 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), |
|
131 # integer literals :: |
|
132 (r'[+\-]?\d+', Number.Integer), |
|
133 # operators :: |
|
134 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator), |
|
135 # punctuation characters :: |
|
136 (r'[(){}.;,:^\[\]]', Punctuation), |
|
137 # line comments :: |
|
138 (r'#[^\n]*', Comment), |
|
139 # strings :: |
|
140 (r'"""', String, 'triple-double-quoted-string'), |
|
141 (r'"', String, 'single-double-quoted-string'), |
|
142 (r"'''", String, 'triple-single-quoted-string'), |
|
143 (r"'", String, 'single-single-quoted-string'), |
|
144 ], |
|
145 'triple-double-quoted-string': [ |
|
146 (r'"""', String, 'end-of-string'), |
|
147 (r'[^\\]+', String), |
|
148 (r'\\', String, 'string-escape'), |
|
149 ], |
|
150 'single-double-quoted-string': [ |
|
151 (r'"', String, 'end-of-string'), |
|
152 (r'[^"\\\n]+', String), |
|
153 (r'\\', String, 'string-escape'), |
|
154 ], |
|
155 'triple-single-quoted-string': [ |
|
156 (r"'''", String, 'end-of-string'), |
|
157 (r'[^\\]+', String), |
|
158 (r'\\', String.Escape, 'string-escape'), |
|
159 ], |
|
160 'single-single-quoted-string': [ |
|
161 (r"'", String, 'end-of-string'), |
|
162 (r"[^'\\\n]+", String), |
|
163 (r'\\', String, 'string-escape'), |
|
164 ], |
|
165 'string-escape': [ |
|
166 (r'u' + HEX + '{4}', String.Escape, '#pop'), |
|
167 (r'U' + HEX + '{8}', String.Escape, '#pop'), |
|
168 (r'.', String.Escape, '#pop'), |
|
169 ], |
|
170 'end-of-string': [ |
|
171 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
|
172 bygroups(Operator, Name.Function), '#pop:2'), |
|
173 (r'\^\^', Operator, '#pop:2'), |
|
174 default('#pop:2'), |
|
175 ], |
|
176 } |
|
177 |
|
178 |
|
179 class TurtleLexer(RegexLexer): |
|
180 """ |
|
181 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. |
|
182 |
|
183 .. versionadded:: 2.1 |
|
184 """ |
|
185 name = 'Turtle' |
|
186 aliases = ['turtle'] |
|
187 filenames = ['*.ttl'] |
|
188 mimetypes = ['text/turtle', 'application/x-turtle'] |
|
189 |
|
190 flags = re.IGNORECASE |
|
191 |
|
192 patterns = { |
|
193 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range |
|
194 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' |
|
195 } |
|
196 |
|
197 # PNAME_NS PN_LOCAL (with simplified character range) |
|
198 patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns |
|
199 |
|
200 tokens = { |
|
201 'root': [ |
|
202 (r'\s+', Whitespace), |
|
203 |
|
204 # Base / prefix |
|
205 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
|
206 bygroups(Keyword, Whitespace, Name.Variable, Whitespace, |
|
207 Punctuation)), |
|
208 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
|
209 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, |
|
210 Name.Variable, Whitespace, Punctuation)), |
|
211 |
|
212 # The shorthand predicate 'a' |
|
213 (r'(?<=\s)a(?=\s)', Keyword.Type), |
|
214 |
|
215 # IRIREF |
|
216 (r'%(IRIREF)s' % patterns, Name.Variable), |
|
217 |
|
218 # PrefixedName |
|
219 (r'%(PrefixedName)s' % patterns, |
|
220 bygroups(Name.Namespace, Name.Tag)), |
|
221 |
|
222 # Comment |
|
223 (r'#[^\n]+', Comment), |
|
224 |
|
225 (r'\b(true|false)\b', Literal), |
|
226 (r'[+\-]?\d*\.\d+', Number.Float), |
|
227 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), |
|
228 (r'[+\-]?\d+', Number.Integer), |
|
229 (r'[\[\](){}.;,:^]', Punctuation), |
|
230 |
|
231 (r'"""', String, 'triple-double-quoted-string'), |
|
232 (r'"', String, 'single-double-quoted-string'), |
|
233 (r"'''", String, 'triple-single-quoted-string'), |
|
234 (r"'", String, 'single-single-quoted-string'), |
|
235 ], |
|
236 'triple-double-quoted-string': [ |
|
237 (r'"""', String, 'end-of-string'), |
|
238 (r'[^\\]+', String), |
|
239 (r'\\', String, 'string-escape'), |
|
240 ], |
|
241 'single-double-quoted-string': [ |
|
242 (r'"', String, 'end-of-string'), |
|
243 (r'[^"\\\n]+', String), |
|
244 (r'\\', String, 'string-escape'), |
|
245 ], |
|
246 'triple-single-quoted-string': [ |
|
247 (r"'''", String, 'end-of-string'), |
|
248 (r'[^\\]+', String), |
|
249 (r'\\', String, 'string-escape'), |
|
250 ], |
|
251 'single-single-quoted-string': [ |
|
252 (r"'", String, 'end-of-string'), |
|
253 (r"[^'\\\n]+", String), |
|
254 (r'\\', String, 'string-escape'), |
|
255 ], |
|
256 'string-escape': [ |
|
257 (r'.', String, '#pop'), |
|
258 ], |
|
259 'end-of-string': [ |
|
260 (r'(@)([a-z]+(:?-[a-z0-9]+)*)', |
|
261 bygroups(Operator, Generic.Emph), '#pop:2'), |
|
262 |
|
263 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), |
|
264 (r'(\^\^)%(PrefixedName)s' % patterns, |
|
265 bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'), |
|
266 |
|
267 default('#pop:2'), |
|
268 |
|
269 ], |
|
270 } |