3 pygments.lexers.rdf |
3 pygments.lexers.rdf |
4 ~~~~~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~~~~~ |
5 |
5 |
6 Lexers for semantic web and RDF query languages and markup. |
6 Lexers for semantic web and RDF query languages and markup. |
7 |
7 |
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
12 import re |
12 import re |
13 |
13 |
14 from pygments.lexer import RegexLexer, bygroups, default |
14 from pygments.lexer import RegexLexer, bygroups, default |
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ |
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ |
16 Whitespace, Name, Literal, Comment, Text |
16 Whitespace, Name, Literal, Comment, Text |
17 |
17 |
18 __all__ = ['SparqlLexer', 'TurtleLexer'] |
18 __all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer'] |
19 |
19 |
20 |
20 |
21 class SparqlLexer(RegexLexer): |
21 class SparqlLexer(RegexLexer): |
22 """ |
22 """ |
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. |
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. |
266 |
266 |
267 default('#pop:2'), |
267 default('#pop:2'), |
268 |
268 |
269 ], |
269 ], |
270 } |
270 } |
|
271 |
|
272 # Turtle and Tera Term macro files share the same file extension |
|
273 # but each has a recognizable and distinct syntax. |
|
274 def analyse_text(text): |
|
275 for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '): |
|
276 if re.search(r'^\s*%s' % t, text): |
|
277 return 0.80 |
|
278 |
|
279 |
|
280 class ShExCLexer(RegexLexer): |
|
281 """ |
|
282 Lexer for `ShExC <https://shex.io/shex-semantics/#shexc>`_ shape expressions language syntax. |
|
283 """ |
|
284 name = 'ShExC' |
|
285 aliases = ['shexc', 'shex'] |
|
286 filenames = ['*.shex'] |
|
287 mimetypes = ['text/shex'] |
|
288 |
|
289 # character group definitions :: |
|
290 |
|
291 PN_CHARS_BASE_GRP = (u'a-zA-Z' |
|
292 u'\u00c0-\u00d6' |
|
293 u'\u00d8-\u00f6' |
|
294 u'\u00f8-\u02ff' |
|
295 u'\u0370-\u037d' |
|
296 u'\u037f-\u1fff' |
|
297 u'\u200c-\u200d' |
|
298 u'\u2070-\u218f' |
|
299 u'\u2c00-\u2fef' |
|
300 u'\u3001-\ud7ff' |
|
301 u'\uf900-\ufdcf' |
|
302 u'\ufdf0-\ufffd') |
|
303 |
|
304 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') |
|
305 |
|
306 PN_CHARS_GRP = (PN_CHARS_U_GRP + |
|
307 r'\-' + |
|
308 r'0-9' + |
|
309 u'\u00b7' + |
|
310 u'\u0300-\u036f' + |
|
311 u'\u203f-\u2040') |
|
312 |
|
313 HEX_GRP = '0-9A-Fa-f' |
|
314 |
|
315 PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%" |
|
316 |
|
317 # terminal productions :: |
|
318 |
|
319 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' |
|
320 |
|
321 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' |
|
322 |
|
323 PN_CHARS = '[' + PN_CHARS_GRP + ']' |
|
324 |
|
325 HEX = '[' + HEX_GRP + ']' |
|
326 |
|
327 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' |
|
328 |
|
329 UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})' |
|
330 |
|
331 UCHAR = r'\\' + UCHAR_NO_BACKSLASH |
|
332 |
|
333 IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>' |
|
334 |
|
335 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ |
|
336 '.]*' + PN_CHARS + ')?' |
|
337 |
|
338 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' |
|
339 |
|
340 PERCENT = '%' + HEX + HEX |
|
341 |
|
342 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
|
343 |
|
344 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
|
345 |
|
346 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + |
|
347 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + |
|
348 PN_CHARS_GRP + ':]|' + PLX + '))?') |
|
349 |
|
350 EXPONENT = r'[eE][+-]?\d+' |
|
351 |
|
352 # Lexer token definitions :: |
|
353 |
|
354 tokens = { |
|
355 'root': [ |
|
356 (r'\s+', Text), |
|
357 # keywords :: |
|
358 (r'(?i)(base|prefix|start|external|' |
|
359 r'literal|iri|bnode|nonliteral|length|minlength|maxlength|' |
|
360 r'mininclusive|minexclusive|maxinclusive|maxexclusive|' |
|
361 r'totaldigits|fractiondigits|' |
|
362 r'closed|extra)\b', Keyword), |
|
363 (r'(a)\b', Keyword), |
|
364 # IRIs :: |
|
365 ('(' + IRIREF + ')', Name.Label), |
|
366 # blank nodes :: |
|
367 ('(' + BLANK_NODE_LABEL + ')', Name.Label), |
|
368 # prefixed names :: |
|
369 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?', |
|
370 bygroups(Name.Namespace, Punctuation, Name.Tag)), |
|
371 # boolean literals :: |
|
372 (r'(true|false)', Keyword.Constant), |
|
373 # double literals :: |
|
374 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), |
|
375 # decimal literals :: |
|
376 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), |
|
377 # integer literals :: |
|
378 (r'[+\-]?\d+', Number.Integer), |
|
379 # operators :: |
|
380 (r'[@|$&=*+?^\-~]', Operator), |
|
381 # operator keywords :: |
|
382 (r'(?i)(and|or|not)\b', Operator.Word), |
|
383 # punctuation characters :: |
|
384 (r'[(){}.;,:^\[\]]', Punctuation), |
|
385 # line comments :: |
|
386 (r'#[^\n]*', Comment), |
|
387 # strings :: |
|
388 (r'"""', String, 'triple-double-quoted-string'), |
|
389 (r'"', String, 'single-double-quoted-string'), |
|
390 (r"'''", String, 'triple-single-quoted-string'), |
|
391 (r"'", String, 'single-single-quoted-string'), |
|
392 ], |
|
393 'triple-double-quoted-string': [ |
|
394 (r'"""', String, 'end-of-string'), |
|
395 (r'[^\\]+', String), |
|
396 (r'\\', String, 'string-escape'), |
|
397 ], |
|
398 'single-double-quoted-string': [ |
|
399 (r'"', String, 'end-of-string'), |
|
400 (r'[^"\\\n]+', String), |
|
401 (r'\\', String, 'string-escape'), |
|
402 ], |
|
403 'triple-single-quoted-string': [ |
|
404 (r"'''", String, 'end-of-string'), |
|
405 (r'[^\\]+', String), |
|
406 (r'\\', String.Escape, 'string-escape'), |
|
407 ], |
|
408 'single-single-quoted-string': [ |
|
409 (r"'", String, 'end-of-string'), |
|
410 (r"[^'\\\n]+", String), |
|
411 (r'\\', String, 'string-escape'), |
|
412 ], |
|
413 'string-escape': [ |
|
414 (UCHAR_NO_BACKSLASH, String.Escape, '#pop'), |
|
415 (r'.', String.Escape, '#pop'), |
|
416 ], |
|
417 'end-of-string': [ |
|
418 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
|
419 bygroups(Operator, Name.Function), '#pop:2'), |
|
420 (r'\^\^', Operator, '#pop:2'), |
|
421 default('#pop:2'), |
|
422 ], |
|
423 } |