ThirdParty/Pygments/pygments/lexers/rdf.py

changeset 4697
c2e9bf425554
parent 4172
4f20dba37ab6
child 5072
aab59042fefb
equal deleted inserted replaced
4696:bf4d19a7cade 4697:c2e9bf425554
3 pygments.lexers.rdf 3 pygments.lexers.rdf
4 ~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for semantic web and RDF query languages and markup. 6 Lexers for semantic web and RDF query languages and markup.
7 7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 13
14 from pygments.lexer import RegexLexer, bygroups, default 14 from pygments.lexer import RegexLexer, bygroups, default
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, \ 15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \
16 Whitespace, Name, Literal, Comment, Text 16 Whitespace, Name, Literal, Comment, Text
17 17
18 __all__ = ['SparqlLexer'] 18 __all__ = ['SparqlLexer', 'TurtleLexer']
19 19
20 20
21 class SparqlLexer(RegexLexer): 21 class SparqlLexer(RegexLexer):
22 """ 22 """
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. 23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language.
27 name = 'SPARQL' 27 name = 'SPARQL'
28 aliases = ['sparql'] 28 aliases = ['sparql']
29 filenames = ['*.rq', '*.sparql'] 29 filenames = ['*.rq', '*.sparql']
30 mimetypes = ['application/sparql-query'] 30 mimetypes = ['application/sparql-query']
31 31
32 flags = re.IGNORECASE 32 # terminal productions ::
33
34 PN_CHARS_BASE = (u'(?:[a-zA-Z'
35 u'\u00c0-\u00d6'
36 u'\u00d8-\u00f6'
37 u'\u00f8-\u02ff'
38 u'\u0370-\u037d'
39 u'\u037f-\u1fff'
40 u'\u200c-\u200d'
41 u'\u2070-\u218f'
42 u'\u2c00-\u2fef'
43 u'\u3001-\ud7ff'
44 u'\uf900-\ufdcf'
45 u'\ufdf0-\ufffd]|'
46 u'[^\u0000-\uffff]|'
47 u'[\ud800-\udbff][\udc00-\udfff])')
48
49 PN_CHARS_U = '(?:' + PN_CHARS_BASE + '|_)'
50
51 PN_CHARS = ('(?:' + PN_CHARS_U + r'|[\-0-9' +
52 u'\u00b7' +
53 u'\u0300-\u036f' +
54 u'\u203f-\u2040])')
55
56 HEX = '[0-9A-Fa-f]'
57
58 PN_LOCAL_ESC_CHARS = r'[ _~.\-!$&""()*+,;=/?#@%]'
59
60 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
61
62 BLANK_NODE_LABEL = '_:(?:' + PN_CHARS_U + '|[0-9])(?:(?:' + PN_CHARS + '|\.)*' + \
63 PN_CHARS + ')?'
64
65 PN_PREFIX = PN_CHARS_BASE + '(?:(?:' + PN_CHARS + '|\.)*' + PN_CHARS + ')?'
66
67 VARNAME = '(?:' + PN_CHARS_U + '|[0-9])(?:' + PN_CHARS_U + \
68 u'|[0-9\u00b7\u0300-\u036f\u203f-\u2040])*'
69
70 PERCENT = '%' + HEX + HEX
71
72 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
73
74 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
75
76 PN_LOCAL = ('(?:(?:' + PN_CHARS_U + '|[:0-9])|' + PLX + ')' +
77 '(?:(?:(?:' + PN_CHARS + '|[.:])|' + PLX + ')*(?:(?:' +
78 PN_CHARS + '|:)|' + PLX + '))?')
79
80 EXPONENT = r'[eE][+-]?\d+'
81
82 # Lexer token definitions ::
33 83
34 tokens = { 84 tokens = {
35 'root': [ 85 'root': [
36 (r'\s+', Whitespace), 86 (r'\s+', Text),
37 (r'(select|construct|describe|ask|where|filter|group\s+by|minus|' 87 # keywords ::
38 r'distinct|reduced|from named|from|order\s+by|limit|' 88 (r'((?i)select|construct|describe|ask|where|filter|group\s+by|minus|'
89 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|'
39 r'offset|bindings|load|clear|drop|create|add|move|copy|' 90 r'offset|bindings|load|clear|drop|create|add|move|copy|'
40 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|' 91 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|'
41 r'using named|using|graph|default|named|all|optional|service|' 92 r'using\s+named|using|graph|default|named|all|optional|service|'
42 r'silent|bind|union|not in|in|as|a)', Keyword), 93 r'silent|bind|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword),
43 (r'(prefix|base)(\s+)([a-z][\w-]*)(\s*)(\:)', 94 (r'(a)\b', Keyword),
44 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, 95 # IRIs ::
45 Punctuation)), 96 ('(' + IRIREF + ')', Name.Label),
46 (r'\?[a-z_]\w*', Name.Variable), 97 # blank nodes ::
47 (r'<[^>]+>', Name.Label), 98 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
48 (r'([a-z][\w-]*)(\:)([a-z][\w-]*)', 99 # # variables ::
100 ('[?$]' + VARNAME, Name.Variable),
101 # prefixed names ::
102 (r'(' + PN_PREFIX + ')?(\:)(' + PN_LOCAL + ')?',
49 bygroups(Name.Namespace, Punctuation, Name.Tag)), 103 bygroups(Name.Namespace, Punctuation, Name.Tag)),
50 (r'(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' 104 # function names ::
105 (r'((?i)str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|'
51 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' 106 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|'
52 r'contains|strstarts|strends|strbefore|strafter|year|month|day|' 107 r'contains|strstarts|strends|strbefore|strafter|year|month|day|'
53 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|' 108 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|'
54 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' 109 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|'
55 r'isliteral|isnumeric|regex|substr|replace|exists|not exists|' 110 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|'
56 r'count|sum|min|max|avg|sample|group_concat|separator)\b', 111 r'count|sum|min|max|avg|sample|group_concat|separator)\b',
57 Name.Function), 112 Name.Function),
58 (r'(true|false)', Literal), 113 # boolean literals ::
59 (r'[+\-]?\d*\.\d+', Number.Float), 114 (r'(true|false)', Keyword.Constant),
60 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), 115 # double literals ::
116 (r'[+\-]?(\d+\.\d*' + EXPONENT + '|\.?\d+' + EXPONENT + ')', Number.Float),
117 # decimal literals ::
118 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
119 # integer literals ::
61 (r'[+\-]?\d+', Number.Integer), 120 (r'[+\-]?\d+', Number.Integer),
62 (r'(\|\||&&|=|\*|\-|\+|/)', Operator), 121 # operators ::
63 (r'[(){}.;,:^]', Punctuation), 122 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator),
64 (r'#[^\n]+', Comment), 123 # punctuation characters ::
124 (r'[(){}.;,:^\[\]]', Punctuation),
125 # line comments ::
126 (r'#[^\n]*', Comment),
127 # strings ::
65 (r'"""', String, 'triple-double-quoted-string'), 128 (r'"""', String, 'triple-double-quoted-string'),
66 (r'"', String, 'single-double-quoted-string'), 129 (r'"', String, 'single-double-quoted-string'),
67 (r"'''", String, 'triple-single-quoted-string'), 130 (r"'''", String, 'triple-single-quoted-string'),
68 (r"'", String, 'single-single-quoted-string'), 131 (r"'", String, 'single-single-quoted-string'),
69 ], 132 ],
78 (r'\\', String, 'string-escape'), 141 (r'\\', String, 'string-escape'),
79 ], 142 ],
80 'triple-single-quoted-string': [ 143 'triple-single-quoted-string': [
81 (r"'''", String, 'end-of-string'), 144 (r"'''", String, 'end-of-string'),
82 (r'[^\\]+', String), 145 (r'[^\\]+', String),
83 (r'\\', String, 'string-escape'), 146 (r'\\', String.Escape, 'string-escape'),
84 ], 147 ],
85 'single-single-quoted-string': [ 148 'single-single-quoted-string': [
86 (r"'", String, 'end-of-string'), 149 (r"'", String, 'end-of-string'),
87 (r"[^'\\\n]+", String), 150 (r"[^'\\\n]+", String),
88 (r'\\', String, 'string-escape'), 151 (r'\\', String, 'string-escape'),
89 ], 152 ],
90 'string-escape': [ 153 'string-escape': [
91 (r'.', String, '#pop'), 154 (r'u' + HEX + '{4}', String.Escape, '#pop'),
155 (r'U' + HEX + '{8}', String.Escape, '#pop'),
156 (r'.', String.Escape, '#pop'),
92 ], 157 ],
93 'end-of-string': [ 158 'end-of-string': [
94 (r'(@)([a-z]+(:?-[a-z0-9]+)*)', 159 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
95 bygroups(Operator, Name.Function), '#pop:2'), 160 bygroups(Operator, Name.Function), '#pop:2'),
96 (r'\^\^', Operator, '#pop:2'), 161 (r'\^\^', Operator, '#pop:2'),
97 default('#pop:2'), 162 default('#pop:2'),
98 ], 163 ],
99 } 164 }
165
166
167 class TurtleLexer(RegexLexer):
168 """
169 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language.
170
171 .. versionadded:: 2.1
172 """
173 name = 'Turtle'
174 aliases = ['turtle']
175 filenames = ['*.ttl']
176 mimetypes = ['text/turtle', 'application/x-turtle']
177
178 flags = re.IGNORECASE
179
180 patterns = {
181 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
182 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
183 }
184
185 # PNAME_NS PN_LOCAL (with simplified character range)
186 patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns
187
188 tokens = {
189 'root': [
190 (r'\s+', Whitespace),
191
192 # Base / prefix
193 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
194 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
195 Punctuation)),
196 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
197 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace,
198 Name.Variable, Whitespace, Punctuation)),
199
200 # The shorthand predicate 'a'
201 (r'(?<=\s)a(?=\s)', Keyword.Type),
202
203 # IRIREF
204 (r'%(IRIREF)s' % patterns, Name.Variable),
205
206 # PrefixedName
207 (r'%(PrefixedName)s' % patterns,
208 bygroups(Name.Namespace, Name.Tag)),
209
210 # Comment
211 (r'#[^\n]+', Comment),
212
213 (r'\b(true|false)\b', Literal),
214 (r'[+\-]?\d*\.\d+', Number.Float),
215 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float),
216 (r'[+\-]?\d+', Number.Integer),
217 (r'[\[\](){}.;,:^]', Punctuation),
218
219 (r'"""', String, 'triple-double-quoted-string'),
220 (r'"', String, 'single-double-quoted-string'),
221 (r"'''", String, 'triple-single-quoted-string'),
222 (r"'", String, 'single-single-quoted-string'),
223 ],
224 'triple-double-quoted-string': [
225 (r'"""', String, 'end-of-string'),
226 (r'[^\\]+', String),
227 (r'\\', String, 'string-escape'),
228 ],
229 'single-double-quoted-string': [
230 (r'"', String, 'end-of-string'),
231 (r'[^"\\\n]+', String),
232 (r'\\', String, 'string-escape'),
233 ],
234 'triple-single-quoted-string': [
235 (r"'''", String, 'end-of-string'),
236 (r'[^\\]+', String),
237 (r'\\', String, 'string-escape'),
238 ],
239 'single-single-quoted-string': [
240 (r"'", String, 'end-of-string'),
241 (r"[^'\\\n]+", String),
242 (r'\\', String, 'string-escape'),
243 ],
244 'string-escape': [
245 (r'.', String, '#pop'),
246 ],
247 'end-of-string': [
248
249 (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)',
250 bygroups(Operator, Generic.Emph), '#pop:2'),
251
252 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
253 (r'(\^\^)%(PrefixedName)s' % patterns,
254 bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),
255
256 default('#pop:2'),
257
258 ],
259 }

eric ide

mercurial