3 pygments.lexers.rdf |
3 pygments.lexers.rdf |
4 ~~~~~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~~~~~ |
5 |
5 |
6 Lexers for semantic web and RDF query languages and markup. |
6 Lexers for semantic web and RDF query languages and markup. |
7 |
7 |
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
12 import re |
12 import re |
13 |
13 |
14 from pygments.lexer import RegexLexer, bygroups, default |
14 from pygments.lexer import RegexLexer, bygroups, default |
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, \ |
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ |
16 Whitespace, Name, Literal, Comment, Text |
16 Whitespace, Name, Literal, Comment, Text |
17 |
17 |
18 __all__ = ['SparqlLexer'] |
18 __all__ = ['SparqlLexer', 'TurtleLexer'] |
19 |
19 |
20 |
20 |
21 class SparqlLexer(RegexLexer): |
21 class SparqlLexer(RegexLexer): |
22 """ |
22 """ |
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. |
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language. |
27 name = 'SPARQL' |
27 name = 'SPARQL' |
28 aliases = ['sparql'] |
28 aliases = ['sparql'] |
29 filenames = ['*.rq', '*.sparql'] |
29 filenames = ['*.rq', '*.sparql'] |
30 mimetypes = ['application/sparql-query'] |
30 mimetypes = ['application/sparql-query'] |
31 |
31 |
32 flags = re.IGNORECASE |
32 # terminal productions :: |
|
33 |
|
34 PN_CHARS_BASE = (u'(?:[a-zA-Z' |
|
35 u'\u00c0-\u00d6' |
|
36 u'\u00d8-\u00f6' |
|
37 u'\u00f8-\u02ff' |
|
38 u'\u0370-\u037d' |
|
39 u'\u037f-\u1fff' |
|
40 u'\u200c-\u200d' |
|
41 u'\u2070-\u218f' |
|
42 u'\u2c00-\u2fef' |
|
43 u'\u3001-\ud7ff' |
|
44 u'\uf900-\ufdcf' |
|
45 u'\ufdf0-\ufffd]|' |
|
46 u'[^\u0000-\uffff]|' |
|
47 u'[\ud800-\udbff][\udc00-\udfff])') |
|
48 |
|
49 PN_CHARS_U = '(?:' + PN_CHARS_BASE + '|_)' |
|
50 |
|
51 PN_CHARS = ('(?:' + PN_CHARS_U + r'|[\-0-9' + |
|
52 u'\u00b7' + |
|
53 u'\u0300-\u036f' + |
|
54 u'\u203f-\u2040])') |
|
55 |
|
56 HEX = '[0-9A-Fa-f]' |
|
57 |
|
58 PN_LOCAL_ESC_CHARS = r'[ _~.\-!$&""()*+,;=/?#@%]' |
|
59 |
|
60 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>' |
|
61 |
|
62 BLANK_NODE_LABEL = '_:(?:' + PN_CHARS_U + '|[0-9])(?:(?:' + PN_CHARS + '|\.)*' + \ |
|
63 PN_CHARS + ')?' |
|
64 |
|
65 PN_PREFIX = PN_CHARS_BASE + '(?:(?:' + PN_CHARS + '|\.)*' + PN_CHARS + ')?' |
|
66 |
|
67 VARNAME = '(?:' + PN_CHARS_U + '|[0-9])(?:' + PN_CHARS_U + \ |
|
68 u'|[0-9\u00b7\u0300-\u036f\u203f-\u2040])*' |
|
69 |
|
70 PERCENT = '%' + HEX + HEX |
|
71 |
|
72 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS |
|
73 |
|
74 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' |
|
75 |
|
76 PN_LOCAL = ('(?:(?:' + PN_CHARS_U + '|[:0-9])|' + PLX + ')' + |
|
77 '(?:(?:(?:' + PN_CHARS + '|[.:])|' + PLX + ')*(?:(?:' + |
|
78 PN_CHARS + '|:)|' + PLX + '))?') |
|
79 |
|
80 EXPONENT = r'[eE][+-]?\d+' |
|
81 |
|
82 # Lexer token definitions :: |
33 |
83 |
34 tokens = { |
84 tokens = { |
35 'root': [ |
85 'root': [ |
36 (r'\s+', Whitespace), |
86 (r'\s+', Text), |
37 (r'(select|construct|describe|ask|where|filter|group\s+by|minus|' |
87 # keywords :: |
38 r'distinct|reduced|from named|from|order\s+by|limit|' |
88 (r'((?i)select|construct|describe|ask|where|filter|group\s+by|minus|' |
|
89 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|' |
39 r'offset|bindings|load|clear|drop|create|add|move|copy|' |
90 r'offset|bindings|load|clear|drop|create|add|move|copy|' |
40 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|' |
91 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|' |
41 r'using named|using|graph|default|named|all|optional|service|' |
92 r'using\s+named|using|graph|default|named|all|optional|service|' |
42 r'silent|bind|union|not in|in|as|a)', Keyword), |
93 r'silent|bind|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword), |
43 (r'(prefix|base)(\s+)([a-z][\w-]*)(\s*)(\:)', |
94 (r'(a)\b', Keyword), |
44 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, |
95 # IRIs :: |
45 Punctuation)), |
96 ('(' + IRIREF + ')', Name.Label), |
46 (r'\?[a-z_]\w*', Name.Variable), |
97 # blank nodes :: |
47 (r'<[^>]+>', Name.Label), |
98 ('(' + BLANK_NODE_LABEL + ')', Name.Label), |
48 (r'([a-z][\w-]*)(\:)([a-z][\w-]*)', |
99 # # variables :: |
|
100 ('[?$]' + VARNAME, Name.Variable), |
|
101 # prefixed names :: |
|
102 (r'(' + PN_PREFIX + ')?(\:)(' + PN_LOCAL + ')?', |
49 bygroups(Name.Namespace, Punctuation, Name.Tag)), |
103 bygroups(Name.Namespace, Punctuation, Name.Tag)), |
50 (r'(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' |
104 # function names :: |
|
105 (r'((?i)str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' |
51 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' |
106 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' |
52 r'contains|strstarts|strends|strbefore|strafter|year|month|day|' |
107 r'contains|strstarts|strends|strbefore|strafter|year|month|day|' |
53 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|' |
108 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|' |
54 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' |
109 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' |
55 r'isliteral|isnumeric|regex|substr|replace|exists|not exists|' |
110 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|' |
56 r'count|sum|min|max|avg|sample|group_concat|separator)\b', |
111 r'count|sum|min|max|avg|sample|group_concat|separator)\b', |
57 Name.Function), |
112 Name.Function), |
58 (r'(true|false)', Literal), |
113 # boolean literals :: |
59 (r'[+\-]?\d*\.\d+', Number.Float), |
114 (r'(true|false)', Keyword.Constant), |
60 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), |
115 # double literals :: |
|
116 (r'[+\-]?(\d+\.\d*' + EXPONENT + '|\.?\d+' + EXPONENT + ')', Number.Float), |
|
117 # decimal literals :: |
|
118 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), |
|
119 # integer literals :: |
61 (r'[+\-]?\d+', Number.Integer), |
120 (r'[+\-]?\d+', Number.Integer), |
62 (r'(\|\||&&|=|\*|\-|\+|/)', Operator), |
121 # operators :: |
63 (r'[(){}.;,:^]', Punctuation), |
122 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator), |
64 (r'#[^\n]+', Comment), |
123 # punctuation characters :: |
|
124 (r'[(){}.;,:^\[\]]', Punctuation), |
|
125 # line comments :: |
|
126 (r'#[^\n]*', Comment), |
|
127 # strings :: |
65 (r'"""', String, 'triple-double-quoted-string'), |
128 (r'"""', String, 'triple-double-quoted-string'), |
66 (r'"', String, 'single-double-quoted-string'), |
129 (r'"', String, 'single-double-quoted-string'), |
67 (r"'''", String, 'triple-single-quoted-string'), |
130 (r"'''", String, 'triple-single-quoted-string'), |
68 (r"'", String, 'single-single-quoted-string'), |
131 (r"'", String, 'single-single-quoted-string'), |
69 ], |
132 ], |
78 (r'\\', String, 'string-escape'), |
141 (r'\\', String, 'string-escape'), |
79 ], |
142 ], |
80 'triple-single-quoted-string': [ |
143 'triple-single-quoted-string': [ |
81 (r"'''", String, 'end-of-string'), |
144 (r"'''", String, 'end-of-string'), |
82 (r'[^\\]+', String), |
145 (r'[^\\]+', String), |
83 (r'\\', String, 'string-escape'), |
146 (r'\\', String.Escape, 'string-escape'), |
84 ], |
147 ], |
85 'single-single-quoted-string': [ |
148 'single-single-quoted-string': [ |
86 (r"'", String, 'end-of-string'), |
149 (r"'", String, 'end-of-string'), |
87 (r"[^'\\\n]+", String), |
150 (r"[^'\\\n]+", String), |
88 (r'\\', String, 'string-escape'), |
151 (r'\\', String, 'string-escape'), |
89 ], |
152 ], |
90 'string-escape': [ |
153 'string-escape': [ |
91 (r'.', String, '#pop'), |
154 (r'u' + HEX + '{4}', String.Escape, '#pop'), |
|
155 (r'U' + HEX + '{8}', String.Escape, '#pop'), |
|
156 (r'.', String.Escape, '#pop'), |
92 ], |
157 ], |
93 'end-of-string': [ |
158 'end-of-string': [ |
94 (r'(@)([a-z]+(:?-[a-z0-9]+)*)', |
159 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', |
95 bygroups(Operator, Name.Function), '#pop:2'), |
160 bygroups(Operator, Name.Function), '#pop:2'), |
96 (r'\^\^', Operator, '#pop:2'), |
161 (r'\^\^', Operator, '#pop:2'), |
97 default('#pop:2'), |
162 default('#pop:2'), |
98 ], |
163 ], |
99 } |
164 } |
|
165 |
|
166 |
|
167 class TurtleLexer(RegexLexer): |
|
168 """ |
|
169 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. |
|
170 |
|
171 .. versionadded:: 2.1 |
|
172 """ |
|
173 name = 'Turtle' |
|
174 aliases = ['turtle'] |
|
175 filenames = ['*.ttl'] |
|
176 mimetypes = ['text/turtle', 'application/x-turtle'] |
|
177 |
|
178 flags = re.IGNORECASE |
|
179 |
|
180 patterns = { |
|
181 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range |
|
182 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' |
|
183 } |
|
184 |
|
185 # PNAME_NS PN_LOCAL (with simplified character range) |
|
186 patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns |
|
187 |
|
188 tokens = { |
|
189 'root': [ |
|
190 (r'\s+', Whitespace), |
|
191 |
|
192 # Base / prefix |
|
193 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
|
194 bygroups(Keyword, Whitespace, Name.Variable, Whitespace, |
|
195 Punctuation)), |
|
196 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, |
|
197 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, |
|
198 Name.Variable, Whitespace, Punctuation)), |
|
199 |
|
200 # The shorthand predicate 'a' |
|
201 (r'(?<=\s)a(?=\s)', Keyword.Type), |
|
202 |
|
203 # IRIREF |
|
204 (r'%(IRIREF)s' % patterns, Name.Variable), |
|
205 |
|
206 # PrefixedName |
|
207 (r'%(PrefixedName)s' % patterns, |
|
208 bygroups(Name.Namespace, Name.Tag)), |
|
209 |
|
210 # Comment |
|
211 (r'#[^\n]+', Comment), |
|
212 |
|
213 (r'\b(true|false)\b', Literal), |
|
214 (r'[+\-]?\d*\.\d+', Number.Float), |
|
215 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), |
|
216 (r'[+\-]?\d+', Number.Integer), |
|
217 (r'[\[\](){}.;,:^]', Punctuation), |
|
218 |
|
219 (r'"""', String, 'triple-double-quoted-string'), |
|
220 (r'"', String, 'single-double-quoted-string'), |
|
221 (r"'''", String, 'triple-single-quoted-string'), |
|
222 (r"'", String, 'single-single-quoted-string'), |
|
223 ], |
|
224 'triple-double-quoted-string': [ |
|
225 (r'"""', String, 'end-of-string'), |
|
226 (r'[^\\]+', String), |
|
227 (r'\\', String, 'string-escape'), |
|
228 ], |
|
229 'single-double-quoted-string': [ |
|
230 (r'"', String, 'end-of-string'), |
|
231 (r'[^"\\\n]+', String), |
|
232 (r'\\', String, 'string-escape'), |
|
233 ], |
|
234 'triple-single-quoted-string': [ |
|
235 (r"'''", String, 'end-of-string'), |
|
236 (r'[^\\]+', String), |
|
237 (r'\\', String, 'string-escape'), |
|
238 ], |
|
239 'single-single-quoted-string': [ |
|
240 (r"'", String, 'end-of-string'), |
|
241 (r"[^'\\\n]+", String), |
|
242 (r'\\', String, 'string-escape'), |
|
243 ], |
|
244 'string-escape': [ |
|
245 (r'.', String, '#pop'), |
|
246 ], |
|
247 'end-of-string': [ |
|
248 |
|
249 (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', |
|
250 bygroups(Operator, Generic.Emph), '#pop:2'), |
|
251 |
|
252 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), |
|
253 (r'(\^\^)%(PrefixedName)s' % patterns, |
|
254 bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'), |
|
255 |
|
256 default('#pop:2'), |
|
257 |
|
258 ], |
|
259 } |