|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.data |
|
4 ~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for data file format. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \ |
|
15 include, bygroups, inherit |
|
16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \ |
|
17 Punctuation, Literal |
|
18 |
|
19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonLdLexer'] |
|
20 |
|
21 |
|
22 class YamlLexerContext(LexerContext): |
|
23 """Indentation context for the YAML lexer.""" |
|
24 |
|
25 def __init__(self, *args, **kwds): |
|
26 super(YamlLexerContext, self).__init__(*args, **kwds) |
|
27 self.indent_stack = [] |
|
28 self.indent = -1 |
|
29 self.next_indent = 0 |
|
30 self.block_scalar_indent = None |
|
31 |
|
32 |
|
33 class YamlLexer(ExtendedRegexLexer): |
|
34 """ |
|
35 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization |
|
36 language. |
|
37 |
|
38 .. versionadded:: 0.11 |
|
39 """ |
|
40 |
|
41 name = 'YAML' |
|
42 aliases = ['yaml'] |
|
43 filenames = ['*.yaml', '*.yml'] |
|
44 mimetypes = ['text/x-yaml'] |
|
45 |
|
46 def something(token_class): |
|
47 """Do not produce empty tokens.""" |
|
48 def callback(lexer, match, context): |
|
49 text = match.group() |
|
50 if not text: |
|
51 return |
|
52 yield match.start(), token_class, text |
|
53 context.pos = match.end() |
|
54 return callback |
|
55 |
|
56 def reset_indent(token_class): |
|
57 """Reset the indentation levels.""" |
|
58 def callback(lexer, match, context): |
|
59 text = match.group() |
|
60 context.indent_stack = [] |
|
61 context.indent = -1 |
|
62 context.next_indent = 0 |
|
63 context.block_scalar_indent = None |
|
64 yield match.start(), token_class, text |
|
65 context.pos = match.end() |
|
66 return callback |
|
67 |
|
68 def save_indent(token_class, start=False): |
|
69 """Save a possible indentation level.""" |
|
70 def callback(lexer, match, context): |
|
71 text = match.group() |
|
72 extra = '' |
|
73 if start: |
|
74 context.next_indent = len(text) |
|
75 if context.next_indent < context.indent: |
|
76 while context.next_indent < context.indent: |
|
77 context.indent = context.indent_stack.pop() |
|
78 if context.next_indent > context.indent: |
|
79 extra = text[context.indent:] |
|
80 text = text[:context.indent] |
|
81 else: |
|
82 context.next_indent += len(text) |
|
83 if text: |
|
84 yield match.start(), token_class, text |
|
85 if extra: |
|
86 yield match.start()+len(text), token_class.Error, extra |
|
87 context.pos = match.end() |
|
88 return callback |
|
89 |
|
90 def set_indent(token_class, implicit=False): |
|
91 """Set the previously saved indentation level.""" |
|
92 def callback(lexer, match, context): |
|
93 text = match.group() |
|
94 if context.indent < context.next_indent: |
|
95 context.indent_stack.append(context.indent) |
|
96 context.indent = context.next_indent |
|
97 if not implicit: |
|
98 context.next_indent += len(text) |
|
99 yield match.start(), token_class, text |
|
100 context.pos = match.end() |
|
101 return callback |
|
102 |
|
103 def set_block_scalar_indent(token_class): |
|
104 """Set an explicit indentation level for a block scalar.""" |
|
105 def callback(lexer, match, context): |
|
106 text = match.group() |
|
107 context.block_scalar_indent = None |
|
108 if not text: |
|
109 return |
|
110 increment = match.group(1) |
|
111 if increment: |
|
112 current_indent = max(context.indent, 0) |
|
113 increment = int(increment) |
|
114 context.block_scalar_indent = current_indent + increment |
|
115 if text: |
|
116 yield match.start(), token_class, text |
|
117 context.pos = match.end() |
|
118 return callback |
|
119 |
|
120 def parse_block_scalar_empty_line(indent_token_class, content_token_class): |
|
121 """Process an empty line in a block scalar.""" |
|
122 def callback(lexer, match, context): |
|
123 text = match.group() |
|
124 if (context.block_scalar_indent is None or |
|
125 len(text) <= context.block_scalar_indent): |
|
126 if text: |
|
127 yield match.start(), indent_token_class, text |
|
128 else: |
|
129 indentation = text[:context.block_scalar_indent] |
|
130 content = text[context.block_scalar_indent:] |
|
131 yield match.start(), indent_token_class, indentation |
|
132 yield (match.start()+context.block_scalar_indent, |
|
133 content_token_class, content) |
|
134 context.pos = match.end() |
|
135 return callback |
|
136 |
|
137 def parse_block_scalar_indent(token_class): |
|
138 """Process indentation spaces in a block scalar.""" |
|
139 def callback(lexer, match, context): |
|
140 text = match.group() |
|
141 if context.block_scalar_indent is None: |
|
142 if len(text) <= max(context.indent, 0): |
|
143 context.stack.pop() |
|
144 context.stack.pop() |
|
145 return |
|
146 context.block_scalar_indent = len(text) |
|
147 else: |
|
148 if len(text) < context.block_scalar_indent: |
|
149 context.stack.pop() |
|
150 context.stack.pop() |
|
151 return |
|
152 if text: |
|
153 yield match.start(), token_class, text |
|
154 context.pos = match.end() |
|
155 return callback |
|
156 |
|
157 def parse_plain_scalar_indent(token_class): |
|
158 """Process indentation spaces in a plain scalar.""" |
|
159 def callback(lexer, match, context): |
|
160 text = match.group() |
|
161 if len(text) <= context.indent: |
|
162 context.stack.pop() |
|
163 context.stack.pop() |
|
164 return |
|
165 if text: |
|
166 yield match.start(), token_class, text |
|
167 context.pos = match.end() |
|
168 return callback |
|
169 |
|
170 tokens = { |
|
171 # the root rules |
|
172 'root': [ |
|
173 # ignored whitespaces |
|
174 (r'[ ]+(?=#|$)', Text), |
|
175 # line breaks |
|
176 (r'\n+', Text), |
|
177 # a comment |
|
178 (r'#[^\n]*', Comment.Single), |
|
179 # the '%YAML' directive |
|
180 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), |
|
181 # the %TAG directive |
|
182 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), |
|
183 # document start and document end indicators |
|
184 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), |
|
185 'block-line'), |
|
186 # indentation spaces |
|
187 (r'[ ]*(?!\s|$)', save_indent(Text, start=True), |
|
188 ('block-line', 'indentation')), |
|
189 ], |
|
190 |
|
191 # trailing whitespaces after directives or a block scalar indicator |
|
192 'ignored-line': [ |
|
193 # ignored whitespaces |
|
194 (r'[ ]+(?=#|$)', Text), |
|
195 # a comment |
|
196 (r'#[^\n]*', Comment.Single), |
|
197 # line break |
|
198 (r'\n', Text, '#pop:2'), |
|
199 ], |
|
200 |
|
201 # the %YAML directive |
|
202 'yaml-directive': [ |
|
203 # the version number |
|
204 (r'([ ]+)([0-9]+\.[0-9]+)', |
|
205 bygroups(Text, Number), 'ignored-line'), |
|
206 ], |
|
207 |
|
208 # the %YAG directive |
|
209 'tag-directive': [ |
|
210 # a tag handle and the corresponding prefix |
|
211 (r'([ ]+)(!|![\w-]*!)' |
|
212 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)', |
|
213 bygroups(Text, Keyword.Type, Text, Keyword.Type), |
|
214 'ignored-line'), |
|
215 ], |
|
216 |
|
217 # block scalar indicators and indentation spaces |
|
218 'indentation': [ |
|
219 # trailing whitespaces are ignored |
|
220 (r'[ ]*$', something(Text), '#pop:2'), |
|
221 # whitespaces preceeding block collection indicators |
|
222 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), |
|
223 # block collection indicators |
|
224 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), |
|
225 # the beginning a block line |
|
226 (r'[ ]*', save_indent(Text), '#pop'), |
|
227 ], |
|
228 |
|
229 # an indented line in the block context |
|
230 'block-line': [ |
|
231 # the line end |
|
232 (r'[ ]*(?=#|$)', something(Text), '#pop'), |
|
233 # whitespaces separating tokens |
|
234 (r'[ ]+', Text), |
|
235 # tags, anchors and aliases, |
|
236 include('descriptors'), |
|
237 # block collections and scalars |
|
238 include('block-nodes'), |
|
239 # flow collections and quoted scalars |
|
240 include('flow-nodes'), |
|
241 # a plain scalar |
|
242 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)', |
|
243 something(Name.Variable), |
|
244 'plain-scalar-in-block-context'), |
|
245 ], |
|
246 |
|
247 # tags, anchors, aliases |
|
248 'descriptors': [ |
|
249 # a full-form tag |
|
250 (r'!<[\w;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type), |
|
251 # a tag in the form '!', '!suffix' or '!handle!suffix' |
|
252 (r'!(?:[\w-]+)?' |
|
253 r'(?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)?', Keyword.Type), |
|
254 # an anchor |
|
255 (r'&[\w-]+', Name.Label), |
|
256 # an alias |
|
257 (r'\*[\w-]+', Name.Variable), |
|
258 ], |
|
259 |
|
260 # block collections and scalars |
|
261 'block-nodes': [ |
|
262 # implicit key |
|
263 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), |
|
264 # literal and folded scalars |
|
265 (r'[|>]', Punctuation.Indicator, |
|
266 ('block-scalar-content', 'block-scalar-header')), |
|
267 ], |
|
268 |
|
269 # flow collections and quoted scalars |
|
270 'flow-nodes': [ |
|
271 # a flow sequence |
|
272 (r'\[', Punctuation.Indicator, 'flow-sequence'), |
|
273 # a flow mapping |
|
274 (r'\{', Punctuation.Indicator, 'flow-mapping'), |
|
275 # a single-quoted scalar |
|
276 (r'\'', String, 'single-quoted-scalar'), |
|
277 # a double-quoted scalar |
|
278 (r'\"', String, 'double-quoted-scalar'), |
|
279 ], |
|
280 |
|
281 # the content of a flow collection |
|
282 'flow-collection': [ |
|
283 # whitespaces |
|
284 (r'[ ]+', Text), |
|
285 # line breaks |
|
286 (r'\n+', Text), |
|
287 # a comment |
|
288 (r'#[^\n]*', Comment.Single), |
|
289 # simple indicators |
|
290 (r'[?:,]', Punctuation.Indicator), |
|
291 # tags, anchors and aliases |
|
292 include('descriptors'), |
|
293 # nested collections and quoted scalars |
|
294 include('flow-nodes'), |
|
295 # a plain scalar |
|
296 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])', |
|
297 something(Name.Variable), |
|
298 'plain-scalar-in-flow-context'), |
|
299 ], |
|
300 |
|
301 # a flow sequence indicated by '[' and ']' |
|
302 'flow-sequence': [ |
|
303 # include flow collection rules |
|
304 include('flow-collection'), |
|
305 # the closing indicator |
|
306 (r'\]', Punctuation.Indicator, '#pop'), |
|
307 ], |
|
308 |
|
309 # a flow mapping indicated by '{' and '}' |
|
310 'flow-mapping': [ |
|
311 # include flow collection rules |
|
312 include('flow-collection'), |
|
313 # the closing indicator |
|
314 (r'\}', Punctuation.Indicator, '#pop'), |
|
315 ], |
|
316 |
|
317 # block scalar lines |
|
318 'block-scalar-content': [ |
|
319 # line break |
|
320 (r'\n', Text), |
|
321 # empty line |
|
322 (r'^[ ]+$', |
|
323 parse_block_scalar_empty_line(Text, Name.Constant)), |
|
324 # indentation spaces (we may leave the state here) |
|
325 (r'^[ ]*', parse_block_scalar_indent(Text)), |
|
326 # line content |
|
327 (r'[\S\t ]+', Name.Constant), |
|
328 ], |
|
329 |
|
330 # the content of a literal or folded scalar |
|
331 'block-scalar-header': [ |
|
332 # indentation indicator followed by chomping flag |
|
333 (r'([1-9])?[+-]?(?=[ ]|$)', |
|
334 set_block_scalar_indent(Punctuation.Indicator), |
|
335 'ignored-line'), |
|
336 # chomping flag followed by indentation indicator |
|
337 (r'[+-]?([1-9])?(?=[ ]|$)', |
|
338 set_block_scalar_indent(Punctuation.Indicator), |
|
339 'ignored-line'), |
|
340 ], |
|
341 |
|
342 # ignored and regular whitespaces in quoted scalars |
|
343 'quoted-scalar-whitespaces': [ |
|
344 # leading and trailing whitespaces are ignored |
|
345 (r'^[ ]+', Text), |
|
346 (r'[ ]+$', Text), |
|
347 # line breaks are ignored |
|
348 (r'\n+', Text), |
|
349 # other whitespaces are a part of the value |
|
350 (r'[ ]+', Name.Variable), |
|
351 ], |
|
352 |
|
353 # single-quoted scalars |
|
354 'single-quoted-scalar': [ |
|
355 # include whitespace and line break rules |
|
356 include('quoted-scalar-whitespaces'), |
|
357 # escaping of the quote character |
|
358 (r'\'\'', String.Escape), |
|
359 # regular non-whitespace characters |
|
360 (r'[^\s\']+', String), |
|
361 # the closing quote |
|
362 (r'\'', String, '#pop'), |
|
363 ], |
|
364 |
|
365 # double-quoted scalars |
|
366 'double-quoted-scalar': [ |
|
367 # include whitespace and line break rules |
|
368 include('quoted-scalar-whitespaces'), |
|
369 # escaping of special characters |
|
370 (r'\\[0abt\tn\nvfre "\\N_LP]', String), |
|
371 # escape codes |
|
372 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', |
|
373 String.Escape), |
|
374 # regular non-whitespace characters |
|
375 (r'[^\s"\\]+', String), |
|
376 # the closing quote |
|
377 (r'"', String, '#pop'), |
|
378 ], |
|
379 |
|
380 # the beginning of a new line while scanning a plain scalar |
|
381 'plain-scalar-in-block-context-new-line': [ |
|
382 # empty lines |
|
383 (r'^[ ]+$', Text), |
|
384 # line breaks |
|
385 (r'\n+', Text), |
|
386 # document start and document end indicators |
|
387 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), |
|
388 # indentation spaces (we may leave the block line state here) |
|
389 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), |
|
390 ], |
|
391 |
|
392 # a plain scalar in the block context |
|
393 'plain-scalar-in-block-context': [ |
|
394 # the scalar ends with the ':' indicator |
|
395 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), |
|
396 # the scalar ends with whitespaces followed by a comment |
|
397 (r'[ ]+(?=#)', Text, '#pop'), |
|
398 # trailing whitespaces are ignored |
|
399 (r'[ ]+$', Text), |
|
400 # line breaks are ignored |
|
401 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), |
|
402 # other whitespaces are a part of the value |
|
403 (r'[ ]+', Literal.Scalar.Plain), |
|
404 # regular non-whitespace characters |
|
405 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain), |
|
406 ], |
|
407 |
|
408 # a plain scalar is the flow context |
|
409 'plain-scalar-in-flow-context': [ |
|
410 # the scalar ends with an indicator character |
|
411 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), |
|
412 # the scalar ends with a comment |
|
413 (r'[ ]+(?=#)', Text, '#pop'), |
|
414 # leading and trailing whitespaces are ignored |
|
415 (r'^[ ]+', Text), |
|
416 (r'[ ]+$', Text), |
|
417 # line breaks are ignored |
|
418 (r'\n+', Text), |
|
419 # other whitespaces are a part of the value |
|
420 (r'[ ]+', Name.Variable), |
|
421 # regular non-whitespace characters |
|
422 (r'[^\s,:?\[\]{}]+', Name.Variable), |
|
423 ], |
|
424 |
|
425 } |
|
426 |
|
427 def get_tokens_unprocessed(self, text=None, context=None): |
|
428 if context is None: |
|
429 context = YamlLexerContext(text, 0) |
|
430 return super(YamlLexer, self).get_tokens_unprocessed(text, context) |
|
431 |
|
432 |
|
433 class JsonLexer(RegexLexer): |
|
434 """ |
|
435 For JSON data structures. |
|
436 |
|
437 .. versionadded:: 1.5 |
|
438 """ |
|
439 |
|
440 name = 'JSON' |
|
441 aliases = ['json'] |
|
442 filenames = ['*.json'] |
|
443 mimetypes = ['application/json'] |
|
444 |
|
445 flags = re.DOTALL |
|
446 |
|
447 # integer part of a number |
|
448 int_part = r'-?(0|[1-9]\d*)' |
|
449 |
|
450 # fractional part of a number |
|
451 frac_part = r'\.\d+' |
|
452 |
|
453 # exponential part of a number |
|
454 exp_part = r'[eE](\+|-)?\d+' |
|
455 |
|
456 tokens = { |
|
457 'whitespace': [ |
|
458 (r'\s+', Text), |
|
459 ], |
|
460 |
|
461 # represents a simple terminal value |
|
462 'simplevalue': [ |
|
463 (r'(true|false|null)\b', Keyword.Constant), |
|
464 (('%(int_part)s(%(frac_part)s%(exp_part)s|' |
|
465 '%(exp_part)s|%(frac_part)s)') % vars(), |
|
466 Number.Float), |
|
467 (int_part, Number.Integer), |
|
468 (r'"(\\\\|\\"|[^"])*"', String.Double), |
|
469 ], |
|
470 |
|
471 |
|
472 # the right hand side of an object, after the attribute name |
|
473 'objectattribute': [ |
|
474 include('value'), |
|
475 (r':', Punctuation), |
|
476 # comma terminates the attribute but expects more |
|
477 (r',', Punctuation, '#pop'), |
|
478 # a closing bracket terminates the entire object, so pop twice |
|
479 (r'\}', Punctuation, ('#pop', '#pop')), |
|
480 ], |
|
481 |
|
482 # a json object - { attr, attr, ... } |
|
483 'objectvalue': [ |
|
484 include('whitespace'), |
|
485 (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'), |
|
486 (r'\}', Punctuation, '#pop'), |
|
487 ], |
|
488 |
|
489 # json array - [ value, value, ... } |
|
490 'arrayvalue': [ |
|
491 include('whitespace'), |
|
492 include('value'), |
|
493 (r',', Punctuation), |
|
494 (r'\]', Punctuation, '#pop'), |
|
495 ], |
|
496 |
|
497 # a json value - either a simple value or a complex value (object or array) |
|
498 'value': [ |
|
499 include('whitespace'), |
|
500 include('simplevalue'), |
|
501 (r'\{', Punctuation, 'objectvalue'), |
|
502 (r'\[', Punctuation, 'arrayvalue'), |
|
503 ], |
|
504 |
|
505 # the root of a json document whould be a value |
|
506 'root': [ |
|
507 include('value'), |
|
508 ], |
|
509 } |
|
510 |
|
511 class JsonLdLexer(JsonLexer): |
|
512 """ |
|
513 For `JSON-LD <http://json-ld.org/>`_ linked data. |
|
514 |
|
515 .. versionadded:: 2.0 |
|
516 """ |
|
517 |
|
518 name = 'JSON-LD' |
|
519 aliases = ['jsonld', 'json-ld'] |
|
520 filenames = ['*.jsonld'] |
|
521 mimetypes = ['application/ld+json'] |
|
522 |
|
523 tokens = { |
|
524 'objectvalue': [ |
|
525 (r'"@(context|id|value|language|type|container|list|set|' |
|
526 r'reverse|index|base|vocab|graph)"', Name.Decorator, |
|
527 'objectattribute'), |
|
528 inherit, |
|
529 ], |
|
530 } |