|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.data |
|
4 ~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for data file format. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \ |
|
15 include, bygroups, inherit |
|
16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \ |
|
17 Punctuation, Literal, Error |
|
18 |
|
19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer'] |
|
20 |
|
21 |
|
22 class YamlLexerContext(LexerContext): |
|
23 """Indentation context for the YAML lexer.""" |
|
24 |
|
25 def __init__(self, *args, **kwds): |
|
26 super(YamlLexerContext, self).__init__(*args, **kwds) |
|
27 self.indent_stack = [] |
|
28 self.indent = -1 |
|
29 self.next_indent = 0 |
|
30 self.block_scalar_indent = None |
|
31 |
|
32 |
|
33 class YamlLexer(ExtendedRegexLexer): |
|
34 """ |
|
35 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization |
|
36 language. |
|
37 |
|
38 .. versionadded:: 0.11 |
|
39 """ |
|
40 |
|
41 name = 'YAML' |
|
42 aliases = ['yaml'] |
|
43 filenames = ['*.yaml', '*.yml'] |
|
44 mimetypes = ['text/x-yaml'] |
|
45 |
|
46 def something(token_class): |
|
47 """Do not produce empty tokens.""" |
|
48 def callback(lexer, match, context): |
|
49 text = match.group() |
|
50 if not text: |
|
51 return |
|
52 yield match.start(), token_class, text |
|
53 context.pos = match.end() |
|
54 return callback |
|
55 |
|
56 def reset_indent(token_class): |
|
57 """Reset the indentation levels.""" |
|
58 def callback(lexer, match, context): |
|
59 text = match.group() |
|
60 context.indent_stack = [] |
|
61 context.indent = -1 |
|
62 context.next_indent = 0 |
|
63 context.block_scalar_indent = None |
|
64 yield match.start(), token_class, text |
|
65 context.pos = match.end() |
|
66 return callback |
|
67 |
|
68 def save_indent(token_class, start=False): |
|
69 """Save a possible indentation level.""" |
|
70 def callback(lexer, match, context): |
|
71 text = match.group() |
|
72 extra = '' |
|
73 if start: |
|
74 context.next_indent = len(text) |
|
75 if context.next_indent < context.indent: |
|
76 while context.next_indent < context.indent: |
|
77 context.indent = context.indent_stack.pop() |
|
78 if context.next_indent > context.indent: |
|
79 extra = text[context.indent:] |
|
80 text = text[:context.indent] |
|
81 else: |
|
82 context.next_indent += len(text) |
|
83 if text: |
|
84 yield match.start(), token_class, text |
|
85 if extra: |
|
86 yield match.start()+len(text), token_class.Error, extra |
|
87 context.pos = match.end() |
|
88 return callback |
|
89 |
|
90 def set_indent(token_class, implicit=False): |
|
91 """Set the previously saved indentation level.""" |
|
92 def callback(lexer, match, context): |
|
93 text = match.group() |
|
94 if context.indent < context.next_indent: |
|
95 context.indent_stack.append(context.indent) |
|
96 context.indent = context.next_indent |
|
97 if not implicit: |
|
98 context.next_indent += len(text) |
|
99 yield match.start(), token_class, text |
|
100 context.pos = match.end() |
|
101 return callback |
|
102 |
|
103 def set_block_scalar_indent(token_class): |
|
104 """Set an explicit indentation level for a block scalar.""" |
|
105 def callback(lexer, match, context): |
|
106 text = match.group() |
|
107 context.block_scalar_indent = None |
|
108 if not text: |
|
109 return |
|
110 increment = match.group(1) |
|
111 if increment: |
|
112 current_indent = max(context.indent, 0) |
|
113 increment = int(increment) |
|
114 context.block_scalar_indent = current_indent + increment |
|
115 if text: |
|
116 yield match.start(), token_class, text |
|
117 context.pos = match.end() |
|
118 return callback |
|
119 |
|
120 def parse_block_scalar_empty_line(indent_token_class, content_token_class): |
|
121 """Process an empty line in a block scalar.""" |
|
122 def callback(lexer, match, context): |
|
123 text = match.group() |
|
124 if (context.block_scalar_indent is None or |
|
125 len(text) <= context.block_scalar_indent): |
|
126 if text: |
|
127 yield match.start(), indent_token_class, text |
|
128 else: |
|
129 indentation = text[:context.block_scalar_indent] |
|
130 content = text[context.block_scalar_indent:] |
|
131 yield match.start(), indent_token_class, indentation |
|
132 yield (match.start()+context.block_scalar_indent, |
|
133 content_token_class, content) |
|
134 context.pos = match.end() |
|
135 return callback |
|
136 |
|
137 def parse_block_scalar_indent(token_class): |
|
138 """Process indentation spaces in a block scalar.""" |
|
139 def callback(lexer, match, context): |
|
140 text = match.group() |
|
141 if context.block_scalar_indent is None: |
|
142 if len(text) <= max(context.indent, 0): |
|
143 context.stack.pop() |
|
144 context.stack.pop() |
|
145 return |
|
146 context.block_scalar_indent = len(text) |
|
147 else: |
|
148 if len(text) < context.block_scalar_indent: |
|
149 context.stack.pop() |
|
150 context.stack.pop() |
|
151 return |
|
152 if text: |
|
153 yield match.start(), token_class, text |
|
154 context.pos = match.end() |
|
155 return callback |
|
156 |
|
157 def parse_plain_scalar_indent(token_class): |
|
158 """Process indentation spaces in a plain scalar.""" |
|
159 def callback(lexer, match, context): |
|
160 text = match.group() |
|
161 if len(text) <= context.indent: |
|
162 context.stack.pop() |
|
163 context.stack.pop() |
|
164 return |
|
165 if text: |
|
166 yield match.start(), token_class, text |
|
167 context.pos = match.end() |
|
168 return callback |
|
169 |
|
170 tokens = { |
|
171 # the root rules |
|
172 'root': [ |
|
173 # ignored whitespaces |
|
174 (r'[ ]+(?=#|$)', Text), |
|
175 # line breaks |
|
176 (r'\n+', Text), |
|
177 # a comment |
|
178 (r'#[^\n]*', Comment.Single), |
|
179 # the '%YAML' directive |
|
180 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), |
|
181 # the %TAG directive |
|
182 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), |
|
183 # document start and document end indicators |
|
184 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), |
|
185 'block-line'), |
|
186 # indentation spaces |
|
187 (r'[ ]*(?!\s|$)', save_indent(Text, start=True), |
|
188 ('block-line', 'indentation')), |
|
189 ], |
|
190 |
|
191 # trailing whitespaces after directives or a block scalar indicator |
|
192 'ignored-line': [ |
|
193 # ignored whitespaces |
|
194 (r'[ ]+(?=#|$)', Text), |
|
195 # a comment |
|
196 (r'#[^\n]*', Comment.Single), |
|
197 # line break |
|
198 (r'\n', Text, '#pop:2'), |
|
199 ], |
|
200 |
|
201 # the %YAML directive |
|
202 'yaml-directive': [ |
|
203 # the version number |
|
204 (r'([ ]+)([0-9]+\.[0-9]+)', |
|
205 bygroups(Text, Number), 'ignored-line'), |
|
206 ], |
|
207 |
|
208 # the %TAG directive |
|
209 'tag-directive': [ |
|
210 # a tag handle and the corresponding prefix |
|
211 (r'([ ]+)(!|![\w-]*!)' |
|
212 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)', |
|
213 bygroups(Text, Keyword.Type, Text, Keyword.Type), |
|
214 'ignored-line'), |
|
215 ], |
|
216 |
|
217 # block scalar indicators and indentation spaces |
|
218 'indentation': [ |
|
219 # trailing whitespaces are ignored |
|
220 (r'[ ]*$', something(Text), '#pop:2'), |
|
221 # whitespaces preceding block collection indicators |
|
222 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), |
|
223 # block collection indicators |
|
224 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), |
|
225 # the beginning a block line |
|
226 (r'[ ]*', save_indent(Text), '#pop'), |
|
227 ], |
|
228 |
|
229 # an indented line in the block context |
|
230 'block-line': [ |
|
231 # the line end |
|
232 (r'[ ]*(?=#|$)', something(Text), '#pop'), |
|
233 # whitespaces separating tokens |
|
234 (r'[ ]+', Text), |
|
235 # key with colon |
|
236 (r'([^,:?\[\]{}\n]+)(:)(?=[ ]|$)', |
|
237 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))), |
|
238 # tags, anchors and aliases, |
|
239 include('descriptors'), |
|
240 # block collections and scalars |
|
241 include('block-nodes'), |
|
242 # flow collections and quoted scalars |
|
243 include('flow-nodes'), |
|
244 # a plain scalar |
|
245 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)', |
|
246 something(Name.Variable), |
|
247 'plain-scalar-in-block-context'), |
|
248 ], |
|
249 |
|
250 # tags, anchors, aliases |
|
251 'descriptors': [ |
|
252 # a full-form tag |
|
253 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type), |
|
254 # a tag in the form '!', '!suffix' or '!handle!suffix' |
|
255 (r'!(?:[\w-]+!)?' |
|
256 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type), |
|
257 # an anchor |
|
258 (r'&[\w-]+', Name.Label), |
|
259 # an alias |
|
260 (r'\*[\w-]+', Name.Variable), |
|
261 ], |
|
262 |
|
263 # block collections and scalars |
|
264 'block-nodes': [ |
|
265 # implicit key |
|
266 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), |
|
267 # literal and folded scalars |
|
268 (r'[|>]', Punctuation.Indicator, |
|
269 ('block-scalar-content', 'block-scalar-header')), |
|
270 ], |
|
271 |
|
272 # flow collections and quoted scalars |
|
273 'flow-nodes': [ |
|
274 # a flow sequence |
|
275 (r'\[', Punctuation.Indicator, 'flow-sequence'), |
|
276 # a flow mapping |
|
277 (r'\{', Punctuation.Indicator, 'flow-mapping'), |
|
278 # a single-quoted scalar |
|
279 (r'\'', String, 'single-quoted-scalar'), |
|
280 # a double-quoted scalar |
|
281 (r'\"', String, 'double-quoted-scalar'), |
|
282 ], |
|
283 |
|
284 # the content of a flow collection |
|
285 'flow-collection': [ |
|
286 # whitespaces |
|
287 (r'[ ]+', Text), |
|
288 # line breaks |
|
289 (r'\n+', Text), |
|
290 # a comment |
|
291 (r'#[^\n]*', Comment.Single), |
|
292 # simple indicators |
|
293 (r'[?:,]', Punctuation.Indicator), |
|
294 # tags, anchors and aliases |
|
295 include('descriptors'), |
|
296 # nested collections and quoted scalars |
|
297 include('flow-nodes'), |
|
298 # a plain scalar |
|
299 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])', |
|
300 something(Name.Variable), |
|
301 'plain-scalar-in-flow-context'), |
|
302 ], |
|
303 |
|
304 # a flow sequence indicated by '[' and ']' |
|
305 'flow-sequence': [ |
|
306 # include flow collection rules |
|
307 include('flow-collection'), |
|
308 # the closing indicator |
|
309 (r'\]', Punctuation.Indicator, '#pop'), |
|
310 ], |
|
311 |
|
312 # a flow mapping indicated by '{' and '}' |
|
313 'flow-mapping': [ |
|
314 # key with colon |
|
315 (r'([^,:?\[\]{}\n]+)(:)(?=[ ]|$)', |
|
316 bygroups(Name.Tag, Punctuation)), |
|
317 # include flow collection rules |
|
318 include('flow-collection'), |
|
319 # the closing indicator |
|
320 (r'\}', Punctuation.Indicator, '#pop'), |
|
321 ], |
|
322 |
|
323 # block scalar lines |
|
324 'block-scalar-content': [ |
|
325 # line break |
|
326 (r'\n', Text), |
|
327 # empty line |
|
328 (r'^[ ]+$', |
|
329 parse_block_scalar_empty_line(Text, Name.Constant)), |
|
330 # indentation spaces (we may leave the state here) |
|
331 (r'^[ ]*', parse_block_scalar_indent(Text)), |
|
332 # line content |
|
333 (r'[\S\t ]+', Name.Constant), |
|
334 ], |
|
335 |
|
336 # the content of a literal or folded scalar |
|
337 'block-scalar-header': [ |
|
338 # indentation indicator followed by chomping flag |
|
339 (r'([1-9])?[+-]?(?=[ ]|$)', |
|
340 set_block_scalar_indent(Punctuation.Indicator), |
|
341 'ignored-line'), |
|
342 # chomping flag followed by indentation indicator |
|
343 (r'[+-]?([1-9])?(?=[ ]|$)', |
|
344 set_block_scalar_indent(Punctuation.Indicator), |
|
345 'ignored-line'), |
|
346 ], |
|
347 |
|
348 # ignored and regular whitespaces in quoted scalars |
|
349 'quoted-scalar-whitespaces': [ |
|
350 # leading and trailing whitespaces are ignored |
|
351 (r'^[ ]+', Text), |
|
352 (r'[ ]+$', Text), |
|
353 # line breaks are ignored |
|
354 (r'\n+', Text), |
|
355 # other whitespaces are a part of the value |
|
356 (r'[ ]+', Name.Variable), |
|
357 ], |
|
358 |
|
359 # single-quoted scalars |
|
360 'single-quoted-scalar': [ |
|
361 # include whitespace and line break rules |
|
362 include('quoted-scalar-whitespaces'), |
|
363 # escaping of the quote character |
|
364 (r'\'\'', String.Escape), |
|
365 # regular non-whitespace characters |
|
366 (r'[^\s\']+', String), |
|
367 # the closing quote |
|
368 (r'\'', String, '#pop'), |
|
369 ], |
|
370 |
|
371 # double-quoted scalars |
|
372 'double-quoted-scalar': [ |
|
373 # include whitespace and line break rules |
|
374 include('quoted-scalar-whitespaces'), |
|
375 # escaping of special characters |
|
376 (r'\\[0abt\tn\nvfre "\\N_LP]', String), |
|
377 # escape codes |
|
378 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', |
|
379 String.Escape), |
|
380 # regular non-whitespace characters |
|
381 (r'[^\s"\\]+', String), |
|
382 # the closing quote |
|
383 (r'"', String, '#pop'), |
|
384 ], |
|
385 |
|
386 # the beginning of a new line while scanning a plain scalar |
|
387 'plain-scalar-in-block-context-new-line': [ |
|
388 # empty lines |
|
389 (r'^[ ]+$', Text), |
|
390 # line breaks |
|
391 (r'\n+', Text), |
|
392 # document start and document end indicators |
|
393 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), |
|
394 # indentation spaces (we may leave the block line state here) |
|
395 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), |
|
396 ], |
|
397 |
|
398 # a plain scalar in the block context |
|
399 'plain-scalar-in-block-context': [ |
|
400 # the scalar ends with the ':' indicator |
|
401 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), |
|
402 # the scalar ends with whitespaces followed by a comment |
|
403 (r'[ ]+(?=#)', Text, '#pop'), |
|
404 # trailing whitespaces are ignored |
|
405 (r'[ ]+$', Text), |
|
406 # line breaks are ignored |
|
407 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), |
|
408 # other whitespaces are a part of the value |
|
409 (r'[ ]+', Literal.Scalar.Plain), |
|
410 # regular non-whitespace characters |
|
411 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain), |
|
412 ], |
|
413 |
|
414 # a plain scalar is the flow context |
|
415 'plain-scalar-in-flow-context': [ |
|
416 # the scalar ends with an indicator character |
|
417 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), |
|
418 # the scalar ends with a comment |
|
419 (r'[ ]+(?=#)', Text, '#pop'), |
|
420 # leading and trailing whitespaces are ignored |
|
421 (r'^[ ]+', Text), |
|
422 (r'[ ]+$', Text), |
|
423 # line breaks are ignored |
|
424 (r'\n+', Text), |
|
425 # other whitespaces are a part of the value |
|
426 (r'[ ]+', Name.Variable), |
|
427 # regular non-whitespace characters |
|
428 (r'[^\s,:?\[\]{}]+', Name.Variable), |
|
429 ], |
|
430 |
|
431 } |
|
432 |
|
433 def get_tokens_unprocessed(self, text=None, context=None): |
|
434 if context is None: |
|
435 context = YamlLexerContext(text, 0) |
|
436 return super(YamlLexer, self).get_tokens_unprocessed(text, context) |
|
437 |
|
438 |
|
439 class JsonLexer(RegexLexer): |
|
440 """ |
|
441 For JSON data structures. |
|
442 |
|
443 .. versionadded:: 1.5 |
|
444 """ |
|
445 |
|
446 name = 'JSON' |
|
447 aliases = ['json'] |
|
448 filenames = ['*.json'] |
|
449 mimetypes = ['application/json'] |
|
450 |
|
451 flags = re.DOTALL |
|
452 |
|
453 # integer part of a number |
|
454 int_part = r'-?(0|[1-9]\d*)' |
|
455 |
|
456 # fractional part of a number |
|
457 frac_part = r'\.\d+' |
|
458 |
|
459 # exponential part of a number |
|
460 exp_part = r'[eE](\+|-)?\d+' |
|
461 |
|
462 tokens = { |
|
463 'whitespace': [ |
|
464 (r'\s+', Text), |
|
465 ], |
|
466 |
|
467 # represents a simple terminal value |
|
468 'simplevalue': [ |
|
469 (r'(true|false|null)\b', Keyword.Constant), |
|
470 (('%(int_part)s(%(frac_part)s%(exp_part)s|' |
|
471 '%(exp_part)s|%(frac_part)s)') % vars(), |
|
472 Number.Float), |
|
473 (int_part, Number.Integer), |
|
474 (r'"(\\\\|\\"|[^"])*"', String.Double), |
|
475 ], |
|
476 |
|
477 |
|
478 # the right hand side of an object, after the attribute name |
|
479 'objectattribute': [ |
|
480 include('value'), |
|
481 (r':', Punctuation), |
|
482 # comma terminates the attribute but expects more |
|
483 (r',', Punctuation, '#pop'), |
|
484 # a closing bracket terminates the entire object, so pop twice |
|
485 (r'\}', Punctuation, '#pop:2'), |
|
486 ], |
|
487 |
|
488 # a json object - { attr, attr, ... } |
|
489 'objectvalue': [ |
|
490 include('whitespace'), |
|
491 (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'), |
|
492 (r'\}', Punctuation, '#pop'), |
|
493 ], |
|
494 |
|
495 # json array - [ value, value, ... } |
|
496 'arrayvalue': [ |
|
497 include('whitespace'), |
|
498 include('value'), |
|
499 (r',', Punctuation), |
|
500 (r'\]', Punctuation, '#pop'), |
|
501 ], |
|
502 |
|
503 # a json value - either a simple value or a complex value (object or array) |
|
504 'value': [ |
|
505 include('whitespace'), |
|
506 include('simplevalue'), |
|
507 (r'\{', Punctuation, 'objectvalue'), |
|
508 (r'\[', Punctuation, 'arrayvalue'), |
|
509 ], |
|
510 |
|
511 # the root of a json document whould be a value |
|
512 'root': [ |
|
513 include('value'), |
|
514 ], |
|
515 } |
|
516 |
|
517 |
|
518 class JsonBareObjectLexer(JsonLexer): |
|
519 """ |
|
520 For JSON data structures (with missing object curly braces). |
|
521 |
|
522 .. versionadded:: 2.2 |
|
523 """ |
|
524 |
|
525 name = 'JSONBareObject' |
|
526 aliases = ['json-object'] |
|
527 filenames = [] |
|
528 mimetypes = ['application/json-object'] |
|
529 |
|
530 tokens = { |
|
531 'root': [ |
|
532 (r'\}', Error), |
|
533 include('objectvalue'), |
|
534 ], |
|
535 'objectattribute': [ |
|
536 (r'\}', Error), |
|
537 inherit, |
|
538 ], |
|
539 } |
|
540 |
|
541 |
|
542 class JsonLdLexer(JsonLexer): |
|
543 """ |
|
544 For `JSON-LD <http://json-ld.org/>`_ linked data. |
|
545 |
|
546 .. versionadded:: 2.0 |
|
547 """ |
|
548 |
|
549 name = 'JSON-LD' |
|
550 aliases = ['jsonld', 'json-ld'] |
|
551 filenames = ['*.jsonld'] |
|
552 mimetypes = ['application/ld+json'] |
|
553 |
|
554 tokens = { |
|
555 'objectvalue': [ |
|
556 (r'"@(context|id|value|language|type|container|list|set|' |
|
557 r'reverse|index|base|vocab|graph)"', Name.Decorator, |
|
558 'objectattribute'), |
|
559 inherit, |
|
560 ], |
|
561 } |