eric6/ThirdParty/Pygments/pygments/lexers/data.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.data
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for data file format.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
15 include, bygroups, inherit
16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \
17 Punctuation, Literal, Error
18
19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
20
21
22 class YamlLexerContext(LexerContext):
23 """Indentation context for the YAML lexer."""
24
25 def __init__(self, *args, **kwds):
26 super(YamlLexerContext, self).__init__(*args, **kwds)
27 self.indent_stack = []
28 self.indent = -1
29 self.next_indent = 0
30 self.block_scalar_indent = None
31
32
33 class YamlLexer(ExtendedRegexLexer):
34 """
35 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
36 language.
37
38 .. versionadded:: 0.11
39 """
40
41 name = 'YAML'
42 aliases = ['yaml']
43 filenames = ['*.yaml', '*.yml']
44 mimetypes = ['text/x-yaml']
45
46 def something(token_class):
47 """Do not produce empty tokens."""
48 def callback(lexer, match, context):
49 text = match.group()
50 if not text:
51 return
52 yield match.start(), token_class, text
53 context.pos = match.end()
54 return callback
55
56 def reset_indent(token_class):
57 """Reset the indentation levels."""
58 def callback(lexer, match, context):
59 text = match.group()
60 context.indent_stack = []
61 context.indent = -1
62 context.next_indent = 0
63 context.block_scalar_indent = None
64 yield match.start(), token_class, text
65 context.pos = match.end()
66 return callback
67
68 def save_indent(token_class, start=False):
69 """Save a possible indentation level."""
70 def callback(lexer, match, context):
71 text = match.group()
72 extra = ''
73 if start:
74 context.next_indent = len(text)
75 if context.next_indent < context.indent:
76 while context.next_indent < context.indent:
77 context.indent = context.indent_stack.pop()
78 if context.next_indent > context.indent:
79 extra = text[context.indent:]
80 text = text[:context.indent]
81 else:
82 context.next_indent += len(text)
83 if text:
84 yield match.start(), token_class, text
85 if extra:
86 yield match.start()+len(text), token_class.Error, extra
87 context.pos = match.end()
88 return callback
89
90 def set_indent(token_class, implicit=False):
91 """Set the previously saved indentation level."""
92 def callback(lexer, match, context):
93 text = match.group()
94 if context.indent < context.next_indent:
95 context.indent_stack.append(context.indent)
96 context.indent = context.next_indent
97 if not implicit:
98 context.next_indent += len(text)
99 yield match.start(), token_class, text
100 context.pos = match.end()
101 return callback
102
103 def set_block_scalar_indent(token_class):
104 """Set an explicit indentation level for a block scalar."""
105 def callback(lexer, match, context):
106 text = match.group()
107 context.block_scalar_indent = None
108 if not text:
109 return
110 increment = match.group(1)
111 if increment:
112 current_indent = max(context.indent, 0)
113 increment = int(increment)
114 context.block_scalar_indent = current_indent + increment
115 if text:
116 yield match.start(), token_class, text
117 context.pos = match.end()
118 return callback
119
120 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
121 """Process an empty line in a block scalar."""
122 def callback(lexer, match, context):
123 text = match.group()
124 if (context.block_scalar_indent is None or
125 len(text) <= context.block_scalar_indent):
126 if text:
127 yield match.start(), indent_token_class, text
128 else:
129 indentation = text[:context.block_scalar_indent]
130 content = text[context.block_scalar_indent:]
131 yield match.start(), indent_token_class, indentation
132 yield (match.start()+context.block_scalar_indent,
133 content_token_class, content)
134 context.pos = match.end()
135 return callback
136
137 def parse_block_scalar_indent(token_class):
138 """Process indentation spaces in a block scalar."""
139 def callback(lexer, match, context):
140 text = match.group()
141 if context.block_scalar_indent is None:
142 if len(text) <= max(context.indent, 0):
143 context.stack.pop()
144 context.stack.pop()
145 return
146 context.block_scalar_indent = len(text)
147 else:
148 if len(text) < context.block_scalar_indent:
149 context.stack.pop()
150 context.stack.pop()
151 return
152 if text:
153 yield match.start(), token_class, text
154 context.pos = match.end()
155 return callback
156
157 def parse_plain_scalar_indent(token_class):
158 """Process indentation spaces in a plain scalar."""
159 def callback(lexer, match, context):
160 text = match.group()
161 if len(text) <= context.indent:
162 context.stack.pop()
163 context.stack.pop()
164 return
165 if text:
166 yield match.start(), token_class, text
167 context.pos = match.end()
168 return callback
169
170 tokens = {
171 # the root rules
172 'root': [
173 # ignored whitespaces
174 (r'[ ]+(?=#|$)', Text),
175 # line breaks
176 (r'\n+', Text),
177 # a comment
178 (r'#[^\n]*', Comment.Single),
179 # the '%YAML' directive
180 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
181 # the %TAG directive
182 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
183 # document start and document end indicators
184 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
185 'block-line'),
186 # indentation spaces
187 (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
188 ('block-line', 'indentation')),
189 ],
190
191 # trailing whitespaces after directives or a block scalar indicator
192 'ignored-line': [
193 # ignored whitespaces
194 (r'[ ]+(?=#|$)', Text),
195 # a comment
196 (r'#[^\n]*', Comment.Single),
197 # line break
198 (r'\n', Text, '#pop:2'),
199 ],
200
201 # the %YAML directive
202 'yaml-directive': [
203 # the version number
204 (r'([ ]+)([0-9]+\.[0-9]+)',
205 bygroups(Text, Number), 'ignored-line'),
206 ],
207
208 # the %TAG directive
209 'tag-directive': [
210 # a tag handle and the corresponding prefix
211 (r'([ ]+)(!|![\w-]*!)'
212 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
213 bygroups(Text, Keyword.Type, Text, Keyword.Type),
214 'ignored-line'),
215 ],
216
217 # block scalar indicators and indentation spaces
218 'indentation': [
219 # trailing whitespaces are ignored
220 (r'[ ]*$', something(Text), '#pop:2'),
221 # whitespaces preceding block collection indicators
222 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
223 # block collection indicators
224 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
225 # the beginning a block line
226 (r'[ ]*', save_indent(Text), '#pop'),
227 ],
228
229 # an indented line in the block context
230 'block-line': [
231 # the line end
232 (r'[ ]*(?=#|$)', something(Text), '#pop'),
233 # whitespaces separating tokens
234 (r'[ ]+', Text),
235 # key with colon
236 (r'([^,:?\[\]{}\n]+)(:)(?=[ ]|$)',
237 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
238 # tags, anchors and aliases,
239 include('descriptors'),
240 # block collections and scalars
241 include('block-nodes'),
242 # flow collections and quoted scalars
243 include('flow-nodes'),
244 # a plain scalar
245 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
246 something(Name.Variable),
247 'plain-scalar-in-block-context'),
248 ],
249
250 # tags, anchors, aliases
251 'descriptors': [
252 # a full-form tag
253 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
254 # a tag in the form '!', '!suffix' or '!handle!suffix'
255 (r'!(?:[\w-]+!)?'
256 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
257 # an anchor
258 (r'&[\w-]+', Name.Label),
259 # an alias
260 (r'\*[\w-]+', Name.Variable),
261 ],
262
263 # block collections and scalars
264 'block-nodes': [
265 # implicit key
266 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
267 # literal and folded scalars
268 (r'[|>]', Punctuation.Indicator,
269 ('block-scalar-content', 'block-scalar-header')),
270 ],
271
272 # flow collections and quoted scalars
273 'flow-nodes': [
274 # a flow sequence
275 (r'\[', Punctuation.Indicator, 'flow-sequence'),
276 # a flow mapping
277 (r'\{', Punctuation.Indicator, 'flow-mapping'),
278 # a single-quoted scalar
279 (r'\'', String, 'single-quoted-scalar'),
280 # a double-quoted scalar
281 (r'\"', String, 'double-quoted-scalar'),
282 ],
283
284 # the content of a flow collection
285 'flow-collection': [
286 # whitespaces
287 (r'[ ]+', Text),
288 # line breaks
289 (r'\n+', Text),
290 # a comment
291 (r'#[^\n]*', Comment.Single),
292 # simple indicators
293 (r'[?:,]', Punctuation.Indicator),
294 # tags, anchors and aliases
295 include('descriptors'),
296 # nested collections and quoted scalars
297 include('flow-nodes'),
298 # a plain scalar
299 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
300 something(Name.Variable),
301 'plain-scalar-in-flow-context'),
302 ],
303
304 # a flow sequence indicated by '[' and ']'
305 'flow-sequence': [
306 # include flow collection rules
307 include('flow-collection'),
308 # the closing indicator
309 (r'\]', Punctuation.Indicator, '#pop'),
310 ],
311
312 # a flow mapping indicated by '{' and '}'
313 'flow-mapping': [
314 # key with colon
315 (r'([^,:?\[\]{}\n]+)(:)(?=[ ]|$)',
316 bygroups(Name.Tag, Punctuation)),
317 # include flow collection rules
318 include('flow-collection'),
319 # the closing indicator
320 (r'\}', Punctuation.Indicator, '#pop'),
321 ],
322
323 # block scalar lines
324 'block-scalar-content': [
325 # line break
326 (r'\n', Text),
327 # empty line
328 (r'^[ ]+$',
329 parse_block_scalar_empty_line(Text, Name.Constant)),
330 # indentation spaces (we may leave the state here)
331 (r'^[ ]*', parse_block_scalar_indent(Text)),
332 # line content
333 (r'[\S\t ]+', Name.Constant),
334 ],
335
336 # the content of a literal or folded scalar
337 'block-scalar-header': [
338 # indentation indicator followed by chomping flag
339 (r'([1-9])?[+-]?(?=[ ]|$)',
340 set_block_scalar_indent(Punctuation.Indicator),
341 'ignored-line'),
342 # chomping flag followed by indentation indicator
343 (r'[+-]?([1-9])?(?=[ ]|$)',
344 set_block_scalar_indent(Punctuation.Indicator),
345 'ignored-line'),
346 ],
347
348 # ignored and regular whitespaces in quoted scalars
349 'quoted-scalar-whitespaces': [
350 # leading and trailing whitespaces are ignored
351 (r'^[ ]+', Text),
352 (r'[ ]+$', Text),
353 # line breaks are ignored
354 (r'\n+', Text),
355 # other whitespaces are a part of the value
356 (r'[ ]+', Name.Variable),
357 ],
358
359 # single-quoted scalars
360 'single-quoted-scalar': [
361 # include whitespace and line break rules
362 include('quoted-scalar-whitespaces'),
363 # escaping of the quote character
364 (r'\'\'', String.Escape),
365 # regular non-whitespace characters
366 (r'[^\s\']+', String),
367 # the closing quote
368 (r'\'', String, '#pop'),
369 ],
370
371 # double-quoted scalars
372 'double-quoted-scalar': [
373 # include whitespace and line break rules
374 include('quoted-scalar-whitespaces'),
375 # escaping of special characters
376 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
377 # escape codes
378 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
379 String.Escape),
380 # regular non-whitespace characters
381 (r'[^\s"\\]+', String),
382 # the closing quote
383 (r'"', String, '#pop'),
384 ],
385
386 # the beginning of a new line while scanning a plain scalar
387 'plain-scalar-in-block-context-new-line': [
388 # empty lines
389 (r'^[ ]+$', Text),
390 # line breaks
391 (r'\n+', Text),
392 # document start and document end indicators
393 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
394 # indentation spaces (we may leave the block line state here)
395 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
396 ],
397
398 # a plain scalar in the block context
399 'plain-scalar-in-block-context': [
400 # the scalar ends with the ':' indicator
401 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
402 # the scalar ends with whitespaces followed by a comment
403 (r'[ ]+(?=#)', Text, '#pop'),
404 # trailing whitespaces are ignored
405 (r'[ ]+$', Text),
406 # line breaks are ignored
407 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
408 # other whitespaces are a part of the value
409 (r'[ ]+', Literal.Scalar.Plain),
410 # regular non-whitespace characters
411 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
412 ],
413
414 # a plain scalar is the flow context
415 'plain-scalar-in-flow-context': [
416 # the scalar ends with an indicator character
417 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
418 # the scalar ends with a comment
419 (r'[ ]+(?=#)', Text, '#pop'),
420 # leading and trailing whitespaces are ignored
421 (r'^[ ]+', Text),
422 (r'[ ]+$', Text),
423 # line breaks are ignored
424 (r'\n+', Text),
425 # other whitespaces are a part of the value
426 (r'[ ]+', Name.Variable),
427 # regular non-whitespace characters
428 (r'[^\s,:?\[\]{}]+', Name.Variable),
429 ],
430
431 }
432
433 def get_tokens_unprocessed(self, text=None, context=None):
434 if context is None:
435 context = YamlLexerContext(text, 0)
436 return super(YamlLexer, self).get_tokens_unprocessed(text, context)
437
438
439 class JsonLexer(RegexLexer):
440 """
441 For JSON data structures.
442
443 .. versionadded:: 1.5
444 """
445
446 name = 'JSON'
447 aliases = ['json']
448 filenames = ['*.json']
449 mimetypes = ['application/json']
450
451 flags = re.DOTALL
452
453 # integer part of a number
454 int_part = r'-?(0|[1-9]\d*)'
455
456 # fractional part of a number
457 frac_part = r'\.\d+'
458
459 # exponential part of a number
460 exp_part = r'[eE](\+|-)?\d+'
461
462 tokens = {
463 'whitespace': [
464 (r'\s+', Text),
465 ],
466
467 # represents a simple terminal value
468 'simplevalue': [
469 (r'(true|false|null)\b', Keyword.Constant),
470 (('%(int_part)s(%(frac_part)s%(exp_part)s|'
471 '%(exp_part)s|%(frac_part)s)') % vars(),
472 Number.Float),
473 (int_part, Number.Integer),
474 (r'"(\\\\|\\"|[^"])*"', String.Double),
475 ],
476
477
478 # the right hand side of an object, after the attribute name
479 'objectattribute': [
480 include('value'),
481 (r':', Punctuation),
482 # comma terminates the attribute but expects more
483 (r',', Punctuation, '#pop'),
484 # a closing bracket terminates the entire object, so pop twice
485 (r'\}', Punctuation, '#pop:2'),
486 ],
487
488 # a json object - { attr, attr, ... }
489 'objectvalue': [
490 include('whitespace'),
491 (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
492 (r'\}', Punctuation, '#pop'),
493 ],
494
495 # json array - [ value, value, ... }
496 'arrayvalue': [
497 include('whitespace'),
498 include('value'),
499 (r',', Punctuation),
500 (r'\]', Punctuation, '#pop'),
501 ],
502
503 # a json value - either a simple value or a complex value (object or array)
504 'value': [
505 include('whitespace'),
506 include('simplevalue'),
507 (r'\{', Punctuation, 'objectvalue'),
508 (r'\[', Punctuation, 'arrayvalue'),
509 ],
510
511 # the root of a json document whould be a value
512 'root': [
513 include('value'),
514 ],
515 }
516
517
518 class JsonBareObjectLexer(JsonLexer):
519 """
520 For JSON data structures (with missing object curly braces).
521
522 .. versionadded:: 2.2
523 """
524
525 name = 'JSONBareObject'
526 aliases = ['json-object']
527 filenames = []
528 mimetypes = ['application/json-object']
529
530 tokens = {
531 'root': [
532 (r'\}', Error),
533 include('objectvalue'),
534 ],
535 'objectattribute': [
536 (r'\}', Error),
537 inherit,
538 ],
539 }
540
541
542 class JsonLdLexer(JsonLexer):
543 """
544 For `JSON-LD <http://json-ld.org/>`_ linked data.
545
546 .. versionadded:: 2.0
547 """
548
549 name = 'JSON-LD'
550 aliases = ['jsonld', 'json-ld']
551 filenames = ['*.jsonld']
552 mimetypes = ['application/ld+json']
553
554 tokens = {
555 'objectvalue': [
556 (r'"@(context|id|value|language|type|container|list|set|'
557 r'reverse|index|base|vocab|graph)"', Name.Decorator,
558 'objectattribute'),
559 inherit,
560 ],
561 }

eric ide

mercurial