ThirdParty/Pygments/pygments/lexers/data.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.data
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for data file format.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
15 include, bygroups, inherit
16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \
17 Punctuation, Literal
18
19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonLdLexer']
20
21
22 class YamlLexerContext(LexerContext):
23 """Indentation context for the YAML lexer."""
24
25 def __init__(self, *args, **kwds):
26 super(YamlLexerContext, self).__init__(*args, **kwds)
27 self.indent_stack = []
28 self.indent = -1
29 self.next_indent = 0
30 self.block_scalar_indent = None
31
32
33 class YamlLexer(ExtendedRegexLexer):
34 """
35 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
36 language.
37
38 .. versionadded:: 0.11
39 """
40
41 name = 'YAML'
42 aliases = ['yaml']
43 filenames = ['*.yaml', '*.yml']
44 mimetypes = ['text/x-yaml']
45
46 def something(token_class):
47 """Do not produce empty tokens."""
48 def callback(lexer, match, context):
49 text = match.group()
50 if not text:
51 return
52 yield match.start(), token_class, text
53 context.pos = match.end()
54 return callback
55
56 def reset_indent(token_class):
57 """Reset the indentation levels."""
58 def callback(lexer, match, context):
59 text = match.group()
60 context.indent_stack = []
61 context.indent = -1
62 context.next_indent = 0
63 context.block_scalar_indent = None
64 yield match.start(), token_class, text
65 context.pos = match.end()
66 return callback
67
68 def save_indent(token_class, start=False):
69 """Save a possible indentation level."""
70 def callback(lexer, match, context):
71 text = match.group()
72 extra = ''
73 if start:
74 context.next_indent = len(text)
75 if context.next_indent < context.indent:
76 while context.next_indent < context.indent:
77 context.indent = context.indent_stack.pop()
78 if context.next_indent > context.indent:
79 extra = text[context.indent:]
80 text = text[:context.indent]
81 else:
82 context.next_indent += len(text)
83 if text:
84 yield match.start(), token_class, text
85 if extra:
86 yield match.start()+len(text), token_class.Error, extra
87 context.pos = match.end()
88 return callback
89
90 def set_indent(token_class, implicit=False):
91 """Set the previously saved indentation level."""
92 def callback(lexer, match, context):
93 text = match.group()
94 if context.indent < context.next_indent:
95 context.indent_stack.append(context.indent)
96 context.indent = context.next_indent
97 if not implicit:
98 context.next_indent += len(text)
99 yield match.start(), token_class, text
100 context.pos = match.end()
101 return callback
102
103 def set_block_scalar_indent(token_class):
104 """Set an explicit indentation level for a block scalar."""
105 def callback(lexer, match, context):
106 text = match.group()
107 context.block_scalar_indent = None
108 if not text:
109 return
110 increment = match.group(1)
111 if increment:
112 current_indent = max(context.indent, 0)
113 increment = int(increment)
114 context.block_scalar_indent = current_indent + increment
115 if text:
116 yield match.start(), token_class, text
117 context.pos = match.end()
118 return callback
119
120 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
121 """Process an empty line in a block scalar."""
122 def callback(lexer, match, context):
123 text = match.group()
124 if (context.block_scalar_indent is None or
125 len(text) <= context.block_scalar_indent):
126 if text:
127 yield match.start(), indent_token_class, text
128 else:
129 indentation = text[:context.block_scalar_indent]
130 content = text[context.block_scalar_indent:]
131 yield match.start(), indent_token_class, indentation
132 yield (match.start()+context.block_scalar_indent,
133 content_token_class, content)
134 context.pos = match.end()
135 return callback
136
137 def parse_block_scalar_indent(token_class):
138 """Process indentation spaces in a block scalar."""
139 def callback(lexer, match, context):
140 text = match.group()
141 if context.block_scalar_indent is None:
142 if len(text) <= max(context.indent, 0):
143 context.stack.pop()
144 context.stack.pop()
145 return
146 context.block_scalar_indent = len(text)
147 else:
148 if len(text) < context.block_scalar_indent:
149 context.stack.pop()
150 context.stack.pop()
151 return
152 if text:
153 yield match.start(), token_class, text
154 context.pos = match.end()
155 return callback
156
157 def parse_plain_scalar_indent(token_class):
158 """Process indentation spaces in a plain scalar."""
159 def callback(lexer, match, context):
160 text = match.group()
161 if len(text) <= context.indent:
162 context.stack.pop()
163 context.stack.pop()
164 return
165 if text:
166 yield match.start(), token_class, text
167 context.pos = match.end()
168 return callback
169
170 tokens = {
171 # the root rules
172 'root': [
173 # ignored whitespaces
174 (r'[ ]+(?=#|$)', Text),
175 # line breaks
176 (r'\n+', Text),
177 # a comment
178 (r'#[^\n]*', Comment.Single),
179 # the '%YAML' directive
180 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
181 # the %TAG directive
182 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
183 # document start and document end indicators
184 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
185 'block-line'),
186 # indentation spaces
187 (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
188 ('block-line', 'indentation')),
189 ],
190
191 # trailing whitespaces after directives or a block scalar indicator
192 'ignored-line': [
193 # ignored whitespaces
194 (r'[ ]+(?=#|$)', Text),
195 # a comment
196 (r'#[^\n]*', Comment.Single),
197 # line break
198 (r'\n', Text, '#pop:2'),
199 ],
200
201 # the %YAML directive
202 'yaml-directive': [
203 # the version number
204 (r'([ ]+)([0-9]+\.[0-9]+)',
205 bygroups(Text, Number), 'ignored-line'),
206 ],
207
208 # the %YAG directive
209 'tag-directive': [
210 # a tag handle and the corresponding prefix
211 (r'([ ]+)(!|![\w-]*!)'
212 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
213 bygroups(Text, Keyword.Type, Text, Keyword.Type),
214 'ignored-line'),
215 ],
216
217 # block scalar indicators and indentation spaces
218 'indentation': [
219 # trailing whitespaces are ignored
220 (r'[ ]*$', something(Text), '#pop:2'),
221 # whitespaces preceeding block collection indicators
222 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
223 # block collection indicators
224 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
225 # the beginning a block line
226 (r'[ ]*', save_indent(Text), '#pop'),
227 ],
228
229 # an indented line in the block context
230 'block-line': [
231 # the line end
232 (r'[ ]*(?=#|$)', something(Text), '#pop'),
233 # whitespaces separating tokens
234 (r'[ ]+', Text),
235 # tags, anchors and aliases,
236 include('descriptors'),
237 # block collections and scalars
238 include('block-nodes'),
239 # flow collections and quoted scalars
240 include('flow-nodes'),
241 # a plain scalar
242 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
243 something(Name.Variable),
244 'plain-scalar-in-block-context'),
245 ],
246
247 # tags, anchors, aliases
248 'descriptors': [
249 # a full-form tag
250 (r'!<[\w;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
251 # a tag in the form '!', '!suffix' or '!handle!suffix'
252 (r'!(?:[\w-]+)?'
253 r'(?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)?', Keyword.Type),
254 # an anchor
255 (r'&[\w-]+', Name.Label),
256 # an alias
257 (r'\*[\w-]+', Name.Variable),
258 ],
259
260 # block collections and scalars
261 'block-nodes': [
262 # implicit key
263 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
264 # literal and folded scalars
265 (r'[|>]', Punctuation.Indicator,
266 ('block-scalar-content', 'block-scalar-header')),
267 ],
268
269 # flow collections and quoted scalars
270 'flow-nodes': [
271 # a flow sequence
272 (r'\[', Punctuation.Indicator, 'flow-sequence'),
273 # a flow mapping
274 (r'\{', Punctuation.Indicator, 'flow-mapping'),
275 # a single-quoted scalar
276 (r'\'', String, 'single-quoted-scalar'),
277 # a double-quoted scalar
278 (r'\"', String, 'double-quoted-scalar'),
279 ],
280
281 # the content of a flow collection
282 'flow-collection': [
283 # whitespaces
284 (r'[ ]+', Text),
285 # line breaks
286 (r'\n+', Text),
287 # a comment
288 (r'#[^\n]*', Comment.Single),
289 # simple indicators
290 (r'[?:,]', Punctuation.Indicator),
291 # tags, anchors and aliases
292 include('descriptors'),
293 # nested collections and quoted scalars
294 include('flow-nodes'),
295 # a plain scalar
296 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
297 something(Name.Variable),
298 'plain-scalar-in-flow-context'),
299 ],
300
301 # a flow sequence indicated by '[' and ']'
302 'flow-sequence': [
303 # include flow collection rules
304 include('flow-collection'),
305 # the closing indicator
306 (r'\]', Punctuation.Indicator, '#pop'),
307 ],
308
309 # a flow mapping indicated by '{' and '}'
310 'flow-mapping': [
311 # include flow collection rules
312 include('flow-collection'),
313 # the closing indicator
314 (r'\}', Punctuation.Indicator, '#pop'),
315 ],
316
317 # block scalar lines
318 'block-scalar-content': [
319 # line break
320 (r'\n', Text),
321 # empty line
322 (r'^[ ]+$',
323 parse_block_scalar_empty_line(Text, Name.Constant)),
324 # indentation spaces (we may leave the state here)
325 (r'^[ ]*', parse_block_scalar_indent(Text)),
326 # line content
327 (r'[\S\t ]+', Name.Constant),
328 ],
329
330 # the content of a literal or folded scalar
331 'block-scalar-header': [
332 # indentation indicator followed by chomping flag
333 (r'([1-9])?[+-]?(?=[ ]|$)',
334 set_block_scalar_indent(Punctuation.Indicator),
335 'ignored-line'),
336 # chomping flag followed by indentation indicator
337 (r'[+-]?([1-9])?(?=[ ]|$)',
338 set_block_scalar_indent(Punctuation.Indicator),
339 'ignored-line'),
340 ],
341
342 # ignored and regular whitespaces in quoted scalars
343 'quoted-scalar-whitespaces': [
344 # leading and trailing whitespaces are ignored
345 (r'^[ ]+', Text),
346 (r'[ ]+$', Text),
347 # line breaks are ignored
348 (r'\n+', Text),
349 # other whitespaces are a part of the value
350 (r'[ ]+', Name.Variable),
351 ],
352
353 # single-quoted scalars
354 'single-quoted-scalar': [
355 # include whitespace and line break rules
356 include('quoted-scalar-whitespaces'),
357 # escaping of the quote character
358 (r'\'\'', String.Escape),
359 # regular non-whitespace characters
360 (r'[^\s\']+', String),
361 # the closing quote
362 (r'\'', String, '#pop'),
363 ],
364
365 # double-quoted scalars
366 'double-quoted-scalar': [
367 # include whitespace and line break rules
368 include('quoted-scalar-whitespaces'),
369 # escaping of special characters
370 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
371 # escape codes
372 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
373 String.Escape),
374 # regular non-whitespace characters
375 (r'[^\s"\\]+', String),
376 # the closing quote
377 (r'"', String, '#pop'),
378 ],
379
380 # the beginning of a new line while scanning a plain scalar
381 'plain-scalar-in-block-context-new-line': [
382 # empty lines
383 (r'^[ ]+$', Text),
384 # line breaks
385 (r'\n+', Text),
386 # document start and document end indicators
387 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
388 # indentation spaces (we may leave the block line state here)
389 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
390 ],
391
392 # a plain scalar in the block context
393 'plain-scalar-in-block-context': [
394 # the scalar ends with the ':' indicator
395 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
396 # the scalar ends with whitespaces followed by a comment
397 (r'[ ]+(?=#)', Text, '#pop'),
398 # trailing whitespaces are ignored
399 (r'[ ]+$', Text),
400 # line breaks are ignored
401 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
402 # other whitespaces are a part of the value
403 (r'[ ]+', Literal.Scalar.Plain),
404 # regular non-whitespace characters
405 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
406 ],
407
408 # a plain scalar is the flow context
409 'plain-scalar-in-flow-context': [
410 # the scalar ends with an indicator character
411 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
412 # the scalar ends with a comment
413 (r'[ ]+(?=#)', Text, '#pop'),
414 # leading and trailing whitespaces are ignored
415 (r'^[ ]+', Text),
416 (r'[ ]+$', Text),
417 # line breaks are ignored
418 (r'\n+', Text),
419 # other whitespaces are a part of the value
420 (r'[ ]+', Name.Variable),
421 # regular non-whitespace characters
422 (r'[^\s,:?\[\]{}]+', Name.Variable),
423 ],
424
425 }
426
427 def get_tokens_unprocessed(self, text=None, context=None):
428 if context is None:
429 context = YamlLexerContext(text, 0)
430 return super(YamlLexer, self).get_tokens_unprocessed(text, context)
431
432
433 class JsonLexer(RegexLexer):
434 """
435 For JSON data structures.
436
437 .. versionadded:: 1.5
438 """
439
440 name = 'JSON'
441 aliases = ['json']
442 filenames = ['*.json']
443 mimetypes = ['application/json']
444
445 flags = re.DOTALL
446
447 # integer part of a number
448 int_part = r'-?(0|[1-9]\d*)'
449
450 # fractional part of a number
451 frac_part = r'\.\d+'
452
453 # exponential part of a number
454 exp_part = r'[eE](\+|-)?\d+'
455
456 tokens = {
457 'whitespace': [
458 (r'\s+', Text),
459 ],
460
461 # represents a simple terminal value
462 'simplevalue': [
463 (r'(true|false|null)\b', Keyword.Constant),
464 (('%(int_part)s(%(frac_part)s%(exp_part)s|'
465 '%(exp_part)s|%(frac_part)s)') % vars(),
466 Number.Float),
467 (int_part, Number.Integer),
468 (r'"(\\\\|\\"|[^"])*"', String.Double),
469 ],
470
471
472 # the right hand side of an object, after the attribute name
473 'objectattribute': [
474 include('value'),
475 (r':', Punctuation),
476 # comma terminates the attribute but expects more
477 (r',', Punctuation, '#pop'),
478 # a closing bracket terminates the entire object, so pop twice
479 (r'\}', Punctuation, ('#pop', '#pop')),
480 ],
481
482 # a json object - { attr, attr, ... }
483 'objectvalue': [
484 include('whitespace'),
485 (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
486 (r'\}', Punctuation, '#pop'),
487 ],
488
489 # json array - [ value, value, ... }
490 'arrayvalue': [
491 include('whitespace'),
492 include('value'),
493 (r',', Punctuation),
494 (r'\]', Punctuation, '#pop'),
495 ],
496
497 # a json value - either a simple value or a complex value (object or array)
498 'value': [
499 include('whitespace'),
500 include('simplevalue'),
501 (r'\{', Punctuation, 'objectvalue'),
502 (r'\[', Punctuation, 'arrayvalue'),
503 ],
504
505 # the root of a json document whould be a value
506 'root': [
507 include('value'),
508 ],
509 }
510
511 class JsonLdLexer(JsonLexer):
512 """
513 For `JSON-LD <http://json-ld.org/>`_ linked data.
514
515 .. versionadded:: 2.0
516 """
517
518 name = 'JSON-LD'
519 aliases = ['jsonld', 'json-ld']
520 filenames = ['*.jsonld']
521 mimetypes = ['application/ld+json']
522
523 tokens = {
524 'objectvalue': [
525 (r'"@(context|id|value|language|type|container|list|set|'
526 r'reverse|index|base|vocab|graph)"', Name.Decorator,
527 'objectattribute'),
528 inherit,
529 ],
530 }

eric ide

mercurial