eric6/ThirdParty/Pygments/pygments/lexers/data.py

changeset 7983
54c5cfbb1e29
parent 7701
25f42e208e08
equal deleted inserted replaced
7982:48d210e41c65 7983:54c5cfbb1e29
3 pygments.lexers.data 3 pygments.lexers.data
4 ~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for data file format. 6 Lexers for data file format.
7 7
8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \ 14 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, LexerContext, \
15 include, bygroups, inherit 15 include, bygroups, inherit
16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \ 16 from pygments.token import Text, Comment, Keyword, Name, String, Number, \
17 Punctuation, Literal, Error 17 Punctuation, Literal, Error
18 18
19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer'] 19 __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
434 if context is None: 434 if context is None:
435 context = YamlLexerContext(text, 0) 435 context = YamlLexerContext(text, 0)
436 return super().get_tokens_unprocessed(text, context) 436 return super().get_tokens_unprocessed(text, context)
437 437
438 438
439 class JsonLexer(RegexLexer): 439 class JsonLexer(Lexer):
440 """ 440 """
441 For JSON data structures. 441 For JSON data structures.
442 442
443 .. versionadded:: 1.5 443 .. versionadded:: 1.5
444 """ 444 """
445 445
446 name = 'JSON' 446 name = 'JSON'
447 aliases = ['json'] 447 aliases = ['json', 'json-object']
448 filenames = ['*.json', 'Pipfile.lock'] 448 filenames = ['*.json', 'Pipfile.lock']
449 mimetypes = ['application/json'] 449 mimetypes = ['application/json', 'application/json-object']
450 450
451 flags = re.DOTALL 451 # No validation of integers, floats, or constants is done.
452 452 # As long as the characters are members of the following
453 # integer part of a number 453 # sets, the token will be considered valid. For example,
454 int_part = r'-?(0|[1-9]\d*)' 454 #
455 455 # "--1--" is parsed as an integer
456 # fractional part of a number 456 # "1...eee" is parsed as a float
457 frac_part = r'\.\d+' 457 # "trustful" is parsed as a constant
458 458 #
459 # exponential part of a number 459 integers = set('-0123456789')
460 exp_part = r'[eE](\+|-)?\d+' 460 floats = set('.eE+')
461 461 constants = set('truefalsenull') # true|false|null
462 tokens = { 462 hexadecimals = set('0123456789abcdefABCDEF')
463 'whitespace': [ 463 punctuations = set('{}[],')
464 (r'\s+', Text), 464 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
465 ], 465
466 466 def get_tokens_unprocessed(self, text):
467 # represents a simple terminal value 467 """Parse JSON data."""
468 'simplevalue': [ 468
469 (r'(true|false|null)\b', Keyword.Constant), 469 in_string = False
470 (('%(int_part)s(%(frac_part)s%(exp_part)s|' 470 in_escape = False
471 '%(exp_part)s|%(frac_part)s)') % vars(), 471 in_unicode_escape = 0
472 Number.Float), 472 in_whitespace = False
473 (int_part, Number.Integer), 473 in_constant = False
474 (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', String.Double), 474 in_number = False
475 ], 475 in_float = False
476 476 in_punctuation = False
477 477
478 # the right hand side of an object, after the attribute name 478 start = 0
479 'objectattribute': [ 479
480 include('value'), 480 # The queue is used to store data that may need to be tokenized
481 (r':', Punctuation), 481 # differently based on what follows. In particular, JSON object
482 # comma terminates the attribute but expects more 482 # keys are tokenized differently than string values, but cannot
483 (r',', Punctuation, '#pop'), 483 # be distinguished until punctuation is encountered outside the
484 # a closing bracket terminates the entire object, so pop twice 484 # string.
485 (r'\}', Punctuation, '#pop:2'), 485 #
486 ], 486 # A ":" character after the string indicates that the string is
487 487 # an object key; any other character indicates the string is a
488 # a json object - { attr, attr, ... } 488 # regular string value.
489 'objectvalue': [ 489 #
490 include('whitespace'), 490 # The queue holds tuples that contain the following data:
491 (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', Name.Tag, 'objectattribute'), 491 #
492 (r'\}', Punctuation, '#pop'), 492 # (start_index, token_type, text)
493 ], 493 #
494 494 # By default the token type of text in double quotes is
495 # json array - [ value, value, ... } 495 # String.Double. The token type will be replaced if a colon
496 'arrayvalue': [ 496 # is encountered after the string closes.
497 include('whitespace'), 497 #
498 include('value'), 498 queue = []
499 (r',', Punctuation), 499
500 (r'\]', Punctuation, '#pop'), 500 for stop, character in enumerate(text):
501 ], 501 if in_string:
502 502 if in_unicode_escape:
503 # a json value - either a simple value or a complex value (object or array) 503 if character in self.hexadecimals:
504 'value': [ 504 in_unicode_escape -= 1
505 include('whitespace'), 505 if not in_unicode_escape:
506 include('simplevalue'), 506 in_escape = False
507 (r'\{', Punctuation, 'objectvalue'), 507 else:
508 (r'\[', Punctuation, 'arrayvalue'), 508 in_unicode_escape = 0
509 ], 509 in_escape = False
510 510
511 # the root of a json document whould be a value 511 elif in_escape:
512 'root': [ 512 if character == 'u':
513 include('value'), 513 in_unicode_escape = 4
514 ], 514 else:
515 } 515 in_escape = False
516
517 elif character == '\\':
518 in_escape = True
519
520 elif character == '"':
521 queue.append((start, String.Double, text[start:stop + 1]))
522 in_string = False
523 in_escape = False
524 in_unicode_escape = 0
525
526 continue
527
528 elif in_whitespace:
529 if character in self.whitespaces:
530 continue
531
532 if queue:
533 queue.append((start, Text, text[start:stop]))
534 else:
535 yield start, Text, text[start:stop]
536 in_whitespace = False
537 # Fall through so the new character can be evaluated.
538
539 elif in_constant:
540 if character in self.constants:
541 continue
542
543 yield start, Keyword.Constant, text[start:stop]
544 in_constant = False
545 # Fall through so the new character can be evaluated.
546
547 elif in_number:
548 if character in self.integers:
549 continue
550 elif character in self.floats:
551 in_float = True
552 continue
553
554 if in_float:
555 yield start, Number.Float, text[start:stop]
556 else:
557 yield start, Number.Integer, text[start:stop]
558 in_number = False
559 in_float = False
560 # Fall through so the new character can be evaluated.
561
562 elif in_punctuation:
563 if character in self.punctuations:
564 continue
565
566 yield start, Punctuation, text[start:stop]
567 in_punctuation = False
568 # Fall through so the new character can be evaluated.
569
570 start = stop
571
572 if character == '"':
573 in_string = True
574
575 elif character in self.whitespaces:
576 in_whitespace = True
577
578 elif character in {'f', 'n', 't'}: # The first letters of true|false|null
579 # Exhaust the queue. Accept the existing token types.
580 yield from queue
581 queue.clear()
582
583 in_constant = True
584
585 elif character in self.integers:
586 # Exhaust the queue. Accept the existing token types.
587 yield from queue
588 queue.clear()
589
590 in_number = True
591
592 elif character == ':':
593 # Yield from the queue. Replace string token types.
594 for _start, _token, _text in queue:
595 if _token is Text:
596 yield _start, _token, _text
597 elif _token is String.Double:
598 yield _start, Name.Tag, _text
599 else:
600 yield _start, Error, _text
601 queue.clear()
602
603 in_punctuation = True
604
605 elif character in self.punctuations:
606 # Exhaust the queue. Accept the existing token types.
607 yield from queue
608 queue.clear()
609
610 in_punctuation = True
611
612 else:
613 # Exhaust the queue. Accept the existing token types.
614 yield from queue
615 queue.clear()
616
617 yield start, Error, character
618
619 # Yield any remaining text.
620 yield from queue
621 if in_string:
622 yield start, Error, text[start:]
623 elif in_float:
624 yield start, Number.Float, text[start:]
625 elif in_number:
626 yield start, Number.Integer, text[start:]
627 elif in_constant:
628 yield start, Keyword.Constant, text[start:]
629 elif in_whitespace:
630 yield start, Text, text[start:]
631 elif in_punctuation:
632 yield start, Punctuation, text[start:]
516 633
517 634
518 class JsonBareObjectLexer(JsonLexer): 635 class JsonBareObjectLexer(JsonLexer):
519 """ 636 """
520 For JSON data structures (with missing object curly braces). 637 For JSON data structures (with missing object curly braces).
521 638
522 .. versionadded:: 2.2 639 .. versionadded:: 2.2
640
641 .. deprecated:: 2.8.0
642
643 Behaves the same as `JsonLexer` now.
523 """ 644 """
524 645
525 name = 'JSONBareObject' 646 name = 'JSONBareObject'
526 aliases = ['json-object'] 647 aliases = []
527 filenames = [] 648 filenames = []
528 mimetypes = ['application/json-object'] 649 mimetypes = []
529
530 tokens = {
531 'root': [
532 (r'\}', Error),
533 include('objectvalue'),
534 ],
535 'objectattribute': [
536 (r'\}', Error),
537 inherit,
538 ],
539 }
540 650
541 651
542 class JsonLdLexer(JsonLexer): 652 class JsonLdLexer(JsonLexer):
543 """ 653 """
544 For `JSON-LD <http://json-ld.org/>`_ linked data. 654 For `JSON-LD <https://json-ld.org/>`_ linked data.
545 655
546 .. versionadded:: 2.0 656 .. versionadded:: 2.0
547 """ 657 """
548 658
549 name = 'JSON-LD' 659 name = 'JSON-LD'
550 aliases = ['jsonld', 'json-ld'] 660 aliases = ['jsonld', 'json-ld']
551 filenames = ['*.jsonld'] 661 filenames = ['*.jsonld']
552 mimetypes = ['application/ld+json'] 662 mimetypes = ['application/ld+json']
553 663
554 tokens = { 664 json_ld_keywords = {
555 'objectvalue': [ 665 '"@%s"' % keyword
556 (r'"@(context|id|value|language|type|container|list|set|' 666 for keyword in (
557 r'reverse|index|base|vocab|graph)"', Name.Decorator, 667 'base',
558 'objectattribute'), 668 'container',
559 inherit, 669 'context',
560 ], 670 'direction',
671 'graph',
672 'id',
673 'import',
674 'included',
675 'index',
676 'json',
677 'language',
678 'list',
679 'nest',
680 'none',
681 'prefix',
682 'propagate',
683 'protected',
684 'reverse',
685 'set',
686 'type',
687 'value',
688 'version',
689 'vocab',
690 )
561 } 691 }
692
693 def get_tokens_unprocessed(self, text):
694 for start, token, value in super(JsonLdLexer, self).get_tokens_unprocessed(text):
695 if token is Name.Tag and value in self.json_ld_keywords:
696 yield start, Name.Decorator, value
697 else:
698 yield start, token, value

eric ide

mercurial