434 if context is None: |
434 if context is None: |
435 context = YamlLexerContext(text, 0) |
435 context = YamlLexerContext(text, 0) |
436 return super().get_tokens_unprocessed(text, context) |
436 return super().get_tokens_unprocessed(text, context) |
437 |
437 |
438 |
438 |
439 class JsonLexer(RegexLexer): |
439 class JsonLexer(Lexer): |
440 """ |
440 """ |
441 For JSON data structures. |
441 For JSON data structures. |
442 |
442 |
443 .. versionadded:: 1.5 |
443 .. versionadded:: 1.5 |
444 """ |
444 """ |
445 |
445 |
446 name = 'JSON' |
446 name = 'JSON' |
447 aliases = ['json'] |
447 aliases = ['json', 'json-object'] |
448 filenames = ['*.json', 'Pipfile.lock'] |
448 filenames = ['*.json', 'Pipfile.lock'] |
449 mimetypes = ['application/json'] |
449 mimetypes = ['application/json', 'application/json-object'] |
450 |
450 |
451 flags = re.DOTALL |
451 # No validation of integers, floats, or constants is done. |
452 |
452 # As long as the characters are members of the following |
453 # integer part of a number |
453 # sets, the token will be considered valid. For example, |
454 int_part = r'-?(0|[1-9]\d*)' |
454 # |
455 |
455 # "--1--" is parsed as an integer |
456 # fractional part of a number |
456 # "1...eee" is parsed as a float |
457 frac_part = r'\.\d+' |
457 # "trustful" is parsed as a constant |
458 |
458 # |
459 # exponential part of a number |
459 integers = set('-0123456789') |
460 exp_part = r'[eE](\+|-)?\d+' |
460 floats = set('.eE+') |
461 |
461 constants = set('truefalsenull') # true|false|null |
462 tokens = { |
462 hexadecimals = set('0123456789abcdefABCDEF') |
463 'whitespace': [ |
463 punctuations = set('{}[],') |
464 (r'\s+', Text), |
464 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'} |
465 ], |
465 |
466 |
466 def get_tokens_unprocessed(self, text): |
467 # represents a simple terminal value |
467 """Parse JSON data.""" |
468 'simplevalue': [ |
468 |
469 (r'(true|false|null)\b', Keyword.Constant), |
469 in_string = False |
470 (('%(int_part)s(%(frac_part)s%(exp_part)s|' |
470 in_escape = False |
471 '%(exp_part)s|%(frac_part)s)') % vars(), |
471 in_unicode_escape = 0 |
472 Number.Float), |
472 in_whitespace = False |
473 (int_part, Number.Integer), |
473 in_constant = False |
474 (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', String.Double), |
474 in_number = False |
475 ], |
475 in_float = False |
476 |
476 in_punctuation = False |
477 |
477 |
478 # the right hand side of an object, after the attribute name |
478 start = 0 |
479 'objectattribute': [ |
479 |
480 include('value'), |
480 # The queue is used to store data that may need to be tokenized |
481 (r':', Punctuation), |
481 # differently based on what follows. In particular, JSON object |
482 # comma terminates the attribute but expects more |
482 # keys are tokenized differently than string values, but cannot |
483 (r',', Punctuation, '#pop'), |
483 # be distinguished until punctuation is encountered outside the |
484 # a closing bracket terminates the entire object, so pop twice |
484 # string. |
485 (r'\}', Punctuation, '#pop:2'), |
485 # |
486 ], |
486 # A ":" character after the string indicates that the string is |
487 |
487 # an object key; any other character indicates the string is a |
488 # a json object - { attr, attr, ... } |
488 # regular string value. |
489 'objectvalue': [ |
489 # |
490 include('whitespace'), |
490 # The queue holds tuples that contain the following data: |
491 (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', Name.Tag, 'objectattribute'), |
491 # |
492 (r'\}', Punctuation, '#pop'), |
492 # (start_index, token_type, text) |
493 ], |
493 # |
494 |
494 # By default the token type of text in double quotes is |
495 # json array - [ value, value, ... } |
495 # String.Double. The token type will be replaced if a colon |
496 'arrayvalue': [ |
496 # is encountered after the string closes. |
497 include('whitespace'), |
497 # |
498 include('value'), |
498 queue = [] |
499 (r',', Punctuation), |
499 |
500 (r'\]', Punctuation, '#pop'), |
500 for stop, character in enumerate(text): |
501 ], |
501 if in_string: |
502 |
502 if in_unicode_escape: |
503 # a json value - either a simple value or a complex value (object or array) |
503 if character in self.hexadecimals: |
504 'value': [ |
504 in_unicode_escape -= 1 |
505 include('whitespace'), |
505 if not in_unicode_escape: |
506 include('simplevalue'), |
506 in_escape = False |
507 (r'\{', Punctuation, 'objectvalue'), |
507 else: |
508 (r'\[', Punctuation, 'arrayvalue'), |
508 in_unicode_escape = 0 |
509 ], |
509 in_escape = False |
510 |
510 |
511 # the root of a json document whould be a value |
511 elif in_escape: |
512 'root': [ |
512 if character == 'u': |
513 include('value'), |
513 in_unicode_escape = 4 |
514 ], |
514 else: |
515 } |
515 in_escape = False |
|
516 |
|
517 elif character == '\\': |
|
518 in_escape = True |
|
519 |
|
520 elif character == '"': |
|
521 queue.append((start, String.Double, text[start:stop + 1])) |
|
522 in_string = False |
|
523 in_escape = False |
|
524 in_unicode_escape = 0 |
|
525 |
|
526 continue |
|
527 |
|
528 elif in_whitespace: |
|
529 if character in self.whitespaces: |
|
530 continue |
|
531 |
|
532 if queue: |
|
533 queue.append((start, Text, text[start:stop])) |
|
534 else: |
|
535 yield start, Text, text[start:stop] |
|
536 in_whitespace = False |
|
537 # Fall through so the new character can be evaluated. |
|
538 |
|
539 elif in_constant: |
|
540 if character in self.constants: |
|
541 continue |
|
542 |
|
543 yield start, Keyword.Constant, text[start:stop] |
|
544 in_constant = False |
|
545 # Fall through so the new character can be evaluated. |
|
546 |
|
547 elif in_number: |
|
548 if character in self.integers: |
|
549 continue |
|
550 elif character in self.floats: |
|
551 in_float = True |
|
552 continue |
|
553 |
|
554 if in_float: |
|
555 yield start, Number.Float, text[start:stop] |
|
556 else: |
|
557 yield start, Number.Integer, text[start:stop] |
|
558 in_number = False |
|
559 in_float = False |
|
560 # Fall through so the new character can be evaluated. |
|
561 |
|
562 elif in_punctuation: |
|
563 if character in self.punctuations: |
|
564 continue |
|
565 |
|
566 yield start, Punctuation, text[start:stop] |
|
567 in_punctuation = False |
|
568 # Fall through so the new character can be evaluated. |
|
569 |
|
570 start = stop |
|
571 |
|
572 if character == '"': |
|
573 in_string = True |
|
574 |
|
575 elif character in self.whitespaces: |
|
576 in_whitespace = True |
|
577 |
|
578 elif character in {'f', 'n', 't'}: # The first letters of true|false|null |
|
579 # Exhaust the queue. Accept the existing token types. |
|
580 yield from queue |
|
581 queue.clear() |
|
582 |
|
583 in_constant = True |
|
584 |
|
585 elif character in self.integers: |
|
586 # Exhaust the queue. Accept the existing token types. |
|
587 yield from queue |
|
588 queue.clear() |
|
589 |
|
590 in_number = True |
|
591 |
|
592 elif character == ':': |
|
593 # Yield from the queue. Replace string token types. |
|
594 for _start, _token, _text in queue: |
|
595 if _token is Text: |
|
596 yield _start, _token, _text |
|
597 elif _token is String.Double: |
|
598 yield _start, Name.Tag, _text |
|
599 else: |
|
600 yield _start, Error, _text |
|
601 queue.clear() |
|
602 |
|
603 in_punctuation = True |
|
604 |
|
605 elif character in self.punctuations: |
|
606 # Exhaust the queue. Accept the existing token types. |
|
607 yield from queue |
|
608 queue.clear() |
|
609 |
|
610 in_punctuation = True |
|
611 |
|
612 else: |
|
613 # Exhaust the queue. Accept the existing token types. |
|
614 yield from queue |
|
615 queue.clear() |
|
616 |
|
617 yield start, Error, character |
|
618 |
|
619 # Yield any remaining text. |
|
620 yield from queue |
|
621 if in_string: |
|
622 yield start, Error, text[start:] |
|
623 elif in_float: |
|
624 yield start, Number.Float, text[start:] |
|
625 elif in_number: |
|
626 yield start, Number.Integer, text[start:] |
|
627 elif in_constant: |
|
628 yield start, Keyword.Constant, text[start:] |
|
629 elif in_whitespace: |
|
630 yield start, Text, text[start:] |
|
631 elif in_punctuation: |
|
632 yield start, Punctuation, text[start:] |
516 |
633 |
517 |
634 |
518 class JsonBareObjectLexer(JsonLexer): |
635 class JsonBareObjectLexer(JsonLexer): |
519 """ |
636 """ |
520 For JSON data structures (with missing object curly braces). |
637 For JSON data structures (with missing object curly braces). |
521 |
638 |
522 .. versionadded:: 2.2 |
639 .. versionadded:: 2.2 |
|
640 |
|
641 .. deprecated:: 2.8.0 |
|
642 |
|
643 Behaves the same as `JsonLexer` now. |
523 """ |
644 """ |
524 |
645 |
525 name = 'JSONBareObject' |
646 name = 'JSONBareObject' |
526 aliases = ['json-object'] |
647 aliases = [] |
527 filenames = [] |
648 filenames = [] |
528 mimetypes = ['application/json-object'] |
649 mimetypes = [] |
529 |
|
530 tokens = { |
|
531 'root': [ |
|
532 (r'\}', Error), |
|
533 include('objectvalue'), |
|
534 ], |
|
535 'objectattribute': [ |
|
536 (r'\}', Error), |
|
537 inherit, |
|
538 ], |
|
539 } |
|
540 |
650 |
541 |
651 |
542 class JsonLdLexer(JsonLexer): |
652 class JsonLdLexer(JsonLexer): |
543 """ |
653 """ |
544 For `JSON-LD <http://json-ld.org/>`_ linked data. |
654 For `JSON-LD <https://json-ld.org/>`_ linked data. |
545 |
655 |
546 .. versionadded:: 2.0 |
656 .. versionadded:: 2.0 |
547 """ |
657 """ |
548 |
658 |
549 name = 'JSON-LD' |
659 name = 'JSON-LD' |
550 aliases = ['jsonld', 'json-ld'] |
660 aliases = ['jsonld', 'json-ld'] |
551 filenames = ['*.jsonld'] |
661 filenames = ['*.jsonld'] |
552 mimetypes = ['application/ld+json'] |
662 mimetypes = ['application/ld+json'] |
553 |
663 |
554 tokens = { |
664 json_ld_keywords = { |
555 'objectvalue': [ |
665 '"@%s"' % keyword |
556 (r'"@(context|id|value|language|type|container|list|set|' |
666 for keyword in ( |
557 r'reverse|index|base|vocab|graph)"', Name.Decorator, |
667 'base', |
558 'objectattribute'), |
668 'container', |
559 inherit, |
669 'context', |
560 ], |
670 'direction', |
|
671 'graph', |
|
672 'id', |
|
673 'import', |
|
674 'included', |
|
675 'index', |
|
676 'json', |
|
677 'language', |
|
678 'list', |
|
679 'nest', |
|
680 'none', |
|
681 'prefix', |
|
682 'propagate', |
|
683 'protected', |
|
684 'reverse', |
|
685 'set', |
|
686 'type', |
|
687 'value', |
|
688 'version', |
|
689 'vocab', |
|
690 ) |
561 } |
691 } |
|
692 |
|
693 def get_tokens_unprocessed(self, text): |
|
694 for start, token, value in super(JsonLdLexer, self).get_tokens_unprocessed(text): |
|
695 if token is Name.Tag and value in self.json_ld_keywords: |
|
696 yield start, Name.Decorator, value |
|
697 else: |
|
698 yield start, token, value |