eric: comparison eric6/ThirdParty/Pygments/pygments/lexers/data.py

-:48d210e41c65
+:54c5cfbb1e29
 pygments.lexers.data
 ~~~~~~~~~~~~~~~~~~~~
 Lexers for data file format.
-:copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
 :license: BSD, see LICENSE for details.
 """
 import re
-from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
+from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, LexerContext, \
 include, bygroups, inherit
 from pygments.token import Text, Comment, Keyword, Name, String, Number, \
 Punctuation, Literal, Error
 __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
 if context is None:
 context = YamlLexerContext(text, 0)
 return super().get_tokens_unprocessed(text, context)
-class JsonLexer(RegexLexer):
+class JsonLexer(Lexer):
 """
 For JSON data structures.
 .. versionadded:: 1.5
 """
 name = 'JSON'
-aliases = ['json']
+aliases = ['json', 'json-object']
 filenames = ['*.json', 'Pipfile.lock']
-mimetypes = ['application/json']
+mimetypes = ['application/json', 'application/json-object']
-flags = re.DOTALL
+# No validation of integers, floats, or constants is done.
+# As long as the characters are members of the following
-# integer part of a number
+# sets, the token will be considered valid. For example,
-int_part = r'-?(0|[1-9]\d*)'
+#
+#     "--1--" is parsed as an integer
-# fractional part of a number
+#     "1...eee" is parsed as a float
-frac_part = r'\.\d+'
+#     "trustful" is parsed as a constant
+#
-# exponential part of a number
+integers = set('-0123456789')
-exp_part = r'[eE](\+|-)?\d+'
+floats = set('.eE+')
+constants = set('truefalsenull')  # true|false|null
-tokens = {
+hexadecimals = set('0123456789abcdefABCDEF')
-'whitespace': [
+punctuations = set('{}[],')
-(r'\s+', Text),
+whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
-],
+def get_tokens_unprocessed(self, text):
-# represents a simple terminal value
+"""Parse JSON data."""
-'simplevalue': [
-(r'(true|false|null)\b', Keyword.Constant),
+in_string = False
-(('%(int_part)s(%(frac_part)s%(exp_part)s|'
+in_escape = False
-'%(exp_part)s|%(frac_part)s)') % vars(),
+in_unicode_escape = 0
-Number.Float),
+in_whitespace = False
-(int_part, Number.Integer),
+in_constant = False
-(r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', String.Double),
+in_number = False
-],
+in_float = False
+in_punctuation = False
-# the right hand side of an object, after the attribute name
+start = 0
-'objectattribute': [
-include('value'),
+# The queue is used to store data that may need to be tokenized
-(r':', Punctuation),
+# differently based on what follows. In particular, JSON object
-# comma terminates the attribute but expects more
+# keys are tokenized differently than string values, but cannot
-(r',', Punctuation, '#pop'),
+# be distinguished until punctuation is encountered outside the
-# a closing bracket terminates the entire object, so pop twice
+# string.
-(r'\}', Punctuation, '#pop:2'),
+#
-],
+# A ":" character after the string indicates that the string is
+# an object key; any other character indicates the string is a
-# a json object - { attr, attr, ... }
+# regular string value.
-'objectvalue': [
+#
-include('whitespace'),
+# The queue holds tuples that contain the following data:
-(r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]]{4})|[^\\"])*"', Name.Tag, 'objectattribute'),
+#
-(r'\}', Punctuation, '#pop'),
+#     (start_index, token_type, text)
-],
+#
+# By default the token type of text in double quotes is
-# json array - [ value, value, ... }
+# String.Double. The token type will be replaced if a colon
-'arrayvalue': [
+# is encountered after the string closes.
-include('whitespace'),
+#
-include('value'),
+queue = []
-(r',', Punctuation),
-(r'\]', Punctuation, '#pop'),
+for stop, character in enumerate(text):
-],
+if in_string:
+if in_unicode_escape:
-# a json value - either a simple value or a complex value (object or array)
+if character in self.hexadecimals:
-'value': [
+in_unicode_escape -= 1
-include('whitespace'),
+if not in_unicode_escape:
-include('simplevalue'),
+in_escape = False
-(r'\{', Punctuation, 'objectvalue'),
+else:
-(r'\[', Punctuation, 'arrayvalue'),
+in_unicode_escape = 0
-],
+in_escape = False
-# the root of a json document whould be a value
+elif in_escape:
-'root': [
+if character == 'u':
-include('value'),
+in_unicode_escape = 4
-],
+else:
-}
+in_escape = False
+elif character == '\\':
+in_escape = True
+elif character == '"':
+queue.append((start, String.Double, text[start:stop + 1]))
+in_string = False
+in_escape = False
+in_unicode_escape = 0
+continue
+elif in_whitespace:
+if character in self.whitespaces:
+continue
+if queue:
+queue.append((start, Text, text[start:stop]))
+else:
+yield start, Text, text[start:stop]
+in_whitespace = False
+# Fall through so the new character can be evaluated.
+elif in_constant:
+if character in self.constants:
+continue
+yield start, Keyword.Constant, text[start:stop]
+in_constant = False
+# Fall through so the new character can be evaluated.
+elif in_number:
+if character in self.integers:
+continue
+elif character in self.floats:
+in_float = True
+continue
+if in_float:
+yield start, Number.Float, text[start:stop]
+else:
+yield start, Number.Integer, text[start:stop]
+in_number = False
+in_float = False
+# Fall through so the new character can be evaluated.
+elif in_punctuation:
+if character in self.punctuations:
+continue
+yield start, Punctuation, text[start:stop]
+in_punctuation = False
+# Fall through so the new character can be evaluated.
+start = stop
+if character == '"':
+in_string = True
+elif character in self.whitespaces:
+in_whitespace = True
+elif character in {'f', 'n', 't'}:  # The first letters of true|false|null
+# Exhaust the queue. Accept the existing token types.
+yield from queue
+queue.clear()
+in_constant = True
+elif character in self.integers:
+# Exhaust the queue. Accept the existing token types.
+yield from queue
+queue.clear()
+in_number = True
+elif character == ':':
+# Yield from the queue. Replace string token types.
+for _start, _token, _text in queue:
+if _token is Text:
+yield _start, _token, _text
+elif _token is String.Double:
+yield _start, Name.Tag, _text
+else:
+yield _start, Error, _text
+queue.clear()
+in_punctuation = True
+elif character in self.punctuations:
+# Exhaust the queue. Accept the existing token types.
+yield from queue
+queue.clear()
+in_punctuation = True
+else:
+# Exhaust the queue. Accept the existing token types.
+yield from queue
+queue.clear()
+yield start, Error, character
+# Yield any remaining text.
+yield from queue
+if in_string:
+yield start, Error, text[start:]
+elif in_float:
+yield start, Number.Float, text[start:]
+elif in_number:
+yield start, Number.Integer, text[start:]
+elif in_constant:
+yield start, Keyword.Constant, text[start:]
+elif in_whitespace:
+yield start, Text, text[start:]
+elif in_punctuation:
+yield start, Punctuation, text[start:]
 class JsonBareObjectLexer(JsonLexer):
 """
 For JSON data structures (with missing object curly braces).
 .. versionadded:: 2.2
+.. deprecated:: 2.8.0
+Behaves the same as `JsonLexer` now.
 """
 name = 'JSONBareObject'
-aliases = ['json-object']
+aliases = []
 filenames = []
-mimetypes = ['application/json-object']
+mimetypes = []
-tokens = {
-'root': [
-(r'\}', Error),
-include('objectvalue'),
-],
-'objectattribute': [
-(r'\}', Error),
-inherit,
-],
-}
 class JsonLdLexer(JsonLexer):
 """
-For `JSON-LD <http://json-ld.org/>`_ linked data.
+For `JSON-LD <https://json-ld.org/>`_ linked data.
 .. versionadded:: 2.0
 """
 name = 'JSON-LD'
 aliases = ['jsonld', 'json-ld']
 filenames = ['*.jsonld']
 mimetypes = ['application/ld+json']
-tokens = {
+json_ld_keywords = {
-'objectvalue': [
+'"@%s"' % keyword
-(r'"@(context|id|value|language|type|container|list|set|'
+for keyword in (
-r'reverse|index|base|vocab|graph)"', Name.Decorator,
+'base',
-'objectattribute'),
+'container',
-inherit,
+'context',
-],
+'direction',
+'graph',
+'id',
+'import',
+'included',
+'index',
+'json',
+'language',
+'list',
+'nest',
+'none',
+'prefix',
+'propagate',
+'protected',
+'reverse',
+'set',
+'type',
+'value',
+'version',
+'vocab',
+)
 }
+def get_tokens_unprocessed(self, text):
+for start, token, value in super(JsonLdLexer, self).get_tokens_unprocessed(text):
+if token is Name.Tag and value in self.json_ld_keywords:
+yield start, Name.Decorator, value
+else:
+yield start, token, value

Mercurial Repositories > eric / file comparison

comparison: eric6/ThirdParty/Pygments/pygments/lexers/data.py

eric6/ThirdParty/Pygments/pygments/lexers/data.py