eric6/ThirdParty/Pygments/pygments/lexer.py

changeset 7547
21b0534faebc
parent 6942
2602857055c5
child 7701
25f42e208e08
equal deleted inserted replaced
7546:bf5f777260a6 7547:21b0534faebc
3 pygments.lexer 3 pygments.lexer
4 ~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~
5 5
6 Base lexer classes. 6 Base lexer classes.
7 7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11
12 from __future__ import print_function
13 11
14 import re 12 import re
15 import sys 13 import sys
16 import time 14 import time
17 15
18 from pygments.filter import apply_filters, Filter 16 from pygments.filter import apply_filters, Filter
19 from pygments.filters import get_filter_by_name 17 from pygments.filters import get_filter_by_name
20 from pygments.token import Error, Text, Other, _TokenType 18 from pygments.token import Error, Text, Other, _TokenType
21 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ 19 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
22 make_analysator, text_type, add_metaclass, iteritems, Future, guess_decode 20 make_analysator, Future, guess_decode
23 from pygments.regexopt import regex_opt 21 from pygments.regexopt import regex_opt
24 22
25 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 23 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
26 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this', 24 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this',
27 'default', 'words'] 25 'default', 'words']
46 if 'analyse_text' in d: 44 if 'analyse_text' in d:
47 d['analyse_text'] = make_analysator(d['analyse_text']) 45 d['analyse_text'] = make_analysator(d['analyse_text'])
48 return type.__new__(mcs, name, bases, d) 46 return type.__new__(mcs, name, bases, d)
49 47
50 48
51 @add_metaclass(LexerMeta) 49 class Lexer(metaclass=LexerMeta):
52 class Lexer(object):
53 """ 50 """
54 Lexer for a specific language. 51 Lexer for a specific language.
55 52
56 Basic options recognized: 53 Basic options recognized:
57 ``stripnl`` 54 ``stripnl``
143 is bypassed even if filters are defined. 140 is bypassed even if filters are defined.
144 141
145 Also preprocess the text, i.e. expand tabs and strip it if 142 Also preprocess the text, i.e. expand tabs and strip it if
146 wanted and applies registered filters. 143 wanted and applies registered filters.
147 """ 144 """
148 if not isinstance(text, text_type): 145 if not isinstance(text, str):
149 if self.encoding == 'guess': 146 if self.encoding == 'guess':
150 text, _ = guess_decode(text) 147 text, _ = guess_decode(text)
151 elif self.encoding == 'chardet': 148 elif self.encoding == 'chardet':
152 try: 149 try:
153 import chardet 150 import chardet
250 Indicates that a state should include rules from another state. 247 Indicates that a state should include rules from another state.
251 """ 248 """
252 pass 249 pass
253 250
254 251
255 class _inherit(object): 252 class _inherit:
256 """ 253 """
257 Indicates the a state should inherit from its superclass. 254 Indicates the a state should inherit from its superclass.
258 """ 255 """
259 def __repr__(self): 256 def __repr__(self):
260 return 'inherit' 257 return 'inherit'
273 def __init__(self, *args): 270 def __init__(self, *args):
274 # tuple.__init__ doesn't do anything 271 # tuple.__init__ doesn't do anything
275 pass 272 pass
276 273
277 274
278 class _PseudoMatch(object): 275 class _PseudoMatch:
279 """ 276 """
280 A pseudo match object constructed from a string. 277 A pseudo match object constructed from a string.
281 """ 278 """
282 279
283 def __init__(self, start, text): 280 def __init__(self, start, text):
326 if ctx: 323 if ctx:
327 ctx.pos = match.end() 324 ctx.pos = match.end()
328 return callback 325 return callback
329 326
330 327
331 class _This(object): 328 class _This:
332 """ 329 """
333 Special singleton used for indicating the caller class. 330 Special singleton used for indicating the caller class.
334 Used by ``using``. 331 Used by ``using``.
335 """ 332 """
333
336 this = _This() 334 this = _This()
337 335
338 336
339 def using(_other, **kwargs): 337 def using(_other, **kwargs):
340 """ 338 """
534 tokens = {} 532 tokens = {}
535 inheritable = {} 533 inheritable = {}
536 for c in cls.__mro__: 534 for c in cls.__mro__:
537 toks = c.__dict__.get('tokens', {}) 535 toks = c.__dict__.get('tokens', {})
538 536
539 for state, items in iteritems(toks): 537 for state, items in toks.items():
540 curitems = tokens.get(state) 538 curitems = tokens.get(state)
541 if curitems is None: 539 if curitems is None:
542 # N.b. because this is assigned by reference, sufficiently 540 # N.b. because this is assigned by reference, sufficiently
543 # deep hierarchies are processed incrementally (e.g. for 541 # deep hierarchies are processed incrementally (e.g. for
544 # A(B), B(C), C(RegexLexer), B will be premodified so X(B) 542 # A(B), B(C), C(RegexLexer), B will be premodified so X(B)
580 cls._tokens = cls.process_tokendef('', cls.get_tokendefs()) 578 cls._tokens = cls.process_tokendef('', cls.get_tokendefs())
581 579
582 return type.__call__(cls, *args, **kwds) 580 return type.__call__(cls, *args, **kwds)
583 581
584 582
585 @add_metaclass(RegexLexerMeta) 583 class RegexLexer(Lexer, metaclass=RegexLexerMeta):
586 class RegexLexer(Lexer):
587 """ 584 """
588 Base for simple stateful regular expression-based lexers. 585 Base for simple stateful regular expression-based lexers.
589 Simplifies the lexing process so that you need only 586 Simplifies the lexing process so that you need only
590 provide a list of states and regular expressions. 587 provide a list of states and regular expressions.
591 """ 588 """
637 if new_state is not None: 634 if new_state is not None:
638 # state transition 635 # state transition
639 if isinstance(new_state, tuple): 636 if isinstance(new_state, tuple):
640 for state in new_state: 637 for state in new_state:
641 if state == '#pop': 638 if state == '#pop':
642 statestack.pop() 639 if len(statestack) > 1:
640 statestack.pop()
643 elif state == '#push': 641 elif state == '#push':
644 statestack.append(statestack[-1]) 642 statestack.append(statestack[-1])
645 else: 643 else:
646 statestack.append(state) 644 statestack.append(state)
647 elif isinstance(new_state, int): 645 elif isinstance(new_state, int):
648 # pop 646 # pop, but keep at least one state on the stack
649 del statestack[new_state:] 647 # (random code leading to unexpected pops should
648 # not allow exceptions)
649 if abs(new_state) >= len(statestack):
650 del statestack[1:]
651 else:
652 del statestack[new_state:]
650 elif new_state == '#push': 653 elif new_state == '#push':
651 statestack.append(statestack[-1]) 654 statestack.append(statestack[-1])
652 else: 655 else:
653 assert False, "wrong state def: %r" % new_state 656 assert False, "wrong state def: %r" % new_state
654 statetokens = tokendefs[statestack[-1]] 657 statetokens = tokendefs[statestack[-1]]
668 pos += 1 671 pos += 1
669 except IndexError: 672 except IndexError:
670 break 673 break
671 674
672 675
673 class LexerContext(object): 676 class LexerContext:
674 """ 677 """
675 A helper object that holds lexer position data. 678 A helper object that holds lexer position data.
676 """ 679 """
677 680
678 def __init__(self, text, pos, stack=None, end=None): 681 def __init__(self, text, pos, stack=None, end=None):
722 if new_state is not None: 725 if new_state is not None:
723 # state transition 726 # state transition
724 if isinstance(new_state, tuple): 727 if isinstance(new_state, tuple):
725 for state in new_state: 728 for state in new_state:
726 if state == '#pop': 729 if state == '#pop':
727 ctx.stack.pop() 730 if len(ctx.stack) > 1:
731 ctx.stack.pop()
728 elif state == '#push': 732 elif state == '#push':
729 ctx.stack.append(ctx.stack[-1]) 733 ctx.stack.append(ctx.stack[-1])
730 else: 734 else:
731 ctx.stack.append(state) 735 ctx.stack.append(state)
732 elif isinstance(new_state, int): 736 elif isinstance(new_state, int):
733 # pop 737 # see RegexLexer for why this check is made
734 del ctx.stack[new_state:] 738 if abs(new_state) >= len(ctx.stack):
739 del ctx.state[1:]
740 else:
741 del ctx.stack[new_state:]
735 elif new_state == '#push': 742 elif new_state == '#push':
736 ctx.stack.append(ctx.stack[-1]) 743 ctx.stack.append(ctx.stack[-1])
737 else: 744 else:
738 assert False, "wrong state def: %r" % new_state 745 assert False, "wrong state def: %r" % new_state
739 statetokens = tokendefs[ctx.stack[-1]] 746 statetokens = tokendefs[ctx.stack[-1]]
838 info[1] += t1 - t0 845 info[1] += t1 - t0
839 return res 846 return res
840 return match_func 847 return match_func
841 848
842 849
843 @add_metaclass(ProfilingRegexLexerMeta) 850 class ProfilingRegexLexer(RegexLexer, metaclass=ProfilingRegexLexerMeta):
844 class ProfilingRegexLexer(RegexLexer):
845 """Drop-in replacement for RegexLexer that does profiling of its regexes.""" 851 """Drop-in replacement for RegexLexer that does profiling of its regexes."""
846 852
847 _prof_data = [] 853 _prof_data = []
848 _prof_sort_index = 4 # defaults to time per call 854 _prof_sort_index = 4 # defaults to time per call
849 855

eric ide

mercurial