eric6/ThirdParty/Pygments/pygments/lexer.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
equal deleted inserted replaced
7700:a3cf077a8db3 7701:25f42e208e08
3 pygments.lexer 3 pygments.lexer
4 ~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~
5 5
6 Base lexer classes. 6 Base lexer classes.
7 7
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 import sys 13 import sys
146 if self.encoding == 'guess': 146 if self.encoding == 'guess':
147 text, _ = guess_decode(text) 147 text, _ = guess_decode(text)
148 elif self.encoding == 'chardet': 148 elif self.encoding == 'chardet':
149 try: 149 try:
150 import chardet 150 import chardet
151 except ImportError: 151 except ImportError as e:
152 raise ImportError('To enable chardet encoding guessing, ' 152 raise ImportError('To enable chardet encoding guessing, '
153 'please install the chardet library ' 153 'please install the chardet library '
154 'from http://chardet.feedparser.org/') 154 'from http://chardet.feedparser.org/') from e
155 # check for BOM first 155 # check for BOM first
156 decoded = None 156 decoded = None
157 for bom, encoding in _encoding_map: 157 for bom, encoding in _encoding_map:
158 if text.startswith(bom): 158 if text.startswith(bom):
159 decoded = text[len(bom):].decode(encoding, 'replace') 159 decoded = text[len(bom):].decode(encoding, 'replace')
164 decoded = text.decode(enc.get('encoding') or 'utf-8', 164 decoded = text.decode(enc.get('encoding') or 'utf-8',
165 'replace') 165 'replace')
166 text = decoded 166 text = decoded
167 else: 167 else:
168 text = text.decode(self.encoding) 168 text = text.decode(self.encoding)
169 if text.startswith(u'\ufeff'): 169 if text.startswith('\ufeff'):
170 text = text[len(u'\ufeff'):] 170 text = text[len('\ufeff'):]
171 else: 171 else:
172 if text.startswith(u'\ufeff'): 172 if text.startswith('\ufeff'):
173 text = text[len(u'\ufeff'):] 173 text = text[len('\ufeff'):]
174 174
175 # text now *is* a unicode string 175 # text now *is* a unicode string
176 text = text.replace('\r\n', '\n') 176 text = text.replace('\r\n', '\n')
177 text = text.replace('\r', '\n') 177 text = text.replace('\r', '\n')
178 if self.stripall: 178 if self.stripall:
494 494
495 try: 495 try:
496 rex = cls._process_regex(tdef[0], rflags, state) 496 rex = cls._process_regex(tdef[0], rflags, state)
497 except Exception as err: 497 except Exception as err:
498 raise ValueError("uncompilable regex %r in state %r of %r: %s" % 498 raise ValueError("uncompilable regex %r in state %r of %r: %s" %
499 (tdef[0], state, cls, err)) 499 (tdef[0], state, cls, err)) from err
500 500
501 token = cls._process_token(tdef[1]) 501 token = cls._process_token(tdef[1])
502 502
503 if len(tdef) == 2: 503 if len(tdef) == 2:
504 new_state = None 504 new_state = None
626 if m: 626 if m:
627 if action is not None: 627 if action is not None:
628 if type(action) is _TokenType: 628 if type(action) is _TokenType:
629 yield pos, action, m.group() 629 yield pos, action, m.group()
630 else: 630 else:
631 for item in action(self, m): 631 yield from action(self, m)
632 yield item
633 pos = m.end() 632 pos = m.end()
634 if new_state is not None: 633 if new_state is not None:
635 # state transition 634 # state transition
636 if isinstance(new_state, tuple): 635 if isinstance(new_state, tuple):
637 for state in new_state: 636 for state in new_state:
662 try: 661 try:
663 if text[pos] == '\n': 662 if text[pos] == '\n':
664 # at EOL, reset state to "root" 663 # at EOL, reset state to "root"
665 statestack = ['root'] 664 statestack = ['root']
666 statetokens = tokendefs['root'] 665 statetokens = tokendefs['root']
667 yield pos, Text, u'\n' 666 yield pos, Text, '\n'
668 pos += 1 667 pos += 1
669 continue 668 continue
670 yield pos, Error, text[pos] 669 yield pos, Error, text[pos]
671 pos += 1 670 pos += 1
672 except IndexError: 671 except IndexError:
714 if action is not None: 713 if action is not None:
715 if type(action) is _TokenType: 714 if type(action) is _TokenType:
716 yield ctx.pos, action, m.group() 715 yield ctx.pos, action, m.group()
717 ctx.pos = m.end() 716 ctx.pos = m.end()
718 else: 717 else:
719 for item in action(self, m, ctx): 718 yield from action(self, m, ctx)
720 yield item
721 if not new_state: 719 if not new_state:
722 # altered the state stack? 720 # altered the state stack?
723 statetokens = tokendefs[ctx.stack[-1]] 721 statetokens = tokendefs[ctx.stack[-1]]
724 # CAUTION: callback must set ctx.pos! 722 # CAUTION: callback must set ctx.pos!
725 if new_state is not None: 723 if new_state is not None:
751 break 749 break
752 if text[ctx.pos] == '\n': 750 if text[ctx.pos] == '\n':
753 # at EOL, reset state to "root" 751 # at EOL, reset state to "root"
754 ctx.stack = ['root'] 752 ctx.stack = ['root']
755 statetokens = tokendefs['root'] 753 statetokens = tokendefs['root']
756 yield ctx.pos, Text, u'\n' 754 yield ctx.pos, Text, '\n'
757 ctx.pos += 1 755 ctx.pos += 1
758 continue 756 continue
759 yield ctx.pos, Error, text[ctx.pos] 757 yield ctx.pos, Error, text[ctx.pos]
760 ctx.pos += 1 758 ctx.pos += 1
761 except IndexError: 759 except IndexError:
779 insertions = iter(insertions) 777 insertions = iter(insertions)
780 try: 778 try:
781 index, itokens = next(insertions) 779 index, itokens = next(insertions)
782 except StopIteration: 780 except StopIteration:
783 # no insertions 781 # no insertions
784 for item in tokens: 782 yield from tokens
785 yield item
786 return 783 return
787 784
788 realpos = None 785 realpos = None
789 insleft = True 786 insleft = True
790 787
854 _prof_sort_index = 4 # defaults to time per call 851 _prof_sort_index = 4 # defaults to time per call
855 852
856 def get_tokens_unprocessed(self, text, stack=('root',)): 853 def get_tokens_unprocessed(self, text, stack=('root',)):
857 # this needs to be a stack, since using(this) will produce nested calls 854 # this needs to be a stack, since using(this) will produce nested calls
858 self.__class__._prof_data.append({}) 855 self.__class__._prof_data.append({})
859 for tok in RegexLexer.get_tokens_unprocessed(self, text, stack): 856 yield from RegexLexer.get_tokens_unprocessed(self, text, stack)
860 yield tok
861 rawdata = self.__class__._prof_data.pop() 857 rawdata = self.__class__._prof_data.pop()
862 data = sorted(((s, repr(r).strip('u\'').replace('\\\\', '\\')[:65], 858 data = sorted(((s, repr(r).strip('u\'').replace('\\\\', '\\')[:65],
863 n, 1000 * t, 1000 * t / n) 859 n, 1000 * t, 1000 * t / n)
864 for ((s, r), (n, t)) in rawdata.items()), 860 for ((s, r), (n, t)) in rawdata.items()),
865 key=lambda x: x[self._prof_sort_index], 861 key=lambda x: x[self._prof_sort_index],

eric ide

mercurial