ThirdParty/Pygments/pygments/lexer.py

changeset 2426
da76c71624de
parent 1705
b0fbc9300f2b
child 2525
8b507a9a2d40
equal deleted inserted replaced
2425:ace8a08028f3 2426:da76c71624de
3 pygments.lexer 3 pygments.lexer
4 ~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~
5 5
6 Base lexer classes. 6 Base lexer classes.
7 7
8 :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 import re 11 import re, itertools
12 12
13 from pygments.filter import apply_filters, Filter 13 from pygments.filter import apply_filters, Filter
14 from pygments.filters import get_filter_by_name 14 from pygments.filters import get_filter_by_name
15 from pygments.token import Error, Text, Other, _TokenType 15 from pygments.token import Error, Text, Other, _TokenType
16 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ 16 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
17 make_analysator 17 make_analysator
18 import collections 18 import collections
19 19
20 20
21 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 21 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
22 'LexerContext', 'include', 'bygroups', 'using', 'this'] 22 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
23 23
24 24
25 _encoding_map = [('\xef\xbb\xbf', 'utf-8'), 25 _encoding_map = [('\xef\xbb\xbf', 'utf-8'),
26 ('\xff\xfe\0\0', 'utf-32'), 26 ('\xff\xfe\0\0', 'utf-32'),
27 ('\0\0\xfe\xff', 'utf-32be'), 27 ('\0\0\xfe\xff', 'utf-32be'),
71 name = None 71 name = None
72 72
73 #: Shortcuts for the lexer 73 #: Shortcuts for the lexer
74 aliases = [] 74 aliases = []
75 75
76 #: fn match rules 76 #: File name globs
77 filenames = [] 77 filenames = []
78 78
79 #: fn alias filenames 79 #: Secondary file name globs
80 alias_filenames = [] 80 alias_filenames = []
81 81
82 #: mime types 82 #: MIME types
83 mimetypes = [] 83 mimetypes = []
84
85 #: Priority, should multiple lexers match and no content is provided
86 priority = 0
84 87
85 def __init__(self, **options): 88 def __init__(self, **options):
86 self.options = options 89 self.options = options
87 self.stripnl = get_bool_opt(options, 'stripnl', True) 90 self.stripnl = get_bool_opt(options, 'stripnl', True)
88 self.stripall = get_bool_opt(options, 'stripall', False) 91 self.stripall = get_bool_opt(options, 'stripall', False)
160 decoded = str(text, enc.get('encoding') or 'utf-8', 163 decoded = str(text, enc.get('encoding') or 'utf-8',
161 errors='replace') 164 errors='replace')
162 text = decoded 165 text = decoded
163 else: 166 else:
164 text = text.decode(self.encoding) 167 text = text.decode(self.encoding)
168 else:
169 if text.startswith('\ufeff'):
170 text = text[len('\ufeff'):]
171
165 # text now *is* a unicode string 172 # text now *is* a unicode string
166 text = text.replace('\r\n', '\n') 173 text = text.replace('\r\n', '\n')
167 text = text.replace('\r', '\n') 174 text = text.replace('\r', '\n')
168 if self.stripall: 175 if self.stripall:
169 text = text.strip() 176 text = text.strip()
233 class include(str): 240 class include(str):
234 """ 241 """
235 Indicates that a state should include rules from another state. 242 Indicates that a state should include rules from another state.
236 """ 243 """
237 pass 244 pass
245
246
247 class _inherit(object):
248 """
249 Indicates the a state should inherit from its superclass.
250 """
251 def __repr__(self):
252 return 'inherit'
253
254 inherit = _inherit()
238 255
239 256
240 class combined(tuple): 257 class combined(tuple):
241 """ 258 """
242 Indicates a state combined from multiple states. 259 Indicates a state combined from multiple states.
425 # it's a state reference 442 # it's a state reference
426 assert tdef != state, "circular state reference %r" % state 443 assert tdef != state, "circular state reference %r" % state
427 tokens.extend(cls._process_state(unprocessed, processed, 444 tokens.extend(cls._process_state(unprocessed, processed,
428 str(tdef))) 445 str(tdef)))
429 continue 446 continue
447 if isinstance(tdef, _inherit):
448 # processed already
449 continue
430 450
431 assert type(tdef) is tuple, "wrong rule def %r" % tdef 451 assert type(tdef) is tuple, "wrong rule def %r" % tdef
432 452
433 try: 453 try:
434 rex = cls._process_regex(tdef[0], rflags) 454 rex = cls._process_regex(tdef[0], rflags)
452 processed = cls._all_tokens[name] = {} 472 processed = cls._all_tokens[name] = {}
453 tokendefs = tokendefs or cls.tokens[name] 473 tokendefs = tokendefs or cls.tokens[name]
454 for state in list(tokendefs.keys()): 474 for state in list(tokendefs.keys()):
455 cls._process_state(tokendefs, processed, state) 475 cls._process_state(tokendefs, processed, state)
456 return processed 476 return processed
477
478 def get_tokendefs(cls):
479 """
480 Merge tokens from superclasses in MRO order, returning a single tokendef
481 dictionary.
482
483 Any state that is not defined by a subclass will be inherited
484 automatically. States that *are* defined by subclasses will, by
485 default, override that state in the superclass. If a subclass wishes to
486 inherit definitions from a superclass, it can use the special value
487 "inherit", which will cause the superclass' state definition to be
488 included at that point in the state.
489 """
490 tokens = {}
491 inheritable = {}
492 for c in itertools.chain((cls,), cls.__mro__):
493 toks = c.__dict__.get('tokens', {})
494
495 for state, items in toks.items():
496 curitems = tokens.get(state)
497 if curitems is None:
498 tokens[state] = items
499 try:
500 inherit_ndx = items.index(inherit)
501 except ValueError:
502 continue
503 inheritable[state] = inherit_ndx
504 continue
505
506 inherit_ndx = inheritable.pop(state, None)
507 if inherit_ndx is None:
508 continue
509
510 # Replace the "inherit" value with the items
511 curitems[inherit_ndx:inherit_ndx+1] = items
512 try:
513 new_inh_ndx = items.index(inherit)
514 except ValueError:
515 pass
516 else:
517 inheritable[state] = inherit_ndx + new_inh_ndx
518
519 return tokens
457 520
458 def __call__(cls, *args, **kwds): 521 def __call__(cls, *args, **kwds):
459 """Instantiate cls after preprocessing its token definitions.""" 522 """Instantiate cls after preprocessing its token definitions."""
460 if '_tokens' not in cls.__dict__: 523 if '_tokens' not in cls.__dict__:
461 cls._all_tokens = {} 524 cls._all_tokens = {}
462 cls._tmpname = 0 525 cls._tmpname = 0
463 if hasattr(cls, 'token_variants') and cls.token_variants: 526 if hasattr(cls, 'token_variants') and cls.token_variants:
464 # don't process yet 527 # don't process yet
465 pass 528 pass
466 else: 529 else:
467 cls._tokens = cls.process_tokendef('', cls.tokens) 530 cls._tokens = cls.process_tokendef('', cls.get_tokendefs())
468 531
469 return type.__call__(cls, *args, **kwds) 532 return type.__call__(cls, *args, **kwds)
470 533
471 534
472 class RegexLexer(Lexer, metaclass=RegexLexerMeta): 535 class RegexLexer(Lexer, metaclass=RegexLexerMeta):
540 break 603 break
541 else: 604 else:
542 try: 605 try:
543 if text[pos] == '\n': 606 if text[pos] == '\n':
544 # at EOL, reset state to "root" 607 # at EOL, reset state to "root"
545 pos += 1
546 statestack = ['root'] 608 statestack = ['root']
547 statetokens = tokendefs['root'] 609 statetokens = tokendefs['root']
548 yield pos, Text, '\n' 610 yield pos, Text, '\n'
611 pos += 1
549 continue 612 continue
550 yield pos, Error, text[pos] 613 yield pos, Error, text[pos]
551 pos += 1 614 pos += 1
552 except IndexError: 615 except IndexError:
553 break 616 break
602 statetokens = tokendefs[ctx.stack[-1]] 665 statetokens = tokendefs[ctx.stack[-1]]
603 # CAUTION: callback must set ctx.pos! 666 # CAUTION: callback must set ctx.pos!
604 if new_state is not None: 667 if new_state is not None:
605 # state transition 668 # state transition
606 if isinstance(new_state, tuple): 669 if isinstance(new_state, tuple):
607 ctx.stack.extend(new_state) 670 for state in new_state:
671 if state == '#pop':
672 ctx.stack.pop()
673 elif state == '#push':
674 ctx.stack.append(statestack[-1])
675 else:
676 ctx.stack.append(state)
608 elif isinstance(new_state, int): 677 elif isinstance(new_state, int):
609 # pop 678 # pop
610 del ctx.stack[new_state:] 679 del ctx.stack[new_state:]
611 elif new_state == '#push': 680 elif new_state == '#push':
612 ctx.stack.append(ctx.stack[-1]) 681 ctx.stack.append(ctx.stack[-1])
618 try: 687 try:
619 if ctx.pos >= ctx.end: 688 if ctx.pos >= ctx.end:
620 break 689 break
621 if text[ctx.pos] == '\n': 690 if text[ctx.pos] == '\n':
622 # at EOL, reset state to "root" 691 # at EOL, reset state to "root"
623 ctx.pos += 1
624 ctx.stack = ['root'] 692 ctx.stack = ['root']
625 statetokens = tokendefs['root'] 693 statetokens = tokendefs['root']
626 yield ctx.pos, Text, '\n' 694 yield ctx.pos, Text, '\n'
695 ctx.pos += 1
627 continue 696 continue
628 yield ctx.pos, Error, text[ctx.pos] 697 yield ctx.pos, Error, text[ctx.pos]
629 ctx.pos += 1 698 ctx.pos += 1
630 except IndexError: 699 except IndexError:
631 break 700 break
690 try: 759 try:
691 index, itokens = next(insertions) 760 index, itokens = next(insertions)
692 except StopIteration: 761 except StopIteration:
693 insleft = False 762 insleft = False
694 break # not strictly necessary 763 break # not strictly necessary
695

eric ide

mercurial