3 pygments.lexer |
3 pygments.lexer |
4 ~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~ |
5 |
5 |
6 Base lexer classes. |
6 Base lexer classes. |
7 |
7 |
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 import re |
11 import re |
12 |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
12 |
18 from pygments.filter import apply_filters, Filter |
13 from pygments.filter import apply_filters, Filter |
19 from pygments.filters import get_filter_by_name |
14 from pygments.filters import get_filter_by_name |
20 from pygments.token import Error, Text, Other, _TokenType |
15 from pygments.token import Error, Text, Other, _TokenType |
21 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ |
16 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ |
22 make_analysator |
17 make_analysator |
|
18 import collections |
23 |
19 |
24 |
20 |
25 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', |
21 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', |
26 'LexerContext', 'include', 'flags', 'bygroups', 'using', 'this'] |
22 'LexerContext', 'include', 'bygroups', 'using', 'this'] |
27 |
23 |
28 |
24 |
29 _default_analyse = staticmethod(lambda x: 0.0) |
25 _default_analyse = staticmethod(lambda x: 0.0) |
30 |
26 |
31 |
27 |
49 ``stripnl`` |
45 ``stripnl`` |
50 Strip leading and trailing newlines from the input (default: True). |
46 Strip leading and trailing newlines from the input (default: True). |
51 ``stripall`` |
47 ``stripall`` |
52 Strip all leading and trailing whitespace from the input |
48 Strip all leading and trailing whitespace from the input |
53 (default: False). |
49 (default: False). |
|
50 ``ensurenl`` |
|
51 Make sure that the input ends with a newline (default: True). This |
|
52 is required for some lexers that consume input linewise. |
|
53 *New in Pygments 1.3.* |
54 ``tabsize`` |
54 ``tabsize`` |
55 If given and greater than 0, expand tabs in the input (default: 0). |
55 If given and greater than 0, expand tabs in the input (default: 0). |
56 ``encoding`` |
56 ``encoding`` |
57 If given, must be an encoding name. This encoding will be used to |
57 If given, must be an encoding name. This encoding will be used to |
58 convert the input string to Unicode, if it is not already a Unicode |
58 convert the input string to Unicode, if it is not already a Unicode |
78 |
78 |
79 def __init__(self, **options): |
79 def __init__(self, **options): |
80 self.options = options |
80 self.options = options |
81 self.stripnl = get_bool_opt(options, 'stripnl', True) |
81 self.stripnl = get_bool_opt(options, 'stripnl', True) |
82 self.stripall = get_bool_opt(options, 'stripall', False) |
82 self.stripall = get_bool_opt(options, 'stripall', False) |
|
83 self.ensurenl = get_bool_opt(options, 'ensurenl', True) |
83 self.tabsize = get_int_opt(options, 'tabsize', 0) |
84 self.tabsize = get_int_opt(options, 'tabsize', 0) |
84 self.encoding = options.get('encoding', 'latin1') |
85 self.encoding = options.get('encoding', 'latin1') |
85 # self.encoding = options.get('inencoding', None) or self.encoding |
86 # self.encoding = options.get('inencoding', None) or self.encoding |
86 self.filters = [] |
87 self.filters = [] |
87 for filter_ in get_list_opt(options, 'filters', ()): |
88 for filter_ in get_list_opt(options, 'filters', ()): |
151 text = text.strip() |
152 text = text.strip() |
152 elif self.stripnl: |
153 elif self.stripnl: |
153 text = text.strip('\n') |
154 text = text.strip('\n') |
154 if self.tabsize > 0: |
155 if self.tabsize > 0: |
155 text = text.expandtabs(self.tabsize) |
156 text = text.expandtabs(self.tabsize) |
156 if not text.endswith('\n'): |
157 if self.ensurenl and not text.endswith('\n'): |
157 text += '\n' |
158 text += '\n' |
158 |
159 |
159 def streamer(): |
160 def streamer(): |
160 for i, t, v in self.get_tokens_unprocessed(text): |
161 for i, t, v in self.get_tokens_unprocessed(text): |
161 yield t, v |
162 yield t, v |
367 rex = re.compile(tdef[0], rflags).match |
368 rex = re.compile(tdef[0], rflags).match |
368 except Exception as err: |
369 except Exception as err: |
369 raise ValueError("uncompilable regex %r in state %r of %r: %s" % |
370 raise ValueError("uncompilable regex %r in state %r of %r: %s" % |
370 (tdef[0], state, cls, err)) |
371 (tdef[0], state, cls, err)) |
371 |
372 |
372 assert type(tdef[1]) is _TokenType or hasattr(tdef[1], '__call__'), \ |
373 assert type(tdef[1]) is _TokenType or isinstance(tdef[1], collections.Callable), \ |
373 'token type must be simple type or callable, not %r' % (tdef[1],) |
374 'token type must be simple type or callable, not %r' % (tdef[1],) |
374 |
375 |
375 if len(tdef) == 2: |
376 if len(tdef) == 2: |
376 new_state = None |
377 new_state = None |
377 else: |
378 else: |
412 return tokens |
413 return tokens |
413 |
414 |
414 def process_tokendef(cls, name, tokendefs=None): |
415 def process_tokendef(cls, name, tokendefs=None): |
415 processed = cls._all_tokens[name] = {} |
416 processed = cls._all_tokens[name] = {} |
416 tokendefs = tokendefs or cls.tokens[name] |
417 tokendefs = tokendefs or cls.tokens[name] |
417 for state in list(tokendefs.keys()): |
418 for state in list(list(tokendefs.keys())): |
418 cls._process_state(tokendefs, processed, state) |
419 cls._process_state(tokendefs, processed, state) |
419 return processed |
420 return processed |
420 |
421 |
421 def __call__(cls, *args, **kwds): |
422 def __call__(cls, *args, **kwds): |
422 if not hasattr(cls, '_tokens'): |
423 if not hasattr(cls, '_tokens'): |
641 break # not strictly necessary |
642 break # not strictly necessary |
642 yield realpos, t, v[oldi:] |
643 yield realpos, t, v[oldi:] |
643 realpos += len(v) - oldi |
644 realpos += len(v) - oldi |
644 |
645 |
645 # leftover tokens |
646 # leftover tokens |
646 if insleft: |
647 while insleft: |
647 # no normal tokens, set realpos to zero |
648 # no normal tokens, set realpos to zero |
648 realpos = realpos or 0 |
649 realpos = realpos or 0 |
649 for p, t, v in itokens: |
650 for p, t, v in itokens: |
650 yield realpos, t, v |
651 yield realpos, t, v |
651 realpos += len(v) |
652 realpos += len(v) |
|
653 try: |
|
654 index, itokens = next(insertions) |
|
655 except StopIteration: |
|
656 insleft = False |
|
657 break # not strictly necessary |
|
658 |