ThirdParty/Pygments/pygments/scanner.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.scanner
4 ~~~~~~~~~~~~~~~~
5
6 This library implements a regex based scanner. Some languages
7 like Pascal are easy to parse but have some keywords that
8 depend on the context. Because of this it's impossible to lex
9 that just by using a regular expression lexer like the
10 `RegexLexer`.
11
12 Have a look at the `DelphiLexer` to get an idea of how to use
13 this scanner.
14
15 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
16 :license: BSD, see LICENSE for details.
17 """
18 import re
19
20
21 class EndOfText(RuntimeError):
22 """
23 Raise if end of text is reached and the user
24 tried to call a match function.
25 """
26
27
28 class Scanner(object):
29 """
30 Simple scanner
31
32 All method patterns are regular expression strings (not
33 compiled expressions!)
34 """
35
36 def __init__(self, text, flags=0):
37 """
38 :param text: The text which should be scanned
39 :param flags: default regular expression flags
40 """
41 self.data = text
42 self.data_length = len(text)
43 self.start_pos = 0
44 self.pos = 0
45 self.flags = flags
46 self.last = None
47 self.match = None
48 self._re_cache = {}
49
50 def eos(self):
51 """`True` if the scanner reached the end of text."""
52 return self.pos >= self.data_length
53 eos = property(eos, eos.__doc__)
54
55 def check(self, pattern):
56 """
57 Apply `pattern` on the current position and return
58 the match object. (Doesn't touch pos). Use this for
59 lookahead.
60 """
61 if self.eos:
62 raise EndOfText()
63 if pattern not in self._re_cache:
64 self._re_cache[pattern] = re.compile(pattern, self.flags)
65 return self._re_cache[pattern].match(self.data, self.pos)
66
67 def test(self, pattern):
68 """Apply a pattern on the current position and check
69 if it patches. Doesn't touch pos."""
70 return self.check(pattern) is not None
71
72 def scan(self, pattern):
73 """
74 Scan the text for the given pattern and update pos/match
75 and related fields. The return value is a boolen that
76 indicates if the pattern matched. The matched value is
77 stored on the instance as ``match``, the last value is
78 stored as ``last``. ``start_pos`` is the position of the
79 pointer before the pattern was matched, ``pos`` is the
80 end position.
81 """
82 if self.eos:
83 raise EndOfText()
84 if pattern not in self._re_cache:
85 self._re_cache[pattern] = re.compile(pattern, self.flags)
86 self.last = self.match
87 m = self._re_cache[pattern].match(self.data, self.pos)
88 if m is None:
89 return False
90 self.start_pos = m.start()
91 self.pos = m.end()
92 self.match = m.group()
93 return True
94
95 def get_char(self):
96 """Scan exactly one char."""
97 self.scan('.')
98
99 def __repr__(self):
100 return '<%s %d/%d>' % (
101 self.__class__.__name__,
102 self.pos,
103 self.data_length
104 )

eric ide

mercurial