ThirdParty/Pygments/pygments/scanner.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ThirdParty/Pygments/pygments/scanner.py	Mon Dec 28 16:03:33 2009 +0000
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.scanner
+    ~~~~~~~~~~~~~~~~
+
+    This library implements a regex based scanner. Some languages
+    like Pascal are easy to parse but have some keywords that
+    depend on the context. Because of this it's impossible to lex
+    that just by using a regular expression lexer like the
+    `RegexLexer`.
+
+    Have a look at the `DelphiLexer` to get an idea of how to use
+    this scanner.
+
+    :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import re
+
+
+class EndOfText(RuntimeError):
+    """
+    Raise if end of text is reached and the user
+    tried to call a match function.
+    """
+
+
+class Scanner(object):
+    """
+    Simple scanner
+
+    All method patterns are regular expression strings (not
+    compiled expressions!)
+    """
+
+    def __init__(self, text, flags=0):
+        """
+        :param text:    The text which should be scanned
+        :param flags:   default regular expression flags
+        """
+        self.data = text
+        self.data_length = len(text)
+        self.start_pos = 0
+        self.pos = 0
+        self.flags = flags
+        self.last = None
+        self.match = None
+        self._re_cache = {}
+
+    def eos(self):
+        """`True` if the scanner reached the end of text."""
+        return self.pos >= self.data_length
+    eos = property(eos, eos.__doc__)
+
+    def check(self, pattern):
+        """
+        Apply `pattern` on the current position and return
+        the match object. (Doesn't touch pos). Use this for
+        lookahead.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        return self._re_cache[pattern].match(self.data, self.pos)
+
+    def test(self, pattern):
+        """Apply a pattern on the current position and check
+        if it patches. Doesn't touch pos."""
+        return self.check(pattern) is not None
+
+    def scan(self, pattern):
+        """
+        Scan the text for the given pattern and update pos/match
+        and related fields. The return value is a boolen that
+        indicates if the pattern matched. The matched value is
+        stored on the instance as ``match``, the last value is
+        stored as ``last``. ``start_pos`` is the position of the
+        pointer before the pattern was matched, ``pos`` is the
+        end position.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        self.last = self.match
+        m = self._re_cache[pattern].match(self.data, self.pos)
+        if m is None:
+            return False
+        self.start_pos = m.start()
+        self.pos = m.end()
+        self.match = m.group()
+        return True
+
+    def get_char(self):
+        """Scan exactly one char."""
+        self.scan('.')
+
+    def __repr__(self):
+        return '<%s %d/%d>' % (
+            self.__class__.__name__,
+            self.pos,
+            self.data_length
+        )

eric ide

mercurial