eric: comparison ThirdParty/Pygments/pygments/lexer.py

-:02ae6c55b35b
+:b0fbc9300f2b
 pygments.lexer
 ~~~~~~~~~~~~~~
 Base lexer classes.
-:copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
+:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
 :license: BSD, see LICENSE for details.
 """
 import re
 from pygments.filter import apply_filters, Filter
 from pygments.filters import get_filter_by_name
 from pygments.token import Error, Text, Other, _TokenType
 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
 make_analysator
+import collections
 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
 'LexerContext', 'include', 'bygroups', 'using', 'this']
+_encoding_map = [('\xef\xbb\xbf', 'utf-8'),
+('\xff\xfe\0\0', 'utf-32'),
+('\0\0\xfe\xff', 'utf-32be'),
+('\xff\xfe', 'utf-16'),
+('\xfe\xff', 'utf-16be')]
 _default_analyse = staticmethod(lambda x: 0.0)
 class LexerMeta(type):
 import chardet
 except ImportError:
 raise ImportError('To enable chardet encoding guessing, '
 'please install the chardet library '
 'from http://chardet.feedparser.org/')
-enc = chardet.detect(text)
+# check for BOM first
-text = text.decode(enc['encoding'])
+decoded = None
+for bom, encoding in _encoding_map:
+if text.startswith(bom):
+decoded = str(text[len(bom):], encoding,
+errors='replace')
+break
+# no BOM found, so use chardet
+if decoded is None:
+enc = chardet.detect(text[:1024]) # Guess using first 1KB
+decoded = str(text, enc.get('encoding') or 'utf-8',
+errors='replace')
+text = decoded
 else:
 text = text.decode(self.encoding)
 # text now *is* a unicode string
 text = text.replace('\r\n', '\n')
 text = text.replace('\r', '\n')
 elif type(action) is _TokenType:
 data = match.group(i + 1)
 if data:
 yield match.start(i + 1), action, data
 else:
-if ctx:
+data = match.group(i + 1)
-ctx.pos = match.start(i + 1)
+if data is not None:
-for item in action(lexer, _PseudoMatch(match.start(i + 1),
+if ctx:
-match.group(i + 1)), ctx):
+ctx.pos = match.start(i + 1)
-if item:
+for item in action(lexer, _PseudoMatch(match.start(i + 1),
-yield item
+data), ctx):
+if item:
+yield item
 if ctx:
 ctx.pos = match.end()
 return callback
 """Preprocess the regular expression component of a token definition."""
 return re.compile(regex, rflags).match
 def _process_token(cls, token):
 """Preprocess the token component of a token definition."""
-assert type(token) is _TokenType or hasattr(token, '__call__'), \
+assert type(token) is _TokenType or isinstance(token, collections.Callable), \
 'token type must be simple type or callable, not %r' % (token,)
 return token
 def _process_new_state(cls, new_state, unprocessed, processed):
 """Preprocess the state transition action of a token definition."""
 cls._process_state(tokendefs, processed, state)
 return processed
 def __call__(cls, *args, **kwds):
 """Instantiate cls after preprocessing its token definitions."""
-if not hasattr(cls, '_tokens'):
+if '_tokens' not in cls.__dict__:
 cls._all_tokens = {}
 cls._tmpname = 0
 if hasattr(cls, 'token_variants') and cls.token_variants:
 # don't process yet
 pass

Mercurial Repositories > eric / file comparison

comparison: ThirdParty/Pygments/pygments/lexer.py

ThirdParty/Pygments/pygments/lexer.py