ThirdParty/Pygments/pygments/util.py

changeset 2426
da76c71624de
parent 1705
b0fbc9300f2b
child 2525
8b507a9a2d40
equal deleted inserted replaced
2425:ace8a08028f3 2426:da76c71624de
3 pygments.util 3 pygments.util
4 ~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~
5 5
6 Utility functions. 6 Utility functions.
7 7
8 :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 import sys 13 import sys
203 if m is not None: 203 if m is not None:
204 return True 204 return True
205 rv = tag_re.search(text[:1000]) is not None 205 rv = tag_re.search(text[:1000]) is not None
206 _looks_like_xml_cache[key] = rv 206 _looks_like_xml_cache[key] = rv
207 return rv 207 return rv
208
209 # Python narrow build compatibility
210
211 def _surrogatepair(c):
212 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
213
214 def unirange(a, b):
215 """
216 Returns a regular expression string to match the given non-BMP range.
217 """
218 if b < a:
219 raise ValueError("Bad character range")
220 if a < 0x10000 or b < 0x10000:
221 raise ValueError("unirange is only defined for non-BMP ranges")
222
223 if sys.maxunicode > 0xffff:
224 # wide build
225 return '[%s-%s]' % (chr(a), chr(b))
226 else:
227 # narrow build stores surrogates, and the 're' module handles them
228 # (incorrectly) as characters. Since there is still ordering among
229 # these characters, expand the range to one that it understands. Some
230 # background in http://bugs.python.org/issue3665 and
231 # http://bugs.python.org/issue12749
232 #
233 # Additionally, the lower constants are using unichr rather than
234 # literals because jython [which uses the wide path] can't load this
235 # file if they are literals.
236 ah, al = _surrogatepair(a)
237 bh, bl = _surrogatepair(b)
238 if ah == bh:
239 return '(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))
240 else:
241 buf = []
242 buf.append('%s[%s-%s]' %
243 (chr(ah), chr(al),
244 ah == bh and chr(bl) or chr(0xdfff)))
245 if ah - bh > 1:
246 buf.append('[%s-%s][%s-%s]' %
247 chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))
248 if ah != bh:
249 buf.append('%s[%s-%s]' %
250 (chr(bh), chr(0xdc00), chr(bl)))
251
252 return '(?:' + '|'.join(buf) + ')'
208 253
209 # Python 2/3 compatibility 254 # Python 2/3 compatibility
210 255
211 if sys.version_info < (3,0): 256 if sys.version_info < (3,0):
212 b = bytes = str 257 b = bytes = str

eric ide

mercurial