eric6/ThirdParty/Pygments/pygments/util.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
equal deleted inserted replaced
7700:a3cf077a8db3 7701:25f42e208e08
3 pygments.util 3 pygments.util
4 ~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~
5 5
6 Utility functions. 6 Utility functions.
7 7
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 import sys 13 import sys
203 rv = tag_re.search(text[:1000]) is not None 203 rv = tag_re.search(text[:1000]) is not None
204 _looks_like_xml_cache[key] = rv 204 _looks_like_xml_cache[key] = rv
205 return rv 205 return rv
206 206
207 207
208 # Python narrow build compatibility 208 def surrogatepair(c):
209 209 """Given a unicode character code with length greater than 16 bits,
210 def _surrogatepair(c): 210 return the two 16 bit surrogate pair.
211 # Given a unicode character code 211 """
212 # with length greater than 16 bits,
213 # return the two 16 bit surrogate pair.
214 # From example D28 of: 212 # From example D28 of:
215 # http://www.unicode.org/book/ch03.pdf 213 # http://www.unicode.org/book/ch03.pdf
216 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 214 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
217
218
219 def unirange(a, b):
220 """Returns a regular expression string to match the given non-BMP range."""
221 if b < a:
222 raise ValueError("Bad character range")
223 if a < 0x10000 or b < 0x10000:
224 raise ValueError("unirange is only defined for non-BMP ranges")
225
226 if sys.maxunicode > 0xffff:
227 # wide build
228 return u'[%s-%s]' % (chr(a), chr(b))
229 else:
230 # narrow build stores surrogates, and the 're' module handles them
231 # (incorrectly) as characters. Since there is still ordering among
232 # these characters, expand the range to one that it understands. Some
233 # background in http://bugs.python.org/issue3665 and
234 # http://bugs.python.org/issue12749
235 #
236 # Additionally, the lower constants are using chr rather than
237 # literals because jython [which uses the wide path] can't load this
238 # file if they are literals.
239 ah, al = _surrogatepair(a)
240 bh, bl = _surrogatepair(b)
241 if ah == bh:
242 return u'(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))
243 else:
244 buf = []
245 buf.append(u'%s[%s-%s]' % (chr(ah), chr(al),
246 ah == bh and chr(bl) or chr(0xdfff)))
247 if ah - bh > 1:
248 buf.append(u'[%s-%s][%s-%s]' %
249 chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))
250 if ah != bh:
251 buf.append(u'%s[%s-%s]' %
252 (chr(bh), chr(0xdc00), chr(bl)))
253
254 return u'(?:' + u'|'.join(buf) + u')'
255 215
256 216
257 def format_lines(var_name, seq, raw=False, indent_level=0): 217 def format_lines(var_name, seq, raw=False, indent_level=0):
258 """Formats a sequence of strings for output.""" 218 """Formats a sequence of strings for output."""
259 lines = [] 219 lines = []

eric ide

mercurial