ThirdParty/Pygments/pygments/util.py

changeset 2426
da76c71624de
parent 1705
b0fbc9300f2b
child 2525
8b507a9a2d40
--- a/ThirdParty/Pygments/pygments/util.py	Sun Feb 17 19:05:40 2013 +0100
+++ b/ThirdParty/Pygments/pygments/util.py	Sun Feb 17 19:07:15 2013 +0100
@@ -5,7 +5,7 @@
 
     Utility functions.
 
-    :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
@@ -206,6 +206,51 @@
         _looks_like_xml_cache[key] = rv
         return rv
 
+# Python narrow build compatibility
+
+def _surrogatepair(c):
+    return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
+
+def unirange(a, b):
+    """
+    Returns a regular expression string to match the given non-BMP range.
+    """
+    if b < a:
+        raise ValueError("Bad character range")
+    if a < 0x10000 or b < 0x10000:
+        raise ValueError("unirange is only defined for non-BMP ranges")
+
+    if sys.maxunicode > 0xffff:
+        # wide build
+        return '[%s-%s]' % (chr(a), chr(b))
+    else:
+        # narrow build stores surrogates, and the 're' module handles them
+        # (incorrectly) as characters.  Since there is still ordering among
+        # these characters, expand the range to one that it understands.  Some
+        # background in http://bugs.python.org/issue3665 and
+        # http://bugs.python.org/issue12749
+        #
+        # Additionally, the lower constants are using unichr rather than
+        # literals because jython [which uses the wide path] can't load this
+        # file if they are literals.
+        ah, al = _surrogatepair(a)
+        bh, bl = _surrogatepair(b)
+        if ah == bh:
+            return '(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))
+        else:
+            buf = []
+            buf.append('%s[%s-%s]' %
+                       (chr(ah), chr(al),
+                        ah == bh and chr(bl) or chr(0xdfff)))
+            if ah - bh > 1:
+                buf.append('[%s-%s][%s-%s]' %
+                           chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))
+            if ah != bh:
+                buf.append('%s[%s-%s]' %
+                           (chr(bh), chr(0xdc00), chr(bl)))
+
+            return '(?:' + '|'.join(buf) + ')'
+
 # Python 2/3 compatibility
 
 if sys.version_info < (3,0):

eric ide

mercurial