eric: comparison ThirdParty/Pygments/pygments/regexopt.py

-:8bc578136279
+:4f20dba37ab6
+# -*- coding: utf-8 -*-
+"""
+pygments.regexopt
+~~~~~~~~~~~~~~~~~
+An algorithm that generates optimized regexes for matching long lists of
+literal strings.
+:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+:license: BSD, see LICENSE for details.
+"""
+import re
+from re import escape
+from os.path import commonprefix
+from itertools import groupby
+from operator import itemgetter
+CS_ESCAPE = re.compile(r'[\^\\\-\]]')
+FIRST_ELEMENT = itemgetter(0)
+def make_charset(letters):
+return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
+def regex_opt_inner(strings, open_paren):
+"""Return a regex that matches any string in the sorted list of strings."""
+close_paren = open_paren and ')' or ''
+# print strings, repr(open_paren)
+if not strings:
+# print '-> nothing left'
+return ''
+first = strings[0]
+if len(strings) == 1:
+# print '-> only 1 string'
+return open_paren + escape(first) + close_paren
+if not first:
+# print '-> first string empty'
+return open_paren + regex_opt_inner(strings[1:], '(?:') \
++ '?' + close_paren
+if len(first) == 1:
+# multiple one-char strings? make a charset
+oneletter = []
+rest = []
+for s in strings:
+if len(s) == 1:
+oneletter.append(s)
+else:
+rest.append(s)
+if len(oneletter) > 1:  # do we have more than one oneletter string?
+if rest:
+# print '-> 1-character + rest'
+return open_paren + regex_opt_inner(rest, '') + '|' \
++ make_charset(oneletter) + close_paren
+# print '-> only 1-character'
+return make_charset(oneletter)
+prefix = commonprefix(strings)
+if prefix:
+plen = len(prefix)
+# we have a prefix for all strings
+# print '-> prefix:', prefix
+return open_paren + escape(prefix) \
++ regex_opt_inner([s[plen:] for s in strings], '(?:') \
++ close_paren
+# is there a suffix?
+strings_rev = [s[::-1] for s in strings]
+suffix = commonprefix(strings_rev)
+if suffix:
+slen = len(suffix)
+# print '-> suffix:', suffix[::-1]
+return open_paren \
++ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
++ escape(suffix[::-1]) + close_paren
+# recurse on common 1-string prefixes
+# print '-> last resort'
+return open_paren + \
+'|'.join(regex_opt_inner(list(group[1]), '')
+for group in groupby(strings, lambda s: s[0] == first[0])) \
++ close_paren
+def regex_opt(strings, prefix='', suffix=''):
+"""Return a compiled regex that matches any string in the given list.
+The strings to match must be literal strings, not regexes.  They will be
+regex-escaped.
+*prefix* and *suffix* are pre- and appended to the final regex.
+"""
+strings = sorted(strings)
+return prefix + regex_opt_inner(strings, '(') + suffix

Mercurial Repositories > eric / file comparison

comparison: ThirdParty/Pygments/pygments/regexopt.py

ThirdParty/Pygments/pygments/regexopt.py