eric6/ThirdParty/Pygments/pygments/regexopt.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
--- a/eric6/ThirdParty/Pygments/pygments/regexopt.py	Tue Apr 20 19:47:39 2021 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-    pygments.regexopt
-    ~~~~~~~~~~~~~~~~~
-
-    An algorithm that generates optimized regexes for matching long lists of
-    literal strings.
-
-    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-from re import escape
-from os.path import commonprefix
-from itertools import groupby
-from operator import itemgetter
-
-CS_ESCAPE = re.compile(r'[\^\\\-\]]')
-FIRST_ELEMENT = itemgetter(0)
-
-
-def make_charset(letters):
-    return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
-
-
-def regex_opt_inner(strings, open_paren):
-    """Return a regex that matches any string in the sorted list of strings."""
-    close_paren = open_paren and ')' or ''
-    # print strings, repr(open_paren)
-    if not strings:
-        # print '-> nothing left'
-        return ''
-    first = strings[0]
-    if len(strings) == 1:
-        # print '-> only 1 string'
-        return open_paren + escape(first) + close_paren
-    if not first:
-        # print '-> first string empty'
-        return open_paren + regex_opt_inner(strings[1:], '(?:') \
-            + '?' + close_paren
-    if len(first) == 1:
-        # multiple one-char strings? make a charset
-        oneletter = []
-        rest = []
-        for s in strings:
-            if len(s) == 1:
-                oneletter.append(s)
-            else:
-                rest.append(s)
-        if len(oneletter) > 1:  # do we have more than one oneletter string?
-            if rest:
-                # print '-> 1-character + rest'
-                return open_paren + regex_opt_inner(rest, '') + '|' \
-                    + make_charset(oneletter) + close_paren
-            # print '-> only 1-character'
-            return open_paren + make_charset(oneletter) + close_paren
-    prefix = commonprefix(strings)
-    if prefix:
-        plen = len(prefix)
-        # we have a prefix for all strings
-        # print '-> prefix:', prefix
-        return open_paren + escape(prefix) \
-            + regex_opt_inner([s[plen:] for s in strings], '(?:') \
-            + close_paren
-    # is there a suffix?
-    strings_rev = [s[::-1] for s in strings]
-    suffix = commonprefix(strings_rev)
-    if suffix:
-        slen = len(suffix)
-        # print '-> suffix:', suffix[::-1]
-        return open_paren \
-            + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
-            + escape(suffix[::-1]) + close_paren
-    # recurse on common 1-string prefixes
-    # print '-> last resort'
-    return open_paren + \
-        '|'.join(regex_opt_inner(list(group[1]), '')
-                 for group in groupby(strings, lambda s: s[0] == first[0])) \
-        + close_paren
-
-
-def regex_opt(strings, prefix='', suffix=''):
-    """Return a compiled regex that matches any string in the given list.
-
-    The strings to match must be literal strings, not regexes.  They will be
-    regex-escaped.
-
-    *prefix* and *suffix* are pre- and appended to the final regex.
-    """
-    strings = sorted(strings)
-    return prefix + regex_opt_inner(strings, '(') + suffix

eric ide

mercurial