src/eric7/Utilities/__init__.py

branch
eric7
changeset 10928
46651e194fbe
parent 10718
c9252721680b
child 10941
07cad049002c
child 10944
ff77c9a96a80
--- a/src/eric7/Utilities/__init__.py	Thu Sep 26 09:48:49 2024 +0200
+++ b/src/eric7/Utilities/__init__.py	Thu Sep 26 15:49:36 2024 +0200
@@ -18,8 +18,6 @@
 import sys
 import warnings
 
-from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
-
 import chardet
 
 from PyQt6 import sip
@@ -35,6 +33,14 @@
 
 from eric7 import Preferences
 from eric7.__version__ import Version
+from eric7.EricUtilities import (  # noqa
+    decodeBytes,
+    decodeString,
+    html_encode,
+    html_udecode,
+    html_uencode,
+    readStringFromStream,
+)
 from eric7.EricWidgets.EricApplication import ericApp
 from eric7.SystemUtilities import DesktopUtilities, FileSystemUtilities, OSUtilities
 from eric7.UI.Info import Program
@@ -290,15 +296,15 @@
     @rtype tuple of (str, str)
     """
     with contextlib.suppress(UnicodeError, LookupError):
-        if text.startswith(BOM_UTF8):
+        if text.startswith(codecs.BOM_UTF8):
             # UTF-8 with BOM
-            return str(text[len(BOM_UTF8) :], "utf-8"), "utf-8-bom"
-        elif text.startswith(BOM_UTF16):
+            return str(text[len(codecs.BOM_UTF8) :], "utf-8"), "utf-8-bom"
+        elif text.startswith(codecs.BOM_UTF16):
             # UTF-16 with BOM
-            return str(text[len(BOM_UTF16) :], "utf-16"), "utf-16"
-        elif text.startswith(BOM_UTF32):
+            return str(text[len(codecs.BOM_UTF16) :], "utf-16"), "utf-16"
+        elif text.startswith(codecs.BOM_UTF32):
             # UTF-32 with BOM
-            return str(text[len(BOM_UTF32) :], "utf-32"), "utf-32"
+            return str(text[len(codecs.BOM_UTF32) :], "utf-32"), "utf-32"
         coding = get_codingBytes(text)
         if coding:
             return str(text, coding), coding
@@ -422,7 +428,7 @@
     """
     encoding = None
     if origEncoding == "utf-8-bom":
-        etext, encoding = BOM_UTF8 + text.encode("utf-8"), "utf-8-bom"
+        etext, encoding = codecs.BOM_UTF8 + text.encode("utf-8"), "utf-8-bom"
     else:
         # Try declared coding spec
         coding = get_coding(text)
@@ -470,83 +476,6 @@
     return etext, encoding
 
 
-def decodeString(text):
-    """
-    Function to decode a string containing Unicode encoded characters.
-
-    @param text text containing encoded chars
-    @type str
-    @return decoded text
-    @rtype str
-    """
-    buf = b""
-    index = 0
-    while index < len(text):
-        if text[index] == "\\":
-            qb = QByteArray.fromHex(text[index : index + 4].encode())
-            buf += bytes(qb)
-            index += 4
-        else:
-            buf += codecs.encode(text[index], "utf-8")
-            index += 1
-    buf = buf.replace(b"\x00", b"")
-    return decodeBytes(buf)
-
-
-def decodeBytes(buffer):
-    """
-    Function to decode some byte text into a string.
-
-    @param buffer byte buffer to decode
-    @type bytes
-    @return decoded text
-    @rtype str
-    """
-    # try UTF with BOM
-    with contextlib.suppress(UnicodeError, LookupError):
-        if buffer.startswith(BOM_UTF8):
-            # UTF-8 with BOM
-            return str(buffer[len(BOM_UTF8) :], encoding="utf-8")
-        elif buffer.startswith(BOM_UTF16):
-            # UTF-16 with BOM
-            return str(buffer[len(BOM_UTF16) :], encoding="utf-16")
-        elif buffer.startswith(BOM_UTF32):
-            # UTF-32 with BOM
-            return str(buffer[len(BOM_UTF32) :], encoding="utf-32")
-
-    # try UTF-8
-    with contextlib.suppress(UnicodeError):
-        return str(buffer, encoding="utf-8")
-
-    # try codec detection
-    try:
-        guess = chardet.detect(buffer)
-        if guess and guess["encoding"] is not None:
-            codec = guess["encoding"].lower()
-            return str(buffer, encoding=codec)
-    except (LookupError, UnicodeError):
-        pass
-    except ImportError:
-        pass
-
-    return str(buffer, encoding="utf-8", errors="ignore")
-
-
-def readStringFromStream(stream):
-    """
-    Module function to read a string from the given stream.
-
-    @param stream data stream opened for reading
-    @type QDataStream
-    @return string read from the stream
-    @rtype str
-    """
-    data = stream.readString()
-    if data is None:
-        data = b""
-    return data.decode("utf-8")
-
-
 def normalizeCode(codestring):
     """
     Function to normalize the given code.
@@ -564,120 +493,6 @@
     return codestring
 
 
-_escape = re.compile("[&<>\"'\u0080-\uffff]")
-
-_escape_map = {
-    "&": "&amp;",
-    "<": "&lt;",
-    ">": "&gt;",
-    '"': "&quot;",
-    "'": "&#x27;",
-}
-
-
-def escape_entities(m, escmap=_escape_map):
-    """
-    Function to encode html entities.
-
-    @param m the match object
-    @type re.Match
-    @param escmap the map of entities to encode
-    @type dict
-    @return the converted text
-    @rtype str
-    """
-    char = m.group()
-    text = escmap.get(char)
-    if text is None:
-        text = "&#{0:d};".format(ord(char))
-    return text
-
-
-def html_encode(text, pattern=_escape):
-    """
-    Function to correctly encode a text for html.
-
-    @param text text to be encoded
-    @type str
-    @param pattern search pattern for text to be encoded
-    @type str
-    @return the encoded text
-    @rtype str
-    """
-    if not text:
-        return ""
-    text = pattern.sub(escape_entities, text)
-    return text
-
-
-_uescape = re.compile("[\u0080-\uffff]")
-
-
-def escape_uentities(m):
-    """
-    Function to encode html entities.
-
-    @param m the match object
-    @type re.Match
-    @return the converted text
-    @rtype str
-    """
-    char = m.group()
-    text = "&#{0:d};".format(ord(char))
-    return text
-
-
-def html_uencode(text, pattern=_uescape):
-    """
-    Function to correctly encode a unicode text for html.
-
-    @param text text to be encoded
-    @type str
-    @param pattern search pattern for text to be encoded
-    @type str
-    @return the encoded text
-    @rtype str
-    """
-    if not text:
-        return ""
-    text = pattern.sub(escape_uentities, text)
-    return text
-
-
-_uunescape = re.compile(r"&#\d+;")
-
-
-def unescape_uentities(m):
-    """
-    Function to decode html entities.
-
-    @param m the match object
-    @type re.Match
-    @return the converted text
-    @rtype str
-    """
-    char = m.group()
-    ordinal = int(char[2:-1])
-    return chr(ordinal)
-
-
-def html_udecode(text, pattern=_uunescape):
-    """
-    Function to correctly decode a html text to a unicode text.
-
-    @param text text to be decoded
-    @type str
-    @param pattern search pattern for text to be decoded
-    @type str
-    @return the decoded text
-    @rtype str
-    """
-    if not text:
-        return ""
-    text = pattern.sub(unescape_uentities, text)
-    return text
-
-
 def convertLineEnds(text, eol):
     """
     Function to convert the end of line characters.

eric ide

mercurial