UtilitiesPython2/Tools.py

changeset 805
83ca4d1ff648
child 945
8cd4d08fa9f6
child 1510
e75ecf2bd9dd
diff -r 3465556892de -r 83ca4d1ff648 UtilitiesPython2/Tools.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/UtilitiesPython2/Tools.py	Tue Jan 04 17:37:48 2011 +0100
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+
+"""
+Module implementing tool functions.
+"""
+
+import re
+from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
+
+coding_regexps = [
+    (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')), 
+    (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')), 
+]
+
+def get_coding(text):
+    """
+    Function to get the coding of a text.
+    
+    @param text text to inspect (string)
+    @return coding string
+    """
+    lines = text.splitlines()
+    for coding in coding_regexps:
+        coding_re = coding[1]
+        head = lines[:coding[0]]
+        for l in head:
+            m = coding_re.search(l)
+            if m:
+                return m.group(1).lower()
+    return None
+
+def decode(text):
+    """
+    Function to decode a text.
+    
+    @param text text to decode (string)
+    @return decoded text and encoding
+    """
+    try:
+        if text.startswith(BOM_UTF8):
+            # UTF-8 with BOM
+            return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
+        elif text.startswith(BOM_UTF16):
+            # UTF-16 with BOM
+            return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
+        elif text.startswith(BOM_UTF32):
+            # UTF-32 with BOM
+            return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
+        coding = get_coding(text)
+        if coding:
+            return unicode(text, coding), coding
+    except (UnicodeError, LookupError):
+        pass
+    
+    # Assume UTF-8
+    try:
+        return unicode(text, 'utf-8'), 'utf-8-guessed'
+    except (UnicodeError, LookupError):
+        pass
+    
+    # Assume Latin-1 (behaviour before 3.7.1)
+    return unicode(text, "latin-1"), 'latin-1-guessed'
+
+def readEncodedFile(filename):
+    """
+    Function to read a file and decode it's contents into proper text.
+    
+    @param filename name of the file to read (string)
+    @return tuple of decoded text and encoding (string, string)
+    """
+    f = open(filename)
+    text = f.read()
+    f.close()
+    return decode(text)
+
+def normalizeCode(codestring):
+    """
+    Function to normalize the given code.
+    
+    @param codestring code to be normalized (string)
+    @return normalized code (string)
+    """
+    if type(codestring) == type(u""):
+        codestring = codestring.encode('utf-8')
+    codestring = codestring.replace("\r\n","\n").replace("\r","\n")
+
+    if codestring and codestring[-1] != '\n':
+        codestring = codestring + '\n'
+    
+    return codestring
+    
+#
+# eflag: FileType = Python2

eric ide

mercurial