eric: comparison UtilitiesPython2/Tools.py

-:3465556892de
+:83ca4d1ff648
+# -*- coding: utf-8 -*-
+# Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+"""
+Module implementing tool functions.
+"""
+import re
+from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
+coding_regexps = [
+(2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')),
+(1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')),
+]
+def get_coding(text):
+"""
+Function to get the coding of a text.
+@param text text to inspect (string)
+@return coding string
+"""
+lines = text.splitlines()
+for coding in coding_regexps:
+coding_re = coding[1]
+head = lines[:coding[0]]
+for l in head:
+m = coding_re.search(l)
+if m:
+return m.group(1).lower()
+return None
+def decode(text):
+"""
+Function to decode a text.
+@param text text to decode (string)
+@return decoded text and encoding
+"""
+try:
+if text.startswith(BOM_UTF8):
+# UTF-8 with BOM
+return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
+elif text.startswith(BOM_UTF16):
+# UTF-16 with BOM
+return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
+elif text.startswith(BOM_UTF32):
+# UTF-32 with BOM
+return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
+coding = get_coding(text)
+if coding:
+return unicode(text, coding), coding
+except (UnicodeError, LookupError):
+pass
+# Assume UTF-8
+try:
+return unicode(text, 'utf-8'), 'utf-8-guessed'
+except (UnicodeError, LookupError):
+pass
+# Assume Latin-1 (behaviour before 3.7.1)
+return unicode(text, "latin-1"), 'latin-1-guessed'
+def readEncodedFile(filename):
+"""
+Function to read a file and decode it's contents into proper text.
+@param filename name of the file to read (string)
+@return tuple of decoded text and encoding (string, string)
+"""
+f = open(filename)
+text = f.read()
+f.close()
+return decode(text)
+def normalizeCode(codestring):
+"""
+Function to normalize the given code.
+@param codestring code to be normalized (string)
+@return normalized code (string)
+"""
+if type(codestring) == type(u""):
+codestring = codestring.encode('utf-8')
+codestring = codestring.replace("\r\n","\n").replace("\r","\n")
+if codestring and codestring[-1] != '\n':
+codestring = codestring + '\n'
+return codestring
+#
+# eflag: FileType = Python2

Mercurial Repositories > eric / file comparison

comparison: UtilitiesPython2/Tools.py

UtilitiesPython2/Tools.py