UtilitiesPython2/Tools.py

changeset 805
83ca4d1ff648
child 945
8cd4d08fa9f6
child 1510
e75ecf2bd9dd
equal deleted inserted replaced
804:3465556892de 805:83ca4d1ff648
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing tool functions.
8 """
9
10 import re
11 from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
12
13 coding_regexps = [
14 (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')),
15 (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')),
16 ]
17
18 def get_coding(text):
19 """
20 Function to get the coding of a text.
21
22 @param text text to inspect (string)
23 @return coding string
24 """
25 lines = text.splitlines()
26 for coding in coding_regexps:
27 coding_re = coding[1]
28 head = lines[:coding[0]]
29 for l in head:
30 m = coding_re.search(l)
31 if m:
32 return m.group(1).lower()
33 return None
34
35 def decode(text):
36 """
37 Function to decode a text.
38
39 @param text text to decode (string)
40 @return decoded text and encoding
41 """
42 try:
43 if text.startswith(BOM_UTF8):
44 # UTF-8 with BOM
45 return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
46 elif text.startswith(BOM_UTF16):
47 # UTF-16 with BOM
48 return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
49 elif text.startswith(BOM_UTF32):
50 # UTF-32 with BOM
51 return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
52 coding = get_coding(text)
53 if coding:
54 return unicode(text, coding), coding
55 except (UnicodeError, LookupError):
56 pass
57
58 # Assume UTF-8
59 try:
60 return unicode(text, 'utf-8'), 'utf-8-guessed'
61 except (UnicodeError, LookupError):
62 pass
63
64 # Assume Latin-1 (behaviour before 3.7.1)
65 return unicode(text, "latin-1"), 'latin-1-guessed'
66
67 def readEncodedFile(filename):
68 """
69 Function to read a file and decode it's contents into proper text.
70
71 @param filename name of the file to read (string)
72 @return tuple of decoded text and encoding (string, string)
73 """
74 f = open(filename)
75 text = f.read()
76 f.close()
77 return decode(text)
78
79 def normalizeCode(codestring):
80 """
81 Function to normalize the given code.
82
83 @param codestring code to be normalized (string)
84 @return normalized code (string)
85 """
86 if type(codestring) == type(u""):
87 codestring = codestring.encode('utf-8')
88 codestring = codestring.replace("\r\n","\n").replace("\r","\n")
89
90 if codestring and codestring[-1] != '\n':
91 codestring = codestring + '\n'
92
93 return codestring
94
95 #
96 # eflag: FileType = Python2

eric ide

mercurial