UtilitiesPython2/Tools.py

branch
Py2 comp.
changeset 3456
96232974dcdb
parent 3178
f25fc1364c88
parent 3445
bf95eac5ce12
child 3484
645c12de6b0c
equal deleted inserted replaced
3178:f25fc1364c88 3456:96232974dcdb
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2011 - 2014 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing tool functions.
8 """
9
10 import re
11 from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
12
13 coding_regexps = [
14 (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')),
15 (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')),
16 ]
17
18
19 def get_coding(text):
20 """
21 Function to get the coding of a text.
22
23 @param text text to inspect (string)
24 @return coding string
25 """
26 lines = text.splitlines()
27 for coding in coding_regexps:
28 coding_re = coding[1]
29 head = lines[:coding[0]]
30 for l in head:
31 m = coding_re.search(l)
32 if m:
33 return m.group(1).lower()
34 return None
35
36
37 def decode(text):
38 """
39 Function to decode a text.
40
41 @param text text to decode (string)
42 @return decoded text and encoding
43 """
44 try:
45 if text.startswith(BOM_UTF8):
46 # UTF-8 with BOM
47 return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
48 elif text.startswith(BOM_UTF16):
49 # UTF-16 with BOM
50 return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
51 elif text.startswith(BOM_UTF32):
52 # UTF-32 with BOM
53 return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
54 coding = get_coding(text)
55 if coding:
56 return unicode(text, coding), coding
57 except (UnicodeError, LookupError):
58 pass
59
60 # Assume UTF-8
61 try:
62 return unicode(text, 'utf-8'), 'utf-8-guessed'
63 except (UnicodeError, LookupError):
64 pass
65
66 # Assume Latin-1 (behaviour before 3.7.1)
67 return unicode(text, "latin-1"), 'latin-1-guessed'
68
69
70 def readEncodedFile(filename):
71 """
72 Function to read a file and decode its contents into proper text.
73
74 @param filename name of the file to read (string)
75 @return tuple of decoded text and encoding (string, string)
76 """
77 f = open(filename)
78 text = f.read()
79 f.close()
80 return decode(text)
81
82
83 def normalizeCode(codestring):
84 """
85 Function to normalize the given code.
86
87 @param codestring code to be normalized (string)
88 @return normalized code (string)
89 """
90 if type(codestring) == type(u""):
91 codestring = codestring.encode('utf-8')
92 codestring = codestring.replace("\r\n", "\n").replace("\r", "\n")
93
94 if codestring and codestring[-1] != '\n':
95 codestring = codestring + '\n'
96
97 return codestring
98
99
100 def extractLineFlags(line, startComment="#", endComment=""):
101 """
102 Function to extract flags starting and ending with '__' from a line
103 comment.
104
105 @param line line to extract flags from (string)
106 @keyparam startComment string identifying the start of the comment (string)
107 @keyparam endComment string identifying the end of a comment (string)
108 @return list containing the extracted flags (list of strings)
109 """
110 flags = []
111
112 pos = line.rfind(startComment)
113 if pos >= 0:
114 comment = line[pos + len(startComment):].strip()
115 if endComment:
116 comment = comment.replace("endComment", "")
117 flags = [f.strip() for f in comment.split()
118 if (f.startswith("__") and f.endswith("__"))]
119 return flags
120
121 #
122 # eflag: FileType = Python2

eric ide

mercurial