1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2011 - 2014 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing tool functions. |
|
8 """ |
|
9 |
|
10 import re |
|
11 from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32 |
|
12 |
|
13 coding_regexps = [ |
|
14 (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')), |
|
15 (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')), |
|
16 ] |
|
17 |
|
18 |
|
19 def get_coding(text): |
|
20 """ |
|
21 Function to get the coding of a text. |
|
22 |
|
23 @param text text to inspect (string) |
|
24 @return coding string |
|
25 """ |
|
26 lines = text.splitlines() |
|
27 for coding in coding_regexps: |
|
28 coding_re = coding[1] |
|
29 head = lines[:coding[0]] |
|
30 for l in head: |
|
31 m = coding_re.search(l) |
|
32 if m: |
|
33 return m.group(1).lower() |
|
34 return None |
|
35 |
|
36 |
|
37 def decode(text): |
|
38 """ |
|
39 Function to decode a text. |
|
40 |
|
41 @param text text to decode (string) |
|
42 @return decoded text and encoding |
|
43 """ |
|
44 try: |
|
45 if text.startswith(BOM_UTF8): |
|
46 # UTF-8 with BOM |
|
47 return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom' |
|
48 elif text.startswith(BOM_UTF16): |
|
49 # UTF-16 with BOM |
|
50 return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16' |
|
51 elif text.startswith(BOM_UTF32): |
|
52 # UTF-32 with BOM |
|
53 return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32' |
|
54 coding = get_coding(text) |
|
55 if coding: |
|
56 return unicode(text, coding), coding |
|
57 except (UnicodeError, LookupError): |
|
58 pass |
|
59 |
|
60 # Assume UTF-8 |
|
61 try: |
|
62 return unicode(text, 'utf-8'), 'utf-8-guessed' |
|
63 except (UnicodeError, LookupError): |
|
64 pass |
|
65 |
|
66 # Assume Latin-1 (behaviour before 3.7.1) |
|
67 return unicode(text, "latin-1"), 'latin-1-guessed' |
|
68 |
|
69 |
|
70 def readEncodedFile(filename): |
|
71 """ |
|
72 Function to read a file and decode its contents into proper text. |
|
73 |
|
74 @param filename name of the file to read (string) |
|
75 @return tuple of decoded text and encoding (string, string) |
|
76 """ |
|
77 f = open(filename) |
|
78 text = f.read() |
|
79 f.close() |
|
80 return decode(text) |
|
81 |
|
82 |
|
83 def normalizeCode(codestring): |
|
84 """ |
|
85 Function to normalize the given code. |
|
86 |
|
87 @param codestring code to be normalized (string) |
|
88 @return normalized code (string) |
|
89 """ |
|
90 if type(codestring) == type(u""): |
|
91 codestring = codestring.encode('utf-8') |
|
92 codestring = codestring.replace("\r\n", "\n").replace("\r", "\n") |
|
93 |
|
94 if codestring and codestring[-1] != '\n': |
|
95 codestring = codestring + '\n' |
|
96 |
|
97 return codestring |
|
98 |
|
99 |
|
100 def extractLineFlags(line, startComment="#", endComment=""): |
|
101 """ |
|
102 Function to extract flags starting and ending with '__' from a line |
|
103 comment. |
|
104 |
|
105 @param line line to extract flags from (string) |
|
106 @keyparam startComment string identifying the start of the comment (string) |
|
107 @keyparam endComment string identifying the end of a comment (string) |
|
108 @return list containing the extracted flags (list of strings) |
|
109 """ |
|
110 flags = [] |
|
111 |
|
112 pos = line.rfind(startComment) |
|
113 if pos >= 0: |
|
114 comment = line[pos + len(startComment):].strip() |
|
115 if endComment: |
|
116 comment = comment.replace("endComment", "") |
|
117 flags = [f.strip() for f in comment.split() |
|
118 if (f.startswith("__") and f.endswith("__"))] |
|
119 return flags |
|
120 |
|
121 # |
|
122 # eflag: FileType = Python2 |
|