Sun, 27 Feb 2011 11:29:52 +0100
Prepared release of 5.1.0.
805
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
1 | # -*- coding: utf-8 -*- |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
2 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
3 | # Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de> |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
4 | # |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
5 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
6 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
7 | Module implementing tool functions. |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
8 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
9 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
10 | import re |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
11 | from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32 |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
12 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
13 | coding_regexps = [ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
14 | (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')), |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
15 | (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')), |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
16 | ] |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
17 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
18 | def get_coding(text): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
19 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
20 | Function to get the coding of a text. |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
21 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
22 | @param text text to inspect (string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
23 | @return coding string |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
24 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
25 | lines = text.splitlines() |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
26 | for coding in coding_regexps: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
27 | coding_re = coding[1] |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
28 | head = lines[:coding[0]] |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
29 | for l in head: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
30 | m = coding_re.search(l) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
31 | if m: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
32 | return m.group(1).lower() |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
33 | return None |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
34 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
35 | def decode(text): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
36 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
37 | Function to decode a text. |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
38 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
39 | @param text text to decode (string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
40 | @return decoded text and encoding |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
41 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
42 | try: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
43 | if text.startswith(BOM_UTF8): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
44 | # UTF-8 with BOM |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
45 | return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
46 | elif text.startswith(BOM_UTF16): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
47 | # UTF-16 with BOM |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
48 | return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
49 | elif text.startswith(BOM_UTF32): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
50 | # UTF-32 with BOM |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
51 | return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
52 | coding = get_coding(text) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
53 | if coding: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
54 | return unicode(text, coding), coding |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
55 | except (UnicodeError, LookupError): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
56 | pass |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
57 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
58 | # Assume UTF-8 |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
59 | try: |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
60 | return unicode(text, 'utf-8'), 'utf-8-guessed' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
61 | except (UnicodeError, LookupError): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
62 | pass |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
63 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
64 | # Assume Latin-1 (behaviour before 3.7.1) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
65 | return unicode(text, "latin-1"), 'latin-1-guessed' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
66 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
67 | def readEncodedFile(filename): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
68 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
69 | Function to read a file and decode it's contents into proper text. |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
70 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
71 | @param filename name of the file to read (string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
72 | @return tuple of decoded text and encoding (string, string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
73 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
74 | f = open(filename) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
75 | text = f.read() |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
76 | f.close() |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
77 | return decode(text) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
78 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
79 | def normalizeCode(codestring): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
80 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
81 | Function to normalize the given code. |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
82 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
83 | @param codestring code to be normalized (string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
84 | @return normalized code (string) |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
85 | """ |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
86 | if type(codestring) == type(u""): |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
87 | codestring = codestring.encode('utf-8') |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
88 | codestring = codestring.replace("\r\n","\n").replace("\r","\n") |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
89 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
90 | if codestring and codestring[-1] != '\n': |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
91 | codestring = codestring + '\n' |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
92 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
93 | return codestring |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
94 | |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
95 | # |
83ca4d1ff648
Added a tabnanny checker function for Python 2 files.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
96 | # eflag: FileType = Python2 |