Tue, 04 Jan 2011 17:37:48 +0100
Added a tabnanny checker function for Python 2 files.
# -*- coding: utf-8 -*- # Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de> # """ Module implementing tool functions. """ import re from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32 coding_regexps = [ (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')), (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')), ] def get_coding(text): """ Function to get the coding of a text. @param text text to inspect (string) @return coding string """ lines = text.splitlines() for coding in coding_regexps: coding_re = coding[1] head = lines[:coding[0]] for l in head: m = coding_re.search(l) if m: return m.group(1).lower() return None def decode(text): """ Function to decode a text. @param text text to decode (string) @return decoded text and encoding """ try: if text.startswith(BOM_UTF8): # UTF-8 with BOM return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom' elif text.startswith(BOM_UTF16): # UTF-16 with BOM return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16' elif text.startswith(BOM_UTF32): # UTF-32 with BOM return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32' coding = get_coding(text) if coding: return unicode(text, coding), coding except (UnicodeError, LookupError): pass # Assume UTF-8 try: return unicode(text, 'utf-8'), 'utf-8-guessed' except (UnicodeError, LookupError): pass # Assume Latin-1 (behaviour before 3.7.1) return unicode(text, "latin-1"), 'latin-1-guessed' def readEncodedFile(filename): """ Function to read a file and decode it's contents into proper text. @param filename name of the file to read (string) @return tuple of decoded text and encoding (string, string) """ f = open(filename) text = f.read() f.close() return decode(text) def normalizeCode(codestring): """ Function to normalize the given code. @param codestring code to be normalized (string) @return normalized code (string) """ if type(codestring) == type(u""): codestring = codestring.encode('utf-8') codestring = codestring.replace("\r\n","\n").replace("\r","\n") if codestring and codestring[-1] != '\n': codestring = codestring + '\n' return codestring # # eflag: FileType = Python2