Fri, 09 Sep 2011 11:54:20 +0200

Detlev Offenbach <detlev@die-offenbachs.de>
Fri, 09 Sep 2011 11:54:20 +0200
changeset 1308
parent 945
child 1309

Made the check for line flags like __IGNORE_WARNING__ more flexible and extensible.

# -*- coding: utf-8 -*-

# Copyright (c) 2011 Detlev Offenbach <detlev@die-offenbachs.de>

Module implementing tool functions.

import re
from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32

coding_regexps = [
    (2, re.compile(r'''coding[:=]\s*([-\w_.]+)''')),
    (1, re.compile(r'''<\?xml.*\bencoding\s*=\s*['"]([-\w_.]+)['"]\?>''')),

def get_coding(text):
    Function to get the coding of a text.
    @param text text to inspect (string)
    @return coding string
    lines = text.splitlines()
    for coding in coding_regexps:
        coding_re = coding[1]
        head = lines[:coding[0]]
        for l in head:
            m = coding_re.search(l)
            if m:
                return m.group(1).lower()
    return None

def decode(text):
    Function to decode a text.
    @param text text to decode (string)
    @return decoded text and encoding
        if text.startswith(BOM_UTF8):
            # UTF-8 with BOM
            return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
        elif text.startswith(BOM_UTF16):
            # UTF-16 with BOM
            return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
        elif text.startswith(BOM_UTF32):
            # UTF-32 with BOM
            return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
        coding = get_coding(text)
        if coding:
            return unicode(text, coding), coding
    except (UnicodeError, LookupError):
    # Assume UTF-8
        return unicode(text, 'utf-8'), 'utf-8-guessed'
    except (UnicodeError, LookupError):
    # Assume Latin-1 (behaviour before 3.7.1)
    return unicode(text, "latin-1"), 'latin-1-guessed'

def readEncodedFile(filename):
    Function to read a file and decode it's contents into proper text.
    @param filename name of the file to read (string)
    @return tuple of decoded text and encoding (string, string)
    f = open(filename)
    text = f.read()
    return decode(text)

def normalizeCode(codestring):
    Function to normalize the given code.
    @param codestring code to be normalized (string)
    @return normalized code (string)
    if type(codestring) == type(u""):
        codestring = codestring.encode('utf-8')
    codestring = codestring.replace("\r\n", "\n").replace("\r", "\n")

    if codestring and codestring[-1] != '\n':
        codestring = codestring + '\n'
    return codestring

def extractLineFlags(line, startComment="#", endComment=""):
    Function to extract flags starting and ending with '__' from a line comment.
    @param line line to extract flags from (string)
    @keyparam startComment string identifying the start of the comment (string)
    @keyparam endComment string identifying the end of a comment (string)
    @return list containing the extracted flags (list of strings)
    flags = []
    pos = line.rindex(startComment)
    if pos >= 0:
        comment = line[pos + len(startComment):].strip()
        if endComment:
            comment = comment.replace("endComment", "")
        flags = [f.strip() for f in comment.split()
                 if (f.startswith("__") and f.endswith("__"))]
    return flags

# eflag: FileType = Python2

eric ide