Sat, 23 Dec 2023 15:48:54 +0100
Updated copyright for 2024.
# -*- coding: utf-8 -*- # Copyright (c) 2014 - 2024 Detlev Offenbach <detlev@die-offenbachs.de> # """ Module implementing the HTML5 to CSS3 converter. """ import datetime import getpass import os import random from bs4 import BeautifulSoup from PyQt6.QtCore import QObject from PyQt6.QtWidgets import QDialog from .Html5ToCss3ConverterParameterDialog import Html5ToCss3ConverterParameterDialog class Html5ToCss3Converter(QObject): """ Class implementing the HTML5 to CSS3 converter. """ CssTemplate7 = "{0}{1}{2}{3}{4}{5}{6}" CssTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}" Placeholders = ( "margin:0", "padding:0", "border:0", "font-size:100%", "font:inherit", "vertical-align:baseline", "line-height:1", "outline:0", "font-weight:inherit", "font-style:inherit", "font-family:inherit", "vertical-align:baseline", ) TagsToIgnore = ( "head", "meta", "noscript", "script", "style", "link", "no-js", "title", "object", "col", "colgroup", "option", "param", "audio", "basefont", "isindex", "svg", "area", "embed", "br", ) def __init__(self, html, parent=None): """ Constructor @param html HTML text to be converted @type str @param parent reference to the parent object @type QObject """ super().__init__(parent) self.__html = html def getCss3(self): """ Public method to get the converted CSS3 text. @return CSS3 text @rtype str """ dlg = Html5ToCss3ConverterParameterDialog() if dlg.exec() == QDialog.DialogCode.Accepted: indentation, placeholders = dlg.getData() self.__createSoup() alreadyDone = list(self.TagsToIgnore) css = '@charset "utf-8";{0}'.format(os.linesep) css += "/* {0} by {1}*/{2}".format( datetime.datetime.now().isoformat().split(".")[0], getpass.getuser(), 2 * os.linesep, ) # step 1: tags for tag in self.__getTags(): if tag not in alreadyDone: css += self.CssTemplate7.format( tag, "{", os.linesep, indentation, random.choice(self.Placeholders) + os.linesep # secok if placeholders else os.linesep, "}", os.linesep, ) alreadyDone.append(tag) css += "/*{0}*/{1}".format("-" * 75, os.linesep) # step 2: IDs for id_ in self.__getIds(): if id_ not in alreadyDone: css += "/* {0} */{1}".format("_".join(id_).lower(), os.linesep) css += self.CssTemplate8.format( "#", id_[1], "{", os.linesep, indentation, random.choice(self.Placeholders) + os.linesep # secok if placeholders else os.linesep, "}", os.linesep, ) alreadyDone.append(id_) css += "/*{0}*/{1}".format("-" * 75, os.linesep) # step 3: classes for class_ in self.__getClasses(): if class_ not in alreadyDone: css += "/* {0} */{1}".format("_".join(class_).lower(), os.linesep) css += self.CssTemplate8.format( ".", ", .".join(class_[1].split()), "{", os.linesep, indentation, random.choice(self.Placeholders) + os.linesep # secok if placeholders else os.linesep, "}", os.linesep, ) alreadyDone.append(class_) else: css = "" return css.strip() def __createSoup(self): """ Private method to get a BeaitifulSoup object with our HTML text. """ self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify()) def __getTags(self): """ Private method to extract all tags of the HTML text. @return list of all tags @rtype list of str """ tags = [t.name for t in self.__soup.find_all(True)] return list(set(tags)) def __getClasses(self): """ Private method to extract all classes of the HTML text. @return list of tuples containing the tag name and its classes as a blank separated string @rtype list of tuples of (str, str) """ classes = [ (t.name, " ".join(t["class"])) for t in self.__soup.find_all(True, {"class": True}) ] return sorted(set(classes)) def __getIds(self): """ Private method to extract all IDs of the HTML text. @return list of tuples containing the tag name and its ID @rtype list of tuples of (str, str) """ ids = [(t.name, t["id"]) for t in self.__soup.find_all(True, {"id": True})] return sorted(set(ids))