Web Project and Tools: ProjectWeb/Html5ToCss3Converter.py@815847f3d404

# -*- coding: utf-8 -*-

# Copyright (c) 2014 - 2024 Detlev Offenbach <detlev@die-offenbachs.de>
#

"""
Module implementing the HTML5 to CSS3 converter.
"""

import datetime
import getpass
import os
import random

from bs4 import BeautifulSoup
from PyQt6.QtCore import QObject
from PyQt6.QtWidgets import QDialog

from .Html5ToCss3ConverterParameterDialog import Html5ToCss3ConverterParameterDialog


class Html5ToCss3Converter(QObject):
    """
    Class implementing the HTML5 to CSS3 converter.
    """

    CssTemplate7 = "{0}{1}{2}{3}{4}{5}{6}"
    CssTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}"
    Placeholders = (
        "margin:0",
        "padding:0",
        "border:0",
        "font-size:100%",
        "font:inherit",
        "vertical-align:baseline",
        "line-height:1",
        "outline:0",
        "font-weight:inherit",
        "font-style:inherit",
        "font-family:inherit",
        "vertical-align:baseline",
    )
    TagsToIgnore = (
        "head",
        "meta",
        "noscript",
        "script",
        "style",
        "link",
        "no-js",
        "title",
        "object",
        "col",
        "colgroup",
        "option",
        "param",
        "audio",
        "basefont",
        "isindex",
        "svg",
        "area",
        "embed",
        "br",
    )

    def __init__(self, html, parent=None):
        """
        Constructor

        @param html HTML text to be converted
        @type str
        @param parent reference to the parent object
        @type QObject
        """
        super().__init__(parent)

        self.__html = html

    def getCss3(self):
        """
        Public method to get the converted CSS3 text.

        @return CSS3 text
        @rtype str
        """
        dlg = Html5ToCss3ConverterParameterDialog()
        if dlg.exec() == QDialog.DialogCode.Accepted:
            indentation, placeholders = dlg.getData()

            self.__createSoup()

            alreadyDone = list(self.TagsToIgnore)

            css = '@charset "utf-8";{0}'.format(os.linesep)
            css += "/* {0} by {1}*/{2}".format(
                datetime.datetime.now().isoformat().split(".")[0],
                getpass.getuser(),
                2 * os.linesep,
            )

            # step 1: tags
            for tag in self.__getTags():
                if tag not in alreadyDone:
                    css += self.CssTemplate7.format(
                        tag,
                        "{",
                        os.linesep,
                        indentation,
                        random.choice(self.Placeholders) + os.linesep  # secok
                        if placeholders
                        else os.linesep,
                        "}",
                        os.linesep,
                    )
                    alreadyDone.append(tag)
            css += "/*{0}*/{1}".format("-" * 75, os.linesep)

            # step 2: IDs
            for id_ in self.__getIds():
                if id_ not in alreadyDone:
                    css += "/* {0} */{1}".format("_".join(id_).lower(), os.linesep)
                    css += self.CssTemplate8.format(
                        "#",
                        id_[1],
                        "{",
                        os.linesep,
                        indentation,
                        random.choice(self.Placeholders) + os.linesep  # secok
                        if placeholders
                        else os.linesep,
                        "}",
                        os.linesep,
                    )
                    alreadyDone.append(id_)
            css += "/*{0}*/{1}".format("-" * 75, os.linesep)

            # step 3: classes
            for class_ in self.__getClasses():
                if class_ not in alreadyDone:
                    css += "/* {0} */{1}".format("_".join(class_).lower(), os.linesep)
                    css += self.CssTemplate8.format(
                        ".",
                        ", .".join(class_[1].split()),
                        "{",
                        os.linesep,
                        indentation,
                        random.choice(self.Placeholders) + os.linesep  # secok
                        if placeholders
                        else os.linesep,
                        "}",
                        os.linesep,
                    )
                    alreadyDone.append(class_)
        else:
            css = ""
        return css.strip()

    def __createSoup(self):
        """
        Private method to get a BeaitifulSoup object with our HTML text.
        """
        self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify())

    def __getTags(self):
        """
        Private method to extract all tags of the HTML text.

        @return list of all tags
        @rtype list of str
        """
        tags = [t.name for t in self.__soup.find_all(True)]
        return list(set(tags))

    def __getClasses(self):
        """
        Private method to extract all classes of the HTML text.

        @return list of tuples containing the tag name and its classes
            as a blank separated string
        @rtype list of tuples of (str, str)
        """
        classes = [
            (t.name, " ".join(t["class"]))
            for t in self.__soup.find_all(True, {"class": True})
        ]
        return sorted(set(classes))

    def __getIds(self):
        """
        Private method to extract all IDs of the HTML text.

        @return list of tuples containing the tag name and its ID
        @rtype list of tuples of (str, str)
        """
        ids = [(t.name, t["id"]) for t in self.__soup.find_all(True, {"id": True})]
        return sorted(set(ids))