Wed, 30 Aug 2023 11:58:51 +0200
Corrected some code style and formatting issues and prepared the code for Python 3.12.
# -*- coding: utf-8 -*- # Copyright (c) 2014 - 2023 Detlev Offenbach <detlev@die-offenbachs.de> # """ Module implementing the HTML5 to JavaScript converter. """ import datetime import getpass import os import re from bs4 import BeautifulSoup from PyQt6.QtCore import QObject from PyQt6.QtWidgets import QDialog from .Html5ToJsConverterParameterDialog import Html5ToJsConverterParameterDialog class Html5ToJsConverter(QObject): """ Class implementing the HTML5 to JavaScript converter. """ JsTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}" TagsToIgnore = ( "head", "meta", "noscript", "script", "style", "link", "no-js", "title", "object", "col", "colgroup", "option", "param", "audio", "basefont", "isindex", "svg", "area", "embed", "br", ) def __init__(self, html, parent=None): """ Constructor @param html HTML text to be converted @type str @param parent reference to the parent object @type QObject """ super().__init__(parent) self.__html = html def getJavaScript(self): """ Public method to get the converted JavaScript text. @return JavaScript text @rtype str """ dlg = Html5ToJsConverterParameterDialog() if dlg.exec() == QDialog.DialogCode.Accepted: indentation, scriptTags = dlg.getData() self.__createSoup() alreadyDone = list(self.TagsToIgnore) js = "<script>{0}".format(os.linesep) if scriptTags else "" js += "// {0} by {1}{2}".format( datetime.datetime.now().isoformat().split(".")[0], getpass.getuser(), os.linesep, ) js += "$(document).ready(function(){" + os.linesep # step 1: IDs js += "/*{0}*/{1}".format("-" * 75, os.linesep) for id_ in self.__getIds(): if id_ not in alreadyDone: js += "{0}// {1}{2}".format( indentation, "#".join(id_).lower(), os.linesep ) js += self.JsTemplate8.format( indentation, "var ", re.sub( "[^a-z0-9]", "", id_[1].lower() if len(id_[1]) < 11 else re.sub("[aeiou]", "", id_[1].lower()), ), " = ", '$("#{0}").length'.format(id_[1]), ";", os.linesep, os.linesep, ) alreadyDone.append(id_) # step 2: classes js += "/*{0}*/{1}".format("-" * 75, os.linesep) for class_ in self.__getClasses(): if class_ not in alreadyDone: js += "{0}// {1}{2}".format( indentation, ".".join(class_).lower(), os.linesep ) js += self.JsTemplate8.format( indentation, "var ", re.sub( "[^a-z0-9]", "", class_[1].lower() if len(class_[1]) < 11 else re.sub("[aeiou]", "", class_[1].lower()), ), " = ", '$(".{0}").length'.format(class_[1]), ";", os.linesep, os.linesep, ) alreadyDone.append(class_) js += "})" js += "{0}</script>".format(os.linesep) if scriptTags else "" else: js = "" return js.strip() def __createSoup(self): """ Private method to get a BeaitifulSoup object with our HTML text. """ self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify()) def __getClasses(self): """ Private method to extract all classes of the HTML text. @return list of tuples containing the tag name and its classes as a blank separated string @rtype list of tuples of (str, str) """ classes = [ (t.name, " ".join(t["class"])) for t in self.__soup.find_all(True, {"class": True}) ] return sorted(set(classes)) def __getIds(self): """ Private method to extract all IDs of the HTML text. @return list of tuples containing the tag name and its ID @rtype list of tuples of (str, str) """ ids = [(t.name, t["id"]) for t in self.__soup.find_all(True, {"id": True})] return sorted(set(ids))