--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ProjectWeb/Html5ToJsConverter.py Wed Dec 31 20:38:35 2014 +0100 @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 Detlev Offenbach <detlev@die-offenbachs.de> +# + +""" +Module implementing the HTML5 to JavaScript converter. +""" + +from __future__ import unicode_literals + +import os +import re +import datetime +import getpass + +from PyQt5.QtCore import QObject +from PyQt5.QtWidgets import QDialog + +from .Html5ToJsConverterParameterDialog import \ + Html5ToJsConverterParameterDialog + + +class Html5ToJsConverter(QObject): + """ + Class implementing the HTML5 to JavaScript converter. + """ + JsTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}" + TagsToIgnore = ('head', 'meta', 'noscript', 'script', 'style', 'link', + 'no-js', 'title', 'object', 'col', 'colgroup', 'option', + 'param', 'audio', 'basefont', 'isindex', 'svg', 'area', + 'embed', 'br') + + def __init__(self, html, parent=None): + """ + Constructor + + @param html HTML text to be converted (string) + @param parent reference to the parent object (QObject) + """ + super(Html5ToJsConverter, self).__init__(parent) + + self.__html = html + + def getJavaScript(self): + """ + Public method to get the converted JavaScript text. + + @return JavaScript text (string) + """ + dlg = Html5ToJsConverterParameterDialog() + if dlg.exec_() == QDialog.Accepted: + indentation, scriptTags = dlg.getData() + + self.__createSoup() + + alreadyDone = list(self.TagsToIgnore) + + js = "<script>{0}".format(os.linesep) if scriptTags else "" + js += "// {0} by {1}{2}".format( + datetime.datetime.now().isoformat().split(".")[0], + getpass.getuser(), + os.linesep + ) + js += "$(document).ready(function(){" + os.linesep + + # step 1: IDs + js += "/*{0}*/{1}".format( + "-" * 75, + os.linesep + ) + for id_ in self.__getIds(): + if id_ not in alreadyDone: + js += "{0}// {1}{2}".format( + indentation, + "#".join(id_).lower(), + os.linesep + ) + js += self.JsTemplate8.format( + indentation, + "var ", + re.sub("[^a-z0-9]", "", + id_[1].lower() if len(id_[1]) < 11 else + re.sub("[aeiou]", "", id_[1].lower())), + " = ", + '$("#{0}").length'.format(id_[1]), + ";", + os.linesep, + os.linesep + ) + alreadyDone.append(id_) + + # step 2: classes + js += "/*{0}*/{1}".format( + "-" * 75, + os.linesep + ) + for class_ in self.__getClasses(): + if class_ not in alreadyDone: + js += "{0}// {1}{2}".format( + indentation, + ".".join(class_).lower(), + os.linesep + ) + js += self.JsTemplate8.format( + indentation, + "var ", + re.sub("[^a-z0-9]", "", + class_[1].lower() if len(class_[1]) < 11 else + re.sub("[aeiou]", "", class_[1].lower())), + " = ", + '$(".{0}").length'.format(class_[1]), + ";", + os.linesep, + os.linesep + ) + alreadyDone.append(class_) + + js += "})" + js += "{0}</script>".format(os.linesep) if scriptTags else "" + else: + js = "" + return js.strip() + + def __createSoup(self): + """ + Private method to get a BeaitifulSoup object with our HTML text. + """ + from bs4 import BeautifulSoup + self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify()) + + def __getClasses(self): + """ + Private method to extract all classes of the HTML text. + + @return list of tuples containing the tag name and its classes + as a blank separated string (list of tuples of two strings) + """ + classes = [(t.name, " ".join(t["class"])) for t in + self.__soup.find_all(True, {"class": True})] + return sorted(list(set(classes))) + + def __getIds(self): + """ + Private method to extract all IDs of the HTML text. + + @return list of tuples containing the tag name and its ID + (list of tuples of two strings) + """ + ids = [(t.name, t["id"]) for t in + self.__soup.find_all(True, {"id": True})] + return sorted(list(set(ids)))