ProjectWeb/Html5ToJsConverter.py

branch
eric7
changeset 43
2bed42620c99
parent 41
836c696f9565
child 48
17eb790b9a82
equal deleted inserted replaced
42:27f43499da60 43:2bed42620c99
13 import getpass 13 import getpass
14 14
15 from PyQt6.QtCore import QObject 15 from PyQt6.QtCore import QObject
16 from PyQt6.QtWidgets import QDialog 16 from PyQt6.QtWidgets import QDialog
17 17
18 from .Html5ToJsConverterParameterDialog import ( 18 from .Html5ToJsConverterParameterDialog import Html5ToJsConverterParameterDialog
19 Html5ToJsConverterParameterDialog
20 )
21 19
22 20
23 class Html5ToJsConverter(QObject): 21 class Html5ToJsConverter(QObject):
24 """ 22 """
25 Class implementing the HTML5 to JavaScript converter. 23 Class implementing the HTML5 to JavaScript converter.
26 """ 24 """
25
27 JsTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}" 26 JsTemplate8 = "{0}{1}{2}{3}{4}{5}{6}{7}"
28 TagsToIgnore = ('head', 'meta', 'noscript', 'script', 'style', 'link', 27 TagsToIgnore = (
29 'no-js', 'title', 'object', 'col', 'colgroup', 'option', 28 "head",
30 'param', 'audio', 'basefont', 'isindex', 'svg', 'area', 29 "meta",
31 'embed', 'br') 30 "noscript",
32 31 "script",
32 "style",
33 "link",
34 "no-js",
35 "title",
36 "object",
37 "col",
38 "colgroup",
39 "option",
40 "param",
41 "audio",
42 "basefont",
43 "isindex",
44 "svg",
45 "area",
46 "embed",
47 "br",
48 )
49
33 def __init__(self, html, parent=None): 50 def __init__(self, html, parent=None):
34 """ 51 """
35 Constructor 52 Constructor
36 53
37 @param html HTML text to be converted 54 @param html HTML text to be converted
38 @type str 55 @type str
39 @param parent reference to the parent object 56 @param parent reference to the parent object
40 @type QObject 57 @type QObject
41 """ 58 """
42 super().__init__(parent) 59 super().__init__(parent)
43 60
44 self.__html = html 61 self.__html = html
45 62
46 def getJavaScript(self): 63 def getJavaScript(self):
47 """ 64 """
48 Public method to get the converted JavaScript text. 65 Public method to get the converted JavaScript text.
49 66
50 @return JavaScript text 67 @return JavaScript text
51 @rtype str 68 @rtype str
52 """ 69 """
53 dlg = Html5ToJsConverterParameterDialog() 70 dlg = Html5ToJsConverterParameterDialog()
54 if dlg.exec() == QDialog.DialogCode.Accepted: 71 if dlg.exec() == QDialog.DialogCode.Accepted:
55 indentation, scriptTags = dlg.getData() 72 indentation, scriptTags = dlg.getData()
56 73
57 self.__createSoup() 74 self.__createSoup()
58 75
59 alreadyDone = list(self.TagsToIgnore) 76 alreadyDone = list(self.TagsToIgnore)
60 77
61 js = "<script>{0}".format(os.linesep) if scriptTags else "" 78 js = "<script>{0}".format(os.linesep) if scriptTags else ""
62 js += "// {0} by {1}{2}".format( 79 js += "// {0} by {1}{2}".format(
63 datetime.datetime.now().isoformat().split(".")[0], 80 datetime.datetime.now().isoformat().split(".")[0],
64 getpass.getuser(), 81 getpass.getuser(),
65 os.linesep 82 os.linesep,
66 ) 83 )
67 js += "$(document).ready(function(){" + os.linesep 84 js += "$(document).ready(function(){" + os.linesep
68 85
69 # step 1: IDs 86 # step 1: IDs
70 js += "/*{0}*/{1}".format( 87 js += "/*{0}*/{1}".format("-" * 75, os.linesep)
71 "-" * 75,
72 os.linesep
73 )
74 for id_ in self.__getIds(): 88 for id_ in self.__getIds():
75 if id_ not in alreadyDone: 89 if id_ not in alreadyDone:
76 js += "{0}// {1}{2}".format( 90 js += "{0}// {1}{2}".format(
77 indentation, 91 indentation, "#".join(id_).lower(), os.linesep
78 "#".join(id_).lower(),
79 os.linesep
80 ) 92 )
81 js += self.JsTemplate8.format( 93 js += self.JsTemplate8.format(
82 indentation, 94 indentation,
83 "var ", 95 "var ",
84 re.sub("[^a-z0-9]", "", 96 re.sub(
85 id_[1].lower() if len(id_[1]) < 11 else 97 "[^a-z0-9]",
86 re.sub("[aeiou]", "", id_[1].lower())), 98 "",
99 id_[1].lower()
100 if len(id_[1]) < 11
101 else re.sub("[aeiou]", "", id_[1].lower()),
102 ),
87 " = ", 103 " = ",
88 '$("#{0}").length'.format(id_[1]), 104 '$("#{0}").length'.format(id_[1]),
89 ";", 105 ";",
90 os.linesep, 106 os.linesep,
91 os.linesep 107 os.linesep,
92 ) 108 )
93 alreadyDone.append(id_) 109 alreadyDone.append(id_)
94 110
95 # step 2: classes 111 # step 2: classes
96 js += "/*{0}*/{1}".format( 112 js += "/*{0}*/{1}".format("-" * 75, os.linesep)
97 "-" * 75,
98 os.linesep
99 )
100 for class_ in self.__getClasses(): 113 for class_ in self.__getClasses():
101 if class_ not in alreadyDone: 114 if class_ not in alreadyDone:
102 js += "{0}// {1}{2}".format( 115 js += "{0}// {1}{2}".format(
103 indentation, 116 indentation, ".".join(class_).lower(), os.linesep
104 ".".join(class_).lower(),
105 os.linesep
106 ) 117 )
107 js += self.JsTemplate8.format( 118 js += self.JsTemplate8.format(
108 indentation, 119 indentation,
109 "var ", 120 "var ",
110 re.sub("[^a-z0-9]", "", 121 re.sub(
111 class_[1].lower() if len(class_[1]) < 11 else 122 "[^a-z0-9]",
112 re.sub("[aeiou]", "", class_[1].lower())), 123 "",
124 class_[1].lower()
125 if len(class_[1]) < 11
126 else re.sub("[aeiou]", "", class_[1].lower()),
127 ),
113 " = ", 128 " = ",
114 '$(".{0}").length'.format(class_[1]), 129 '$(".{0}").length'.format(class_[1]),
115 ";", 130 ";",
116 os.linesep, 131 os.linesep,
117 os.linesep 132 os.linesep,
118 ) 133 )
119 alreadyDone.append(class_) 134 alreadyDone.append(class_)
120 135
121 js += "})" 136 js += "})"
122 js += "{0}</script>".format(os.linesep) if scriptTags else "" 137 js += "{0}</script>".format(os.linesep) if scriptTags else ""
123 else: 138 else:
124 js = "" 139 js = ""
125 return js.strip() 140 return js.strip()
126 141
127 def __createSoup(self): 142 def __createSoup(self):
128 """ 143 """
129 Private method to get a BeaitifulSoup object with our HTML text. 144 Private method to get a BeaitifulSoup object with our HTML text.
130 """ 145 """
131 from bs4 import BeautifulSoup 146 from bs4 import BeautifulSoup
147
132 self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify()) 148 self.__soup = BeautifulSoup(BeautifulSoup(self.__html).prettify())
133 149
134 def __getClasses(self): 150 def __getClasses(self):
135 """ 151 """
136 Private method to extract all classes of the HTML text. 152 Private method to extract all classes of the HTML text.
137 153
138 @return list of tuples containing the tag name and its classes 154 @return list of tuples containing the tag name and its classes
139 as a blank separated string 155 as a blank separated string
140 @rtype list of tuples of (str, str) 156 @rtype list of tuples of (str, str)
141 """ 157 """
142 classes = [(t.name, " ".join(t["class"])) for t in 158 classes = [
143 self.__soup.find_all(True, {"class": True})] 159 (t.name, " ".join(t["class"]))
160 for t in self.__soup.find_all(True, {"class": True})
161 ]
144 return sorted(set(classes)) 162 return sorted(set(classes))
145 163
146 def __getIds(self): 164 def __getIds(self):
147 """ 165 """
148 Private method to extract all IDs of the HTML text. 166 Private method to extract all IDs of the HTML text.
149 167
150 @return list of tuples containing the tag name and its ID 168 @return list of tuples containing the tag name and its ID
151 @rtype list of tuples of (str, str) 169 @rtype list of tuples of (str, str)
152 """ 170 """
153 ids = [(t.name, t["id"]) for t in 171 ids = [(t.name, t["id"]) for t in self.__soup.find_all(True, {"id": True})]
154 self.__soup.find_all(True, {"id": True})]
155 return sorted(set(ids)) 172 return sorted(set(ids))

eric ide

mercurial