Helpviewer/Bookmarks/BookmarksImporters/HtmlImporter.py

changeset 1732
b140a24e147a
parent 1725
d7a3430f7cbf
child 1965
96f5a76e1845
equal deleted inserted replaced
1731:56cf9c150dbf 1732:b140a24e147a
6 """ 6 """
7 Module implementing an importer for HTML bookmark files. 7 Module implementing an importer for HTML bookmark files.
8 """ 8 """
9 9
10 import os 10 import os
11 import tempfile
12 11
13 from PyQt4.QtCore import QCoreApplication, QXmlStreamReader, QDate, Qt 12 from PyQt4.QtCore import QCoreApplication, QDate, Qt
14 from PyQt4.QtWebKit import QWebPage
15 13
16 from ..BookmarkNode import BookmarkNode 14 from ..BookmarkNode import BookmarkNode
17 from ..XbelReader import XbelReader 15 from ..NsHtmlReader import NsHtmlReader
18 16
19 from .BookmarksImporter import BookmarksImporter 17 from .BookmarksImporter import BookmarksImporter
20 18
21 import UI.PixmapCache 19 import UI.PixmapCache
22
23 ##########################################################################################
24
25 extract_js = r"""
26 function walk() {
27 var parent = arguments[0];
28 var indent = arguments[1];
29
30 var result = "";
31 var children = parent.childNodes;
32 var folderName = "";
33 var folded = "";
34 for (var i = 0; i < children.length; i++) {
35 var object = children.item(i);
36 if (object.nodeName == "HR") {
37 result += indent + "<separator/>\n";
38 }
39 if (object.nodeName == "H3") {
40 folderName = object.innerHTML;
41 folded = object.folded;
42 if (object.folded == undefined)
43 folded = "false";
44 else
45 folded = "true";
46 }
47 if (object.nodeName == "A") {
48 result += indent + "<bookmark href=\"" + encodeURI(object.href).replace(/&/g, '&amp;') + "\">\n";
49 result += indent + indent + "<title>" + object.innerHTML + "</title>\n";
50 result += indent + "</bookmark>\n";
51 }
52
53 var currentIndent = indent;
54 if (object.nodeName == "DL" && folderName != "") {
55 result += indent + "<folder folded=\"" + folded + "\">\n";
56 indent += " ";
57 result += indent + "<title>" + folderName + "</title>\n";
58 }
59 result += walk(object, indent);
60 if (object.nodeName == "DL" && folderName != "") {
61 result += currentIndent + "</folder>\n";
62 }
63 }
64 return result;
65 }
66
67 var xbel = walk(document, " ");
68
69 if (xbel != "") {
70 xbel = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE xbel>\n<xbel version=\"1.0\">\n" + xbel + "</xbel>\n";
71 }
72
73 xbel;
74 """
75
76 ##########################################################################################
77 20
78 21
79 def getImporterInfo(id): 22 def getImporterInfo(id):
80 """ 23 """
81 Module function to get information for the given HTML source id. 24 Module function to get information for the given HTML source id.
142 """ 85 """
143 Public method to get the imported bookmarks. 86 Public method to get the imported bookmarks.
144 87
145 @return imported bookmarks (BookmarkNode) 88 @return imported bookmarks (BookmarkNode)
146 """ 89 """
147 try: 90 reader = NsHtmlReader()
148 f = open(self.__fileName, "r", encoding="utf-8") 91 importRootNode = reader.read(self.__fileName)
149 contents = f.read()
150 f.close()
151 except IOError as err:
152 self._error = True
153 self._errorString = self.trUtf8("File '{0}' cannot be read.\nReason: {1}")\
154 .format(self.__fileName, str(err))
155 return None
156
157 reader = XbelReader()
158 webpage = QWebPage()
159 webpage.mainFrame().setHtml(contents)
160 result = webpage.mainFrame().evaluateJavaScript(extract_js)
161
162 fd, name = tempfile.mkstemp(text=True)
163 f = os.fdopen(fd, "w")
164 f.write(result)
165 f.close()
166 importRootNode = reader.read(name)
167 os.remove(name)
168
169 if reader.error() != QXmlStreamReader.NoError:
170 self._error = True
171 self._errorString = self.trUtf8(
172 """Error when importing bookmarks on line {0}, column {1}:\n{2}""")\
173 .format(reader.lineNumber(),
174 reader.columnNumber(),
175 reader.errorString())
176 return None
177 92
178 importRootNode.setType(BookmarkNode.Folder) 93 importRootNode.setType(BookmarkNode.Folder)
179 if self._id == "html": 94 if self._id == "html":
180 importRootNode.title = self.trUtf8("HTML Import") 95 importRootNode.title = self.trUtf8("HTML Import")
181 else: 96 else:

eric ide

mercurial