Helpviewer/Bookmarks/BookmarksImporters/HtmlImporter.py

changeset 1713
56fdde8a2441
child 1725
d7a3430f7cbf
equal deleted inserted replaced
1709:62fb6a42cd7c 1713:56fdde8a2441
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2012 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing an importer for HTML bookmark files.
8 """
9
10 import os
11 import tempfile
12
13 from PyQt4.QtCore import QCoreApplication, QXmlStreamReader, QDate, Qt
14 from PyQt4.QtWebKit import QWebPage
15
16 from ..BookmarkNode import BookmarkNode
17 from ..XbelReader import XbelReader
18
19 from .BookmarksImporter import BookmarksImporter
20
21 import UI.PixmapCache
22
23 ##########################################################################################
24
25 extract_js = r"""
26 function walk() {
27 var parent = arguments[0];
28 var indent = arguments[1];
29
30 var result = "";
31 var children = parent.childNodes;
32 var folderName = "";
33 var folded = "";
34 for (var i = 0; i < children.length; i++) {
35 var object = children.item(i);
36 if (object.nodeName == "HR") {
37 result += indent + "<separator/>\n";
38 }
39 if (object.nodeName == "H3") {
40 folderName = object.innerHTML;
41 folded = object.folded;
42 if (object.folded == undefined)
43 folded = "false";
44 else
45 folded = "true";
46 }
47 if (object.nodeName == "A") {
48 result += indent + "<bookmark href=\"" + encodeURI(object.href).replace(/&/g, '&amp;') + "\">\n";
49 result += indent + indent + "<title>" + object.innerHTML + "</title>\n";
50 result += indent + "</bookmark>\n";
51 }
52
53 var currentIndent = indent;
54 if (object.nodeName == "DL" && folderName != "") {
55 result += indent + "<folder folded=\"" + folded + "\">\n";
56 indent += " ";
57 result += indent + "<title>" + folderName + "</title>\n";
58 }
59 result += walk(object, indent);
60 if (object.nodeName == "DL" && folderName != "") {
61 result += currentIndent + "</folder>\n";
62 }
63 }
64 return result;
65 }
66
67 var xbel = walk(document, " ");
68
69 if (xbel != "") {
70 xbel = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE xbel>\n<xbel version=\"1.0\">\n" + xbel + "</xbel>\n";
71 }
72
73 xbel;
74 """
75
76 ##########################################################################################
77
78
79 def getImporterInfo(id):
80 """
81 Module function to get information for the given HTML source id.
82
83 @return tuple with an icon (QPixmap), readable name (string), name of
84 the default bookmarks file (string), an info text (string),
85 a prompt (string) and the default directory of the bookmarks file (string)
86 """
87 if id == "html":
88 return (
89 UI.PixmapCache.getPixmap("html.png"),
90 "HTML Netscape Bookmarks",
91 QCoreApplication.translate("HtmlImporter",
92 "HTML Netscape Bookmarks") + " (*.htm *.html)",
93 QCoreApplication.translate("HtmlImporter",
94 """You can import bookmarks from any browser that supports HTML """
95 """exporting. This file has usually the extension .htm or .html.""" ),
96 QCoreApplication.translate("HtmlImporter",
97 """Please choose the file to begin importing bookmarks."""),
98 "",
99 )
100 else:
101 raise ValueError("Unsupported browser ID given ({0}).".format(id))
102
103
104 class HtmlImporter(BookmarksImporter):
105 """
106 Class implementing the HTML bookmarks importer.
107 """
108 def __init__(self, id="", parent=None):
109 """
110 Constructor
111
112 @param id source ID (string)
113 @param parent reference to the parent object (QObject)
114 """
115 super().__init__(id, parent)
116
117 self.__fileName = ""
118 self.__inFile = None
119
120 def setPath(self, path):
121 """
122 Public method to set the path of the bookmarks file or directory.
123
124 @param path bookmarks file or directory (string)
125 """
126 self.__fileName = path
127
128 def open(self):
129 """
130 Public method to open the bookmarks file.
131
132 @return flag indicating success (boolean)
133 """
134 if not os.path.exists(self.__fileName):
135 self._error = True
136 self._errorString = self.trUtf8("File '{0}' does not exist.")\
137 .format(self.__fileName)
138 return False
139 return True
140
141 def importedBookmarks(self):
142 """
143 Public method to get the imported bookmarks.
144
145 @return imported bookmarks (BookmarkNode)
146 """
147 try:
148 f = open(self.__fileName, "r")
149 contents = f.read()
150 f.close()
151 except IOError as err:
152 self._error = True
153 self._errorString = self.trUtf8("File '{0}' cannot be read.\nReason: {1}")\
154 .format(self.__fileName, str(err))
155 return None
156
157 reader = XbelReader()
158 webpage = QWebPage()
159 webpage.mainFrame().setHtml(contents)
160 result = webpage.mainFrame().evaluateJavaScript(extract_js)
161
162 fd, name = tempfile.mkstemp(text=True)
163 f = os.fdopen(fd, "w")
164 f.write(result)
165 f.close()
166 importRootNode = reader.read(name)
167 os.remove(name)
168
169 if reader.error() != QXmlStreamReader.NoError:
170 self._error = True
171 self._errorString = self.trUtf8(
172 """Error when importing bookmarks on line {0}, column {1}:\n{2}""")\
173 .format(reader.lineNumber(),
174 reader.columnNumber(),
175 reader.errorString())
176 return None
177
178 importRootNode.setType(BookmarkNode.Folder)
179 if self._id == "html":
180 importRootNode.title = self.trUtf8("HTML Import")
181 else:
182 importRootNode.title = self.trUtf8("Imported {0}")\
183 .format(QDate.currentDate().toString(Qt.SystemLocaleShortDate))
184 return importRootNode

eric ide

mercurial