WebBrowser/Bookmarks/NsHtmlReader.py

branch
QtWebEngine
changeset 4732
5ac4fc1dfc20
parent 4631
5c1a96925da4
child 5389
9b1c800daff3
equal deleted inserted replaced
4731:67d861d9e492 4732:5ac4fc1dfc20
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2012 - 2016 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing a class to read Netscape HTML bookmark files.
8 """
9
10 from __future__ import unicode_literals
11 try:
12 str = unicode
13 except NameError:
14 pass
15
16 from PyQt5.QtCore import QObject, QIODevice, QFile, QRegExp, Qt, QDateTime
17
18 from .BookmarkNode import BookmarkNode
19
20 import Utilities
21
22
23 class NsHtmlReader(QObject):
24 """
25 Class implementing a reader object for Netscape HTML bookmark files.
26 """
27 indentSize = 4
28
29 def __init__(self):
30 """
31 Constructor
32 """
33 super(NsHtmlReader, self).__init__()
34
35 self.__folderRx = QRegExp("<DT><H3(.*)>(.*)</H3>", Qt.CaseInsensitive)
36 self.__folderRx.setMinimal(True)
37
38 self.__endFolderRx = QRegExp("</DL>", Qt.CaseInsensitive)
39
40 self.__bookmarkRx = QRegExp("<DT><A(.*)>(.*)</A>", Qt.CaseInsensitive)
41 self.__bookmarkRx.setMinimal(True)
42
43 self.__descRx = QRegExp("<DD>(.*)", Qt.CaseInsensitive)
44
45 self.__separatorRx = QRegExp("<HR>", Qt.CaseInsensitive)
46
47 self.__urlRx = QRegExp('HREF="(.*)"', Qt.CaseInsensitive)
48 self.__urlRx.setMinimal(True)
49
50 self.__addedRx = QRegExp('ADD_DATE="(\d*)"', Qt.CaseInsensitive)
51 self.__addedRx.setMinimal(True)
52
53 self.__modifiedRx = QRegExp(
54 'LAST_MODIFIED="(\d*)"', Qt.CaseInsensitive)
55 self.__modifiedRx.setMinimal(True)
56
57 self.__visitedRx = QRegExp('LAST_VISIT="(\d*)"', Qt.CaseInsensitive)
58 self.__visitedRx.setMinimal(True)
59
60 self.__foldedRx = QRegExp("FOLDED", Qt.CaseInsensitive)
61
62 def read(self, fileNameOrDevice):
63 """
64 Public method to read a Netscape HTML bookmark file.
65
66 @param fileNameOrDevice name of the file to read (string)
67 or reference to the device to read (QIODevice)
68 @return reference to the root node (BookmarkNode)
69 """
70 if isinstance(fileNameOrDevice, QIODevice):
71 dev = fileNameOrDevice
72 else:
73 f = QFile(fileNameOrDevice)
74 if not f.exists():
75 return BookmarkNode(BookmarkNode.Root)
76 f.open(QFile.ReadOnly)
77 dev = f
78
79 folders = []
80 lastNode = None
81
82 root = BookmarkNode(BookmarkNode.Root)
83 folders.append(root)
84
85 while not dev.atEnd():
86 line = str(dev.readLine(), encoding="utf-8").rstrip()
87 if self.__folderRx.indexIn(line) != -1:
88 # folder definition
89 arguments = self.__folderRx.cap(1)
90 name = self.__folderRx.cap(2)
91 node = BookmarkNode(BookmarkNode.Folder, folders[-1])
92 node.title = Utilities.html_udecode(name)
93 node.expanded = self.__foldedRx.indexIn(arguments) == -1
94 if self.__addedRx.indexIn(arguments) != -1:
95 node.added = QDateTime.fromTime_t(
96 int(self.__addedRx.cap(1)))
97 folders.append(node)
98 lastNode = node
99
100 elif self.__endFolderRx.indexIn(line) != -1:
101 # end of folder definition
102 folders.pop()
103
104 elif self.__bookmarkRx.indexIn(line) != -1:
105 # bookmark definition
106 arguments = self.__bookmarkRx.cap(1)
107 name = self.__bookmarkRx.cap(2)
108 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1])
109 node.title = Utilities.html_udecode(name)
110 if self.__urlRx.indexIn(arguments) != -1:
111 node.url = self.__urlRx.cap(1)
112 if self.__addedRx.indexIn(arguments) != -1:
113 node.added = QDateTime.fromTime_t(
114 int(self.__addedRx.cap(1)))
115 if self.__modifiedRx.indexIn(arguments) != -1:
116 node.modified = QDateTime.fromTime_t(
117 int(self.__modifiedRx.cap(1)))
118 if self.__visitedRx.indexIn(arguments) != -1:
119 node.visited = QDateTime.fromTime_t(
120 int(self.__visitedRx.cap(1)))
121 lastNode = node
122
123 elif self.__descRx.indexIn(line) != -1:
124 # description
125 if lastNode:
126 lastNode.desc = Utilities.html_udecode(
127 self.__descRx.cap(1))
128
129 elif self.__separatorRx.indexIn(line) != -1:
130 # separator definition
131 BookmarkNode(BookmarkNode.Separator, folders[-1])
132
133 return root

eric ide

mercurial