Helpviewer/Bookmarks/NsHtmlReader.py

changeset 1732
b140a24e147a
child 2302
f29e9405c851
equal deleted inserted replaced
1731:56cf9c150dbf 1732:b140a24e147a
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2012 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing a class to read Netscape HTML bookmark files.
8 """
9
10 from PyQt4.QtCore import QObject, QIODevice, QFile, QRegExp, Qt, QDateTime
11
12 from .BookmarkNode import BookmarkNode
13
14 import Utilities
15
16
17 class NsHtmlReader(QObject):
18 """
19 Class implementing a reader object for Netscape HTML bookmark files.
20 """
21 indentSize = 4
22
23 def __init__(self):
24 """
25 Constructor
26 """
27 super().__init__()
28
29 self.__folderRx = QRegExp("<DT><H3(.*)>(.*)</H3>", Qt.CaseInsensitive)
30 self.__folderRx.setMinimal(True)
31
32 self.__endFolderRx = QRegExp("</DL>", Qt.CaseInsensitive)
33
34 self.__bookmarkRx = QRegExp("<DT><A(.*)>(.*)</A>", Qt.CaseInsensitive)
35 self.__bookmarkRx.setMinimal(True)
36
37 self.__descRx = QRegExp("<DD>(.*)", Qt.CaseInsensitive)
38
39 self.__separatorRx = QRegExp("<HR>", Qt.CaseInsensitive)
40
41 self.__urlRx = QRegExp('HREF="(.*)"', Qt.CaseInsensitive)
42 self.__urlRx.setMinimal(True)
43
44 self.__addedRx = QRegExp('ADD_DATE="(\d*)"', Qt.CaseInsensitive)
45 self.__addedRx.setMinimal(True)
46
47 self.__modifiedRx = QRegExp('LAST_MODIFIED="(\d*)"', Qt.CaseInsensitive)
48 self.__modifiedRx.setMinimal(True)
49
50 self.__visitedRx = QRegExp('LAST_VISIT="(\d*)"', Qt.CaseInsensitive)
51 self.__visitedRx.setMinimal(True)
52
53 self.__foldedRx = QRegExp("FOLDED", Qt.CaseInsensitive)
54
55 def read(self, fileNameOrDevice):
56 """
57 Public method to read a Netscape HTML bookmark file.
58
59 @param fileNameOrDevice name of the file to read (string)
60 or reference to the device to read (QIODevice)
61 @return reference to the root node (BookmarkNode)
62 """
63 if isinstance(fileNameOrDevice, QIODevice):
64 dev = fileNameOrDevice
65 else:
66 f = QFile(fileNameOrDevice)
67 if not f.exists():
68 return BookmarkNode(BookmarkNode.Root)
69 f.open(QFile.ReadOnly)
70 dev = f
71
72 folders = []
73 lastNode = None
74
75 root = BookmarkNode(BookmarkNode.Root)
76 folders.append(root)
77
78 while not dev.atEnd():
79 line = str(dev.readLine(), encoding="utf-8").rstrip()
80 if self.__folderRx.indexIn(line) != -1:
81 # folder definition
82 arguments = self.__folderRx.cap(1)
83 name = self.__folderRx.cap(2)
84 node = BookmarkNode(BookmarkNode.Folder, folders[-1])
85 node.title = Utilities.html_udecode(name)
86 node.expanded = self.__foldedRx.indexIn(arguments) == -1
87 if self.__addedRx.indexIn(arguments) != -1:
88 node.added = QDateTime.fromTime_t(int(self.__addedRx.cap(1)))
89 folders.append(node)
90 lastNode = node
91
92 elif self.__endFolderRx.indexIn(line) != -1:
93 # end of folder definition
94 folders.pop()
95
96 elif self.__bookmarkRx.indexIn(line) != -1:
97 # bookmark definition
98 arguments = self.__bookmarkRx.cap(1)
99 name = self.__bookmarkRx.cap(2)
100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1])
101 node.title = Utilities.html_udecode(name)
102 if self.__urlRx.indexIn(arguments) != -1:
103 node.url = self.__urlRx.cap(1)
104 if self.__addedRx.indexIn(arguments) != -1:
105 node.added = QDateTime.fromTime_t(int(self.__addedRx.cap(1)))
106 if self.__modifiedRx.indexIn(arguments) != -1:
107 node.modified = QDateTime.fromTime_t(int(self.__modifiedRx.cap(1)))
108 if self.__visitedRx.indexIn(arguments) != -1:
109 node.visited = QDateTime.fromTime_t(int(self.__visitedRx.cap(1)))
110 lastNode = node
111
112 elif self.__descRx.indexIn(line) != -1:
113 # description
114 if lastNode:
115 lastNode.desc = Utilities.html_udecode(self.__descRx.cap(1))
116
117 elif self.__separatorRx.indexIn(line) != -1:
118 # separator definition
119 BookmarkNode(BookmarkNode.Separator, folders[-1])
120
121 return root

eric ide

mercurial