|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2012 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a class to read Netscape HTML bookmark files. |
|
8 """ |
|
9 |
|
10 from PyQt4.QtCore import QObject, QIODevice, QFile, QRegExp, Qt, QDateTime |
|
11 |
|
12 from .BookmarkNode import BookmarkNode |
|
13 |
|
14 import Utilities |
|
15 |
|
16 |
|
17 class NsHtmlReader(QObject): |
|
18 """ |
|
19 Class implementing a reader object for Netscape HTML bookmark files. |
|
20 """ |
|
21 indentSize = 4 |
|
22 |
|
23 def __init__(self): |
|
24 """ |
|
25 Constructor |
|
26 """ |
|
27 super().__init__() |
|
28 |
|
29 self.__folderRx = QRegExp("<DT><H3(.*)>(.*)</H3>", Qt.CaseInsensitive) |
|
30 self.__folderRx.setMinimal(True) |
|
31 |
|
32 self.__endFolderRx = QRegExp("</DL>", Qt.CaseInsensitive) |
|
33 |
|
34 self.__bookmarkRx = QRegExp("<DT><A(.*)>(.*)</A>", Qt.CaseInsensitive) |
|
35 self.__bookmarkRx.setMinimal(True) |
|
36 |
|
37 self.__descRx = QRegExp("<DD>(.*)", Qt.CaseInsensitive) |
|
38 |
|
39 self.__separatorRx = QRegExp("<HR>", Qt.CaseInsensitive) |
|
40 |
|
41 self.__urlRx = QRegExp('HREF="(.*)"', Qt.CaseInsensitive) |
|
42 self.__urlRx.setMinimal(True) |
|
43 |
|
44 self.__addedRx = QRegExp('ADD_DATE="(\d*)"', Qt.CaseInsensitive) |
|
45 self.__addedRx.setMinimal(True) |
|
46 |
|
47 self.__modifiedRx = QRegExp('LAST_MODIFIED="(\d*)"', Qt.CaseInsensitive) |
|
48 self.__modifiedRx.setMinimal(True) |
|
49 |
|
50 self.__visitedRx = QRegExp('LAST_VISIT="(\d*)"', Qt.CaseInsensitive) |
|
51 self.__visitedRx.setMinimal(True) |
|
52 |
|
53 self.__foldedRx = QRegExp("FOLDED", Qt.CaseInsensitive) |
|
54 |
|
55 def read(self, fileNameOrDevice): |
|
56 """ |
|
57 Public method to read a Netscape HTML bookmark file. |
|
58 |
|
59 @param fileNameOrDevice name of the file to read (string) |
|
60 or reference to the device to read (QIODevice) |
|
61 @return reference to the root node (BookmarkNode) |
|
62 """ |
|
63 if isinstance(fileNameOrDevice, QIODevice): |
|
64 dev = fileNameOrDevice |
|
65 else: |
|
66 f = QFile(fileNameOrDevice) |
|
67 if not f.exists(): |
|
68 return BookmarkNode(BookmarkNode.Root) |
|
69 f.open(QFile.ReadOnly) |
|
70 dev = f |
|
71 |
|
72 folders = [] |
|
73 lastNode = None |
|
74 |
|
75 root = BookmarkNode(BookmarkNode.Root) |
|
76 folders.append(root) |
|
77 |
|
78 while not dev.atEnd(): |
|
79 line = str(dev.readLine(), encoding="utf-8").rstrip() |
|
80 if self.__folderRx.indexIn(line) != -1: |
|
81 # folder definition |
|
82 arguments = self.__folderRx.cap(1) |
|
83 name = self.__folderRx.cap(2) |
|
84 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) |
|
85 node.title = Utilities.html_udecode(name) |
|
86 node.expanded = self.__foldedRx.indexIn(arguments) == -1 |
|
87 if self.__addedRx.indexIn(arguments) != -1: |
|
88 node.added = QDateTime.fromTime_t(int(self.__addedRx.cap(1))) |
|
89 folders.append(node) |
|
90 lastNode = node |
|
91 |
|
92 elif self.__endFolderRx.indexIn(line) != -1: |
|
93 # end of folder definition |
|
94 folders.pop() |
|
95 |
|
96 elif self.__bookmarkRx.indexIn(line) != -1: |
|
97 # bookmark definition |
|
98 arguments = self.__bookmarkRx.cap(1) |
|
99 name = self.__bookmarkRx.cap(2) |
|
100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) |
|
101 node.title = Utilities.html_udecode(name) |
|
102 if self.__urlRx.indexIn(arguments) != -1: |
|
103 node.url = self.__urlRx.cap(1) |
|
104 if self.__addedRx.indexIn(arguments) != -1: |
|
105 node.added = QDateTime.fromTime_t(int(self.__addedRx.cap(1))) |
|
106 if self.__modifiedRx.indexIn(arguments) != -1: |
|
107 node.modified = QDateTime.fromTime_t(int(self.__modifiedRx.cap(1))) |
|
108 if self.__visitedRx.indexIn(arguments) != -1: |
|
109 node.visited = QDateTime.fromTime_t(int(self.__visitedRx.cap(1))) |
|
110 lastNode = node |
|
111 |
|
112 elif self.__descRx.indexIn(line) != -1: |
|
113 # description |
|
114 if lastNode: |
|
115 lastNode.desc = Utilities.html_udecode(self.__descRx.cap(1)) |
|
116 |
|
117 elif self.__separatorRx.indexIn(line) != -1: |
|
118 # separator definition |
|
119 BookmarkNode(BookmarkNode.Separator, folders[-1]) |
|
120 |
|
121 return root |