|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2012 - 2016 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a class to read Netscape HTML bookmark files. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 try: |
|
12 str = unicode |
|
13 except NameError: |
|
14 pass |
|
15 |
|
16 from PyQt5.QtCore import QObject, QIODevice, QFile, QRegExp, Qt, QDateTime |
|
17 |
|
18 from .BookmarkNode import BookmarkNode |
|
19 |
|
20 import Utilities |
|
21 |
|
22 |
|
23 class NsHtmlReader(QObject): |
|
24 """ |
|
25 Class implementing a reader object for Netscape HTML bookmark files. |
|
26 """ |
|
27 indentSize = 4 |
|
28 |
|
29 def __init__(self): |
|
30 """ |
|
31 Constructor |
|
32 """ |
|
33 super(NsHtmlReader, self).__init__() |
|
34 |
|
35 self.__folderRx = QRegExp("<DT><H3(.*)>(.*)</H3>", Qt.CaseInsensitive) |
|
36 self.__folderRx.setMinimal(True) |
|
37 |
|
38 self.__endFolderRx = QRegExp("</DL>", Qt.CaseInsensitive) |
|
39 |
|
40 self.__bookmarkRx = QRegExp("<DT><A(.*)>(.*)</A>", Qt.CaseInsensitive) |
|
41 self.__bookmarkRx.setMinimal(True) |
|
42 |
|
43 self.__descRx = QRegExp("<DD>(.*)", Qt.CaseInsensitive) |
|
44 |
|
45 self.__separatorRx = QRegExp("<HR>", Qt.CaseInsensitive) |
|
46 |
|
47 self.__urlRx = QRegExp('HREF="(.*)"', Qt.CaseInsensitive) |
|
48 self.__urlRx.setMinimal(True) |
|
49 |
|
50 self.__addedRx = QRegExp('ADD_DATE="(\d*)"', Qt.CaseInsensitive) |
|
51 self.__addedRx.setMinimal(True) |
|
52 |
|
53 self.__modifiedRx = QRegExp( |
|
54 'LAST_MODIFIED="(\d*)"', Qt.CaseInsensitive) |
|
55 self.__modifiedRx.setMinimal(True) |
|
56 |
|
57 self.__visitedRx = QRegExp('LAST_VISIT="(\d*)"', Qt.CaseInsensitive) |
|
58 self.__visitedRx.setMinimal(True) |
|
59 |
|
60 self.__foldedRx = QRegExp("FOLDED", Qt.CaseInsensitive) |
|
61 |
|
62 def read(self, fileNameOrDevice): |
|
63 """ |
|
64 Public method to read a Netscape HTML bookmark file. |
|
65 |
|
66 @param fileNameOrDevice name of the file to read (string) |
|
67 or reference to the device to read (QIODevice) |
|
68 @return reference to the root node (BookmarkNode) |
|
69 """ |
|
70 if isinstance(fileNameOrDevice, QIODevice): |
|
71 dev = fileNameOrDevice |
|
72 else: |
|
73 f = QFile(fileNameOrDevice) |
|
74 if not f.exists(): |
|
75 return BookmarkNode(BookmarkNode.Root) |
|
76 f.open(QFile.ReadOnly) |
|
77 dev = f |
|
78 |
|
79 folders = [] |
|
80 lastNode = None |
|
81 |
|
82 root = BookmarkNode(BookmarkNode.Root) |
|
83 folders.append(root) |
|
84 |
|
85 while not dev.atEnd(): |
|
86 line = str(dev.readLine(), encoding="utf-8").rstrip() |
|
87 if self.__folderRx.indexIn(line) != -1: |
|
88 # folder definition |
|
89 arguments = self.__folderRx.cap(1) |
|
90 name = self.__folderRx.cap(2) |
|
91 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) |
|
92 node.title = Utilities.html_udecode(name) |
|
93 node.expanded = self.__foldedRx.indexIn(arguments) == -1 |
|
94 if self.__addedRx.indexIn(arguments) != -1: |
|
95 node.added = QDateTime.fromTime_t( |
|
96 int(self.__addedRx.cap(1))) |
|
97 folders.append(node) |
|
98 lastNode = node |
|
99 |
|
100 elif self.__endFolderRx.indexIn(line) != -1: |
|
101 # end of folder definition |
|
102 folders.pop() |
|
103 |
|
104 elif self.__bookmarkRx.indexIn(line) != -1: |
|
105 # bookmark definition |
|
106 arguments = self.__bookmarkRx.cap(1) |
|
107 name = self.__bookmarkRx.cap(2) |
|
108 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) |
|
109 node.title = Utilities.html_udecode(name) |
|
110 if self.__urlRx.indexIn(arguments) != -1: |
|
111 node.url = self.__urlRx.cap(1) |
|
112 if self.__addedRx.indexIn(arguments) != -1: |
|
113 node.added = QDateTime.fromTime_t( |
|
114 int(self.__addedRx.cap(1))) |
|
115 if self.__modifiedRx.indexIn(arguments) != -1: |
|
116 node.modified = QDateTime.fromTime_t( |
|
117 int(self.__modifiedRx.cap(1))) |
|
118 if self.__visitedRx.indexIn(arguments) != -1: |
|
119 node.visited = QDateTime.fromTime_t( |
|
120 int(self.__visitedRx.cap(1))) |
|
121 lastNode = node |
|
122 |
|
123 elif self.__descRx.indexIn(line) != -1: |
|
124 # description |
|
125 if lastNode: |
|
126 lastNode.desc = Utilities.html_udecode( |
|
127 self.__descRx.cap(1)) |
|
128 |
|
129 elif self.__separatorRx.indexIn(line) != -1: |
|
130 # separator definition |
|
131 BookmarkNode(BookmarkNode.Separator, folders[-1]) |
|
132 |
|
133 return root |