|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2012 - 2022 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a class to read Netscape HTML bookmark files. |
|
8 """ |
|
9 |
|
10 import re |
|
11 |
|
12 from PyQt6.QtCore import QObject, QIODevice, QFile, QDateTime |
|
13 |
|
14 from .BookmarkNode import BookmarkNode |
|
15 |
|
16 import Utilities |
|
17 |
|
18 |
|
19 class NsHtmlReader(QObject): |
|
20 """ |
|
21 Class implementing a reader object for Netscape HTML bookmark files. |
|
22 """ |
|
23 indentSize = 4 |
|
24 |
|
25 def __init__(self): |
|
26 """ |
|
27 Constructor |
|
28 """ |
|
29 super().__init__() |
|
30 |
|
31 self.__folderRx = re.compile("<DT><H3(.*?)>(.*?)</H3>", re.IGNORECASE) |
|
32 self.__endFolderRx = re.compile("</DL>", re.IGNORECASE) |
|
33 self.__bookmarkRx = re.compile("<DT><A(.*?)>(.*?)</A>", re.IGNORECASE) |
|
34 self.__descRx = re.compile("<DD>(.*)", re.IGNORECASE) |
|
35 self.__separatorRx = re.compile("<HR>", re.IGNORECASE) |
|
36 self.__urlRx = re.compile('HREF="(.*?)"', re.IGNORECASE) |
|
37 self.__addedRx = re.compile(r'ADD_DATE="(\d*?)"', re.IGNORECASE) |
|
38 self.__modifiedRx = re.compile(r'LAST_MODIFIED="(\d*?)"', |
|
39 re.IGNORECASE) |
|
40 self.__visitedRx = re.compile(r'LAST_VISIT="(\d*?)"', re.IGNORECASE) |
|
41 self.__foldedRx = re.compile("FOLDED", re.IGNORECASE) |
|
42 |
|
43 def read(self, fileNameOrDevice): |
|
44 """ |
|
45 Public method to read a Netscape HTML bookmark file. |
|
46 |
|
47 @param fileNameOrDevice name of the file to read (string) |
|
48 or reference to the device to read (QIODevice) |
|
49 @return reference to the root node (BookmarkNode) |
|
50 """ |
|
51 if isinstance(fileNameOrDevice, QIODevice): |
|
52 dev = fileNameOrDevice |
|
53 else: |
|
54 f = QFile(fileNameOrDevice) |
|
55 if not f.exists(): |
|
56 return BookmarkNode(BookmarkNode.Root) |
|
57 f.open(QIODevice.OpenModeFlag.ReadOnly) |
|
58 dev = f |
|
59 |
|
60 folders = [] |
|
61 lastNode = None |
|
62 |
|
63 root = BookmarkNode(BookmarkNode.Root) |
|
64 folders.append(root) |
|
65 |
|
66 while not dev.atEnd(): |
|
67 line = str(dev.readLine(), encoding="utf-8").rstrip() |
|
68 match = ( |
|
69 self.__folderRx.search(line) or |
|
70 self.__endFolderRx.search(line) or |
|
71 self.__bookmarkRx.search(line) or |
|
72 self.__descRx.search(line) or |
|
73 self.__separatorRx.search(line) |
|
74 ) |
|
75 if match is None: |
|
76 continue |
|
77 |
|
78 if match.re is self.__folderRx: |
|
79 # folder definition |
|
80 arguments = match.group(1) |
|
81 name = match.group(2) |
|
82 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) |
|
83 node.title = Utilities.html_udecode(name) |
|
84 node.expanded = self.__foldedRx.search(arguments) is None |
|
85 addedMatch = self.__addedRx.search(arguments) |
|
86 if addedMatch is not None: |
|
87 node.added = QDateTime.fromSecsSinceEpoch( |
|
88 int(addedMatch.group(1))) |
|
89 folders.append(node) |
|
90 lastNode = node |
|
91 |
|
92 elif match.re is self.__endFolderRx: |
|
93 # end of folder definition |
|
94 folders.pop() |
|
95 |
|
96 elif match.re is self.__bookmarkRx: |
|
97 # bookmark definition |
|
98 arguments = match.group(1) |
|
99 name = match.group(2) |
|
100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) |
|
101 node.title = Utilities.html_udecode(name) |
|
102 match1 = self.__urlRx.search(arguments) |
|
103 if match1 is not None: |
|
104 node.url = match1.group(1) |
|
105 match1 = self.__addedRx.search(arguments) |
|
106 if match1 is not None: |
|
107 node.added = QDateTime.fromSecsSinceEpoch( |
|
108 int(match1.group(1))) |
|
109 match1 = self.__modifiedRx.search(arguments) |
|
110 if match1 is not None: |
|
111 node.modified = QDateTime.fromSecsSinceEpoch( |
|
112 int(match1.group(1))) |
|
113 match1 = self.__visitedRx.search(arguments) |
|
114 if match1 is not None: |
|
115 node.visited = QDateTime.fromSecsSinceEpoch( |
|
116 int(match1.group(1))) |
|
117 lastNode = node |
|
118 |
|
119 elif match.re is self.__descRx: |
|
120 # description |
|
121 if lastNode: |
|
122 lastNode.desc = Utilities.html_udecode( |
|
123 match.group(1)) |
|
124 |
|
125 elif match.re is self.__separatorRx: |
|
126 # separator definition |
|
127 BookmarkNode(BookmarkNode.Separator, folders[-1]) |
|
128 |
|
129 return root |