src/eric7/WebBrowser/Bookmarks/NsHtmlReader.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 8881
54e42bc2437a
child 9221
bf71ee032bb4
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2012 - 2022 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing a class to read Netscape HTML bookmark files.
8 """
9
10 import re
11
12 from PyQt6.QtCore import QObject, QIODevice, QFile, QDateTime
13
14 from .BookmarkNode import BookmarkNode
15
16 import Utilities
17
18
19 class NsHtmlReader(QObject):
20 """
21 Class implementing a reader object for Netscape HTML bookmark files.
22 """
23 indentSize = 4
24
25 def __init__(self):
26 """
27 Constructor
28 """
29 super().__init__()
30
31 self.__folderRx = re.compile("<DT><H3(.*?)>(.*?)</H3>", re.IGNORECASE)
32 self.__endFolderRx = re.compile("</DL>", re.IGNORECASE)
33 self.__bookmarkRx = re.compile("<DT><A(.*?)>(.*?)</A>", re.IGNORECASE)
34 self.__descRx = re.compile("<DD>(.*)", re.IGNORECASE)
35 self.__separatorRx = re.compile("<HR>", re.IGNORECASE)
36 self.__urlRx = re.compile('HREF="(.*?)"', re.IGNORECASE)
37 self.__addedRx = re.compile(r'ADD_DATE="(\d*?)"', re.IGNORECASE)
38 self.__modifiedRx = re.compile(r'LAST_MODIFIED="(\d*?)"',
39 re.IGNORECASE)
40 self.__visitedRx = re.compile(r'LAST_VISIT="(\d*?)"', re.IGNORECASE)
41 self.__foldedRx = re.compile("FOLDED", re.IGNORECASE)
42
43 def read(self, fileNameOrDevice):
44 """
45 Public method to read a Netscape HTML bookmark file.
46
47 @param fileNameOrDevice name of the file to read (string)
48 or reference to the device to read (QIODevice)
49 @return reference to the root node (BookmarkNode)
50 """
51 if isinstance(fileNameOrDevice, QIODevice):
52 dev = fileNameOrDevice
53 else:
54 f = QFile(fileNameOrDevice)
55 if not f.exists():
56 return BookmarkNode(BookmarkNode.Root)
57 f.open(QIODevice.OpenModeFlag.ReadOnly)
58 dev = f
59
60 folders = []
61 lastNode = None
62
63 root = BookmarkNode(BookmarkNode.Root)
64 folders.append(root)
65
66 while not dev.atEnd():
67 line = str(dev.readLine(), encoding="utf-8").rstrip()
68 match = (
69 self.__folderRx.search(line) or
70 self.__endFolderRx.search(line) or
71 self.__bookmarkRx.search(line) or
72 self.__descRx.search(line) or
73 self.__separatorRx.search(line)
74 )
75 if match is None:
76 continue
77
78 if match.re is self.__folderRx:
79 # folder definition
80 arguments = match.group(1)
81 name = match.group(2)
82 node = BookmarkNode(BookmarkNode.Folder, folders[-1])
83 node.title = Utilities.html_udecode(name)
84 node.expanded = self.__foldedRx.search(arguments) is None
85 addedMatch = self.__addedRx.search(arguments)
86 if addedMatch is not None:
87 node.added = QDateTime.fromSecsSinceEpoch(
88 int(addedMatch.group(1)))
89 folders.append(node)
90 lastNode = node
91
92 elif match.re is self.__endFolderRx:
93 # end of folder definition
94 folders.pop()
95
96 elif match.re is self.__bookmarkRx:
97 # bookmark definition
98 arguments = match.group(1)
99 name = match.group(2)
100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1])
101 node.title = Utilities.html_udecode(name)
102 match1 = self.__urlRx.search(arguments)
103 if match1 is not None:
104 node.url = match1.group(1)
105 match1 = self.__addedRx.search(arguments)
106 if match1 is not None:
107 node.added = QDateTime.fromSecsSinceEpoch(
108 int(match1.group(1)))
109 match1 = self.__modifiedRx.search(arguments)
110 if match1 is not None:
111 node.modified = QDateTime.fromSecsSinceEpoch(
112 int(match1.group(1)))
113 match1 = self.__visitedRx.search(arguments)
114 if match1 is not None:
115 node.visited = QDateTime.fromSecsSinceEpoch(
116 int(match1.group(1)))
117 lastNode = node
118
119 elif match.re is self.__descRx:
120 # description
121 if lastNode:
122 lastNode.desc = Utilities.html_udecode(
123 match.group(1))
124
125 elif match.re is self.__separatorRx:
126 # separator definition
127 BookmarkNode(BookmarkNode.Separator, folders[-1])
128
129 return root

eric ide

mercurial