eric6/WebBrowser/Bookmarks/NsHtmlReader.py

changeset 7775
4a1db75550bd
parent 7360
9190402e4505
child 7923
91e843545d9a
equal deleted inserted replaced
7774:9eed155411f0 7775:4a1db75550bd
5 5
6 """ 6 """
7 Module implementing a class to read Netscape HTML bookmark files. 7 Module implementing a class to read Netscape HTML bookmark files.
8 """ 8 """
9 9
10 import re
10 11
11 from PyQt5.QtCore import QObject, QIODevice, QFile, QRegExp, Qt, QDateTime 12 from PyQt5.QtCore import QObject, QIODevice, QFile, QDateTime
12 13
13 from .BookmarkNode import BookmarkNode 14 from .BookmarkNode import BookmarkNode
14 15
15 import Utilities 16 import Utilities
16 17
25 """ 26 """
26 Constructor 27 Constructor
27 """ 28 """
28 super(NsHtmlReader, self).__init__() 29 super(NsHtmlReader, self).__init__()
29 30
30 self.__folderRx = QRegExp("<DT><H3(.*)>(.*)</H3>", Qt.CaseInsensitive) 31 self.__folderRx = re.compile("<DT><H3(.*?)>(.*?)</H3>", re.IGNORECASE)
31 self.__folderRx.setMinimal(True) 32 self.__endFolderRx = re.compile("</DL>", re.IGNORECASE)
32 33 self.__bookmarkRx = re.compile("<DT><A(.*?)>(.*?)</A>", re.IGNORECASE)
33 self.__endFolderRx = QRegExp("</DL>", Qt.CaseInsensitive) 34 self.__descRx = re.compile("<DD>(.*)", re.IGNORECASE)
34 35 self.__separatorRx = re.compile("<HR>", re.IGNORECASE)
35 self.__bookmarkRx = QRegExp("<DT><A(.*)>(.*)</A>", Qt.CaseInsensitive) 36 self.__urlRx = re.compile('HREF="(.*?)"', re.IGNORECASE)
36 self.__bookmarkRx.setMinimal(True) 37 self.__addedRx = re.compile(r'ADD_DATE="(\d*?)"', re.IGNORECASE)
37 38 self.__modifiedRx = re.compile(r'LAST_MODIFIED="(\d*?)"',
38 self.__descRx = QRegExp("<DD>(.*)", Qt.CaseInsensitive) 39 re.IGNORECASE)
39 40 self.__visitedRx = re.compile(r'LAST_VISIT="(\d*?)"', re.IGNORECASE)
40 self.__separatorRx = QRegExp("<HR>", Qt.CaseInsensitive) 41 self.__foldedRx = re.compile("FOLDED", re.IGNORECASE)
41
42 self.__urlRx = QRegExp('HREF="(.*)"', Qt.CaseInsensitive)
43 self.__urlRx.setMinimal(True)
44
45 self.__addedRx = QRegExp(r'ADD_DATE="(\d*)"', Qt.CaseInsensitive)
46 self.__addedRx.setMinimal(True)
47
48 self.__modifiedRx = QRegExp(
49 r'LAST_MODIFIED="(\d*)"', Qt.CaseInsensitive)
50 self.__modifiedRx.setMinimal(True)
51
52 self.__visitedRx = QRegExp(r'LAST_VISIT="(\d*)"', Qt.CaseInsensitive)
53 self.__visitedRx.setMinimal(True)
54
55 self.__foldedRx = QRegExp("FOLDED", Qt.CaseInsensitive)
56 42
57 def read(self, fileNameOrDevice): 43 def read(self, fileNameOrDevice):
58 """ 44 """
59 Public method to read a Netscape HTML bookmark file. 45 Public method to read a Netscape HTML bookmark file.
60 46
77 root = BookmarkNode(BookmarkNode.Root) 63 root = BookmarkNode(BookmarkNode.Root)
78 folders.append(root) 64 folders.append(root)
79 65
80 while not dev.atEnd(): 66 while not dev.atEnd():
81 line = str(dev.readLine(), encoding="utf-8").rstrip() 67 line = str(dev.readLine(), encoding="utf-8").rstrip()
82 if self.__folderRx.indexIn(line) != -1: 68 match = (
69 self.__folderRx.search(line) or
70 self.__endFolderRx.search(line) or
71 self.__bookmarkRx.search(line) or
72 self.__descRx.search(line) or
73 self.__separatorRx.search(line)
74 )
75 if match is None:
76 continue
77
78 if match.re is self.__folderRx:
83 # folder definition 79 # folder definition
84 arguments = self.__folderRx.cap(1) 80 arguments = match.group(1)
85 name = self.__folderRx.cap(2) 81 name = match.group(2)
86 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) 82 node = BookmarkNode(BookmarkNode.Folder, folders[-1])
87 node.title = Utilities.html_udecode(name) 83 node.title = Utilities.html_udecode(name)
88 node.expanded = self.__foldedRx.indexIn(arguments) == -1 84 node.expanded = self.__foldedRx.search(arguments) is None
89 if self.__addedRx.indexIn(arguments) != -1: 85 addedMatch = self.__addedRx.search(arguments)
86 if addedMatch is not None:
90 node.added = QDateTime.fromTime_t( 87 node.added = QDateTime.fromTime_t(
91 int(self.__addedRx.cap(1))) 88 int(addedMatch.group(1)))
92 folders.append(node) 89 folders.append(node)
93 lastNode = node 90 lastNode = node
94 91
95 elif self.__endFolderRx.indexIn(line) != -1: 92 elif match.re is self.__endFolderRx:
96 # end of folder definition 93 # end of folder definition
97 folders.pop() 94 folders.pop()
98 95
99 elif self.__bookmarkRx.indexIn(line) != -1: 96 elif match.re is self.__bookmarkRx:
100 # bookmark definition 97 # bookmark definition
101 arguments = self.__bookmarkRx.cap(1) 98 arguments = match.group(1)
102 name = self.__bookmarkRx.cap(2) 99 name = match.group(2)
103 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) 100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1])
104 node.title = Utilities.html_udecode(name) 101 node.title = Utilities.html_udecode(name)
105 if self.__urlRx.indexIn(arguments) != -1: 102 match1 = self.__urlRx.search(arguments)
106 node.url = self.__urlRx.cap(1) 103 if match1 is not None:
107 if self.__addedRx.indexIn(arguments) != -1: 104 node.url = match1.group(1)
105 match1 = self.__addedRx.search(arguments)
106 if match1 is not None:
108 node.added = QDateTime.fromTime_t( 107 node.added = QDateTime.fromTime_t(
109 int(self.__addedRx.cap(1))) 108 int(match1.group(1)))
110 if self.__modifiedRx.indexIn(arguments) != -1: 109 match1 = self.__modifiedRx.search(arguments)
110 if match1 is not None:
111 node.modified = QDateTime.fromTime_t( 111 node.modified = QDateTime.fromTime_t(
112 int(self.__modifiedRx.cap(1))) 112 int(match1.group(1)))
113 if self.__visitedRx.indexIn(arguments) != -1: 113 match1 = self.__visitedRx.search(arguments)
114 if match1 is not None:
114 node.visited = QDateTime.fromTime_t( 115 node.visited = QDateTime.fromTime_t(
115 int(self.__visitedRx.cap(1))) 116 int(match1.group(1)))
116 lastNode = node 117 lastNode = node
117 118
118 elif self.__descRx.indexIn(line) != -1: 119 elif match.re is self.__descRx:
119 # description 120 # description
120 if lastNode: 121 if lastNode:
121 lastNode.desc = Utilities.html_udecode( 122 lastNode.desc = Utilities.html_udecode(
122 self.__descRx.cap(1)) 123 match.group(1))
123 124
124 elif self.__separatorRx.indexIn(line) != -1: 125 elif match.re is self.__separatorRx:
125 # separator definition 126 # separator definition
126 BookmarkNode(BookmarkNode.Separator, folders[-1]) 127 BookmarkNode(BookmarkNode.Separator, folders[-1])
127 128
128 return root 129 return root

eric ide

mercurial