18 |
18 |
19 class NsHtmlReader(QObject): |
19 class NsHtmlReader(QObject): |
20 """ |
20 """ |
21 Class implementing a reader object for Netscape HTML bookmark files. |
21 Class implementing a reader object for Netscape HTML bookmark files. |
22 """ |
22 """ |
|
23 |
23 indentSize = 4 |
24 indentSize = 4 |
24 |
25 |
25 def __init__(self): |
26 def __init__(self): |
26 """ |
27 """ |
27 Constructor |
28 Constructor |
28 """ |
29 """ |
29 super().__init__() |
30 super().__init__() |
30 |
31 |
31 self.__folderRx = re.compile("<DT><H3(.*?)>(.*?)</H3>", re.IGNORECASE) |
32 self.__folderRx = re.compile("<DT><H3(.*?)>(.*?)</H3>", re.IGNORECASE) |
32 self.__endFolderRx = re.compile("</DL>", re.IGNORECASE) |
33 self.__endFolderRx = re.compile("</DL>", re.IGNORECASE) |
33 self.__bookmarkRx = re.compile("<DT><A(.*?)>(.*?)</A>", re.IGNORECASE) |
34 self.__bookmarkRx = re.compile("<DT><A(.*?)>(.*?)</A>", re.IGNORECASE) |
34 self.__descRx = re.compile("<DD>(.*)", re.IGNORECASE) |
35 self.__descRx = re.compile("<DD>(.*)", re.IGNORECASE) |
35 self.__separatorRx = re.compile("<HR>", re.IGNORECASE) |
36 self.__separatorRx = re.compile("<HR>", re.IGNORECASE) |
36 self.__urlRx = re.compile('HREF="(.*?)"', re.IGNORECASE) |
37 self.__urlRx = re.compile('HREF="(.*?)"', re.IGNORECASE) |
37 self.__addedRx = re.compile(r'ADD_DATE="(\d*?)"', re.IGNORECASE) |
38 self.__addedRx = re.compile(r'ADD_DATE="(\d*?)"', re.IGNORECASE) |
38 self.__modifiedRx = re.compile(r'LAST_MODIFIED="(\d*?)"', |
39 self.__modifiedRx = re.compile(r'LAST_MODIFIED="(\d*?)"', re.IGNORECASE) |
39 re.IGNORECASE) |
|
40 self.__visitedRx = re.compile(r'LAST_VISIT="(\d*?)"', re.IGNORECASE) |
40 self.__visitedRx = re.compile(r'LAST_VISIT="(\d*?)"', re.IGNORECASE) |
41 self.__foldedRx = re.compile("FOLDED", re.IGNORECASE) |
41 self.__foldedRx = re.compile("FOLDED", re.IGNORECASE) |
42 |
42 |
43 def read(self, fileNameOrDevice): |
43 def read(self, fileNameOrDevice): |
44 """ |
44 """ |
45 Public method to read a Netscape HTML bookmark file. |
45 Public method to read a Netscape HTML bookmark file. |
46 |
46 |
47 @param fileNameOrDevice name of the file to read (string) |
47 @param fileNameOrDevice name of the file to read (string) |
48 or reference to the device to read (QIODevice) |
48 or reference to the device to read (QIODevice) |
49 @return reference to the root node (BookmarkNode) |
49 @return reference to the root node (BookmarkNode) |
50 """ |
50 """ |
51 if isinstance(fileNameOrDevice, QIODevice): |
51 if isinstance(fileNameOrDevice, QIODevice): |
54 f = QFile(fileNameOrDevice) |
54 f = QFile(fileNameOrDevice) |
55 if not f.exists(): |
55 if not f.exists(): |
56 return BookmarkNode(BookmarkNode.Root) |
56 return BookmarkNode(BookmarkNode.Root) |
57 f.open(QIODevice.OpenModeFlag.ReadOnly) |
57 f.open(QIODevice.OpenModeFlag.ReadOnly) |
58 dev = f |
58 dev = f |
59 |
59 |
60 folders = [] |
60 folders = [] |
61 lastNode = None |
61 lastNode = None |
62 |
62 |
63 root = BookmarkNode(BookmarkNode.Root) |
63 root = BookmarkNode(BookmarkNode.Root) |
64 folders.append(root) |
64 folders.append(root) |
65 |
65 |
66 while not dev.atEnd(): |
66 while not dev.atEnd(): |
67 line = str(dev.readLine(), encoding="utf-8").rstrip() |
67 line = str(dev.readLine(), encoding="utf-8").rstrip() |
68 match = ( |
68 match = ( |
69 self.__folderRx.search(line) or |
69 self.__folderRx.search(line) |
70 self.__endFolderRx.search(line) or |
70 or self.__endFolderRx.search(line) |
71 self.__bookmarkRx.search(line) or |
71 or self.__bookmarkRx.search(line) |
72 self.__descRx.search(line) or |
72 or self.__descRx.search(line) |
73 self.__separatorRx.search(line) |
73 or self.__separatorRx.search(line) |
74 ) |
74 ) |
75 if match is None: |
75 if match is None: |
76 continue |
76 continue |
77 |
77 |
78 if match.re is self.__folderRx: |
78 if match.re is self.__folderRx: |
79 # folder definition |
79 # folder definition |
80 arguments = match.group(1) |
80 arguments = match.group(1) |
81 name = match.group(2) |
81 name = match.group(2) |
82 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) |
82 node = BookmarkNode(BookmarkNode.Folder, folders[-1]) |
83 node.title = Utilities.html_udecode(name) |
83 node.title = Utilities.html_udecode(name) |
84 node.expanded = self.__foldedRx.search(arguments) is None |
84 node.expanded = self.__foldedRx.search(arguments) is None |
85 addedMatch = self.__addedRx.search(arguments) |
85 addedMatch = self.__addedRx.search(arguments) |
86 if addedMatch is not None: |
86 if addedMatch is not None: |
87 node.added = QDateTime.fromSecsSinceEpoch( |
87 node.added = QDateTime.fromSecsSinceEpoch(int(addedMatch.group(1))) |
88 int(addedMatch.group(1))) |
|
89 folders.append(node) |
88 folders.append(node) |
90 lastNode = node |
89 lastNode = node |
91 |
90 |
92 elif match.re is self.__endFolderRx: |
91 elif match.re is self.__endFolderRx: |
93 # end of folder definition |
92 # end of folder definition |
94 folders.pop() |
93 folders.pop() |
95 |
94 |
96 elif match.re is self.__bookmarkRx: |
95 elif match.re is self.__bookmarkRx: |
97 # bookmark definition |
96 # bookmark definition |
98 arguments = match.group(1) |
97 arguments = match.group(1) |
99 name = match.group(2) |
98 name = match.group(2) |
100 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) |
99 node = BookmarkNode(BookmarkNode.Bookmark, folders[-1]) |
102 match1 = self.__urlRx.search(arguments) |
101 match1 = self.__urlRx.search(arguments) |
103 if match1 is not None: |
102 if match1 is not None: |
104 node.url = match1.group(1) |
103 node.url = match1.group(1) |
105 match1 = self.__addedRx.search(arguments) |
104 match1 = self.__addedRx.search(arguments) |
106 if match1 is not None: |
105 if match1 is not None: |
107 node.added = QDateTime.fromSecsSinceEpoch( |
106 node.added = QDateTime.fromSecsSinceEpoch(int(match1.group(1))) |
108 int(match1.group(1))) |
|
109 match1 = self.__modifiedRx.search(arguments) |
107 match1 = self.__modifiedRx.search(arguments) |
110 if match1 is not None: |
108 if match1 is not None: |
111 node.modified = QDateTime.fromSecsSinceEpoch( |
109 node.modified = QDateTime.fromSecsSinceEpoch(int(match1.group(1))) |
112 int(match1.group(1))) |
|
113 match1 = self.__visitedRx.search(arguments) |
110 match1 = self.__visitedRx.search(arguments) |
114 if match1 is not None: |
111 if match1 is not None: |
115 node.visited = QDateTime.fromSecsSinceEpoch( |
112 node.visited = QDateTime.fromSecsSinceEpoch(int(match1.group(1))) |
116 int(match1.group(1))) |
|
117 lastNode = node |
113 lastNode = node |
118 |
114 |
119 elif match.re is self.__descRx: |
115 elif match.re is self.__descRx: |
120 # description |
116 # description |
121 if lastNode: |
117 if lastNode: |
122 lastNode.desc = Utilities.html_udecode( |
118 lastNode.desc = Utilities.html_udecode(match.group(1)) |
123 match.group(1)) |
119 |
124 |
|
125 elif match.re is self.__separatorRx: |
120 elif match.re is self.__separatorRx: |
126 # separator definition |
121 # separator definition |
127 BookmarkNode(BookmarkNode.Separator, folders[-1]) |
122 BookmarkNode(BookmarkNode.Separator, folders[-1]) |
128 |
123 |
129 return root |
124 return root |