eric6/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py

branch
without_py2_and_pyqt4
changeset 7192
a22eee00b052
parent 6942
2602857055c5
child 7229
53054eb5b15a
equal deleted inserted replaced
7191:960850ec284c 7192:a22eee00b052
6 """ 6 """
7 Module implementing an URL representation suitable for Google Safe Browsing. 7 Module implementing an URL representation suitable for Google Safe Browsing.
8 """ 8 """
9 9
10 from __future__ import unicode_literals 10 from __future__ import unicode_literals
11
12 try:
13 import urlparse # Py2
14 import urllib # Py2
15 except ImportError:
16 import urllib.parse as urllib
17 from urllib import parse as urlparse
18 11
19 import re 12 import re
20 import posixpath 13 import posixpath
21 import socket 14 import socket
22 import struct 15 import struct
23 import hashlib 16 import hashlib
17 import urllib.parse
24 18
25 import Preferences 19 import Preferences
26 20
27 21
28 class SafeBrowsingUrl(object): 22 class SafeBrowsingUrl(object):
68 @param u URL string to unescape 62 @param u URL string to unescape
69 @type str 63 @type str
70 @return unescaped URL string 64 @return unescaped URL string
71 @rtype str 65 @rtype str
72 """ 66 """
73 uu = urllib.unquote(u) 67 uu = urllib.parse.unquote(u)
74 if uu == u: 68 if uu == u:
75 return uu 69 return uu
76 else: 70 else:
77 return fullUnescape(uu) 71 return fullUnescape(uu)
78 72
84 @type str 78 @type str
85 @return quoted string 79 @return quoted string
86 @rtype str 80 @rtype str
87 """ 81 """
88 safeChars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~' 82 safeChars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~'
89 return urllib.quote(s, safe=safeChars) 83 return urllib.parse.quote(s, safe=safeChars)
90 84
91 url = self.__url.strip() 85 url = self.__url.strip()
92 url = url.replace('\n', '').replace('\r', '').replace('\t', '') 86 url = url.replace('\n', '').replace('\r', '').replace('\t', '')
93 url = url.split('#', 1)[0] 87 url = url.split('#', 1)[0]
94 if url.startswith('//'): 88 if url.startswith('//'):
95 url = Preferences.getWebBrowser("DefaultScheme")[:-3] + url 89 url = Preferences.getWebBrowser("DefaultScheme")[:-3] + url
96 if len(url.split('://')) <= 1: 90 if len(url.split('://')) <= 1:
97 url = Preferences.getWebBrowser("DefaultScheme") + url 91 url = Preferences.getWebBrowser("DefaultScheme") + url
98 url = quote(fullUnescape(url)) 92 url = quote(fullUnescape(url))
99 urlParts = urlparse.urlsplit(url) 93 urlParts = urllib.parse.parse.urlsplit(url)
100 if not urlParts[0]: 94 if not urlParts[0]:
101 url = Preferences.getWebBrowser("DefaultScheme") + url 95 url = Preferences.getWebBrowser("DefaultScheme") + url
102 urlParts = urlparse.urlsplit(url) 96 urlParts = urllib.parse.parse.urlsplit(url)
103 protocol = urlParts.scheme 97 protocol = urlParts.scheme
104 host = fullUnescape(urlParts.hostname) 98 host = fullUnescape(urlParts.hostname)
105 path = fullUnescape(urlParts.path) 99 path = fullUnescape(urlParts.path)
106 query = urlParts.query 100 query = urlParts.query
107 if not query and '?' not in url: 101 if not query and '?' not in url:
182 curPath = '' 176 curPath = ''
183 for i in range(min(4, len(pathParts))): 177 for i in range(min(4, len(pathParts))):
184 curPath = curPath + pathParts[i] + '/' 178 curPath = curPath + pathParts[i] + '/'
185 yield curPath 179 yield curPath
186 180
187 protocol, addressStr = urllib.splittype(url) 181 protocol, addressStr = urllib.parse.splittype(url)
188 host, path = urllib.splithost(addressStr) 182 host, path = urllib.parse.splithost(addressStr)
189 user, host = urllib.splituser(host) 183 user, host = urllib.parse.splituser(host)
190 host, port = urllib.splitport(host) 184 host, port = urllib.parse.splitport(host)
191 host = host.strip('/') 185 host = host.strip('/')
192 seenPermutations = set() 186 seenPermutations = set()
193 for h in hostPermutations(host): 187 for h in hostPermutations(host):
194 for p in pathPermutations(path): 188 for p in pathPermutations(path):
195 u = '{0}{1}'.format(h, p) 189 u = '{0}{1}'.format(h, p)

eric ide

mercurial