eric7/EricNetwork/EricTldExtractor.py

branch
eric7
changeset 9162
8b75b1668583
parent 9152
8a68afaf1ba2
equal deleted inserted replaced
9161:90939b08da20 9162:8b75b1668583
12 # Copyright (C) 2014 Razi Alavizadeh <s.r.alavizadeh@gmail.com> 12 # Copyright (C) 2014 Razi Alavizadeh <s.r.alavizadeh@gmail.com>
13 # 13 #
14 14
15 import collections 15 import collections
16 import os 16 import os
17 import re 17
18 18 from PyQt6.QtCore import QObject, QUrl, qWarning
19 from PyQt6.QtCore import QObject, QUrl, QFile, QIODevice, qWarning
20 19
21 from EricWidgets import EricMessageBox 20 from EricWidgets import EricMessageBox
22 21
23 22
24 class EricTldHostParts: 23 class EricTldHostParts:
316 @rtype bool 315 @rtype bool
317 """ 316 """
318 # start with a fresh dictionary 317 # start with a fresh dictionary
319 self.__tldDict = collections.defaultdict(list) 318 self.__tldDict = collections.defaultdict(list)
320 319
321 file = QFile(dataFile) 320 seekToEndOfPrivateDomains = False
322 321
323 if not file.open(QIODevice.OpenModeFlag.ReadOnly | 322 try:
324 QIODevice.OpenModeFlag.Text): 323 with open(dataFile, "r", encoding="utf-8") as f:
324 for line in f.readlines():
325 if not line:
326 continue
327
328 if line.startswith("."):
329 line = line[1:]
330
331 if line.startswith("//"):
332 if "===END PRIVATE DOMAINS===" in line:
333 seekToEndOfPrivateDomains = False
334
335 if (
336 not loadPrivateDomains and
337 "===BEGIN PRIVATE DOMAINS===" in line
338 ):
339 seekToEndOfPrivateDomains = True
340
341 continue
342
343 if seekToEndOfPrivateDomains:
344 continue
345
346 # only data up to the first whitespace is used
347 line = line.split(None, 1)[0]
348
349 if "." not in line:
350 self.__tldDict[line].append(line)
351 else:
352 key = line[line.rfind(".") + 1:]
353 self.__tldDict[key].append(line)
354
355 return self.isDataLoaded()
356 except OSError:
325 return False 357 return False
326
327 seekToEndOfPrivateDomains = False
328
329 while not file.atEnd():
330 line = bytes(file.readLine()).decode("utf-8").strip()
331 if not line:
332 continue
333
334 if line.startswith("."):
335 line = line[1:]
336
337 if line.startswith("//"):
338 if "===END PRIVATE DOMAINS===" in line:
339 seekToEndOfPrivateDomains = False
340
341 if (
342 not loadPrivateDomains and
343 "===BEGIN PRIVATE DOMAINS===" in line
344 ):
345 seekToEndOfPrivateDomains = True
346
347 continue
348
349 if seekToEndOfPrivateDomains:
350 continue
351
352 # only data up to the first whitespace is used
353 line = line.split(None, 1)[0]
354
355 if "." not in line:
356 self.__tldDict[line].append(line)
357 else:
358 key = line[line.rfind(".") + 1:]
359 self.__tldDict[key].append(line)
360
361 return self.isDataLoaded()
362 358
363 def __domainHelper(self, host, tldPart): 359 def __domainHelper(self, host, tldPart):
364 """ 360 """
365 Private method to get the domain name without TLD. 361 Private method to get the domain name without TLD.
366 362
430 @type str 426 @type str
431 @return normalized host address 427 @return normalized host address
432 @rtype str 428 @rtype str
433 """ 429 """
434 return host.lower() 430 return host.lower()
435
436 #################################################################
437 ## Methods below are for testing purposes
438 #################################################################
439
440 def test(self):
441 """
442 Public method to execute the tests.
443
444 @return flag indicating the test result
445 @rtype bool
446 """
447 self.__withPrivate = True
448 self.__loadData()
449 if not self.__tldDict:
450 return False
451
452 testDataFileName = ""
453 testDataFileExist = False
454
455 for path in self.__dataSearchPaths:
456 testDataFileName = os.path.abspath(
457 os.path.join(path, "test_psl.txt")
458 )
459 if os.path.exists(testDataFileName):
460 testDataFileExist = True
461 break
462
463 if not testDataFileExist:
464 testFileDownloadLink = (
465 "http://mxr.mozilla.org/mozilla-central/source/netwerk/test/"
466 "unit/data/test_psl.txt?raw=1"
467 )
468 EricMessageBox.information(
469 None,
470 self.tr("TLD Data File not found"),
471 self.tr("""<p>The file 'test_psl.txt' was not found!"""
472 """<br/>You can download it from '<a href="{0}">"""
473 """<b>here</b></a>' to one of the following"""
474 """ paths:</p><ul>{1}</ul>""").format(
475 testFileDownloadLink,
476 "".join(["<li>{0}</li>".format(p)
477 for p in self.__dataSearchPaths]))
478 )
479 return False
480
481 file = QFile(testDataFileName)
482
483 if not file.open(QIODevice.OpenModeFlag.ReadOnly |
484 QIODevice.OpenModeFlag.Text):
485 return False
486
487 testRegExp = re.compile(
488 "checkPublicSuffix\\(('([^']+)'|null), ('([^']+)'|null)\\);")
489 allTestSuccess = True
490
491 while not file.atEnd():
492 line = bytes(file.readLine()).decode("utf-8").strip()
493 if not line or line.startswith("//"):
494 continue
495
496 match = testRegExp.search(line)
497 if match is None:
498 allTestSuccess = False
499 else:
500 hostName, registrableName = match.group(2, 4)
501
502 if not self.__checkPublicSuffix(hostName, registrableName):
503 allTestSuccess = False
504
505 if allTestSuccess:
506 qWarning("EricTldExtractor: Test passed successfully.")
507 else:
508 qWarning("EricTldExtractor: Test finished with some errors!")
509
510 # reset the TLD dictionary
511 self.__tldDict = collections.defaultdict(list)
512
513 return allTestSuccess
514
515 def __checkPublicSuffix(self, host, registrableName):
516 """
517 Private method to test a host name against a registrable name.
518
519 @param host host name to test
520 @type str
521 @param registrableName registrable domain name to test against
522 @type str
523 @return flag indicating the check result
524 @rtype bool
525 """
526 regName = self.registrableDomain(host)
527 if regName != registrableName:
528 qWarning(
529 "EricTldExtractor Test Error: hostName: {0}\n"
530 " Correct registrableName: {1}\n"
531 " Calculated registrableName: {2}".format(
532 host, registrableName, regName))
533 return False
534
535 return True
536 431
537 432
538 _TLDExtractor = None 433 _TLDExtractor = None
539 434
540 435

eric ide

mercurial