316 @rtype bool |
315 @rtype bool |
317 """ |
316 """ |
318 # start with a fresh dictionary |
317 # start with a fresh dictionary |
319 self.__tldDict = collections.defaultdict(list) |
318 self.__tldDict = collections.defaultdict(list) |
320 |
319 |
321 file = QFile(dataFile) |
320 seekToEndOfPrivateDomains = False |
322 |
321 |
323 if not file.open(QIODevice.OpenModeFlag.ReadOnly | |
322 try: |
324 QIODevice.OpenModeFlag.Text): |
323 with open(dataFile, "r", encoding="utf-8") as f: |
|
324 for line in f.readlines(): |
|
325 if not line: |
|
326 continue |
|
327 |
|
328 if line.startswith("."): |
|
329 line = line[1:] |
|
330 |
|
331 if line.startswith("//"): |
|
332 if "===END PRIVATE DOMAINS===" in line: |
|
333 seekToEndOfPrivateDomains = False |
|
334 |
|
335 if ( |
|
336 not loadPrivateDomains and |
|
337 "===BEGIN PRIVATE DOMAINS===" in line |
|
338 ): |
|
339 seekToEndOfPrivateDomains = True |
|
340 |
|
341 continue |
|
342 |
|
343 if seekToEndOfPrivateDomains: |
|
344 continue |
|
345 |
|
346 # only data up to the first whitespace is used |
|
347 line = line.split(None, 1)[0] |
|
348 |
|
349 if "." not in line: |
|
350 self.__tldDict[line].append(line) |
|
351 else: |
|
352 key = line[line.rfind(".") + 1:] |
|
353 self.__tldDict[key].append(line) |
|
354 |
|
355 return self.isDataLoaded() |
|
356 except OSError: |
325 return False |
357 return False |
326 |
|
327 seekToEndOfPrivateDomains = False |
|
328 |
|
329 while not file.atEnd(): |
|
330 line = bytes(file.readLine()).decode("utf-8").strip() |
|
331 if not line: |
|
332 continue |
|
333 |
|
334 if line.startswith("."): |
|
335 line = line[1:] |
|
336 |
|
337 if line.startswith("//"): |
|
338 if "===END PRIVATE DOMAINS===" in line: |
|
339 seekToEndOfPrivateDomains = False |
|
340 |
|
341 if ( |
|
342 not loadPrivateDomains and |
|
343 "===BEGIN PRIVATE DOMAINS===" in line |
|
344 ): |
|
345 seekToEndOfPrivateDomains = True |
|
346 |
|
347 continue |
|
348 |
|
349 if seekToEndOfPrivateDomains: |
|
350 continue |
|
351 |
|
352 # only data up to the first whitespace is used |
|
353 line = line.split(None, 1)[0] |
|
354 |
|
355 if "." not in line: |
|
356 self.__tldDict[line].append(line) |
|
357 else: |
|
358 key = line[line.rfind(".") + 1:] |
|
359 self.__tldDict[key].append(line) |
|
360 |
|
361 return self.isDataLoaded() |
|
362 |
358 |
363 def __domainHelper(self, host, tldPart): |
359 def __domainHelper(self, host, tldPart): |
364 """ |
360 """ |
365 Private method to get the domain name without TLD. |
361 Private method to get the domain name without TLD. |
366 |
362 |
430 @type str |
426 @type str |
431 @return normalized host address |
427 @return normalized host address |
432 @rtype str |
428 @rtype str |
433 """ |
429 """ |
434 return host.lower() |
430 return host.lower() |
435 |
|
436 ################################################################# |
|
437 ## Methods below are for testing purposes |
|
438 ################################################################# |
|
439 |
|
440 def test(self): |
|
441 """ |
|
442 Public method to execute the tests. |
|
443 |
|
444 @return flag indicating the test result |
|
445 @rtype bool |
|
446 """ |
|
447 self.__withPrivate = True |
|
448 self.__loadData() |
|
449 if not self.__tldDict: |
|
450 return False |
|
451 |
|
452 testDataFileName = "" |
|
453 testDataFileExist = False |
|
454 |
|
455 for path in self.__dataSearchPaths: |
|
456 testDataFileName = os.path.abspath( |
|
457 os.path.join(path, "test_psl.txt") |
|
458 ) |
|
459 if os.path.exists(testDataFileName): |
|
460 testDataFileExist = True |
|
461 break |
|
462 |
|
463 if not testDataFileExist: |
|
464 testFileDownloadLink = ( |
|
465 "http://mxr.mozilla.org/mozilla-central/source/netwerk/test/" |
|
466 "unit/data/test_psl.txt?raw=1" |
|
467 ) |
|
468 EricMessageBox.information( |
|
469 None, |
|
470 self.tr("TLD Data File not found"), |
|
471 self.tr("""<p>The file 'test_psl.txt' was not found!""" |
|
472 """<br/>You can download it from '<a href="{0}">""" |
|
473 """<b>here</b></a>' to one of the following""" |
|
474 """ paths:</p><ul>{1}</ul>""").format( |
|
475 testFileDownloadLink, |
|
476 "".join(["<li>{0}</li>".format(p) |
|
477 for p in self.__dataSearchPaths])) |
|
478 ) |
|
479 return False |
|
480 |
|
481 file = QFile(testDataFileName) |
|
482 |
|
483 if not file.open(QIODevice.OpenModeFlag.ReadOnly | |
|
484 QIODevice.OpenModeFlag.Text): |
|
485 return False |
|
486 |
|
487 testRegExp = re.compile( |
|
488 "checkPublicSuffix\\(('([^']+)'|null), ('([^']+)'|null)\\);") |
|
489 allTestSuccess = True |
|
490 |
|
491 while not file.atEnd(): |
|
492 line = bytes(file.readLine()).decode("utf-8").strip() |
|
493 if not line or line.startswith("//"): |
|
494 continue |
|
495 |
|
496 match = testRegExp.search(line) |
|
497 if match is None: |
|
498 allTestSuccess = False |
|
499 else: |
|
500 hostName, registrableName = match.group(2, 4) |
|
501 |
|
502 if not self.__checkPublicSuffix(hostName, registrableName): |
|
503 allTestSuccess = False |
|
504 |
|
505 if allTestSuccess: |
|
506 qWarning("EricTldExtractor: Test passed successfully.") |
|
507 else: |
|
508 qWarning("EricTldExtractor: Test finished with some errors!") |
|
509 |
|
510 # reset the TLD dictionary |
|
511 self.__tldDict = collections.defaultdict(list) |
|
512 |
|
513 return allTestSuccess |
|
514 |
|
515 def __checkPublicSuffix(self, host, registrableName): |
|
516 """ |
|
517 Private method to test a host name against a registrable name. |
|
518 |
|
519 @param host host name to test |
|
520 @type str |
|
521 @param registrableName registrable domain name to test against |
|
522 @type str |
|
523 @return flag indicating the check result |
|
524 @rtype bool |
|
525 """ |
|
526 regName = self.registrableDomain(host) |
|
527 if regName != registrableName: |
|
528 qWarning( |
|
529 "EricTldExtractor Test Error: hostName: {0}\n" |
|
530 " Correct registrableName: {1}\n" |
|
531 " Calculated registrableName: {2}".format( |
|
532 host, registrableName, regName)) |
|
533 return False |
|
534 |
|
535 return True |
|
536 |
431 |
537 |
432 |
538 _TLDExtractor = None |
433 _TLDExtractor = None |
539 |
434 |
540 |
435 |