WebBrowser/AdBlock/AdBlockMatcher.py

Thu, 10 Jan 2019 14:18:48 +0100

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Thu, 10 Jan 2019 14:18:48 +0100
changeset 6645
ad476851d7e0
parent 6048
82ad8ec9548c
permissions
-rw-r--r--

Updated copyright for 2019.

# -*- coding: utf-8 -*-

# Copyright (c) 2017 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
#

"""
Module implementing the AdBlock matcher.
"""

from __future__ import unicode_literals

from PyQt5.QtCore import QObject

from .AdBlockSearchTree import AdBlockSearchTree
from .AdBlockRule import AdBlockRule, AdBlockRuleOption


class AdBlockMatcher(QObject):
    """
    Class implementing the AdBlock matcher.
    """
    def __init__(self, manager):
        """
        Constructor
        
        @param manager reference to the AdBlock manager object
        @type AdBlockManager
        """
        super(AdBlockMatcher, self).__init__(manager)
        
        self.__manager = manager
        
        self.__createdRules = []
        self.__networkExceptionRules = []
        self.__networkBlockRules = []
        self.__domainRestrictedCssRules = []
        self.__documentRules = []
        self.__elemhideRules = []
        
        self.__elementHidingRules = ""
        self.__networkBlockTree = AdBlockSearchTree()
        self.__networkExceptionTree = AdBlockSearchTree()
    
    def match(self, request, urlDomain, urlString):
        """
        Public method to match a request.
        
        @param request URL request to be matched
        @type QWebEngineUrlRequestInfo
        @param urlDomain domain of the URL
        @type str
        @param urlString requested URL as a lowercase string
        @type str
        @return reference to the matched rule
        @rtype AdBlockRule
        """
        # exception rules
        if self.__networkExceptionTree.find(request, urlDomain, urlString):
            return None
        
        for rule in self.__networkExceptionRules:
            if rule.networkMatch(request, urlDomain, urlString):
                return None
        
        # block rules
        rule = self.__networkBlockTree.find(request, urlDomain, urlString)
        if rule:
            return rule
        
        for rule in self.__networkBlockRules:
            if rule.networkMatch(request, urlDomain, urlString):
                return rule
        
        return None
    
    def adBlockDisabledForUrl(self, url):
        """
        Public method to check, if AdBlock is disabled for the given URL.
        
        @param url URL to check
        @type QUrl
        @return flag indicating disabled state
        @rtype bool
        """
        for rule in self.__documentRules:
            if rule.urlMatch(url):
                return True
        
        return False
    
    def elemHideDisabledForUrl(self, url):
        """
        Public method to check, if element hiding is disabled for the given
        URL.
        
        @param url URL to check
        @type QUrl
        @return flag indicating disabled state
        @rtype bool
        """
        if self.adBlockDisabledForUrl(url):
            return True
        
        for rule in self.__elemhideRules:
            if rule.urlMatch(url):
                return True
        
        return False
    
    def elementHidingRules(self):
        """
        Public method to get the element hiding rules.
        
        @return element hiding rules
        @rtype str
        """
        return self.__elementHidingRules
    
    def elementHidingRulesForDomain(self, domain):
        """
        Public method to get the element hiding rules for the given domain.
        
        @param domain domain name
        @type str
        @return element hiding rules
        @rtype str
        """
        rules = ""
        addedRulesCount = 0
        
        for rule in self.__domainRestrictedCssRules:
            if not rule.matchDomain(domain):
                continue
            
            if addedRulesCount == 1000:
                rules += rule.cssSelector()
                rules += "{display:none !important;}\n"
                addedRulesCount = 0
            else:
                rules += rule.cssSelector() + ","
                addedRulesCount += 1
        
        if addedRulesCount != 0:
            rules = rules[:-1]
            rules += "{display:none !important;}\n"
        
        return rules
    
    def update(self):
        """
        Public slot to update the internal state.
        """
        self.clear()
        
        cssRulesDict = {}
        exceptionCssRules = []
        
        for subscription in self.__manager.subscriptions():
            if subscription.isEnabled():
                for rule in subscription.allRules():
                    # Don't add internally disabled rules to the cache
                    if rule.isInternalDisabled():
                        continue
                    
                    if rule.isCSSRule():
                        # Only enabled CSS rules are added to the cache because
                        # there is no enabled/disabled check on match. They are
                        # directly embedded to pages.
                        if not rule.isEnabled():
                            continue
                        
                        if rule.isException():
                            exceptionCssRules.append(rule)
                        else:
                            cssRulesDict[rule.cssSelector()] = rule
                    elif rule.isDocument():
                        self.__documentRules.append(rule)
                    elif rule.isElementHiding():
                        self.__elemhideRules.append(rule)
                    elif rule.isException():
                        if not self.__networkExceptionTree.add(rule):
                            self.__networkBlockRules.append(rule)
                    else:
                        if not self.__networkBlockTree.add(rule):
                            self.__networkBlockRules.append(rule)
        
        for rule in exceptionCssRules:
            try:
                originalRule = cssRulesDict[rule.cssSelector()]
            except KeyError:
                # If there is no such selector, the exception does nothing.
                continue
            
            copiedRule = AdBlockRule()
            copiedRule.copyFrom(originalRule)
            copiedRule.setOption(AdBlockRuleOption.DomainRestrictedOption)
            copiedRule.addBlockedDomains(rule.allowedDomains())
            
            cssRulesDict[rule.cssSelector()] = copiedRule
            self.__createdRules.append(copiedRule)
        
        # Excessive amount of selectors for one CSS rule is not what the
        # rendering engine likes. So split them up by 1.000 selectors.
        hidingRulesCount = 0
        for key in cssRulesDict:
            rule = cssRulesDict[key]
            
            if rule.isDomainRestricted():
                self.__domainRestrictedCssRules.append(rule)
            elif hidingRulesCount == 1000:
                self.__elementHidingRules += rule.cssSelector()
                self.__elementHidingRules += "{display:none !important;} "
                hidingRulesCount = 0
            else:
                self.__elementHidingRules += rule.cssSelector() + ","
                hidingRulesCount += 1
        
        if hidingRulesCount != 0:
            self.__elementHidingRules = self.__elementHidingRules[:-1]
            self.__elementHidingRules += "{display:none !important;} "
    
    def clear(self):
        """
        Public slot to clear the internal structures.
        """
        self.__createdRules = []
        self.__networkExceptionRules = []
        self.__networkBlockRules = []
        self.__domainRestrictedCssRules = []
        self.__documentRules = []
        self.__elemhideRules = []
        
        self.__elementHidingRules = ""
        self.__networkBlockTree.clear()
        self.__networkExceptionTree.clear()

eric ide

mercurial