eric6/PipInterface/PipPackagesWidget.py

changeset 8085
f6db8b3ecea9
parent 8056
6e89221ff9dd
child 8090
c53117374255
--- a/eric6/PipInterface/PipPackagesWidget.py	Sun Feb 07 17:34:51 2021 +0100
+++ b/eric6/PipInterface/PipPackagesWidget.py	Mon Feb 08 18:01:02 2021 +0100
@@ -9,20 +9,19 @@
 
 import textwrap
 import os
-import re
+import html.parser
 
-from PyQt5.QtCore import pyqtSlot, Qt
+from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QUrlQuery
+from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest
 from PyQt5.QtWidgets import (
     QWidget, QToolButton, QApplication, QHeaderView, QTreeWidgetItem,
-    QInputDialog, QMenu, QDialog
+    QMenu, QDialog
 )
 
 from E5Gui.E5Application import e5App
 from E5Gui import E5MessageBox
 from E5Gui.E5OverrideCursor import E5OverrideCursor
 
-from E5Network.E5XmlRpcClient import E5XmlRpcClient
-
 from .Ui_PipPackagesWidget import Ui_PipPackagesWidget
 
 import UI.PixmapCache
@@ -30,6 +29,114 @@
 import Preferences
 
 
+class PypiSearchResultsParser(html.parser.HTMLParser):
+    """
+    Class implementing the parser for the PyPI search result page.
+    """
+    ClassPrefix = "package-snippet__"
+    
+    def __init__(self, data):
+        """
+        Constructor
+        
+        @param data data to be parsed
+        @type str
+        """
+        super(PypiSearchResultsParser, self).__init__()
+        self.__results = []
+        self.__activeClass = None
+        self.feed(data)
+    
+    def __getClass(self, attrs):
+        """
+        Private method to extract the class attribute out of the list of
+        attributes.
+        
+        @param attrs list of tag attributes as (name, value) tuples
+        @type list of tuple of (str, str)
+        @return value of the 'class' attribute or None
+        @rtype str
+        """
+        for name, value in attrs:
+            if name == "class":
+                return value
+        
+        return None
+    
+    def __getDate(self, attrs):
+        """
+        Private method to extract the datetime attribute out of the list of
+        attributes and process it.
+        
+        @param attrs list of tag attributes as (name, value) tuples
+        @type list of tuple of (str, str)
+        @return value of the 'class' attribute or None
+        @rtype str
+        """
+        for name, value in attrs:
+            if name == "datetime":
+                return value.split("T")[0]
+        
+        return None
+    
+    def handle_starttag(self, tag, attrs):
+        """
+        Public method to process the start tag.
+        
+        @param tag tag name (all lowercase)
+        @type str
+        @param attrs list of tag attributes as (name, value) tuples
+        @type list of tuple of (str, str)
+        """
+        if tag == "a" and self.__getClass(attrs) == "package-snippet":
+            self.__results.append({})
+        
+        if tag in ("span", "p"):
+            tagClass = self.__getClass(attrs)
+            if tagClass in (
+                "package-snippet__name", "package-snippet__description",
+                "package-snippet__version", "package-snippet__released",
+            ):
+                self.__activeClass = tagClass
+            else:
+                self.__activeClass = None
+        elif tag == "time":
+            attributeName = self.__activeClass.replace(self.ClassPrefix, "")
+            self.__results[-1][attributeName] = self.__getDate(attrs)
+            self.__activeClass = None
+        else:
+            self.__activeClass = None
+    
+    def handle_data(self, data):
+        """
+        Public method process arbitrary data.
+        
+        @param data data to be processed
+        @type str
+        """
+        if self.__activeClass is not None:
+            attributeName = self.__activeClass.replace(self.ClassPrefix, "")
+            self.__results[-1][attributeName] = data
+    
+    def handle_endtag(self, tag):
+        """
+        Public method to process the end tag.
+        
+        @param tag tag name (all lowercase)
+        @type str
+        """
+        self.__activeClass = None
+    
+    def getResults(self):
+        """
+        Public method to get the extracted search results.
+        
+        @return extracted result data
+        @rtype list of dict
+        """
+        return self.__results
+
+
 class PipPackagesWidget(QWidget, Ui_PipPackagesWidget):
     """
     Class implementing the pip packages management widget.
@@ -39,15 +146,10 @@
     ShowProcessEntryPointsMode = 2
     ShowProcessFilesListMode = 3
     
-    SearchStopwords = {
-        "a", "and", "are", "as", "at", "be", "but", "by",
-        "for", "if", "in", "into", "is", "it",
-        "no", "not", "of", "on", "or", "such",
-        "that", "the", "their", "then", "there", "these",
-        "they", "this", "to", "was", "will",
-    }
     SearchVersionRole = Qt.UserRole + 1
     
+    SearchUrl = "https://pypi.org/search/"
+    
     def __init__(self, pip, parent=None):
         """
         Constructor
@@ -82,7 +184,6 @@
         self.showDetailsButton.setIcon(UI.PixmapCache.getIcon("info"))
         
         self.__pip = pip
-        self.__client = E5XmlRpcClient(self.__pip.getIndexUrlXml(), self)
         
         self.packagesList.header().setSortIndicator(0, Qt.AscendingOrder)
         
@@ -126,6 +227,8 @@
         self.__queryName = []
         self.__querySummary = []
         
+        self.__replies = []
+        
         self.__packageDetailsDialog = None
     
     def __populateEnvironments(self):
@@ -544,8 +647,7 @@
         Private method to update the state of the search button.
         """
         self.searchButton.setEnabled(
-            (bool(self.searchEditName.text()) or
-             bool(self.searchEditSummary.text())) and
+            bool(self.searchEditName.text()) and
             self.__isPipAvailable()
         )
     
@@ -582,30 +684,7 @@
         Private slot initiating a search via a press of the Return key.
         """
         if (
-            (bool(self.searchEditName.text()) or
-             bool(self.searchEditSummary.text())) and
-            self.__isPipAvailable()
-        ):
-            self.__search()
-    
-    @pyqtSlot(str)
-    def on_searchEditSummary_textChanged(self, txt):
-        """
-        Private slot handling a change of the search term.
-        
-        @param txt search term
-        @type str
-        """
-        self.__updateSearchButton()
-    
-    @pyqtSlot()
-    def on_searchEditSummary_returnPressed(self):
-        """
-        Private slot initiating a search via a press of the Return key.
-        """
-        if (
-            (bool(self.searchEditName.text()) or
-             bool(self.searchEditSummary.text())) and
+            bool(self.searchEditName.text()) and
             self.__isPipAvailable()
         ):
             self.__search()
@@ -626,84 +705,89 @@
     
     def __search(self):
         """
-        Private method to perform the search.
+        Private method to perform the search by calling the PyPI search URL.
         """
-        # TODO: change search to use web scraping to get rid of XML-RPC
-        #       see thonny for how to do it
         self.searchResultList.clear()
         self.searchInfoLabel.clear()
         
         self.searchButton.setEnabled(False)
         
-        self.__queryName = [
-            term for term in self.searchEditName.text().strip().split()
-            if term not in self.SearchStopwords
-        ]
-        self.__querySummary = [
-            term for term in self.searchEditSummary.text().strip().split()
-            if term not in self.SearchStopwords
-        ]
-        self.__client.call(
-            "search",
-            ({"name": self.__queryName,
-              "summary": self.__querySummary},
-             self.searchTermCombineComboBox.currentText()),
-            self.__processSearchResult,
-            self.__searchError
-        )
+        searchTerm = self.searchEditName.text().strip()
+        searchTerm = bytes(QUrl.toPercentEncoding(searchTerm)).decode()
+        urlQuery = QUrlQuery()
+        urlQuery.addQueryItem("q", searchTerm)
+        url = QUrl(self.SearchUrl)
+        url.setQuery(urlQuery)
+        
+        request = QNetworkRequest(QUrl(url))
+        request.setAttribute(QNetworkRequest.CacheLoadControlAttribute,
+                             QNetworkRequest.AlwaysNetwork)
+        reply = self.__pip.getNetworkAccessManager().get(request)
+        reply.finished.connect(
+            lambda: self.__searchResponse(reply))
+        self.__replies.append(reply)
     
-    def __processSearchResult(self, data):
+    def __searchResponse(self, reply):
         """
-        Private method to process the search result data from PyPI.
+        Private method to extract the search result data from the response.
         
-        @param data result data with hits in the first element
-        @type tuple
+        @param reply reference to the reply object containing the data
+        @type QNetworkReply
         """
-        if data:
-            packages = self.__transformHits(data[0])
-            if packages:
-                self.searchInfoLabel.setText(
-                    self.tr("%n package(s) found.", "", len(packages)))
-                wrapper = textwrap.TextWrapper(width=80)
-                count = 0
-                total = 0
-                for package in packages:
-                    itm = QTreeWidgetItem(
-                        self.searchResultList, [
-                            package['name'].strip(),
-                            "{0:4d}".format(package['score']),
-                            "\n".join([
-                                wrapper.fill(line) for line in
-                                package['summary'].strip().splitlines()
-                            ])
-                        ])
-                    itm.setData(0, self.SearchVersionRole, package['version'])
-                    count += 1
-                    total += 1
-                    if count == 100:
-                        count = 0
-                        QApplication.processEvents()
+        if reply in self.__replies:
+            self.__replies.remove(reply)
+        
+        urlQuery = QUrlQuery(reply.url())
+        searchTerm = urlQuery.queryItemValue("q")
+        
+        if reply.error() != QNetworkReply.NoError:
+            E5MessageBox.warning(
+                None,
+                self.tr("Search PyPI"),
+                self.tr(
+                    "<p>Received an error while searching for <b>{0}</b>.</p>"
+                    "<p>Error: {1}</p>"
+                ).format(searchTerm, reply.errorString())
+            )
+            reply.deleteLater()
+            return
+        
+        data = bytes(reply.readAll()).decode()
+        reply.deleteLater()
+        
+        results = PypiSearchResultsParser(data).getResults()
+        if results:
+            if len(results) < 20:
+                msg = self.tr("%n package(s) found.", "", len(results))
             else:
-                E5MessageBox.warning(
-                    self,
-                    self.tr("Search PyPI"),
-                    self.tr("""<p>The package search did not return"""
-                            """ anything.</p>"""))
-                self.searchInfoLabel.setText(
-                    self.tr("""<p>The package search did not return"""
-                            """ anything.</p>"""))
+                msg = self.tr("Showing first 20 packages found.")
+            self.searchInfoLabel.setText(msg)
         else:
             E5MessageBox.warning(
                 self,
                 self.tr("Search PyPI"),
-                self.tr("""<p>The package search did not return anything."""
-                        """</p>"""))
+                self.tr("""<p>There were no results for <b>{0}</b>.</p>"""))
             self.searchInfoLabel.setText(
-                self.tr("""<p>The package search did not return anything."""
-                        """</p>"""))
+                self.tr("""<p>There were no results for <b>{0}</b>.</p>"""))
+        
+        wrapper = textwrap.TextWrapper(width=80)
+        for result in results:
+            try:
+                description = "\n".join([
+                    wrapper.fill(line) for line in
+                    result['description'].strip().splitlines()
+                ])
+            except KeyError:
+                description = ""
+            itm = QTreeWidgetItem(
+                self.searchResultList, [
+                    result['name'].strip(),
+                    result["released"].strip(),
+                    description,
+                ])
+            itm.setData(0, self.SearchVersionRole, result['version'])
         
         header = self.searchResultList.header()
-        self.searchResultList.sortItems(1, Qt.DescendingOrder)
         header.setStretchLastSection(False)
         header.resizeSections(QHeaderView.ResizeToContents)
         headerSize = 0
@@ -723,87 +807,6 @@
         
         self.searchEditName.setFocus(Qt.OtherFocusReason)
     
-    def __searchError(self, errorCode, errorString):
-        """
-        Private method handling a search error.
-        
-        @param errorCode code of the error
-        @type int
-        @param errorString error message
-        @type str
-        """
-        self.__finishSearch()
-        E5MessageBox.warning(
-            self,
-            self.tr("Search PyPI"),
-            self.tr("""<p>The package search failed.</p><p>Reason: {0}</p>""")
-            .format(errorString))
-        self.searchInfoLabel.setText(self.tr("Error: {0}").format(errorString))
-    
-    def __transformHits(self, hits):
-        """
-        Private method to convert the list returned from pypi into a
-        packages list.
-        
-        @param hits list returned from pypi
-        @type list of dict
-        @return list of packages
-        @rtype list of dict
-        """
-        # we only include the record with the highest score
-        packages = {}
-        for hit in hits:
-            name = hit['name'].strip()
-            summary = (hit['summary'] or "").strip()
-            version = hit['version'].strip()
-            score = self.__score(name, summary)
-            # cleanup the summary
-            if summary in ["UNKNOWN", "."]:
-                summary = ""
-
-            if name not in packages:
-                packages[name] = {
-                    'name': name,
-                    'summary': summary,
-                    'version': [version.strip()],
-                    'score': score}
-            else:
-                if score > packages[name]['score']:
-                    packages[name]['score'] = score
-                    packages[name]['summary'] = summary
-                packages[name]['version'].append(version.strip())
-
-        return list(packages.values())
-    
-    def __score(self, name, summary):
-        """
-        Private method to calculate some score for a search result.
-        
-        @param name name of the returned package
-        @type str
-        @param summary summary text for the package
-        @type str
-        @return score value
-        @rtype int
-        """
-        score = 0
-        for queryTerm in self.__queryName:
-            if queryTerm.lower() in name.lower():
-                score += 4
-                if queryTerm.lower() == name.lower():
-                    score += 4
-            
-        for queryTerm in self.__querySummary:
-            if queryTerm.lower() in summary.lower():
-                if re.search(r'\b{0}\b'.format(re.escape(queryTerm)),
-                             summary, re.IGNORECASE) is not None:
-                    # word match gets even higher score
-                    score += 2
-                else:
-                    score += 1
-        
-        return score
-    
     @pyqtSlot()
     def on_installButton_clicked(self):
         """
@@ -866,20 +869,7 @@
         if not item:
             item = self.searchResultList.selectedItems()[0]
         
-        packageVersions = item.data(0, self.SearchVersionRole)
-        if len(packageVersions) == 1:
-            packageVersion = packageVersions[0]
-        elif len(packageVersions) == 0:
-            packageVersion = ""
-        else:
-            packageVersion, ok = QInputDialog.getItem(
-                self,
-                self.tr("Show Package Details"),
-                self.tr("Select the package version:"),
-                packageVersions,
-                0, False)
-            if not ok:
-                return
+        packageVersion = item.data(0, self.SearchVersionRole)
         packageName = item.text(0)
         
         self.__showPackageDetails(packageName, packageVersion)

eric ide

mercurial