diff -r 7742e0b96629 -r f6db8b3ecea9 eric6/PipInterface/PipPackagesWidget.py --- a/eric6/PipInterface/PipPackagesWidget.py Sun Feb 07 17:34:51 2021 +0100 +++ b/eric6/PipInterface/PipPackagesWidget.py Mon Feb 08 18:01:02 2021 +0100 @@ -9,20 +9,19 @@ import textwrap import os -import re +import html.parser -from PyQt5.QtCore import pyqtSlot, Qt +from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QUrlQuery +from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest from PyQt5.QtWidgets import ( QWidget, QToolButton, QApplication, QHeaderView, QTreeWidgetItem, - QInputDialog, QMenu, QDialog + QMenu, QDialog ) from E5Gui.E5Application import e5App from E5Gui import E5MessageBox from E5Gui.E5OverrideCursor import E5OverrideCursor -from E5Network.E5XmlRpcClient import E5XmlRpcClient - from .Ui_PipPackagesWidget import Ui_PipPackagesWidget import UI.PixmapCache @@ -30,6 +29,114 @@ import Preferences +class PypiSearchResultsParser(html.parser.HTMLParser): + """ + Class implementing the parser for the PyPI search result page. + """ + ClassPrefix = "package-snippet__" + + def __init__(self, data): + """ + Constructor + + @param data data to be parsed + @type str + """ + super(PypiSearchResultsParser, self).__init__() + self.__results = [] + self.__activeClass = None + self.feed(data) + + def __getClass(self, attrs): + """ + Private method to extract the class attribute out of the list of + attributes. + + @param attrs list of tag attributes as (name, value) tuples + @type list of tuple of (str, str) + @return value of the 'class' attribute or None + @rtype str + """ + for name, value in attrs: + if name == "class": + return value + + return None + + def __getDate(self, attrs): + """ + Private method to extract the datetime attribute out of the list of + attributes and process it. + + @param attrs list of tag attributes as (name, value) tuples + @type list of tuple of (str, str) + @return value of the 'class' attribute or None + @rtype str + """ + for name, value in attrs: + if name == "datetime": + return value.split("T")[0] + + return None + + def handle_starttag(self, tag, attrs): + """ + Public method to process the start tag. + + @param tag tag name (all lowercase) + @type str + @param attrs list of tag attributes as (name, value) tuples + @type list of tuple of (str, str) + """ + if tag == "a" and self.__getClass(attrs) == "package-snippet": + self.__results.append({}) + + if tag in ("span", "p"): + tagClass = self.__getClass(attrs) + if tagClass in ( + "package-snippet__name", "package-snippet__description", + "package-snippet__version", "package-snippet__released", + ): + self.__activeClass = tagClass + else: + self.__activeClass = None + elif tag == "time": + attributeName = self.__activeClass.replace(self.ClassPrefix, "") + self.__results[-1][attributeName] = self.__getDate(attrs) + self.__activeClass = None + else: + self.__activeClass = None + + def handle_data(self, data): + """ + Public method process arbitrary data. + + @param data data to be processed + @type str + """ + if self.__activeClass is not None: + attributeName = self.__activeClass.replace(self.ClassPrefix, "") + self.__results[-1][attributeName] = data + + def handle_endtag(self, tag): + """ + Public method to process the end tag. + + @param tag tag name (all lowercase) + @type str + """ + self.__activeClass = None + + def getResults(self): + """ + Public method to get the extracted search results. + + @return extracted result data + @rtype list of dict + """ + return self.__results + + class PipPackagesWidget(QWidget, Ui_PipPackagesWidget): """ Class implementing the pip packages management widget. @@ -39,15 +146,10 @@ ShowProcessEntryPointsMode = 2 ShowProcessFilesListMode = 3 - SearchStopwords = { - "a", "and", "are", "as", "at", "be", "but", "by", - "for", "if", "in", "into", "is", "it", - "no", "not", "of", "on", "or", "such", - "that", "the", "their", "then", "there", "these", - "they", "this", "to", "was", "will", - } SearchVersionRole = Qt.UserRole + 1 + SearchUrl = "https://pypi.org/search/" + def __init__(self, pip, parent=None): """ Constructor @@ -82,7 +184,6 @@ self.showDetailsButton.setIcon(UI.PixmapCache.getIcon("info")) self.__pip = pip - self.__client = E5XmlRpcClient(self.__pip.getIndexUrlXml(), self) self.packagesList.header().setSortIndicator(0, Qt.AscendingOrder) @@ -126,6 +227,8 @@ self.__queryName = [] self.__querySummary = [] + self.__replies = [] + self.__packageDetailsDialog = None def __populateEnvironments(self): @@ -544,8 +647,7 @@ Private method to update the state of the search button. """ self.searchButton.setEnabled( - (bool(self.searchEditName.text()) or - bool(self.searchEditSummary.text())) and + bool(self.searchEditName.text()) and self.__isPipAvailable() ) @@ -582,30 +684,7 @@ Private slot initiating a search via a press of the Return key. """ if ( - (bool(self.searchEditName.text()) or - bool(self.searchEditSummary.text())) and - self.__isPipAvailable() - ): - self.__search() - - @pyqtSlot(str) - def on_searchEditSummary_textChanged(self, txt): - """ - Private slot handling a change of the search term. - - @param txt search term - @type str - """ - self.__updateSearchButton() - - @pyqtSlot() - def on_searchEditSummary_returnPressed(self): - """ - Private slot initiating a search via a press of the Return key. - """ - if ( - (bool(self.searchEditName.text()) or - bool(self.searchEditSummary.text())) and + bool(self.searchEditName.text()) and self.__isPipAvailable() ): self.__search() @@ -626,84 +705,89 @@ def __search(self): """ - Private method to perform the search. + Private method to perform the search by calling the PyPI search URL. """ - # TODO: change search to use web scraping to get rid of XML-RPC - # see thonny for how to do it self.searchResultList.clear() self.searchInfoLabel.clear() self.searchButton.setEnabled(False) - self.__queryName = [ - term for term in self.searchEditName.text().strip().split() - if term not in self.SearchStopwords - ] - self.__querySummary = [ - term for term in self.searchEditSummary.text().strip().split() - if term not in self.SearchStopwords - ] - self.__client.call( - "search", - ({"name": self.__queryName, - "summary": self.__querySummary}, - self.searchTermCombineComboBox.currentText()), - self.__processSearchResult, - self.__searchError - ) + searchTerm = self.searchEditName.text().strip() + searchTerm = bytes(QUrl.toPercentEncoding(searchTerm)).decode() + urlQuery = QUrlQuery() + urlQuery.addQueryItem("q", searchTerm) + url = QUrl(self.SearchUrl) + url.setQuery(urlQuery) + + request = QNetworkRequest(QUrl(url)) + request.setAttribute(QNetworkRequest.CacheLoadControlAttribute, + QNetworkRequest.AlwaysNetwork) + reply = self.__pip.getNetworkAccessManager().get(request) + reply.finished.connect( + lambda: self.__searchResponse(reply)) + self.__replies.append(reply) - def __processSearchResult(self, data): + def __searchResponse(self, reply): """ - Private method to process the search result data from PyPI. + Private method to extract the search result data from the response. - @param data result data with hits in the first element - @type tuple + @param reply reference to the reply object containing the data + @type QNetworkReply """ - if data: - packages = self.__transformHits(data[0]) - if packages: - self.searchInfoLabel.setText( - self.tr("%n package(s) found.", "", len(packages))) - wrapper = textwrap.TextWrapper(width=80) - count = 0 - total = 0 - for package in packages: - itm = QTreeWidgetItem( - self.searchResultList, [ - package['name'].strip(), - "{0:4d}".format(package['score']), - "\n".join([ - wrapper.fill(line) for line in - package['summary'].strip().splitlines() - ]) - ]) - itm.setData(0, self.SearchVersionRole, package['version']) - count += 1 - total += 1 - if count == 100: - count = 0 - QApplication.processEvents() + if reply in self.__replies: + self.__replies.remove(reply) + + urlQuery = QUrlQuery(reply.url()) + searchTerm = urlQuery.queryItemValue("q") + + if reply.error() != QNetworkReply.NoError: + E5MessageBox.warning( + None, + self.tr("Search PyPI"), + self.tr( + "<p>Received an error while searching for <b>{0}</b>.</p>" + "<p>Error: {1}</p>" + ).format(searchTerm, reply.errorString()) + ) + reply.deleteLater() + return + + data = bytes(reply.readAll()).decode() + reply.deleteLater() + + results = PypiSearchResultsParser(data).getResults() + if results: + if len(results) < 20: + msg = self.tr("%n package(s) found.", "", len(results)) else: - E5MessageBox.warning( - self, - self.tr("Search PyPI"), - self.tr("""<p>The package search did not return""" - """ anything.</p>""")) - self.searchInfoLabel.setText( - self.tr("""<p>The package search did not return""" - """ anything.</p>""")) + msg = self.tr("Showing first 20 packages found.") + self.searchInfoLabel.setText(msg) else: E5MessageBox.warning( self, self.tr("Search PyPI"), - self.tr("""<p>The package search did not return anything.""" - """</p>""")) + self.tr("""<p>There were no results for <b>{0}</b>.</p>""")) self.searchInfoLabel.setText( - self.tr("""<p>The package search did not return anything.""" - """</p>""")) + self.tr("""<p>There were no results for <b>{0}</b>.</p>""")) + + wrapper = textwrap.TextWrapper(width=80) + for result in results: + try: + description = "\n".join([ + wrapper.fill(line) for line in + result['description'].strip().splitlines() + ]) + except KeyError: + description = "" + itm = QTreeWidgetItem( + self.searchResultList, [ + result['name'].strip(), + result["released"].strip(), + description, + ]) + itm.setData(0, self.SearchVersionRole, result['version']) header = self.searchResultList.header() - self.searchResultList.sortItems(1, Qt.DescendingOrder) header.setStretchLastSection(False) header.resizeSections(QHeaderView.ResizeToContents) headerSize = 0 @@ -723,87 +807,6 @@ self.searchEditName.setFocus(Qt.OtherFocusReason) - def __searchError(self, errorCode, errorString): - """ - Private method handling a search error. - - @param errorCode code of the error - @type int - @param errorString error message - @type str - """ - self.__finishSearch() - E5MessageBox.warning( - self, - self.tr("Search PyPI"), - self.tr("""<p>The package search failed.</p><p>Reason: {0}</p>""") - .format(errorString)) - self.searchInfoLabel.setText(self.tr("Error: {0}").format(errorString)) - - def __transformHits(self, hits): - """ - Private method to convert the list returned from pypi into a - packages list. - - @param hits list returned from pypi - @type list of dict - @return list of packages - @rtype list of dict - """ - # we only include the record with the highest score - packages = {} - for hit in hits: - name = hit['name'].strip() - summary = (hit['summary'] or "").strip() - version = hit['version'].strip() - score = self.__score(name, summary) - # cleanup the summary - if summary in ["UNKNOWN", "."]: - summary = "" - - if name not in packages: - packages[name] = { - 'name': name, - 'summary': summary, - 'version': [version.strip()], - 'score': score} - else: - if score > packages[name]['score']: - packages[name]['score'] = score - packages[name]['summary'] = summary - packages[name]['version'].append(version.strip()) - - return list(packages.values()) - - def __score(self, name, summary): - """ - Private method to calculate some score for a search result. - - @param name name of the returned package - @type str - @param summary summary text for the package - @type str - @return score value - @rtype int - """ - score = 0 - for queryTerm in self.__queryName: - if queryTerm.lower() in name.lower(): - score += 4 - if queryTerm.lower() == name.lower(): - score += 4 - - for queryTerm in self.__querySummary: - if queryTerm.lower() in summary.lower(): - if re.search(r'\b{0}\b'.format(re.escape(queryTerm)), - summary, re.IGNORECASE) is not None: - # word match gets even higher score - score += 2 - else: - score += 1 - - return score - @pyqtSlot() def on_installButton_clicked(self): """ @@ -866,20 +869,7 @@ if not item: item = self.searchResultList.selectedItems()[0] - packageVersions = item.data(0, self.SearchVersionRole) - if len(packageVersions) == 1: - packageVersion = packageVersions[0] - elif len(packageVersions) == 0: - packageVersion = "" - else: - packageVersion, ok = QInputDialog.getItem( - self, - self.tr("Show Package Details"), - self.tr("Select the package version:"), - packageVersions, - 0, False) - if not ok: - return + packageVersion = item.data(0, self.SearchVersionRole) packageName = item.text(0) self.__showPackageDetails(packageName, packageVersion)