在PyQt4中,使用QtWebKit中的QWebView渲染页面,得到原始的html,代码如下:
pp = QApplication([])
webview = QWebView()
loop = QEventLoop()
webview.loadFinished.connect(loop.quit)
webview.load(QUrl(url))
loop.exec_()
html = webview.page().mainFrame().toHtml()
但是PyQt5中使用QWebEngineView代替上述的QWebView,我怎么能使用QWebEngineView完成上面那样的获得源html呢?
如下,我的代码可以显示网页,但我怎么得到html文件呢?
url = 'http://example.python-scraping.com/dynamic'
# html = download(url)
app = QApplication([])
webview = QWebEngineView()
loop = QEventLoop()
# webview.loadFinished.connect(loop.quit)
webview.load(QUrl(url))
webview.show()
loop.exec_()
已解决
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import QWebEngineView
import requests
import lxml.html
class Render(QWebEngineView): # 子类Render继承父类QWebEngineView
def __init__(self, url):
self.html = ''
self.app = QApplication(sys.argv)
super().__init__()
self.loadFinished.connect(self._loadFinished)
self.load(QUrl(url))
self.app.exec_()
def _loadFinished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
if __name__ == '__main__':
url = 'http://example.python-scraping.com/dynamic'
r = Render(url)
result = r.html
tree = lxml.html.fromstring(result)
a = tree.cssselect('#result')[0].text_content()
print(a)
你可以参考下这篇文章:pyqt5 开发浏览器 (pyqt5 QWebEngineView开发浏览器)