How To "render" Html With Pyqt5's Qwebengineview
Solution 1:
Quite a bit of discussion on the topic was made in the following thread: https://riverbankcomputing.com/pipermail/pyqt/2015-January/035324.html
The new QWebEngine interface takes account of the fact that the underlying Chromium engine is asynchronous. As such we have to turn an asynchronous API into a synchronous one.
Here's how that looks:
defrender(source_html):
"""Fully render HTML, JavaScript and all."""import sys
from PyQt5.QtCore import QEventLoop
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
classRender(QWebEngineView):
def__init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.setHtml(html)
while self.html isNone:
self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents)
self.app.quit()
def_callable(self, data):
self.html = data
def_loadFinished(self, result):
self.page().toHtml(self._callable)
return Render(source_html).html
import requests
sample_html = requests.get(dummy_url).text
print(render(sample_html))
Solution 2:
The answer by Six & Veehmot is great, but I found out that for my purpose it was not sufficient, as it did not expand the dropdown elements of the page that I wanted to scrape. A slight modification fixed this:
defrender(url):
"""Fully render HTML, JavaScript and all."""import sys
from PyQt5.QtCore import QEventLoop,QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
classRender(QWebEngineView):
def__init__(self, url):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.load(QUrl(url))
while self.html isNone:
self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents)
self.app.quit()
def_callable(self, data):
self.html = data
def_loadFinished(self, result):
self.page().toHtml(self._callable)
return Render(url).html
print(render(dummy_url))
Solution 3:
As you pointed out, Qt5.4 relies on async calls. It's not necessary to use the Loop (as seen on your answer), since your only mistake was to call quit
before the toHtml
call finishes.
defrender(source_html):
"""Fully render HTML, JavaScript and all."""import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
classRender(QWebEngineView):
def__init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.setHtml(html)
self.app.exec_()
def_loadFinished(self, result):
# This is an async call, you need to wait for this# to be called before closing the app
self.page().toHtml(self.callable)
defcallable(self, data):
self.html = data
# Data has been stored, it's safe to quit the app
self.app.quit()
return Render(source_html).html
import requests
sample_html = requests.get(dummy_url).text
print(render(sample_html))
Solution 4:
It's not entirely clear to me what you mean by "render". I understand it to mean, "display the HTML accordingly on the screen." The following does just that.
# main.pyimport sys
import os
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
classBrowser(QtWebEngineWidgets.QWebEngineView):
def__init__(self):
super().__init__()
html = """
<!DOCTYPE html>
<html>
<head>
<title>Example</title>
<meta charset="utf-8" />
</head>
<body>
<script>alert('Running some Javascript');</script>
<h1>Hello world!</h1>
<p>Goodbye, cruel world...</p>
</body>
</html>
"""# With QWebEnginePage.setHtml, the html is loaded immediately.# baseUrl is used to resolve relative URLs in the document.# For whatever reason, it seems like the baseUrl resolves to# the parent of the path, not the baseUrl itself. As a# workaround, either append a dummy directory to the base url# or start all relative paths in the html with the current# directory.# https://doc-snapshots.qt.io/qtforpython-5.15/PySide2/QtWebEngineWidgets/QWebEnginePage.html#PySide2.QtWebEngineWidgets.PySide2.QtWebEngineWidgets.QWebEnginePage.setHtml
here = os.path.dirname(os.path.abspath(__file__)).replace('\\', '/')
base_path = os.path.join(os.path.dirname(here), 'dummy').replace('\\', '/')
self.url = QtCore.QUrl('file:///' + base_path)
self.page().setHtml(html, baseUrl=self.url)
classMainWindow(QtWidgets.QMainWindow):
def__init__(self):
super().__init__()
self.init_widgets()
self.init_layout()
definit_widgets(self):
self.browser = Browser()
self.browser.loadFinished.connect(self.load_finished)
definit_layout(self):
layout = QtWidgets.QVBoxLayout()
layout.addWidget(self.browser)
centralWidget = QtWidgets.QWidget()
centralWidget.setLayout(layout)
self.setCentralWidget(centralWidget)
defload_finished(self, status):
self.msg = QtWidgets.QMessageBox()
self.msg.setIcon(QtWidgets.QMessageBox.Information)
self.msg.setWindowTitle('Load Status')
self.msg.setText(f"It is {str(status)} that the page loaded.")
self.msg.show()
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
main_window = MainWindow()
main_window.show()
sys.exit(app.exec_())
The setHtml
method takes a string so it must be read in first when using an HTML file.
Post a Comment for "How To "render" Html With Pyqt5's Qwebengineview"