I'm trying to parse webpages generated by js with qtwebkit, I found an example of how to get page source:
import sys
from PySide.QtGui import *
from PySide.QtCore import *
from PySide.QtWebKit import *
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = 'http://www.thesite.gov/search'
r = Render(url)
html = r.frame.toHtml()
But i don't know how to make it work in threads.
So, how to do this and if it's not possible - is there another fast way to get wepages generated by js?
Given QT's async nature, the QtWebkit methods are non-blocking as well, so there is no point running them in threads. You can start them parallelly like this:
from functools import partial
from PySide.QtCore import QUrl
from PySide.QtGui import QApplication
from PySide.QtWebKit import QWebView, QWebSettings
TARGET_URLS = (
'http://stackoverflow.com',
'http://github.com',
'http://bitbucket.org',
'http://news.ycombinator.com',
'http://slashdot.org',
'http://www.reddit.com',
'http://www.dzone.com',
'http://www.ideone.com',
'http://jsfiddle.net',
)
class Crawler(object):
def __init__(self, app):
self.app = app
self.results = dict()
self.browsers = dict()
def _load_finished(self, browser_id, ok):
print ok, browser_id
web_view, _flag = self.browsers[browser_id]
self.browsers[browser_id] = (web_view, True)
frame = web_view.page().mainFrame()
self.results[frame.url()] = frame.toHtml()
web_view.loadFinished.disconnect()
web_view.stop()
if all([closed for bid, closed in self.browsers.values()]):
print 'all finished'
self.app.quit()
def start(self, urls):
for browser_id, url in enumerate(urls):
web_view = QWebView()
web_view.settings().setAttribute(QWebSettings.AutoLoadImages,
False)
loaded = partial(self._load_finished, browser_id)
web_view.loadFinished.connect(loaded)
web_view.load(QUrl(url))
self.browsers[browser_id] = (web_view, False)
if __name__ == '__main__':
app = QApplication([])
crawler = Crawler(app)
crawler.start(TARGET_URLS)
app.exec_()
print 'got:', crawler.results.keys()
Related
So, I was building a python web browser with pyqt5, I followed programming hero's tutorial but when I change the link of my webpage, the app just crashes and I have this error:
Code here:
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtCore import *
class Main(QMainWindow):
def __init__(self) -> None:
super(Main, self).__init__()
self.showMaximized()
self.browser = QWebEngineView()
self.browser.setUrl(QUrl('https://google.com'))
self.setCentralWidget(self.browser)
self.navbar = QToolBar()
self.addToolBar(self.navbar)
self.btn_back = QAction('Back', self)
self.btn_back.triggered.connect(self.browser.back)
self.navbar.addAction(self.btn_back)
self.btn_forward = QAction('Forward', self)
self.btn_forward.triggered.connect(self.browser.forward)
self.navbar.addAction(self.btn_forward)
self.btn_reload = QAction('Reload', self)
self.btn_reload.triggered.connect(self.browser.reload)
self.navbar.addAction(self.btn_reload)
self.home = QAction('🏠', self)
self.home.triggered.connect(self.urlhome)
self.navbar.addAction(self.home)
self.bar = QLineEdit()
self.bar.returnPressed.connect(self.navigate_to)
self.navbar.addWidget(self.bar)
self.browser.urlChanged.connect(self.nurl)
def urlhome(self):
self.browser.setUrl(QUrl('https://google.com'))
def navigate_to(self):
url = self.bar.text()
self.browser.setUrl(url)
def nurl(self, q):
self.bar.setText(q.toString())
app = QApplication(sys.argv)
app.setApplicationName('Private browser ^^')
window = Main()
app.exec_()
def navigate_to(self):
url = self.bar.text()
self.browser.setUrl(url)
The only explaination is that self.bar.text() returns a str, while a different type was expected.
this code opens web site with Qt browser. If internet connection is lost I want to give Information message with QMessageBox:
import sys
from PyQt5.QtCore import *
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication, QWidget, QMainWindow, QLabel
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtGui import QGuiApplication as App
from PyQt5.QtGui import QPixmap, QWindow
from PyQt5 import QtNetwork, QtCore, QtWidgets
import urllib
from urllib.request import urlopen
import threading
import time
class WebApp(QMainWindow):
def __init__(self):
self.is_connected = None
self.is_msgshow = True
self.msg = QtWidgets.QMessageBox()
super().__init__()
self.title = "OZI"
self.t_internet = threading.Thread(target=self.is_internet)
self.t_internet.start()
self.t_refreshpage = threading.Thread(target=self.refresh_page)
self.t_refreshpage.start()
self.web = QWebEngineView()
self.web.window().setWindowTitle(self.title)
self.web.load(QUrl("http://www.google.com"))
self.web.showFullScreen()
def is_internet(self):
"""
Query internet using python
:return:
"""
while True:
time.sleep(5)
try:
urlopen("http://www.google.com", timeout=1)
self.is_connected = True
except urllib.error.URLError as Error:
print(Error)
self.is_connected = False
print(self.is_connected)
def refresh_page(self):
while True:
time.sleep(.1)
if self.is_connected == False:
time.sleep(5)
if self.is_connected == True:
self.web.page().action(QWebEnginePage.Reload).trigger()
else:
if self.is_msgshow == True:
print('testtt')
self.msg.information(None, 'INFO', 'PLEASE CHECK YOUR INTERNET CONNECTION!!!')
self.is_msgshow = False
self.msg.close()
else:
pass
if __name__ == '__main__':
app = QApplication(sys.argv)
ex = WebApp()
sys.exit(app.exec_())
However, I couldn't succeed at
if self.is_msgshow == True:
print('testtt')
this part while internet connection is lost my code opens many MessageBox. I guess my fault is controlling self.is_msgshow flag.
Far beyond the problem you are pointing out, you have other errors and it is more dangerous than the one indicated: You should not access the GUI from another thread, and you should also not access the same variable from 2 threads without protecting it through a mutex, semaphore or Similary. For example, you are calling self.web.page().action(QWebEnginePage.Reload).trigger() and self.msg.information(None, 'INFO', 'PLEASE CHECK YOUR INTERNET CONNECTION!!!') from the secondary thread, and the variable "is_connected" is accessed by several threads.
Another error is that method QMessage::information() is static and generates a new object that is not directly accessible, instead you must use the object "self.msg" through its own methods to display the necessary information.
Considering the above I have created a class that is only responsible for analyzing the status of the connection and if it changes to emit a signal. To send information between threads in Qt you can use signals or QMetaObject::invokedMethod() in addition to the classic mutex and semaphores.
Then it is only necessary to analyze the value of the status sent to the GUI to implement the logic:
import sys
import threading
import time
import urllib
from urllib.request import urlopen
from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets
class ConnectivityManager(QtCore.QObject):
statusChanged = QtCore.pyqtSignal(bool)
def __init__(self, *, timeout=4000, parent=None):
super().__init__(parent)
self._status = False
self._timeout = timeout
def start(self):
threading.Thread(target=self._check, daemon=True).start()
#QtCore.pyqtProperty(bool, notify=statusChanged)
def status(self):
return self._status
#QtCore.pyqtSlot(bool)
def _update_status(self, status):
if self._status != status:
self._status = status
self.statusChanged.emit(self.status)
def _check(self):
while True:
try:
urlopen("http://www.google.com", timeout=1)
status = True
except urllib.error.URLError as Error:
status = False
QtCore.QMetaObject.invokeMethod(
self,
"_update_status",
QtCore.Qt.QueuedConnection,
QtCore.Q_ARG(bool, status),
)
time.sleep(5)
class WebApp(QtWidgets.QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("OZI")
self.web = QtWebEngineWidgets.QWebEngineView()
self.setCentralWidget(self.web)
self.web.load(QtCore.QUrl("http://www.google.com"))
self.connectivity_manager = ConnectivityManager()
self.connectivity_manager.statusChanged.connect(self.on_status_changed)
self.connectivity_manager.start()
self.msg = QtWidgets.QMessageBox()
self.msg.setWindowTitle("INFO")
self.msg.setText("PLEASE CHECK YOUR INTERNET CONNECTION!!!")
#QtCore.pyqtSlot(bool)
def on_status_changed(self, status):
if status:
self.msg.hide()
self.statusBar().showMessage("Connected")
self.web.page().action(QtWebEngineWidgets.QWebEnginePage.Reload).trigger()
else:
self.statusBar().showMessage("Disconnected")
self.msg.show()
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
ex = WebApp()
ex.showFullScreen()
sys.exit(app.exec_())
On the other hand, the same logic can be implemented using QtNetwork without the need for threads:
import sys
from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets
class ConnectivityManager(QtCore.QObject):
statusChanged = QtCore.pyqtSignal(bool)
def __init__(self, *, timeout=4000, parent=None):
super().__init__(parent)
self._status = False
self._timeout = timeout
self.manager = QtNetwork.QNetworkAccessManager()
self._timer = QtCore.QTimer(
singleShot=True, interval=self._timeout, timeout=self.verify_status
)
def start(self):
QtCore.QTimer.singleShot(0, self._check)
#QtCore.pyqtProperty(bool, notify=statusChanged)
def status(self):
return self._status
#QtCore.pyqtSlot(bool)
def _update_status(self, status):
if self._status != status:
self._status = status
self.statusChanged.emit(self.status)
def _check(self):
url = QtCore.QUrl("https://www.google.com/")
req = QtNetwork.QNetworkRequest(url)
self._reply = self.manager.get(req)
self._reply.finished.connect(self.verify_status)
self._timer.start()
#QtCore.pyqtSlot()
def verify_status(self):
if self._timer.isActive():
self._timer.stop()
if self._reply.error() == QtNetwork.QNetworkReply.NoError:
v = self._reply.attribute(
QtNetwork.QNetworkRequest.HttpStatusCodeAttribute
)
if 200 <= v < 300:
self._update_status(True)
else:
print("error", "code error: {}".format(v))
self._update_status(False)
else:
print("error", self._reply.errorString())
self._update_status(False)
else:
self._reply.finished.disconnect(self.verify_status)
self._reply.abort()
print("Timeout")
self._update_status(False)
QtCore.QTimer.singleShot(5000, self._check)
self._reply.deleteLater()
class WebApp(QtWidgets.QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("OZI")
self.web = QtWebEngineWidgets.QWebEngineView()
self.setCentralWidget(self.web)
self.web.load(QtCore.QUrl("http://www.google.com"))
self.connectivity_manager = ConnectivityManager()
self.connectivity_manager.statusChanged.connect(self.on_status_changed)
self.connectivity_manager.start()
self.msg = QtWidgets.QMessageBox()
self.msg.setWindowTitle("INFO")
self.msg.setText("PLEASE CHECK YOUR INTERNET CONNECTION!!!")
#QtCore.pyqtSlot(bool)
def on_status_changed(self, status):
if status:
self.msg.hide()
self.statusBar().showMessage("Connected")
self.web.page().action(QtWebEngineWidgets.QWebEnginePage.Reload).trigger()
else:
self.statusBar().showMessage("Disconnected")
self.msg.show()
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
ex = WebApp()
ex.showFullScreen()
sys.exit(app.exec_())
I do not know the pyqt5 at all but guessing by flow of your code, here:
if self.is_msgshow == True:
print('testtt')
self.msg.information(None, 'INFO', 'PLEASE CHECK YOUR INTERNET CONNECTION!!!')
self.is_msgshow = False
self.msg.close()
It seems the self.msg.information() call is expected to be synchronous. If that is so, then as long as it remains open, the is_msgshow is still True because you changing it once you got dialog closed. And this is your bug as if new even occurs then nothing blocks new dialog from being shown. The fix is pretty simple - just move self.is_msgshow = False to be very first thing done in that code block, you should be good:
if self.is_msgshow:
self.is_msgshow = False
print('testtt')
self.msg.information(None, 'INFO', 'PLEASE CHECK YOUR INTERNET CONNECTION!!!')
self.msg.close()
Additional note, you may want to reset it back to True once connectivity is restored otherwise you won't show a thing next time network is down.
The QMessageBox class provides a modal dialog for informing the user or for asking the user a question and receiving an answer.
https://doc.qt.io/qtforpython/PySide2/QtWidgets/QMessageBox.html
working on simple GUI project. I've got some code from online,and found out how to connect the IP-webcam app, but the question is how do I use this code in my PyQt4 GUI so that the visual of the camera will be shown in the scroll-area widget.
This is the code i used:
import urllib
import cv2
import numpy as np
url='http://192.168.0.100:8080/shot.jpg'
while True:
imgResp=urllib.urlopen(url)
imgNp=np.array(bytearray(imgResp.read()),dtype=np.uint8)
img=cv2.imdecode(imgNp,-1)
# all the opencv processing is done here
cv2.imshow('test',img)
if ord('q')==cv2.waitKey(10):
exit(0)
As #furas points out, a possible option is to use numpy and cv2 to convert it to QPixmap and display it in a QLabel, and so that it looks like streaming run it in a loop.
But instead of getting complicated with all of the above, the simplest thing is to use QtNetwork to get the bytes and convert it directly to QPixmap and send it through signals:
from PyQt4 import QtCore, QtGui, QtNetwork
class IPWebcam(QtCore.QObject):
pixmapChanged = QtCore.pyqtSignal(QtGui.QPixmap)
def __init__(self, url, parent=None):
super(IPWebcam, self).__init__(parent)
self._url = url
self.m_manager = QtNetwork.QNetworkAccessManager(self)
self.m_manager.finished.connect(self._on_finished)
self.m_stopped = True
def start(self):
self.m_stopped = False
self._launch_request()
def stop(self):
self.m_stopped = True
def _launch_request(self):
request = QtNetwork.QNetworkRequest(QtCore.QUrl(self._url))
self.m_manager.get(request)
#QtCore.pyqtSlot(QtNetwork.QNetworkReply)
def _on_finished(self, reply):
ba = reply.readAll()
pixmap = QtGui.QPixmap()
if pixmap.loadFromData(ba):
self.pixmapChanged.emit(pixmap)
if not self.m_stopped:
self._launch_request()
class Widget(QtGui.QWidget):
def __init__(self, parent=None):
super(Widget, self).__init__(parent)
self.m_label = QtGui.QLabel()
self.m_button = QtGui.QPushButton(
"Start", clicked=self.onClicked, checkable=True
)
lay = QtGui.QVBoxLayout(self)
lay.addWidget(self.m_label)
lay.addWidget(self.m_button)
self.resize(640, 480)
url = "http://192.168.0.100:8080/shot.jpg"
self.m_webcam = IPWebcam(url, self)
self.m_webcam.pixmapChanged.connect(self.m_label.setPixmap)
#QtCore.pyqtSlot(bool)
def onClicked(self, checked):
if checked:
self.m_button.setText("Stop")
self.m_webcam.start()
else:
self.m_button.setText("Start")
self.m_webcam.stop()
if __name__ == "__main__":
import sys
app = QtGui.QApplication(sys.argv)
w = Widget()
w.show()
sys.exit(app.exec_())
I am using the below code to take screenshot from a webpage. But the problem is I am getting blank screenshot for certain websites. Obviously the reason is the page size is not fitting. Can some one please help me to fix this code so that it works for all webpages.
import sys
import time
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
class Screenshot(QWebView):
def __init__(self):
self.app = QApplication(sys.argv)
QWebView.__init__(self)
self._loaded = False
self.loadFinished.connect(self._loadFinished)
def capture(self, url, output_file):
self.load(QUrl(url))
self.wait_load()
# set to webpage size
frame = self.page().mainFrame()
self.page().setViewportSize(frame.contentsSize())
# render image
image = QImage(self.page().viewportSize(), QImage.Format_ARGB32)
painter = QPainter(image)
frame.render(painter)
painter.end()
print ('saving', output_file)
image.save(output_file)
def wait_load(self, delay=0):
# process app events until page loaded
while not self._loaded:
self.app.processEvents()
time.sleep(delay)
self._loaded = False
def _loadFinished(self, result):
self._loaded = True
s = Screenshot()
s.capture('https://docs.python.org/2/library/', 'website3.png')
I have tried to modify like below from a similar thread:
PyQt: QImage() returns a 'Null'-Image. I have made a change according to the post but cant make it work.
import sys
import time
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
class Screenshot(QWebView):
def __init__(self):
self.app = QApplication(sys.argv)
QWebView.__init__(self)
self._loaded = False
self.loadFinished.connect(self._loadFinished)
def capture(self, url, output_file):
self.load(QUrl(url))
self.wait_load()
# set to webpage size
frame = self.page().mainFrame()
self.page().setViewportSize(frame.contentsSize())
# render image
image = QImage(self.page().viewportSize(), QImage.Format_ARGB32)
painter = QPainter(image)
frame.render(painter)
painter.end()
print ('saving', output_file)
image.save(output_file)
def wait_load(self, delay=0):
# process app events until page loaded
frame = self.page().mainFrame()
if frame.contentsSize().width() == 0 or frame.contentsSize().height() == 0:
print ('ContentsSize = (w: {}, h: {})'.format(frame.contentsSize().width(), frame.contentsSize().height()))
count = 0 # used so we're not starting an infinite loop
while (frame.contentsSize().width() == 0 or frame.contentsSize().height() == 0) and count < 5:
count += 1
self.app.processEvents()
time.sleep(1)
self._loaded = False
def _loadFinished(self, result):
self._loaded = True
s = Screenshot()
s.capture('https://stackoverflow.com/','website4.png')
This is a pyside GUI, I have created 2 panel in 2 different .py files, main.py and sub.py each panel will display a we browser 'QWebView'. Currently when user press the button on main.py it will redirect the user to a page e.g "www.google" and user will have to click the button on sub.py to be redirected to e.g"www.facebook.com" they work as a indipendent function.
I would like to ask is there a way to link both together where user press the button on main.py and both webbrower will change together?
Yes, you can have multiple items triggered by the same connection.
QObject::connect(myButton, SIGNAL(clicked()),
this, SLOT(launchGoogleSiteOnBrowserA());
QObject::connect(myButton, SIGNAL(clicked()),
pointerToOtherClass, SLOT(launchFacebookSiteOnBrowserB());
http://qt-project.org/doc/qt-4.8/signalsandslots.html
EDIT: Following some another answer about using signals and slots in PyQt...
https://stackoverflow.com/a/7618282/999943
Here is a way to do it in PyQt:
widget.pyw
from PyQt4 import QtCore, QtGui
from mybrowser import Browser
class Widget(QtGui.QWidget):
def __init__(self):
super(Widget, self).__init__()
self.myButton = QtGui.QPushButton('Open Facebook and Google')
self.myHLayout = QtGui.QHBoxLayout()
self.myVLayout = QtGui.QVBoxLayout()
self.myVLayout.addWidget(self.myButton)
url = QtCore.QUrl('http://www.yahoo.com')
self.browserLHS = Browser(url)
self.browserRHS = Browser(url)
self.myHLayout.addWidget(self.browserLHS)
self.myHLayout.addWidget(self.browserRHS)
QtCore.QObject.connect(self.myButton, QtCore.SIGNAL("clicked()"), self.changePageOnBothBrowsers )
self.myVLayout.addLayout(self.myHLayout)
self.setLayout(self.myVLayout)
def changePageOnBothBrowsers(self):
self.browserLHS.load(QtCore.QUrl.fromUserInput('google.com'))
self.browserRHS.load(QtCore.QUrl.fromUserInput('facebook.com'))
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
widget = Widget()
widget.show()
sys.exit(app.exec_())
mybrowser.pyw
from PyQt4 import QtCore, QtGui, QtNetwork, QtWebKit
import jquery_rc
class Browser(QtWebKit.QWebView):
def __init__(self, url):
super(Browser, self).__init__()
self.progress = 0
fd = QtCore.QFile(":/jquery.min.js")
if fd.open(QtCore.QIODevice.ReadOnly | QtCore.QFile.Text):
self.jQuery = QtCore.QTextStream(fd).readAll()
fd.close()
else:
self.jQuery = ''
QtNetwork.QNetworkProxyFactory.setUseSystemConfiguration(True)
self.load(url)
self.loadFinished.connect(self.adjustLocation)
self.titleChanged.connect(self.adjustTitle)
self.loadProgress.connect(self.setProgress)
self.loadFinished.connect(self.finishLoading)
self.locationEdit = QtGui.QLineEdit(self)
self.locationEdit.setSizePolicy(QtGui.QSizePolicy.Expanding,
self.locationEdit.sizePolicy().verticalPolicy())
self.locationEdit.returnPressed.connect(self.changeLocation)
def adjustLocation(self):
self.locationEdit.setText(self.url().toString())
def changeLocation(self):
url = QtCore.QUrl.fromUserInput(self.locationEdit.text())
self.load(url)
self.setFocus()
def adjustTitle(self):
if 0 < self.progress < 100:
self.setWindowTitle("%s (%s%%)" % (self.title(), self.progress))
else:
self.setWindowTitle(self.title())
def setProgress(self, p):
self.progress = p
self.adjustTitle()
def finishLoading(self):
self.progress = 100
self.adjustTitle()
self.page().mainFrame().evaluateJavaScript(self.jQuery)
#if __name__ == '__main__':
#
# import sys
#
# app = QtGui.QApplication(sys.argv)
#
# if len(sys.argv) > 1:
# url = QtCore.QUrl(sys.argv[1])
# else:
# url = QtCore.QUrl('http://www.google.com/ncr')
#
# browser = Browser(url)
# browser.show()
#
# sys.exit(app.exec_())
Hope that helps.