I would like to know the best way to do a url redirect in pyqt webview. The following is an extract of my code. I originally did it with web.load(QtCore.QUrl(url)) however I receive the following warning:
content-type missing in HTTP POST, defaulting to application/x-www-form-urlencoded
Use QNetworkRequest::setHeader() to fix this problem. When I try use the QNetworkRequest class nothing happens.
class qt:
def __init__(self):
self.app=QApplication(sys.argv)
def q(self, meth, url):
t=meth
url=url
self.web=QWebView()
self.web.load(QUrl(url))
self.web.loadFinished.connect(t)
self.web.show()
self.app.exec_()
class run:
def __init__(self,url):
self.n=qt()
self.n.q(self.method,url)
def method(self):
nurl="https://www.newurl.com"
#self.n.web.load(QtCore.QUrl(nurl))
req = QtNetwork.QNetworkRequest()
req.setRawHeader("content-type", "application/x-www-form-urlencoded")
self.nam = QtNetwork.QNetworkAccessManager()
self.nam.get(req)
if __name__ == '__main__':
url='https://www.oldurl.com/'
i=run(url)
I have tried unsuccesfully to save fetched API data to sqlite database in a Flask app. I have used requests.get() to extract external API data to dataframe. The function "extract_to_df_race" works when i test it in Jupyter Notebook. I have placed try-except statements to print error messages to console. Since, there were no error messages logged in console, I initally presume that the data has been successfully fetched and save to database. However, upon checking the database, none of the records have been saved.
I have used a custom Flask command to execute the 'historical_records' function to one-off load the database.
Are there any better methods of debugging that i could try?
app/api/log.py
from app import app
from app.models import Race, db
from app.utils import *
import click
#app.cli.command()
def historical_records():
seasons = [2015]
races_round = range(1,5)
df_races = extract_to_df_race('results', seasons, races_round)
save_races_to_db(df_races, db)
def save_races_to_db(df_races, db):
for idx,row in df_races.iterrows():
r = Race()
r.url = df_races.loc[idx,"url"]
r.season = df_races.loc[idx,"season"]
r.raceName = df_races.loc[idx,"raceName"]
db.session.add(r)
try:
db.session.commit()
except Exception as e:
db.session.rollback()
eprint(str(e))
To execute historical_records function from virtual environment, i ran "export FLASK_APP=app/api/log.py", then "flask historical_records"
app/utils.py
from __future__ import print_function
import requests
import json
import pandas as pd
import datetime
import sys
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def extract_to_df_race(results_type, seasons, races_round):
df_races = pd.DataFrame()
if results_type == 'results':
for s in seasons:
for r in races_round:
try:
response = requests.get(API_URL)
response.raise_for_status()
dictionary = response.content
dictionary = json.loads(dictionary)
races = transform_func(dictionary, s, r)
df_races = pd.concat([df_races, races])
except requests.exceptions.HTTPError as err:
eprint(err)
sys.exit(1)
return df_races
Races model
class Race(db.Model, Serializer):
__tablename__ = 'races'
raceId = db.Column(db.Integer, primary_key=True)
url = db.Column(db.String(50), unique=True)
season = db.Column(db.Integer)
raceName = db.Column(db.String(50))
def __init__(self, **kwargs):
super(Race, self).__init__(**kwargs)
I would like to upload multiple files using a thread. This way the files can upload in the background and not make the user wait.
Here is my simplified code:
In app.py:
from file_upload import upload_process
from flask import request
#app.route('/complete', methods=['POST'])
def complete():
id = 5 #for simplified example
upload_process(id) #My thread
...
return render_template('complete.html')
In file_upload.py
from threading import Thread
from flask import request
def upload_process(id):
thr = Thread(target = upload_files, args = [id])
thr.start()
def upload_files(id):
file_1= request.files['file_1']
file_2= request.files['file_2']
file_3= request.files['file_3']
newFiles = FileStorage(id= id, file_1 = file_1.read(), file_2 =
file_2.read(), file_3 = file_3.read())
db.session.add(newFiles)
db.session.commit()
I get the error:
RuntimeError: Working outside of request context.
This typically means that you attempted to use functionality that needed an active HTTP request. Consult the documentation on testing for information about how to avoid this problem.
How would I get the request to work within the upload_files function.
(Without threading the files upload correctly.)
I have an application with many threads. One of them is flask, which is used to implement (auxiliary) API. It's used with low load and never exposed to the Internet, so build-in flask web server is perfectly fine.
My current code looks like this:
class API:
# ... all other stuff here, skipped
def run():
app = flask.Flask('API')
#app.route('/cmd1')
def cmd1():
self.cmd1()
#app.route('/cmd2')
def cmd2()
self.cmd2()
app.run()
I feel I done it wrong, because all docs says 'create flask app at module level'. But I don't want to do this - it messes up with my tests, and API is a small part of the larger application, which has own structure and conventions (each 'application' is a separate class running in one or more threads).
How can I use Flask inside class?
Although this works it doesn't feel compliant with the Flask style guide. If you need to wrap a Flask application inside your project, create a separate class to your needs and add functions that should be executed
from flask import Flask, Response
class EndpointAction(object):
def __init__(self, action):
self.action = action
self.response = Response(status=200, headers={})
def __call__(self, *args):
self.action()
return self.response
class FlaskAppWrapper(object):
app = None
def __init__(self, name):
self.app = Flask(name)
def run(self):
self.app.run()
def add_endpoint(self, endpoint=None, endpoint_name=None, handler=None):
self.app.add_url_rule(endpoint, endpoint_name, EndpointAction(handler))
def action():
# Execute anything
a = FlaskAppWrapper('wrap')
a.add_endpoint(endpoint='/ad', endpoint_name='ad', handler=action)
a.run()
Some things to note here:
EndpointAction is supposed to be a wrapper that will execute your function and generate an empty 200 response. If you want you can edit the functionality
The endpoint handler can be anything that has a __call__ method defined
The endpoint name should be unique as it represents a view name
Adding endpoints after the application is not possible as the thread will block once the application starts. You can enable it by running the application on a separate thread but changing the URL map on the fly is not advised, neither thread safe
So I just came across the library Flask-Classful
which was really simple comparatively
To create a simple web app inside a class is this:
from flask import Flask
from flask_classful import FlaskView
app = Flask(__name__)
class TestView(FlaskView):
def index(self):
# http://localhost:5000/
return "<h1>This is my indexpage</h1>"
TestView.register(app,route_base = '/')
if __name__ == '__main__':
app.run(debug=True)
Handling multiple route and dynamic route is also simple
class TestView(FlaskView):
def index(self):
# http://localhost:5000/
return "<h1>This is my indexpage</h1>"
def secondpage(self):
# http://localhost:5000/secondpage
return "<h1>This is my second</h1>"
def thirdpage(self,name):
# dynamic route
# http://localhost:5000/thirdpage/sometext
return "<h1>This is my third page <br> welcome"+name+"</h1>"
TestView.register(app,route_base = '/')
Adding own route name with a different method that is also possible
from flask_classful import FlaskView,route
class TestView(FlaskView):
def index(self):
# http://localhost:5000/
return "<h1>This is my indexpage</h1>"
#route('/diffrentname')
def bsicname(self):
# customized route
# http://localhost:5000/diffrentname
return "<h1>This is my custom route</h1>"
TestView.register(app,route_base = '/')
This gives the potential to create separate class and handlers for a separate dependent and independent process and just import them as a package to run on the main file or wrapper file
from package import Classname
Classname.register(app,route_base = '/')
which is really simple and object-oriented
To complete Kostas Pelelis's answer, because I had some difficulty to find the why the Response wasn't directly using the Action returned value.
Here is another version of FLASK class without decorators :
class EndpointAction(object):
def __init__(self, action):
self.action = action
def __call__(self, *args):
# Perform the action
answer = self.action()
# Create the answer (bundle it in a correctly formatted HTTP answer)
self.response = flask.Response(answer, status=200, headers={})
# Send it
return self.response
class FlaskAppWrapper(object):
def add_all_endpoints(self):
# Add root endpoint
self.add_endpoint(endpoint="/", endpoint_name="/", handler=self.action)
# Add action endpoints
self.add_endpoint(endpoint="/add_X", endpoint_name="/add_X", handler=self.add_X)
# you can add more ...
def add_endpoint(self, endpoint=None, endpoint_name=None, handler=None):
self.app.add_url_rule(endpoint, endpoint_name, EndpointAction(handler))
# You can also add options here : "... , methods=['POST'], ... "
# ==================== ------ API Calls ------- ====================
def action(self):
# Dummy action
return "action" # String that will be returned and display on the webpage
# Test it with curl 127.0.0.1:5000
def add_X(self):
# Dummy action
return "add_X"
# Test it with curl 127.0.0.1:5000/add_X
Here is an example of mixing class and routing that seems reasonable to me. See also https://github.com/WolfgangFahl/pyFlaskBootstrap4/issues/2 (where i am a committer)
This design has been criticized so in the project there are some improvements to this code.
'''
Created on 27.07.2020
#author: wf
'''
from flask import Flask
from frontend.WikiCMS import Frontend
from flask import render_template
import os
class AppWrap:
def __init__(self, host='0.0.0.0',port=8251,debug=False):
self.debug=debug
self.port=port
self.host=host
scriptdir=os.path.dirname(os.path.abspath(__file__))
self.app = Flask(__name__,template_folder=scriptdir+'/../templates')
self.frontend=None
def wrap(self,route):
if self.frontend is None:
raise Exception("frontend is not initialized")
content,error=self.frontend.getContent(route);
return render_template('index.html',content=content,error=error)
def run(self):
self.app.run(debug=self.debug,port=self.port,host=self.host)
pass
def initFrontend(self,wikiId):
frontend=Frontend(wikiId)
frontend.open()
appWrap=AppWrap()
app=appWrap.app
#app.route('/', defaults={'path': ''})
#app.route('/<path:route>')
def wrap(route):
return appWrap.wrap(route)
if __name__ == '__main__':
appWrap.run()
A sidenote/addition to #Kostas Pelelis Answer (Sorry can't comment yet):
For all of you who wonder how to integrate the methods of the endpoint route: have a look at the function description for app.add_url_rule.
As stated there you can use the "methods" parameter to change the default "GET" method.
Kostas Pelelis code changed to a "POST" type method would look like this:
(Example with methods integrated + Endpoint-class that returns whatever your action-function returns [a html for example]
from flask import Flask, Response, render_template
class EndpointAction(object):
def __init__(self, action):
self.action = action
self.response = Response(status=200, headers={})
def __call__(self, *args):
response = self.action()
if response != None:
return response
else
return self.response
class FlaskAppWrapper(object):
app = None
def __init__(self, name):
self.app = Flask(name)
def run(self):
self.app.run()
def add_endpoint(self, endpoint=None, endpoint_name=None, handler=None, t_methods=None):
self.app.add_url_rule(endpoint, endpoint_name, EndpointAction(handler), methods=t_methods)
def action():
# Execute anything
print('i did something')
def returning_action():
# Returning for example an index hello world page
return render_template('index.html')
a = FlaskAppWrapper('wrap')
a.add_endpoint(endpoint='/ad', endpoint_name='ad', handler=action, req_methods=['POST'])
#just a little addition for handling of a returning actionhandler method
#-> i added another endpoint but for a returning method
a.add_endpoint(endpoint='/', endpoint_name='index_page', handler=returning_action, req_methods=['GET']
a.run()
While the templates/index.html could look like this (note render_templates expects a templates-folder in the same location as your py-file with specified htmls in it):
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Index Page</title>
</head>
<body>
<h1>Hello World!</h1>
</body>
</html>
This index page addition is called when the index route 'ip-address-of-the-webapp/' is visited (via usual browser visit -> GET request).
*Edit: to show how it would look like if your action-methods had params (for example from a route param) here an updated version of the endpoint class and the action class
class EndpointAction(object):
def __init__(self, action):
self.action = action
self.response = Response(status=200, headers={})
def __call__(self, *args, **kwargs):
response = self.action(**kwargs)
if response != None:
return response
else
return self.response
def param_action(param):
# Execute something (print param)
print(f'i did {param}')
[...]
a.add_endpoint(endpoint='/<param>', endpoint_name='parametric_action', handler=param_action, req_methods=['GET']
[...]
Using scrapy I faced a problem of javascript rendered pages. For the site Forum Franchise for example the link http://www.idee-franchise.com/forum/viewtopic.php?f=3&t=69, trying to scrap the source html I couldn't retrieve any posts because they seem to be "appended" after the page is being rendered (Probably through javascript).
So i was looking on the net for a solution to this problem, and i came across https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/ .
I am completely new to PYPQ, but was hoping to take a shortcut and copy paste some code.
This worked perfectly for when i tried to scrap a single page. But then when i implemented this in scrapy i get the following error :
QObject::connect: Cannot connect (null)::configurationAdded(QNetworkConfiguration) to QNetworkConfigurationManager::configurationAdded(QNetworkConfiguration)
QObject::connect: Cannot connect (null)::configurationRemoved(QNetworkConfiguration) to QNetworkConfigurationManager::configurationRemoved(QNetworkConfiguration)
QObject::connect: Cannot connect (null)::configurationChanged(QNetworkConfiguration) to QNetworkConfigurationManager::configurationChanged(QNetworkConfiguration)
QObject::connect: Cannot connect (null)::onlineStateChanged(bool) to QNetworkConfigurationManager::onlineStateChanged(bool)
QObject::connect: Cannot connect (null)::configurationUpdateComplete() to QNetworkConfigurationManager::updateCompleted()
If i scrap a single page, then no error occurs, but when i set crawler to recursive mode, then right at the second link i get an error that python.exe stopped working and the above error.
I will searching for what this could be, and somewhere i read a QApplication object should only be initiated once.
Could someone please tell me what should be the proper implementation?
The Spider
# -*- coding: utf-8 -*-
import scrapy
import sys, traceback
from bs4 import BeautifulSoup as bs
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from crawler.items import ThreadItem, PostItem
from crawler.utils import utils
class IdeefranchiseSpider(CrawlSpider):
name = "ideefranchise"
allowed_domains = ["idee-franchise.com"]
start_urls = (
'http://www.idee-franchise.com/forum/',
# 'http://www.idee-franchise.com/forum/viewtopic.php?f=3&t=69',
)
rules = [
Rule(LinkExtractor(allow='/forum/'), callback='parse_thread', follow=True)
]
def parse_thread(self, response):
print "Parsing Thread", response.url
thread = ThreadItem()
thread['url'] = response.url
thread['domain'] = self.allowed_domains[0]
thread['title'] = self.get_thread_title(response)
thread['forumname'] = self.get_thread_forum_name(response)
thread['posts'] = self.get_thread_posts(response)
yield thread
# paginate if possible
next_page = response.css('fieldset.display-options > a::attr("href")')
if next_page:
url = response.urljoin(next_page[0].extract())
yield scrapy.Request(url, self.parse_thread)
def get_thread_posts(self, response):
# using PYQTRenderor to reload page. I think this is where the problem
# occurs, when i initiate the PYQTPageRenderor object.
soup = bs(unicode(utils.PYQTPageRenderor(response.url).get_html()))
# sleep so that PYQT can render page
# time.sleep(5)
# comments
posts = []
for item in soup.select("div.post.bg2") + soup.select("div.post.bg1"):
try:
post = PostItem()
post['profile'] = item.select("p.author > strong > a")[0].get_text()
details = item.select('dl.postprofile > dd')
post['date'] = details[2].get_text()
post['content'] = item.select('div.content')[0].get_text()
# appending the comment
posts.append(post)
except:
e = sys.exc_info()[0]
self.logger.critical("ERROR GET_THREAD_POSTS %s", e)
traceback.print_exc(file=sys.stdout)
return posts
The PYPQ implementation
import sys
from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
class PYQTPageRenderor(object):
def __init__(self, url):
self.url = url
def get_html(self):
r = Render(self.url)
return unicode(r.frame.toHtml())
The proper implementation, if you want to do it yourself, would be to create a downlader middleware that uses PyQt to process a request. It will be instantiated once by Scrapy.
Should not be that complicated, just
Create QTDownloader class in the middleware.py file of your project
The constructor should create the QApplication object.
The process_request method should do the url loading, and HTML fetching. Note that you return a Response object with the HTML string.
You might do appropriate clean-up in a _cleanup method of your class.
Finally, activate your middleware by adding it to the DOWNLOADER_MIDDLEWARES variable of the settings.py file of your project.
If you don't want to write your own solution, you could use an existing middleware that uses Selenium to do the downloading, like scrapy-webdriver. If you don't want to have a visible browser, you can instruct it to use PhantomJS.
EDIT1:
So the proper way to do it, as pointed out by Rejected is to use a download handler. The idea is similar, but the downloading should happen in a download_request method, and it should be enabled by adding it to DOWNLOAD_HANDLERS. Take a look to the WebdriverDownloadHandler for an example.