flask + gevent pywsgi blocking on pandas and built-in io - python

Calling built-in open() or pandas.read_csv() seems to block all other requests to my flask + gevent wsgi webserver despite monkey.patch_all(). Do I need to call special gevent io functions to make them non blocking?
from gevent import monkey, pywsgi, sleep
monkey.patch_all()
import pandas as pd
from flask import Flask
app = Flask(__name__)
FILENAME = 'c:/temp/testcsv.csv'
#app.route('/')
def fastresult():
return 'this should return immediately'
#app.route('/non_blocking_sleep')
def non_blocking_sleep():
sleep(10)
return 'using gevent.sleep does NOT block other reqeusts as expected'
#app.route('/readcsv')
def readcsv():
"""
this blocks any other request before the read completes
"""
df = pd.read_csv(FILENAME)
return df.info()
#app.route('/openfile')
def openfile():
"""
this blocks any other request before the read completes
"""
with open(FILENAME, 'r') as file:
res = file.readlines()
return res[:1000]
http_server = pywsgi.WSGIServer(('', 5000), app)
http_server.serve_forever()
Tested on Anaconda 5.3 (python 3.7) 64 bit on both Windows and Linux.

Related

How Do You Thread an External Hanging API Call in Flask?

Getting the specifics out of the way, I'm writing an open source P2P social network over IPFS and Flask -- I know, it's been done. I'm choosing Flask because pyinstaller can put it in an exe file.
I am attempting to update my IPNS every 10 minutes to publish all status updates I've added to the network during said 10 minutes. The cron function from setup class (from library.py) is where that updater function is stored. At first, I threaded the cron function from init of setup. The server hung. Then I moved the threading process over to app.before_first_request. The server still hangs.
https://pastebin.com/bXHTuH83 (main.py)
from flask import Flask, jsonify
from library import *
#=========================TO BE DELETED=========================================
def pretty(json):
json = dumps(loads(json), indent=4, sort_keys=True)
return json
#===============================================================================
app = Flask(__name__)
GANN = setup()
#app.before_first_request
def cron_job():
Thread(target=GANN.cron())
#app.route("/")
def home():
return "Hello World!!!"
if __name__ == "__main__":
app.run(port="80", debug=True, threaded=True)
https://pastebin.com/W5P8Tpvd (library.py)
from threading import Thread
from time import time, sleep
import urllib.request
from json import loads, dumps
def api(*argv, **kwargs):
url = "http://127.0.0.1:5001/api/v0/"
for arg in argv:
arg = arg.replace(" ", "/")
if arg[:-1] != "/":
arg += "/"
url += arg
url = url[0:-1]
if kwargs:
url+="?"
for val in kwargs:
url = url + val + "=" + kwargs[val] + "&"
url = url[0:-1]
print(url)
try:
with urllib.request.urlopen(url, timeout=300) as response:
return response.read()
except:
return b"""{"ERROR": "CANNOT CONNECT TO IPFS!"}"""
class setup():
def __init__(self):
api("files", "mkdir", arg="/GANN", parents="True")
self.root_hash = ""
def update_root(self):
try:
for entry in loads(api("files", "ls", l="True").decode())["Entries"]:
if entry["Name"] == "GANN":
self.root_hash = entry["Hash"]
except:
return """{"ERROR": "CANNOT FIND ROOT DIRECTORY"}"""
def publish_root(self):
api("name", "publish", arg=self.root_hash)
def cron(self):
while True:
print("CRON Thread Started!")
self.update_root()
self.publish_root()
sleep(600)
I have searched the web for a couple days and have yet to find a threading technique that will split from the main process and not hang the server from taking other requests. I believe I'm on a single stream connection, as IPFS blocks connections to every other device in my home when it's started. It takes a couple minutes for the CLI IPNS update to go through, so I set urllib's timeout to 300 seconds.
Well what I think the threading code is not correct.
#app.before_first_request
def cron_job():
Thread(target=GANN.cron())
Here you created a Thread object. The argument must be callable, but you called your method already here. so the right way would be
Thread(target=GANN.cron)
So the thread can call the cron function later. having said that, the Thread must be started, so it will call the function target you gave. So it must be ike
thread_cron = Thread(target=GANN.cron)
thread_cron.start()
Since you called the GANN.cron() , the method starts executing and your app hung!

Python Tornado - How to Implement Long-Polling Server to Read from a Queue

I'm trying to build a web server to collect "commands" via AJAX and then distribute the commands to clients via long-polling.
The goal is that someone POSTs some data to /add-command.
Another client implements a long-polling client hitting /poll waiting for a command to execute.
I think a queue is the right data structure to use to hold commands waiting for attention. I'd like the commands to essentially be distributed immediately to any long-polling client but held if no client is currently polling.
Here's my python script.
import os
import time
import tornado.httpserver
import tornado.ioloop
import tornado.web
import tornado.gen
import Queue
import multiprocessing.pool
import mysql.connector
import urlparse
import uuid
import json
_commandQueue = Queue.Queue()
_commandPollInterval = 0.2
_commandPollTimeout = 10
class HomeHandler(tornado.web.RequestHandler):
def get(self):
self.render("home.htm")
class AddCommandHandler(tornado.web.RequestHandler):
def post(self):
d = urlparse.parse_qs(self.request.body)
_commandQueue.put(d)
self.write(str(True))
class PollHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self):
self.write("start")
d = 1
d = yield self.getCommand()
self.write(str(d))
self.write("end")
self.finish()
#tornado.gen.coroutine
def getCommand(self):
start = time.time()
while (time.time() - start) < _commandPollTimeout * 1000:
if not _commandQueue.empty:
return _commandQueue.get()
else:
time.sleep(_commandPollInterval)
return None
def main():
application = tornado.web.Application(
[
(r"/", HomeHandler),
(r"/add-command", AddCommandHandler),
(r"/poll", PollHandler),
],
debug=True,
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"),
)
tornado.httpserver.HTTPServer(application).listen(int(os.environ.get("PORT", 5000)))
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
The AddCommandHandler works fine to put items in the _commandQueue.
The PollHandler request just times out. If I call the PollHandler, it seems to lock the _commandQueue and I can't put or get from it.
I suspect I need to join the queue, but I can't seem to find the right time to do that in the code.
UPDATE -- Here's my final code thanks to the answers
import os
import time
import datetime
import tornado.httpserver
import tornado.ioloop
import tornado.web
import tornado.gen
import tornado.queues
import urlparse
import json
_commandQueue = tornado.queues.Queue()
_commandPollInterval = 0.2
_commandPollTimeout = 10
class HomeHandler(tornado.web.RequestHandler):
def get(self):
self.render("home.htm")
class AddCommandHandler(tornado.web.RequestHandler):
def get(self):
cmd = urlparse.parse_qs(self.request.body)
_commandQueue.put(cmd)
self.write(str(cmd))
def post(self):
cmd = urlparse.parse_qs(self.request.body)
_commandQueue.put(cmd)
self.write(str(cmd))
class PollHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self):
cmd = yield self.getCommand()
self.write(str(cmd))
#tornado.gen.coroutine
def getCommand(self):
try:
cmd = yield _commandQueue.get(
timeout=datetime.timedelta(seconds=_commandPollTimeout)
)
raise tornado.gen.Return(cmd)
except tornado.gen.TimeoutError:
raise tornado.gen.Return()
def main():
application = tornado.web.Application(
[
(r"/", HomeHandler),
(r"/add-command", AddCommandHandler),
(r"/poll", PollHandler),
],
debug=True,
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"),
)
tornado.httpserver.HTTPServer(application).listen(int(os.environ.get("PORT", 5000)))
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
In async model you should omit blocking operation, time.sleep is evil in your code. Moreover, I think that the best way is to use tornado's (in async interface) queue - tornado.queue.Queue and use async get:
import datetime
import tornado.gen
import tornado.queues
_commandQueue = tornado.queues.Queue()
# ...rest of the code ...
#tornado.gen.coroutine
def getCommand(self):
try:
# wait for queue item if cannot obtain in timeout raise exception
cmd = yield _commandQueue.get(
timeout=datetime.timedelta(seconds=_commandPollTimeout)
)
return cmd
except tornado.gen.Timeout:
return None
Note: Module tornado.queues si available since Tornado 4.x, if you use older one, Toro will help.
You can NOT use sleep in listener, since it blocks reading from input stream. time.sleep(_commandPollInterval). What you should use is yield gen.sleep(_commandPollInterval)

Streaming a response doesn't work with Flask-Restful

I have a scenario where I want to show output of a long running script through a Flask API. I followed an example given for Flask and it works. I get dmesg steam in my browser.
import subprocess
import time
from flask import Flask, Response
app = Flask(__name__)
#app.route('/yield')
def index():
def inner():
proc = subprocess.Popen(
['dmesg'], # call something with a lot of output so we can see it
shell=True,
stdout=subprocess.PIPE
)
for line in iter(proc.stdout.readline,''):
time.sleep(1) # Don't need this just shows the text streaming
yield line.rstrip() + '<br/>\n'
return Response(inner(), mimetype='text/html') # text/html is required for most browsers to show this
The thing is, I have been using Flask-Restful from a long time. So I want to do the streaming using it. I tried it and it's not working.
import subprocess
import time
from flask import Response
from flask_restful import Resource
class CatalogStrings(Resource):
def get(self):
return Response(inner(), mimetype='text/html')
def inner():
proc = subprocess.Popen(
['dmesg'], # call something with a lot of output so we can see it
shell=True,
stdout=subprocess.PIPE
)
for line in iter(proc.stdout.readline, ''):
time.sleep(1) # Don't need this just shows the text streaming
yield line.rstrip() + '<br/>\n'
Please help

socketio.emit() doesn't work when interacting using Popen on Windows in a Thread

I think a quick code snippet is better to explain my problem, so please have a look at this:
from flask import Flask
from flask.ext.socketio import SocketIO
from threading import Thread
import subprocess
import threading
from eventlet.green.subprocess import Popen
app = Flask(__name__)
socketio = SocketIO(app)
def get_tasks_and_emit():
instance = Popen(["tasklist"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1)
lines_iterator = iter(instance.stdout.readline, b"")
data = ""
for line in lines_iterator:
data += line.decode("utf8")
socketio.emit("loaded", data)
print("::: DEBUG - returned tasks with thread")
#app.route("/")
def index():
html = "<!DOCTYPE html>"
html += "<script src=https://code.jquery.com/jquery-2.2.0.min.js></script>"
html += "<script src=https://cdn.socket.io/socket.io-1.4.5.js></script>"
html += "<script>"
html += "var socket = io.connect(window.location.origin);"
html += "socket.on('loaded', function(data) {alert(data);});"
html += "function load_tasks_threaded() {$.get('/tasks_threaded');}"
html += "function load_tasks_nonthreaded() {$.get('/tasks');}"
html += "</script>"
html += "<button onclick='load_tasks_nonthreaded()'>Load Tasks</button>"
html += "<button onclick='load_tasks_threaded()'>Load Tasks (Threaded)</button>"
return html
#app.route("/tasks")
def tasks():
get_tasks_and_emit()
print("::: DEBUG - returned tasks without thread")
return ""
#app.route("/tasks_threaded")
def tasks_threaded():
threading.Thread(target=get_tasks_and_emit).start()
return ""
if __name__ == "__main__":
socketio.run(app, port=7000, debug=True)
I am running this code on Windows using eventlet, if I don't use eventlet everything is fine (but of course much slower due to the werkzeug threading mode). (And I just checked and it's not working on Linux either)
I hope someone can point me into the right direction. (My Python version is 3.5.1 by the way)
I found the problem. Apparently you have to monkey patch the threading module, so I added
import eventlet
eventlet.monkey_patch(thread=True)
and then I also had a problem with long running programs. I had the same problem as the guy in this StackOverflow post:
Using Popen in a thread blocks every incoming Flask-SocketIO request
So I added
eventlet.sleep()
to the for loop that processes the pipes.
EDIT:
As temoto pointed out, alternatively one can also just use the threading module from eventlet.green like this:
from eventlet.green import threading

Twisted API for Couchbase not working with Python Tornado

I'm trying to run a Tornado server with Couchbase 4.0 Developer preview.
import tornado.web
import tornado.httpserver
import tornado.options
import tornado.ioloop
import tornado.websocket
import tornado.httpclient
from tornado import gen
import os.path
from tornado.options import define, options, parse_command_line
import time
#from couchbase.bucket import Bucket
from twisted.internet import reactor
from txcouchbase.bucket import Bucket
from couchbase.n1ql import N1QLQuery, N1QLError
from pprint import pprint
server = "x.x.x.x"
bucketname = "zips"
Connection = "couchbase://" + server + "/" + bucketname
bkt = Bucket(Connection)
class IndexHandler(tornado.web.RequestHandler):
#tornado.web.asynchronous
def get(self):
print "entered"
query = "SELECT * FROM `zips` where pincode= '632014'"
q = N1QLQuery(query)
#self.bkt = bkt
t0 = time.time()
res = bkt.n1qlQueryAll(q)
res.addCallback(self.on_ok)
reactor.run()
t1 = time.time()
print t1-t0
self.write("Hello World")
def on_ok(self,response):
print "LOl"
for each in res:
print each
reactor.stop()
self.finish()
handlers = [
(r'/',IndexHandler),
]
if __name__ == "__main__":
parse_command_line()
# template path should be given here only unlike handlers
app = tornado.web.Application(handlers, template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"), cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", debug=True)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(8888, address='0.0.0.0')
tornado.ioloop.IOLoop.instance().start()
After I run this, for some reason the callback function is never called. I could not find any proper documentation for this, and had to go through the source code to write this. I'm still confused as I'm new to asynchronous programming. Can someone please tell me where I'm going wrong and if there is a better way of doing this?
In asynchronous programming, you only want to start an event loop (like IOLoop.start() or reactor.run()) once, at the top of your program. You're calling IOLoop.start(), so instead of calling reactor.run() you want to tell Twisted to use the Tornado IOLoop as its reactor. Before the import of reactor, do
import tornado.platform.twisted
tornado.platform.twisted.install()
from twisted.internet import reactor
See http://www.tornadoweb.org/en/stable/twisted.html#twisted-on-tornado for more.
Once you've done this, you can call twisted libraries without having to start and stop the reactor.

Categories