Twisted API for Couchbase not working with Python Tornado - python

I'm trying to run a Tornado server with Couchbase 4.0 Developer preview.
import tornado.web
import tornado.httpserver
import tornado.options
import tornado.ioloop
import tornado.websocket
import tornado.httpclient
from tornado import gen
import os.path
from tornado.options import define, options, parse_command_line
import time
#from couchbase.bucket import Bucket
from twisted.internet import reactor
from txcouchbase.bucket import Bucket
from couchbase.n1ql import N1QLQuery, N1QLError
from pprint import pprint
server = "x.x.x.x"
bucketname = "zips"
Connection = "couchbase://" + server + "/" + bucketname
bkt = Bucket(Connection)
class IndexHandler(tornado.web.RequestHandler):
#tornado.web.asynchronous
def get(self):
print "entered"
query = "SELECT * FROM `zips` where pincode= '632014'"
q = N1QLQuery(query)
#self.bkt = bkt
t0 = time.time()
res = bkt.n1qlQueryAll(q)
res.addCallback(self.on_ok)
reactor.run()
t1 = time.time()
print t1-t0
self.write("Hello World")
def on_ok(self,response):
print "LOl"
for each in res:
print each
reactor.stop()
self.finish()
handlers = [
(r'/',IndexHandler),
]
if __name__ == "__main__":
parse_command_line()
# template path should be given here only unlike handlers
app = tornado.web.Application(handlers, template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"), cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", debug=True)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(8888, address='0.0.0.0')
tornado.ioloop.IOLoop.instance().start()
After I run this, for some reason the callback function is never called. I could not find any proper documentation for this, and had to go through the source code to write this. I'm still confused as I'm new to asynchronous programming. Can someone please tell me where I'm going wrong and if there is a better way of doing this?

In asynchronous programming, you only want to start an event loop (like IOLoop.start() or reactor.run()) once, at the top of your program. You're calling IOLoop.start(), so instead of calling reactor.run() you want to tell Twisted to use the Tornado IOLoop as its reactor. Before the import of reactor, do
import tornado.platform.twisted
tornado.platform.twisted.install()
from twisted.internet import reactor
See http://www.tornadoweb.org/en/stable/twisted.html#twisted-on-tornado for more.
Once you've done this, you can call twisted libraries without having to start and stop the reactor.

Related

Django.db.utils.OperationalError: (2013, 'Lost connection to MySQL server during query')

I'm using apschduler to call a task every 2hs, in this task it will read data from mysql db.
But after mysql's default wait_timeout 28800s, it always raise Django.db.utils.OperationalError: (2013, 'Lost connection to MySQL server during query')
According from doc MySQL server has gone away, I think it should be child process issue.
But I still can't solve this problem
main.py
import sys, os
import django
import logging
import datetime
import argparse
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR,'src'))
sys.path.append(os.path.join(BASE_DIR,'data_model'))
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--execute_dir', type=str,
help='exe_dir',
default=BASE_DIR)
args = ap.parse_args()
sys.path.append(os.path.join(args.execute_dir, "conf"))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_settings")
django.setup()
from auto_management_v2 import auto_manage
from baseApscheduler import baseScheduler
scheduler = baseScheduler.scheduler
scheduler.add_job(
func=auto_manage,
trigger='interval',
hours=2,
start_date=(datetime.datetime.now() + datetime.timedelta(seconds=20)).strftime("%Y-%m-%d %H:%M:%S"),
id='auto_manage',
jobstore='default',
replace_existing=True)
scheduler.start()
baseApscheduler.py
import logging
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.jobstores.memory import MemoryJobStore
from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
from apscheduler.events import EVENT_JOB_ERROR, EVENT_JOB_EXECUTED
from django.conf import settings
from utils import sendEmail
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
filename=settings.AUTO_MANAGEMENT_FILE_NAME,
filemode='a')
def event_listener(event):
if event.exception:
print("task failed!")
info = '''
time:{},
task_name: {},
fail_reason: {}
'''.format(str(event.scheduled_run_time), str(event.job_id), str(event.exception))
sendEmail(message=info, subject='task failed')
else:
print(event.job_id + "task successed!")
class BaseScheduler():
def __init__(self):
self.executors = {
'default': ThreadPoolExecutor(10),
'processPool': ProcessPoolExecutor(3)
}
self.jobstores = {
'default': MemoryJobStore(),
}
self.scheduler = BlockingScheduler(
jobstores=self.jobstores, executors=self.executors)
self.scheduler._logger = logging
self.scheduler.add_listener(
event_listener,
EVENT_JOB_ERROR | EVENT_JOB_EXECUTED)
baseScheduler = BaseScheduler()
auto_management_v2.py
import logging
import datetime
import json
import requests
import urllib
import asyncio
from django.db.utils import OperationalError
from django.conf import settings
django.db.connections.close_all() # try to close old connection here
from data_model.models import SmbAmmFilter, SmbAmmRule, SmbAmmCampaignFbRule, SmbAmmRuleCampaign
info_logger = logging.getLogger('auto_manage_info')
class AutoManagementServiceV2(object):
def __init__(self):
self.plan_lists = SmbAmmRuleCampaign.objects.filter(status=1)
self.chunk_size = 50 #
def run(self):
new_loop = asyncio.new_event_loop()
self.iter_plans = (
self.plan_lists[i:i + self.chunk_size] for i in range(0, len(self.plan_lists), self.chunk_size)
)
asyncio.set_event_loop(new_loop)
for chunk_plans in self.iter_plans:
task_list = [
asyncio.ensure_future(self._handle_campaign_data(campaign_plan)) for campaign_plan in chunk_plans
]
new_loop.run_until_complete(asyncio.gather(*task_list))
def auto_manage():
AutoManagementServiceV2().run()
if __name__ == '__main__':
auto_manage()
As ablove codes showed, I think i've close the mysql connection in my task, and hope it do a reconnection work, but always failed..
How can i reconnect to db ervery time I run my task correctly?
Any commentary is very welcome. great thanks.
The problem solved after adding django.db.connections.close_all() in my task auto_manage
def auto_manage():
django.db.connections.close_all()
AutoManagementServiceV2().run()

flask + gevent pywsgi blocking on pandas and built-in io

Calling built-in open() or pandas.read_csv() seems to block all other requests to my flask + gevent wsgi webserver despite monkey.patch_all(). Do I need to call special gevent io functions to make them non blocking?
from gevent import monkey, pywsgi, sleep
monkey.patch_all()
import pandas as pd
from flask import Flask
app = Flask(__name__)
FILENAME = 'c:/temp/testcsv.csv'
#app.route('/')
def fastresult():
return 'this should return immediately'
#app.route('/non_blocking_sleep')
def non_blocking_sleep():
sleep(10)
return 'using gevent.sleep does NOT block other reqeusts as expected'
#app.route('/readcsv')
def readcsv():
"""
this blocks any other request before the read completes
"""
df = pd.read_csv(FILENAME)
return df.info()
#app.route('/openfile')
def openfile():
"""
this blocks any other request before the read completes
"""
with open(FILENAME, 'r') as file:
res = file.readlines()
return res[:1000]
http_server = pywsgi.WSGIServer(('', 5000), app)
http_server.serve_forever()
Tested on Anaconda 5.3 (python 3.7) 64 bit on both Windows and Linux.

Asynchronous Function Call

I would like to learn how to call a function asynchronously in Python3. I think Tornado can do this. Currently, my code is returning nothing on the command line:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
async def count(end):
"""Print message when start equals end."""
start = 0
while True:
if start == end:
print('start = {0}, end = {1}'.format(start, end))
break
start = start + 1
def main():
# Start counting.
yield count(1000000000)
# This should print while count is running.
print('Count is running. Async!')
if __name__ == '__main__':
main()
Thanks
To call an async function, you need to provide an event loop to handle it. If you have a Tornado app, it provides such a loop, which allows you to make your handlers asynchronous:
from tornado.web import RequestHandler, url
from tornado.httpserver import HTTPServer
from tornado.ioloop import IOLoop
async def do_something_asynchronous():
# e.g. call another service, read from database etc
return {'something': 'something'}
class YourAsyncHandler(RequestHandler):
async def get(self):
payload = await do_something_asynchronous()
self.write(payload)
application = web.Application([
url(r'/your_url', YourAsyncHandler, name='your_url')
])
http_server = HTTPServer(application)
http_server.listen(8000, address='0.0.0.0')
IOLoop.instance().start()
Outside of a Tornado app you can get the event loop from any number of providers, including the built-in asyncio library:
import asyncio
event_loop = asyncio.get_event_loop()
try:
event_loop.run_until_complete(do_something_asynchronous())
finally:
event_loop.close()

Python Tornado - How to Implement Long-Polling Server to Read from a Queue

I'm trying to build a web server to collect "commands" via AJAX and then distribute the commands to clients via long-polling.
The goal is that someone POSTs some data to /add-command.
Another client implements a long-polling client hitting /poll waiting for a command to execute.
I think a queue is the right data structure to use to hold commands waiting for attention. I'd like the commands to essentially be distributed immediately to any long-polling client but held if no client is currently polling.
Here's my python script.
import os
import time
import tornado.httpserver
import tornado.ioloop
import tornado.web
import tornado.gen
import Queue
import multiprocessing.pool
import mysql.connector
import urlparse
import uuid
import json
_commandQueue = Queue.Queue()
_commandPollInterval = 0.2
_commandPollTimeout = 10
class HomeHandler(tornado.web.RequestHandler):
def get(self):
self.render("home.htm")
class AddCommandHandler(tornado.web.RequestHandler):
def post(self):
d = urlparse.parse_qs(self.request.body)
_commandQueue.put(d)
self.write(str(True))
class PollHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self):
self.write("start")
d = 1
d = yield self.getCommand()
self.write(str(d))
self.write("end")
self.finish()
#tornado.gen.coroutine
def getCommand(self):
start = time.time()
while (time.time() - start) < _commandPollTimeout * 1000:
if not _commandQueue.empty:
return _commandQueue.get()
else:
time.sleep(_commandPollInterval)
return None
def main():
application = tornado.web.Application(
[
(r"/", HomeHandler),
(r"/add-command", AddCommandHandler),
(r"/poll", PollHandler),
],
debug=True,
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"),
)
tornado.httpserver.HTTPServer(application).listen(int(os.environ.get("PORT", 5000)))
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
The AddCommandHandler works fine to put items in the _commandQueue.
The PollHandler request just times out. If I call the PollHandler, it seems to lock the _commandQueue and I can't put or get from it.
I suspect I need to join the queue, but I can't seem to find the right time to do that in the code.
UPDATE -- Here's my final code thanks to the answers
import os
import time
import datetime
import tornado.httpserver
import tornado.ioloop
import tornado.web
import tornado.gen
import tornado.queues
import urlparse
import json
_commandQueue = tornado.queues.Queue()
_commandPollInterval = 0.2
_commandPollTimeout = 10
class HomeHandler(tornado.web.RequestHandler):
def get(self):
self.render("home.htm")
class AddCommandHandler(tornado.web.RequestHandler):
def get(self):
cmd = urlparse.parse_qs(self.request.body)
_commandQueue.put(cmd)
self.write(str(cmd))
def post(self):
cmd = urlparse.parse_qs(self.request.body)
_commandQueue.put(cmd)
self.write(str(cmd))
class PollHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self):
cmd = yield self.getCommand()
self.write(str(cmd))
#tornado.gen.coroutine
def getCommand(self):
try:
cmd = yield _commandQueue.get(
timeout=datetime.timedelta(seconds=_commandPollTimeout)
)
raise tornado.gen.Return(cmd)
except tornado.gen.TimeoutError:
raise tornado.gen.Return()
def main():
application = tornado.web.Application(
[
(r"/", HomeHandler),
(r"/add-command", AddCommandHandler),
(r"/poll", PollHandler),
],
debug=True,
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"),
)
tornado.httpserver.HTTPServer(application).listen(int(os.environ.get("PORT", 5000)))
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
In async model you should omit blocking operation, time.sleep is evil in your code. Moreover, I think that the best way is to use tornado's (in async interface) queue - tornado.queue.Queue and use async get:
import datetime
import tornado.gen
import tornado.queues
_commandQueue = tornado.queues.Queue()
# ...rest of the code ...
#tornado.gen.coroutine
def getCommand(self):
try:
# wait for queue item if cannot obtain in timeout raise exception
cmd = yield _commandQueue.get(
timeout=datetime.timedelta(seconds=_commandPollTimeout)
)
return cmd
except tornado.gen.Timeout:
return None
Note: Module tornado.queues si available since Tornado 4.x, if you use older one, Toro will help.
You can NOT use sleep in listener, since it blocks reading from input stream. time.sleep(_commandPollInterval). What you should use is yield gen.sleep(_commandPollInterval)

python what is the import for threading?

I want to run some python code every 120 seconds.
I tried this:
class AppServerSvc :
def f(self):
# call f() again in 120 seconds
spider = FantasySerieaSpider()
settings = get_project_settings()
crawler = Crawler(settings)
crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(spider)
crawler.start()
log.start()
reactor.run() # the script will block here until the spider_closed signal was sent
threading.Timer(120, f).start()
if __name__ == '__main__':
AppServerSvc().f();
I got the threading is not defined error
This are my imports:
import pythoncom
import win32serviceutil
import win32service
import win32event
import servicemanager
import socket
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from FantasySeriea.spiders.spider import FantasySerieaSpider
from scrapy.utils.project import get_project_settings
from threading import Thread
rather than (or in addition to?):
from threading import Thread
you want:
import threading
You're using threading.Timer in your code but you're importing only Thread from threading and putting it into the current namespace. What you want is to import the whole module:
import threading
If you are using Thread, make sure to replace Thread by threading.Thread. Also, you are in a class, so you need to add self. in prefix or f to refer the class member:
threading.Timer(120, self.f).start()

Categories