Using djcelery to scrape a website and populate django models - python

I'm having trouble getting celery working with django. I want to use celery to scrape a website and update some django models every 20 minutes.
I created a task file in my app directory that has an update class:
class Update(PeriodicTask):
run_every=datetime.timedelta(minutes=20)
def run(self, **kwargs):
#update models
The class correctly updates my modesl if I run it from the command line:
if __name__ == '__main__':
Update().run()
My celery config in setting.py looks like this:
CELERY_RESULT_BACKEND = "database"
BROKER_HOST = 'localhost'
BROKER_PORT = 5672
BROKER_USER = 'Broker'
BROKER_PASSWORD = '*password*'
BROKER_VHOST = 'broker_vhost'
But when I run manage.py celeryd -v 2 I get connection errors:
[2010-12-29 09:28:15,150: ERROR/MainProcess] CarrotListener: Connection Error: [Errno 111] Connection refused. Trying again in 10 seconds...
What am I missing?
Update:
I found django-kombu which looked pretty good becuase it uses my existing database. I've installed django-kombu and kombu but now I get the following error when running manage.py celeryd -v 2.
Traceback (most recent call last):
File "manage.py", line 11, in <module>
execute_manager(settings)
File "<webapp_path>/lib/python2.6/django/core/management/__init__.py", line 438, in execute_manager
utility.execute()
File "<webapp_path>/lib/python2.6/django/core/management/__init__.py", line 379, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "<webapp_path>/lib/python2.6/django/core/management/base.py", line 191, in run_from_argv
self.execute(*args, **options.__dict__)
File "<webapp_path>/lib/python2.6/django/core/management/base.py", line 220, in execute
output = self.handle(*args, **options)
File "<webapp_path>/lib/python2.6/django_celery-2.1.4-py2.6.egg/djcelery/management/commands/celeryd.py", line 20, in handle
worker.run(*args, **options)
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/bin/celeryd.py", line 83, in run
from celery.apps.worker import Worker
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/apps/worker.py", line 15, in <module>
from celery.task import discard_all
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/task/__init__.py", line 7, in <module>
from celery.execute import apply_async
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/execute/__init__.py", line 7, in <module>
from celery.result import AsyncResult, EagerResult
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/result.py", line 9, in <module>
from celery.backends import default_backend
File "<webapp_path>/lib/python2.6/celery-2.1.4-py2.6.egg/celery/backends/__init__.py", line 51, in <module>
default_backend = DefaultBackend()
TypeError: __init__() takes exactly 2 arguments (1 given)

Doesn't look like you have a broker installed/running (RabbitMQ?)

I had the same issue, and the problem was that I had the import path wrong.
Probably, you import task as
from celery import task.
While you should
from celery.task import task

Related

Failed to load application: No module named udpecho

I am trying to start a socket connection in amazon AMI server, for that I have some existing code which is in python, I am a newbie to python and unable to start that, As I am trying to start that program using this command, I am facing some issues, please check this
from twisted.application import internet, service
from udpecho import Echo
application = service.Application("echo")
echoService = internet.UDPServer(7401, Echo())
echoService.setServiceParent(application)
but its showing error
Traceback (most recent call last):
File "/usr/lib64/python2.6/dist-packages/twisted/application/app.py", line 694, in run
runApp(config)
File "/usr/lib64/python2.6/dist-packages/twisted/scripts/twistd.py", line 23, in runApp
_SomeApplicationRunner(config).run()
File "/usr/lib64/python2.6/dist-packages/twisted/application/app.py", line 411, in run
self.application = self.createOrGetApplication()
File "/usr/lib64/python2.6/dist-packages/twisted/application/app.py", line 494, in createOrGetApplication
application = getApplication(self.config, passphrase)
--- <exception caught here> ---
File "/usr/lib64/python2.6/dist-packages/twisted/application/app.py", line 505, in getApplication
application = service.loadApplication(filename, style, passphrase)
File "/usr/lib64/python2.6/dist-packages/twisted/application/service.py", line 390, in loadApplication
application = sob.loadValueFromFile(filename, 'application', passphrase)
File "/usr/lib64/python2.6/dist-packages/twisted/persisted/sob.py", line 215, in loadValueFromFile
exec fileObj in d, d
File "udp_server.tac", line 4, in <module>
from udpecho import Echo
exceptions.ImportError: No module named udpecho
Failed to load application: No module named udpecho
What should I do for this to run?
Any Working solution is appreciable

Does anybody have ssl working with the Python Cassandra driver and eventlet?

I am stuck getting ssl to work with the Python Cassandra driver and eventlet.
We are using Python 3.4, eventlet 18.3 and Cassandra driver 3.0.0. Eventlet without ssl and ssl without eventlet both work.
Has anybody got the combination of ssl, cassandra and eventlet to work with Python? If so, what versions?
A code example may be too much to ask for, but would be very helpful.
Followup Feb 18 2016: Sorry for being so terse. Here is some code, all with Python3:
First, the simplest possible Cassandra client. No eventlet, no ssl. It works:
from cassandra.cluster import Cluster
cluster = Cluster(contact_points=['<ip>'],
connection_class=None)
session = cluster.connect('<keyspace>')
print("OK, session:", session)
Next, eventlet. No threads, so eventlet is pointless here. But it works:
from cassandra.cluster import Cluster
from cassandra.io.eventletreactor import EventletConnection
cluster = Cluster(contact_points=['<ip>'],
connection_class=EventletConnection)
session = cluster.connect('<keyspace>')
print("OK, session:", session)
Next, ssl, no eventlet. This also works:
import ssl
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
cluster = Cluster(contact_points=['<ip>'],
connection_class=None,
ssl_options=dict(ca_certs='<certfile>',
cert_reqs=ssl.CERT_REQUIRED,
ssl_version=ssl.PROTOCOL_TLSv1),
auth_provider=PlainTextAuthProvider(username='<user>',
password='<pass>'))
session = cluster.connect('<keyspace>')
print("OK, session:", session)
Finally, ssl and eventlet. This fails:
import ssl
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.io.eventletreactor import EventletConnection
cluster = Cluster(contact_points=['<ip>'],
connection_class=EventletConnection,
ssl_options=dict(ca_certs='<certfile>',
cert_reqs=ssl.CERT_REQUIRED,
ssl_version=ssl.PROTOCOL_TLSv1),
auth_provider=PlainTextAuthProvider(username='<user>',
password='<pass>'))
session = cluster.connect('<keyspace>')
print("OK, session:", session)
The backtrace shows that we are using eventlet.green.ssl:
Traceback (most recent call last):
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/hubs/poll.py", line 115, in wait
listener.cb(fileno)
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/green/select.py", line 55, in on_read
current.switch(([original], [], []))
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/greenthread.py", line 214, in main
result = function(*args, **kwargs)
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/cassandra/io/eventletreactor.py", line 98, in <lambda>
self._read_watcher = eventlet.spawn(lambda: self.handle_read())
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/cassandra/io/eventletreactor.py", line 153, in handle_read
buf = self._socket.recv(self.in_buffer_size)
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/green/ssl.py", line 198, in recv
read = self.read(buflen)
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/green/ssl.py", line 138, in read
super(GreenSSLSocket, self).read, *args, **kwargs)
File "/home/jk/eventlet/venv3/lib/python3.5/site-packages/eventlet/green/ssl.py", line 112, in _call_trampolining
return func(*a, **kw)
File "/home/jk/python-org/dst/python-3.5.1/lib/python3.5/ssl.py", line 786, in read
return self._sslobj.read(len, buffer)
TypeError: must be read-write bytes-like object, not None
Removing descriptor: 5
Traceback (most recent call last):
File "so4.py", line 13, in <module>
session = cluster.connect('<keyspace>')
File "cassandra/cluster.py", line 824, in cassandra.cluster.Cluster.connect (cassandra/cluster.c:11354)
File "cassandra/cluster.py", line 850, in cassandra.cluster.Cluster.connect (cassandra/cluster.c:11176)
File "cassandra/cluster.py", line 844, in cassandra.cluster.Cluster.connect (cassandra/cluster.c:11056)
File "cassandra/cluster.py", line 2041, in cassandra.cluster.ControlConnection.connect (cassandra/cluster.c:36224)
File "cassandra/cluster.py", line 2076, in cassandra.cluster.ControlConnection._reconnect_internal (cassandra/cluster.c:37080)
cassandra.cluster.NoHostAvailable: ('Unable to connect to any servers', {'<ip>': OperationTimedOut('errors=Timed out creating connection (5 seconds), last_host=None',)})
Again, no actual threads. But our real system does use them.
I didn't see any difference if I included eventlet.monkey_patch() in the setup.
The real system does.
This seems to be caused by the cassandra-drivers eventlet code using select.select() on a ssl socket. This causes problems as described here: select and ssl in python
This issue is fixed in https://github.com/datastax/python-driver/pull/485

Python Streamhandler over ftp doesn't work after second import

I have the following problem:
I wrote a FTPHandler(StreamHandler), which connects via 'transport=paramiko.Transport(...)' and 'transport.connect(...)' to a server and opens a sftp connection with 'SFTPClient.from_transport(...)'.
I am importing this handler in a class named 'JUS_Logger.py', which is my module for logging. This 'FMP_Logger' is imported by another class, 'JUS_Reader'.
The problem is, that if I start 'JUS_Reader', the transport is being initialized, but the Connection fails. There is no exception, the program only hangs. If I kill it, I get the stacktrace
CTraceback (most recent call last):
File "./JUS_Reader.py", line 24, in <module>
from JUS_Logger import logger
File "/<home>/.../JUS_Logger.py", line 74, in <module>
ftpHandler=FTPHandler(ftpOut,10)
File "/<home>/FTPHandler.py", line 21, in __init__
self.transport.connect(username=ftpOut['user'].decode('base64'),password=ftpOut['passwd'].decode('base64'))
File "/usr/lib/python2.7/dist-packages/paramiko/transport.py", line 1004, in connect
self.auth_password(username, password)
File "/usr/lib/python2.7/dist-packages/paramiko/transport.py", line 1165, in auth_password
return self.auth_handler.wait_for_response(my_event)
File "/usr/lib/python2.7/dist-packages/paramiko/auth_handler.py", line 158, in wait_for_response
event.wait(0.1)
File "/usr/lib/python2.7/threading.py", line 403, in wait
self.__cond.wait(timeout)
File "/usr/lib/python2.7/threading.py", line 262, in wait
_sleep(delay)
However, if I'm running the 'JUS_Logger.py' by itself, everything works, the transport's connection establishes and the SFTClient connects also.
Any ideas? Or further questions?

Celery tasks not works with gevent

When i use celery + gevent for tasks that uses subprocess module i'm getting following stacktrace:
Traceback (most recent call last):
File "/home/venv/admin/lib/python2.7/site-packages/celery/task/trace.py", line 228, in trace_task
R = retval = fun(*args, **kwargs)
File "/home/venv/admin/lib/python2.7/site-packages/celery/task/trace.py", line 415, in __protected_call__
return self.run(*args, **kwargs)
File "/home/webapp/admin/webadmin/apps/loggingquarantine/tasks.py", line 107, in release_mail_task
res = call_external_script(popen_obj.communicate)
File "/home/webapp/admin/webadmin/apps/core/helpers.py", line 42, in call_external_script
return func_to_call(*args, **kwargs)
File "/usr/lib64/python2.7/subprocess.py", line 740, in communicate
return self._communicate(input)
File "/usr/lib64/python2.7/subprocess.py", line 1257, in _communicate
stdout, stderr = self._communicate_with_poll(input)
File "/usr/lib64/python2.7/subprocess.py", line 1287, in _communicate_with_poll
poller = select.poll()
AttributeError: 'module' object has no attribute 'poll'
My manage.py looks following (doing monkeypatch there):
#!/usr/bin/env python
from gevent import monkey
import sys
import os
if __name__ == "__main__":
if not 'celery' in sys.argv:
monkey.patch_all()
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "webadmin.settings")
from django.core.management import execute_from_command_line
sys.path.append(".")
execute_from_command_line(sys.argv)
Is there a reason why celery tasks act like it wasn't patched properly?
p.s. strange thing that my local setup on Macos works fine while i getting such exceptions under Centos (all package versions are the same, init and config scripts too)
There's no simulation for poll in gevent so monkey.patch_all removes polling mechanisms that gevent.select does not simulate: poll, epoll, kqueue, kevent. See gevent.monkey – Make the standard library cooperative.

celery 3.0.12 typeerror :run() got multiple values for values for keyword argument 'detach'

I want to run the periodic tasks using celery and celerybeat.am using
celery 3.0.12 django-celery 3.0.11 kombu 2.5.3 and billiard 2.7.3.19 python 2.6 django 1.4.2. Can anyone please tell where its going wrong or missing any configuration part?
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/usr/lib/python2.6/site-packages/django/core/management/__init__.py", line 443, in execute_from_command_line
utility.execute()
File "/usr/lib/python2.6/site-packages/django/core/management/__init__.py", line 382, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/usr/lib/python2.6/site-packages/django_celery-3.0.11-py2.6.egg/djcelery/management/base.py", line 74, in run_from_argv
return super(CeleryCommand, self).run_from_argv(argv)
File "/usr/lib/python2.6/site-packages/django/core/management/base.py", line 196, in run_from_argv
self.execute(*args, **options.__dict__)
File "/usr/lib/python2.6/site-packages/django_celery-3.0.11-py2.6.egg/djcelery/management/base.py", line 67, in execute
super(CeleryCommand, self).execute(*args, **options)
File "/usr/lib/python2.6/site-packages/django/core/management/base.py", line 232, in execute
output = self.handle(*args, **options)
File "/usr/lib/python2.6/site-packages/django_celery-3.0.11-py2.6.egg/djcelery/management/commands/celerybeat.py", line 24, in handle
beat.run(*args, **options)
TypeError: run() got multiple values for keyword argument 'detach'
TIA:)
update:
task.py
from celery.utils.log import get_task_logger
import celery
from celery.task.schedules import crontab
from celery.task import periodic_task
#periodic_task(run_every=crontab(hour='*', minute='10', day_of_week='*'))
def spam_task():
"""
am fetching the user's spam and sending mail to spam users
using django EmailMultiAlternatives
"""
#periodic_task(run_every=crontab(hour='*', minute='10', day_of_week='*'))
def notify():
"""
fetching the activities and notifying to users by sending mail
"""
settings.py
CELERY_IMPORTS = ("myapp.tasks", )
CELERY_ENABLE_UTC = True
CELERY_TIMEZONE = 'America/Chicago'
import djcelery
djcelery.setup_loader()
BROKER_URL = 'django://'
BROKER_HOST = 'localhost'
BROKER_PORT = 5672
BROKER_USER = 'guest'
BROKER_PASSWORD = 'guest'
BROKER_VHOST = '/'
CELERY_EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
CELERY_EMAIL_TASK_CONFIG = {
'queue' : 'email',
'rate_limit' : '10/m',
'name': 'djcelery_email_send',
'ignore_result': True }

Categories