I have a RabbitMQ message broker and a remote Celery worker. It is working fine but about every five minutes I get this error:
[2014-01-06 14:02:27,247: WARNING/MainProcess] consumer: Connection to broker lost. Trying to re-establish the connection...
Traceback (most recent call last):
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/consumer.py", line 270, in start
blueprint.start(self)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/consumer.py", line 786, in start
c.loop(*c.loop_args())
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/loops.py", line 72, in asynloop
next(loop)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/async/hub.py", line 333, in create_loop
cb(*cbargs)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/transport/base.py", line 156, in on_readable
reader(loop)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/transport/base.py", line 141, in _read
drain_events(timeout=0)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/amqp/connection.py", line 282, in drain_events
chanmap, None, timeout=timeout,
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/amqp/connection.py", line 345, in _wait_multiple
channel, method_sig, args, content = read_timeout(timeout)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/amqp/connection.py", line 316, in read_timeout
return self.method_reader.read_method()
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/amqp/method_framing.py", line 195, in read_method
raise m
IOError: Socket closed
[2014-01-06 14:02:27,308: ERROR/MainProcess] Unrecoverable error: ValueError('I/O operation on closed epoll fd',)
Traceback (most recent call last):
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/__init__.py", line 206, in start
self.blueprint.start(self)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/bootsteps.py", line 373, in start
return self.obj.start()
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/consumer.py", line 270, in start
blueprint.start(self)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/consumer.py", line 468, in start
c.connection = c.connect()
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/celery/worker/consumer.py", line 369, in connect
conn.transport.register_with_event_loop(conn.connection, self.hub)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/transport/pyamqp.py", line 124, in register_with_event_loop
loop.add_reader(connection.sock, self.on_readable, connection, loop)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/async/hub.py", line 214, in add_reader
return self.add(fds, callback, READ | ERR, args)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/async/hub.py", line 165, in add
self.poller.register(fd, flags)
File "/usr/local/ABCD/venv/local/lib/python2.7/site-packages/kombu/utils/eventio.py", line 78, in register
self._epoll.register(fd, events)
ValueError: I/O operation on closed epoll fd
This is the init script I use to start a Celery deamon:
# description "Celery worker using sync broker"
console log
start on runlevel [2345]
stop on runlevel [!2345]
setuid yoyo_login
setgid yoyo_login
script
chdir /usr/local/ABCD/abcdegg
exec /usr/local/ABCD/venv/bin/celery worker -n ABCD_sync.%h -A proj.sync_celery -Q sync_queue -l info --autoscale=10,3 --autoreload --without-gossip --without-mingle --without-heartbeat
end script
respawn
Any idea why this error keep happening every few minutes?
It seems the worker is not the issue here, rather it seems RabbitMQ is closing the connection which the worker consumes. Check RabbitMQ/queue itself settings. Perhaps a proxy in the middle?
Related
Version: redis-py=3.1.0 and redis=3.2.10
Platform: Python 2.7.5 / CentOS Linux release 7.4.1708 (Core)
Infrastructure:
two machines (worker1, worker2 ) for running celery worker services with default concurrency (=8).
one dedicated machine (redis1) for running redis server.
Issue:
After the workers running for some time, suddenly a worker running on machine1 dies due to a RuntimeError raised losing a connection to pubsub.
machine1.worker.log
[2019-02-01 13:43:39,477: CRITICAL/MainProcess] Unrecoverable error: RuntimeError(u'pubsub connection not set: did you forget to call subscribe() or psubscribe()?',)
Traceback (most recent call last):
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/worker.py", line 205, in start
self.blueprint.start(self)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/bootsteps.py", line 119, in start
step.start(parent)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/bootsteps.py", line 369, in start
return self.obj.start()
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/consumer.py", line 322, in start
blueprint.start(self)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/bootsteps.py", line 119, in start
step.start(parent)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/consumer.py", line 598, in start
c.loop(*c.loop_args())
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/loops.py", line 91, in asynloop
next(loop)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/asynchronous/hub.py", line 354, in create_loop
cb(*cbargs)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/redis.py", line 1047, in on_readable
self.cycle.on_readable(fileno)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/redis.py", line 344, in on_readable
chan.handlers[type]()
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/redis.py", line 674, in _receive
ret.append(self._receive_one(c))
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/redis.py", line 685, in _receive_one
response = c.parse_response()
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/redis/client.py", line 3032, in parse_response
'pubsub connection not set: '
RuntimeError: pubsub connection not set: did you forget to call subscribe() or psubscribe()?
While at the same time I have spotted that worker running on machine2 suffers due to not being able to connect to redis. Eventually, it managed to recover and reconnect to redis and receiving the queued tasks.
machine2.worker.log
[2019-02-01 14:43:41,722: WARNING/MainProcess] consumer: Connection to broker lost. Trying to re-establish the connection...
Traceback (most recent call last):
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/consumer.py", line 322, in start
blueprint.start(self)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/bootsteps.py", line 119, in start
step.start(parent)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/mingle.py", line 40, in start
self.sync(c)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/mingle.py", line 44, in sync
replies = self.send_hello(c)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/worker/consumer/mingle.py", line 57, in send_hello
replies = inspect.hello(c.hostname, our_revoked._data) or {}
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/app/control.py", line 143, in hello
return self._request('hello', from_node=from_node, revoked=revoked)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/app/control.py", line 95, in _request
timeout=self.timeout, reply=True,
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/celery/app/control.py", line 454, in broadcast
limit, callback, channel=channel,
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/pidbox.py", line 315, in _broadcast
serializer=serializer)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/pidbox.py", line 290, in _publish
serializer=serializer,
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/messaging.py", line 181, in publish
exchange_name, declare,
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/messaging.py", line 203, in _publish
mandatory=mandatory, immediate=immediate,
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/virtual/base.py", line 605, in basic_publish
message, exchange, routing_key, **kwargs
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/virtual/exchange.py", line 151, in deliver
exchange, message, routing_key, **kwargs)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/kombu/transport/redis.py", line 781, in _put_fanout
dumps(message),
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/redis/client.py", line 2716, in publish
return self.execute_command('PUBLISH', channel, message)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/redis/client.py", line 775, in execute_command
return self.parse_response(connection, command_name, **options)
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/redis/client.py", line 789, in parse_response
response = connection.read_response()
File "/opt/c1/cip-middleware/webapp/virtualenv/lib/python2.7/site-packages/redis/connection.py", line 636, in read_response
raise e
ConnectionError: Error while reading from socket: (u'Connection closed by server.',)
[2019-02-01 14:44:50,237: INFO/MainProcess] Received task: c1_cip_middleware.tasks.validate_purchases.run_purchases_validation[a411dd90-ab50-4101-becb-90adda3663a2]
* Questions *
I wonder what are the circumstances/scenarios when RuntimeError is raised, thus, the worker gets into the "unrecovery" stage and must be stopped?
I am in doubt what could be a root-cause of having this issue, especially that one worker managed to recover but the other one just died?
I have Django app in Production working together with Celery and Amazon SQS. Every day in my celery logs I can see that there was SSL error:
[ERROR/MainProcess] Empty body: SQSError: 599 gnutls_handshake() failed: An unexpected TLS packet was received.
which follows by next error while trying to reconnect to broker:
[2016-12-14 16:06:28,917: WARNING/MainProcess] consumer: Connection to broker lost. Trying to re-establish the connection...
Traceback (most recent call last):
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/celery/worker/consumer/consumer.py", line 318, in start
blueprint.start(self)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/celery/bootsteps.py", line 119, in start
step.start(parent)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/celery/worker/consumer/consumer.py", line 584, in start
c.loop(*c.loop_args())
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/celery/worker/loops.py", line 88, in asynloop
next(loop)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/hub.py", line 284, in create_loop
poll_timeout = fire_timers(propagate=propagate) if scheduled else 1
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/hub.py", line 137, in fire_timers
entry()
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/timer.py", line 68, in __call__
return self.fun(*self.args, **self.kwargs)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/timer.py", line 127, in _reschedules
return fun(*args, **kwargs)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/http/curl.py", line 108, in _timeout_check
self._process_pending_requests()
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/http/curl.py", line 132, in _process_pending_requests
self._process(curl, errno, reason)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/http/curl.py", line 178, in _process
buffer=buffer, effective_url=effective_url, error=error,
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/promises.py", line 146, in __call__
svpending(*ca, **ck)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/promises.py", line 139, in __call__
return self.throw()
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/promises.py", line 136, in __call__
retval = fun(*final_args, **final_kwargs)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/funtools.py", line 100, in _transback
return callback(ret)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/promises.py", line 139, in __call__
return self.throw()
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/promises.py", line 136, in __call__
retval = fun(*final_args, **final_kwargs)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/funtools.py", line 98, in _transback
callback.throw()
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/vine/funtools.py", line 96, in _transback
ret = filter_(*args + (ret,), **kwargs)
File "/home/ubuntu/virtualenvs/env/lib/python3.5/site-packages/kombu/async/aws/connection.py", line 269, in _on_list_ready
raise self._for_status(response, body)
boto.exception.SQSError: SQSError: 599 gnutls_handshake() failed: An unexpected TLS packet was received.
Sometimes queues are crashing after this message and I have to restart my Celery workers. In general I am not sure if Celery tasks are running and executed correctly after I get this error.
What is the best way to solve this issue?
Looks like I have found an answer on my question. The problem was in kombu -> pycurl -> curl -> gnutls. GnuTLS library always drops the SSL connections. So the only possible solution which I have found is forcing libcurl to work together with openssl instead of GnuTLS.
Here is my step by step guide "How to reinstall curl"
Sorry, guys, but my bounty goes back to me! ;-)
I have 3 machines with celery workers and rabbitmq as a broker, one worker is running with beat flag, all of this is managed by supervisor, and sometimes celery dies with such error.
This error appears only on beat worker, but when it appears, workers on all machines dies.
(celery==3.1.12, kombu==3.0.20)
[2014-07-05 08:37:04,297: INFO/MainProcess] Connected to amqp://user:**#192.168.15.106:5672//
[2014-07-05 08:37:04,311: ERROR/Beat] Process Beat
Traceback (most recent call last):
File "/var/projects/env/local/lib/python2.7/site-packages/billiard/process.py", line 292, in _bootstrap
self.run()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 527, in run
self.service.start(embedded_process=True)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 453, in start
humanize_seconds(self.scheduler.max_interval))
File "/var/projects/env/local/lib/python2.7/site-packages/kombu/utils/__init__.py", line 322, in __get__
value = obj.__dict__[self.__name__] = self.__get(obj)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 491, in scheduler
return self.get_scheduler()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 486, in get_scheduler
lazy=lazy)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/utils/imports.py", line 53, in instantiate
return symbol_by_name(name)(*args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 357, in __init__
Scheduler.__init__(self, *args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 184, in __init__
self.setup_schedule()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 376, in setup_schedule
self._store['entries']
File "/usr/lib/python2.7/shelve.py", line 121, in __getitem__
f = StringIO(self.dict[key])
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in __getitem__
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
File "/usr/lib/python2.7/bsddb/dbutils.py", line 68, in DeadlockWrap
return function(*_args, **_kwargs)
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in <lambda>
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
DBPageNotFoundError: (-30985, 'DB_PAGE_NOTFOUND: Requested page not found')
I've ran into this issue and the cause was a corrupted db file (usually named "celerybeat-schedule").
Solution would be to delete the existing db file and restart the process.
Relavent:bsddb.db.DBPageNotFoundError
https://mail.python.org/pipermail/python-list/2009-October/554552.html
I had to remove some temp files in the /tmp directory. One was named celeryd-<NAME_OF_WORKER>-state and also celeryd-<NAME_OF_WORKER>-state-renamed. After removing those and I was able to restart my affected worker.
I have a celery setup with django & redis.
When i run celery by command from user, like celery multi start 123_work -A 123 --pidfile="/var/log/celery/%n.pid" --logfile="/var/log/celery/%n.log" --workdir="/data/ports/dj_dois" --loglevel=INFO job work's fine, but if i run celery via celeryd or supervisor some job's give me an error:
[2015-12-28 09:10:59,229: ERROR/MainProcess] Unrecoverable error: UnpicklingError('NEWOBJ class argument has NULL tp_new',)
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/celery/worker/__init__.py", line 206, in start
self.blueprint.start(self)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 374, in start
return self.obj.start()
File "/usr/local/lib/python3.4/dist-packages/celery/worker/consumer.py", line 278, in start
blueprint.start(self)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/lib/python3.4/dist-packages/celery/worker/consumer.py", line 821, in start
c.loop(*c.loop_args())
File "/usr/local/lib/python3.4/dist-packages/celery/worker/loops.py", line 76, in asynloop
next(loop)
File "/usr/local/lib/python3.4/dist-packages/kombu/async/hub.py", line 328, in create_loop
next(cb)
File "/usr/local/lib/python3.4/dist-packages/celery/concurrency/asynpool.py", line 258, in _recv_message
message = load(bufv)
_pickle.UnpicklingError: NEWOBJ class argument has NULL tp_new
[2015-12-28 09:10:59,317: ERROR/MainProcess] Task db_select_task[dd5af67d-6bbe-49bb-8f13-59d0a0a9717b] raised unexpected: WorkerLostError('Worker exited prematurely: exitcode 0.',)
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/celery/worker/__init__.py", line 206, in start
self.blueprint.start(self)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 374, in start
return self.obj.start()
File "/usr/local/lib/python3.4/dist-packages/celery/worker/consumer.py", line 278, in start
blueprint.start(self)
File "/usr/local/lib/python3.4/dist-packages/celery/bootsteps.py", line 123, in start
step.start(parent)
File "/usr/local/lib/python3.4/dist-packages/celery/worker/consumer.py", line 821, in start
c.loop(*c.loop_args())
File "/usr/local/lib/python3.4/dist-packages/celery/worker/loops.py", line 76, in asynloop
next(loop)
File "/usr/local/lib/python3.4/dist-packages/kombu/async/hub.py", line 328, in create_loop
next(cb)
File "/usr/local/lib/python3.4/dist-packages/celery/concurrency/asynpool.py", line 258, in _recv_message
message = load(bufv)
_pickle.UnpicklingError: NEWOBJ class argument has NULL tp_new
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/billiard/pool.py", line 1175, in mark_as_worker_lost
human_status(exitcode)),
billiard.exceptions.WorkerLostError: Worker exited prematurely: exitcode 0.
My celery version:
software -> celery:3.1.19 (Cipater) kombu:3.0.32 py:3.4.2
billiard:3.3.0.22 py-amqp:1.4.8
platform -> system:Linux arch:64bit, ELF imp:CPython
loader -> celery.loaders.default.Loader
settings -> transport:amqp results:disabled
Pythnon - 3.4
Django - 1.8.7
Redis server v=2.8.17
Example of job that give's me an Error:
#shared_task(name='db_select_task')
def db_select_task(arg1,arg2):
conn_pool = pool.manage(cx_Oracle)
db = conn_pool.connect("user/pass#db")
try:
cursor = db.cursor()
ports = {}
t = tech
cursor.execute("sql")
data = cursor.fetchall()
except Exception:
return ('Error: with db')
finally:
cursor.close()
db.close()
return data
Problem was with oracle paths for celeryd daemon. Just add aditional export for celeryd config.
I have 3 machines with celery workers and rabbitmq as a broker, one worker is running with beat flag, all of this is managed by supervisor, and sometimes celery dies with such error.
This error appears only on beat worker, but when it appears, workers on all machines dies.
(celery==3.1.12, kombu==3.0.20)
[2014-07-05 08:37:04,297: INFO/MainProcess] Connected to amqp://user:**#192.168.15.106:5672//
[2014-07-05 08:37:04,311: ERROR/Beat] Process Beat
Traceback (most recent call last):
File "/var/projects/env/local/lib/python2.7/site-packages/billiard/process.py", line 292, in _bootstrap
self.run()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 527, in run
self.service.start(embedded_process=True)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 453, in start
humanize_seconds(self.scheduler.max_interval))
File "/var/projects/env/local/lib/python2.7/site-packages/kombu/utils/__init__.py", line 322, in __get__
value = obj.__dict__[self.__name__] = self.__get(obj)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 491, in scheduler
return self.get_scheduler()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 486, in get_scheduler
lazy=lazy)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/utils/imports.py", line 53, in instantiate
return symbol_by_name(name)(*args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 357, in __init__
Scheduler.__init__(self, *args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 184, in __init__
self.setup_schedule()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 376, in setup_schedule
self._store['entries']
File "/usr/lib/python2.7/shelve.py", line 121, in __getitem__
f = StringIO(self.dict[key])
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in __getitem__
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
File "/usr/lib/python2.7/bsddb/dbutils.py", line 68, in DeadlockWrap
return function(*_args, **_kwargs)
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in <lambda>
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
DBPageNotFoundError: (-30985, 'DB_PAGE_NOTFOUND: Requested page not found')
I've ran into this issue and the cause was a corrupted db file (usually named "celerybeat-schedule").
Solution would be to delete the existing db file and restart the process.
Relavent:bsddb.db.DBPageNotFoundError
https://mail.python.org/pipermail/python-list/2009-October/554552.html
I had to remove some temp files in the /tmp directory. One was named celeryd-<NAME_OF_WORKER>-state and also celeryd-<NAME_OF_WORKER>-state-renamed. After removing those and I was able to restart my affected worker.