Twint scraping: ClientPayloadError: Response payload is not completed - python

While I was scraping Tweets about a certain hashtag using twint, I received the error below. Can anyone explain me why this error occurs and how can I fix the problem? Because the tweets to a certain date were scraped, however, the other tweets before that day could not be scraped because of this error.
Thank you for your help already!
---------------------------------------------------------------------------
ClientPayloadError Traceback (most recent call last)
<ipython-input-8-f28f8e9aab1e> in <module>
----> 1 twint.run.Search(c)
~/.local/lib/python3.8/site-packages/twint/run.py in Search(config, callback)
408 config.Followers = False
409 config.Profile = False
--> 410 run(config, callback)
411 if config.Pandas_au:
412 storage.panda._autoget("tweet")
~/.local/lib/python3.8/site-packages/twint/run.py in run(config, callback)
327 raise
328
--> 329 get_event_loop().run_until_complete(Twint(config).main(callback))
330
331
~/opt/anaconda3/lib/python3.8/asyncio/base_events.py in run_until_complete(self, future)
614 raise RuntimeError('Event loop stopped before Future completed.')
615
--> 616 return future.result()
617
618 def stop(self):
~/.local/lib/python3.8/site-packages/twint/run.py in main(self, callback)
233 task.add_done_callback(callback)
234
--> 235 await task
236
237 async def run(self):
~/.local/lib/python3.8/site-packages/twint/run.py in run(self)
284 elif self.config.TwitterSearch:
285 logme.debug(__name__ + ':Twint:main:twitter-search')
--> 286 await self.tweets()
287 else:
288 logme.debug(__name__ + ':Twint:main:no-more-tweets')
~/.local/lib/python3.8/site-packages/twint/run.py in tweets(self)
215
216 async def tweets(self):
--> 217 await self.Feed()
218 # TODO : need to take care of this later
219 if self.config.Location:
~/.local/lib/python3.8/site-packages/twint/run.py in Feed(self)
60 # this will receive a JSON string, parse it into a `dict` and do the required stuff
61 try:
---> 62 response = await get.RequestUrl(self.config, self.init)
63 except TokenExpiryException as e:
64 logme.debug(__name__ + 'Twint:Feed:' + str(e))
~/.local/lib/python3.8/site-packages/twint/get.py in RequestUrl(config, init)
133 _serialQuery = _url
134
--> 135 response = await Request(_url, params=params, connector=_connector, headers=_headers)
136
137 if config.Debug:
~/.local/lib/python3.8/site-packages/twint/get.py in Request(_url, connector, params, headers)
159 logme.debug(__name__ + ':Request:Connector')
160 async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
--> 161 return await Response(session, _url, params)
162
163
~/.local/lib/python3.8/site-packages/twint/get.py in Response(session, _url, params)
166 with timeout(120):
167 async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
--> 168 resp = await response.text()
169 if response.status == 429: # 429 implies Too many requests i.e. Rate Limit Exceeded
170 raise TokenExpiryException(loads(resp)['errors'][0]['message'])
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in text(self, encoding, errors)
1074 """Read response payload and decode."""
1075 if self._body is None:
-> 1076 await self.read()
1077
1078 if encoding is None:
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in read(self)
1030 if self._body is None:
1031 try:
-> 1032 self._body = await self.content.read()
1033 for trace in self._traces:
1034 await trace.send_response_chunk_received(
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in read(self, n)
368 blocks = []
369 while True:
--> 370 block = await self.readany()
371 if not block:
372 break
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in readany(self)
390 # without feeding any data
391 while not self._buffer and not self._eof:
--> 392 await self._wait("readany")
393
394 return self._read_nowait(-1)
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in _wait(self, func_name)
304 if self._timer:
305 with self._timer:
--> 306 await waiter
307 else:
308 await waiter
ClientPayloadError: Response payload is not completed

Not sure if you found a response to your problem but thought I would add this here for anyone looking in the future:
https://github.com/twintproject/twint/issues/1099
Essentially the above link suggests using a try / except block to capture the error and try again, if that works for your code.
I have also found that twint works better using python3.6 - might help!
Good luck :)

Related

how to prevent ratelimiterror exchangelib python

Im pulling daily all the emails from some 8 different postboxes via exchangelib. I was using it the whole week but now the code seems to be throttled by the exchangeserver as the error below gets thrown out while its trying to grab the first email. So I want to learn how to handle it and dont get throttled anymore. I already implemented one retry policy
credentials = Credentials(username='username', password='password')
config = Configuration(retry_policy=FaultTolerance(max_wait=600), credentials=credentials)
for that im using following code:
while True:
try:
for shared_postbox in tqdm(shared_postboxes):
account = Account(shared_postbox, credentials=credentials, config = config, autodiscover=True)
top_folder = account.root
email_folders = [f for f in top_folder.walk() if isinstance(f, Messages)]
for folder in tqdm(email_folders):
#added item_class in filter and removed order by
#for m in folder.all().only('text_body', 'datetime_received', "sender").filter(datetime_received__range=(start_of_month,end_of_month), sender__exists=True):
#when since statement is needed
for m in folder.all().only('text_body', 'datetime_received', "sender").filter(datetime_received__gt=midnight, sender__exists=True):
try:
senderdomain = ExtractingDomain(m.sender.email_address)
senderdomains.append(senderdomain)
except:
print("could not extract domain")
else:
if senderdomain in domains_of_interest:
postboxname = account.identity.primary_smtp_address
body = m.text_body
emails.append(body)
sender.append(senderdomain)
postbox.append(postboxname)
received.append(m.datetime_received)
#else:
# print("nicht in domains of interest")
account.protocol.close()
except RateLimitError as e:
time.sleep(60)
following error code i get:
RateLimitError Traceback (most recent call last)
Input In [4], in <cell line: 77>()
81 account = Account(shared_postbox, credentials=credentials, config = config, autodiscover=True)
---> 82 top_folder = account.root
83 email_folders = [f for f in top_folder.walk() if isinstance(f, Messages)]
File ~\.conda\envs\python383\lib\site-packages\cached_property.py:74, in threaded_cached_property.__get__(self, obj, cls)
72 except KeyError:
73 # if not, do the calculation and release the lock
---> 74 return obj_dict.setdefault(name, self.func(obj))
File ~\.conda\envs\python383\lib\site-packages\exchangelib\account.py:349, in Account.root(self)
347 #threaded_cached_property
348 def root(self):
--> 349 return Root.get_distinguished(account=self)
File ~\.conda\envs\python383\lib\site-packages\exchangelib\folders\roots.py:114, in RootOfHierarchy.get_distinguished(cls, account)
113 try:
--> 114 return cls.resolve(
115 account=account, folder=cls(account=account, name=cls.DISTINGUISHED_FOLDER_ID, is_distinguished=True)
116 )
117 except MISSING_FOLDER_ERRORS:
File ~\.conda\envs\python383\lib\site-packages\exchangelib\folders\base.py:512, in BaseFolder.resolve(cls, account, folder)
509 #classmethod
510 def resolve(cls, account, folder):
511 # Resolve a single folder
--> 512 folders = list(FolderCollection(account=account, folders=[folder]).resolve())
513 if not folders:
File ~\.conda\envs\python383\lib\site-packages\exchangelib\folders\collections.py:335, in FolderCollection.resolve(self)
334 additional_fields = self.get_folder_fields(target_cls=self._get_target_cls())
--> 335 yield from self.__class__(account=self.account, folders=resolveable_folders).get_folders(
336 additional_fields=additional_fields
337 )
File ~\.conda\envs\python383\lib\site-packages\exchangelib\folders\collections.py:403, in FolderCollection.get_folders(self, additional_fields)
399 additional_fields.update(
400 (FieldPath(field=BaseFolder.get_field_by_fieldname(f)) for f in self.REQUIRED_FOLDER_FIELDS)
401 )
--> 403 yield from GetFolder(account=self.account).call(
404 folders=self.folders,
405 additional_fields=additional_fields,
406 shape=ID_ONLY,
407 )
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\get_folder.py:43, in GetFolder._elems_to_objs(self, elems)
42 def _elems_to_objs(self, elems):
---> 43 for folder, elem in zip(self.folders, elems):
44 if isinstance(elem, Exception):
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\common.py:246, in EWSService._chunked_get_elements(self, payload_func, items, **kwargs)
245 log.debug("Processing chunk %s containing %s items", i, len(chunk))
--> 246 yield from self._get_elements(payload=payload_func(chunk, **kwargs))
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\common.py:266, in EWSService._get_elements(self, payload)
263 try:
264 # Create a generator over the response elements so exceptions in response elements are also raised
265 # here and can be handled.
--> 266 yield from self._response_generator(payload=payload)
267 return
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\common.py:228, in EWSService._response_generator(self, payload)
223 """Send the payload to the server, and return the response.
224
225 :param payload: payload as an XML object
226 :return: the response, as XML objects
227 """
--> 228 response = self._get_response_xml(payload=payload)
229 if self.supports_paging:
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\common.py:343, in EWSService._get_response_xml(self, payload, **parse_opts)
342 log.debug("Trying API version %s", api_version)
--> 343 r = self._get_response(payload=payload, api_version=api_version)
344 if self.streaming:
345 # Let 'requests' decode raw data automatically
File ~\.conda\envs\python383\lib\site-packages\exchangelib\services\common.py:298, in EWSService._get_response(self, payload, api_version)
297 session = self.protocol.get_session()
--> 298 r, session = post_ratelimited(
299 protocol=self.protocol,
300 session=session,
301 url=self.protocol.service_endpoint,
302 headers=self._extra_headers(session),
303 data=wrap(
304 content=payload,
305 api_version=api_version,
306 account_to_impersonate=self._account_to_impersonate,
307 timezone=self._timezone,
308 ),
309 stream=self.streaming,
310 timeout=self.timeout or self.protocol.TIMEOUT,
311 )
312 self._handle_response_cookies(session)
File ~\.conda\envs\python383\lib\site-packages\exchangelib\util.py:880, in post_ratelimited(protocol, session, url, headers, data, allow_redirects, stream, timeout)
879 total_wait = time.monotonic() - t_start
--> 880 if protocol.retry_policy.may_retry_on_error(response=r, wait=total_wait):
881 r.close() # Release memory
File ~\.conda\envs\python383\lib\site-packages\exchangelib\protocol.py:780, in FaultTolerance.may_retry_on_error(self, response, wait)
778 if wait > self.max_wait:
779 # We lost patience. Session is cleaned up in outer loop
--> 780 raise RateLimitError(
781 "Max timeout reached", url=response.url, status_code=response.status_code, total_wait=wait
782 )
783 if response.status_code == 401:
784 # EWS sometimes throws 401's when it wants us to throttle connections. OK to retry.
RateLimitError: Max timeout reached (gave up after 634.031 seconds. URL https://outlook.office365.com/EWS/Exchange.asmx returned status code 401)
When I looked into it, I saw that exchangelib has a function to handle the throttle policy but I don't know to implement it. could the function
def post_ratelimited(protocol, session, url, headers, data, stream=False, timeout=None)
help me in this case? I found this function in their documentation.
You defined a policy the tells exchangelib to retry up to 600 seconds. The code threw an exception after waiting for more than 600 seconds. That's how it's supposed to work.
If you want the code to retry for a longer period, then increase the max_wait value.
Guide to EWS throttling and how to handle it is here: https://learn.microsoft.com/en-us/exchange/client-developer/exchange-web-services/ews-throttling-in-exchange

How to prevent TimeoutError on asyncio when making large amounts of API calls

I'm new to the asyncio module. Until recently I used to use requests for the task I'm about to write about.
I'm trying to scale a script I work with which works well with up to 120 calls on Requests. However, with request being single threaded, it would take forever to get 1000 API calls (which is what I'm trying to achieve). This is where I found asyncio which makes asynchonous requests.
This is a script I put together for asyncio with the help of this article.
url = 'https://api.url.com/search?api_key=api_key&api_params=multiple_params'
queries = ['online slots', 'metaverse', 'sports betting', 'basketball odds', 'soccer odds', 'online poker', 'best casinos in germany', 'barbecue grills', 'outdoor pizza ovens']
results = []
def get_tasks(session):
tasks=[]
for q in queries:
tasks.append(asyncio.create_task(session.get(url.format(q), ssl=False)))
return tasks
timeout = ClientTimeout(total=500)
async def get_queries():
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = get_tasks(session)
responses = await asyncio.gather(*tasks)
for response in responses:
results.append(await response.json())
asyncio.run(get_queries())
It seems to work fine in most instances. But, it seems to timeout on many occasions. When I'm using the German queries and when it is making more than 500 API calls.
Below is what I keep getting back. As you can see in the script I've added a client timeout.
---------------------------------------------------------------------------
TimeoutError Traceback (most recent call last)
<ipython-input-4-8c48df090394> in <module>
33 results.append(await response.json())
34
---> 35 asyncio.run(get_queries())
/opt/anaconda3/lib/python3.8/site-packages/nest_asyncio.py in run(future, debug)
30 loop = asyncio.get_event_loop()
31 loop.set_debug(debug)
---> 32 return loop.run_until_complete(future)
33
34 if sys.version_info >= (3, 6, 0):
/opt/anaconda3/lib/python3.8/site-packages/nest_asyncio.py in run_until_complete(self, future)
68 raise RuntimeError(
69 'Event loop stopped before Future completed.')
---> 70 return f.result()
71
72 def _run_once(self):
/opt/anaconda3/lib/python3.8/asyncio/futures.py in result(self)
176 self.__log_traceback = False
177 if self._exception is not None:
--> 178 raise self._exception
179 return self._result
180
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __step(***failed resolving arguments***)
278 # We use the `send` method directly, because coroutines
279 # don't have `__iter__` and `__next__` methods.
--> 280 result = coro.send(None)
281 else:
282 result = coro.throw(exc)
<ipython-input-4-8c48df090394> in get_queries()
29 async with aiohttp.ClientSession(timeout=timeout) as session:
30 tasks = get_tasks(session)
---> 31 responses = await asyncio.gather(*tasks)
32 for response in responses:
33 results.append(await response.json())
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __wakeup(self, future)
347 def __wakeup(self, future):
348 try:
--> 349 future.result()
350 except BaseException as exc:
351 # This may also be a cancellation.
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __step(***failed resolving arguments***)
280 result = coro.send(None)
281 else:
--> 282 result = coro.throw(exc)
283 except StopIteration as exc:
284 if self._must_cancel:
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client.py in throw(self, arg)
1123
1124 def throw(self, arg: BaseException) -> None: # type: ignore[arg-type,override]
-> 1125 self._coro.throw(arg)
1126
1127 def close(self) -> None:
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client.py in _request(self, method, str_or_url, params, data, json, cookies, headers, skip_auto_headers, auth, allow_redirects, max_redirects, compress, chunked, expect100, raise_for_status, read_until_eof, proxy, proxy_auth, timeout, verify_ssl, fingerprint, ssl_context, ssl, proxy_headers, trace_request_ctx, read_bufsize)
557 resp = await req.send(conn)
558 try:
--> 559 await resp.start(conn)
560 except BaseException:
561 resp.close()
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in start(self, connection)
911 if self._continue is not None:
912 set_result(self._continue, True)
--> 913 self._continue = None
914
915 # payload eof handler
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/helpers.py in __exit__(self, exc_type, exc_val, exc_tb)
719
720 if exc_type is asyncio.CancelledError and self._cancelled:
--> 721 raise asyncio.TimeoutError from None
722 return None
723
TimeoutError:
Can anyone help me figure what I'm doing wrong? And how to avoid timeouts for large amounts of API calls on asyncio?
Much appreciated!

When terminate celery chain conditionally, it not return the data

I have chain of tasks, and want to terminate conditionally, I am following steps in https://stackoverflow.com/a/21106596/243031 but after that, we are not getting the output.
I have tasks as
from __future__ import absolute_import, unicode_literals
from .celery import app
#app.task(bind=True)
def add(self, x, y):
if (x + y) % 2 == 0:
self.request.callbacks[:] = []
return x + y
means when the sum is even and its part of chain, then stop that chain.
But it gives error.
In [13]: ~(add.s(2, 2) | add.s(3))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-13-0fe85c5d0e22> in <module>
----> 1 ~(add.s(2, 2) | add.s(3))
~/virtualenv/lib/python3.7/site-packages/celery/canvas.py in __invert__(self)
479
480 def __invert__(self):
--> 481 return self.apply_async().get()
482
483 def __reduce__(self):
~/virtualenv/lib/python3.7/site-packages/celery/result.py in get(self, timeout, propagate, interval, no_ack, follow_parents, callback, on_message, on_interval, disable_sync_subtasks, EXCEPTION_STATES, PROPAGATE_STATES)
228 propagate=propagate,
229 callback=callback,
--> 230 on_message=on_message,
231 )
232 wait = get # deprecated alias to :meth:`get`.
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in wait_for_pending(self, result, callback, propagate, **kwargs)
197 callback=None, propagate=True, **kwargs):
198 self._ensure_not_eager()
--> 199 for _ in self._wait_for_pending(result, **kwargs):
200 pass
201 return result.maybe_throw(callback=callback, propagate=propagate)
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in _wait_for_pending(self, result, timeout, on_interval, on_message, **kwargs)
265 for _ in self.drain_events_until(
266 result.on_ready, timeout=timeout,
--> 267 on_interval=on_interval):
268 yield
269 sleep(0)
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in drain_events_until(self, p, timeout, interval, on_interval, wait)
56 pass
57 if on_interval:
---> 58 on_interval()
59 if p.ready: # got event on the wanted channel.
60 break
~/virtualenv/lib/python3.7/site-packages/vine/promises.py in __call__(self, *args, **kwargs)
158 self.value = (ca, ck) = (retval,), {}
159 except Exception:
--> 160 return self.throw()
161 else:
162 self.value = (ca, ck) = final_args, final_kwargs
~/virtualenv/lib/python3.7/site-packages/vine/promises.py in __call__(self, *args, **kwargs)
155 ck = {}
156 else:
--> 157 retval = fun(*final_args, **final_kwargs)
158 self.value = (ca, ck) = (retval,), {}
159 except Exception:
~/virtualenv/lib/python3.7/site-packages/celery/result.py in _maybe_reraise_parent_error(self)
234 def _maybe_reraise_parent_error(self):
235 for node in reversed(list(self._parents())):
--> 236 node.maybe_throw()
237
238 def _parents(self):
~/virtualenv/lib/python3.7/site-packages/celery/result.py in maybe_throw(self, propagate, callback)
333 cache['status'], cache['result'], cache.get('traceback'))
334 if state in states.PROPAGATE_STATES and propagate:
--> 335 self.throw(value, self._to_remote_traceback(tb))
336 if callback is not None:
337 callback(self.id, value)
~/virtualenv/lib/python3.7/site-packages/celery/result.py in throw(self, *args, **kwargs)
326
327 def throw(self, *args, **kwargs):
--> 328 self.on_ready.throw(*args, **kwargs)
329
330 def maybe_throw(self, propagate=True, callback=None):
~/virtualenv/lib/python3.7/site-packages/vine/promises.py in throw(self, exc, tb, propagate)
232 if tb is None and (exc is None or exc is current_exc):
233 raise
--> 234 reraise(type(exc), exc, tb)
235
236 #property
~/virtualenv/lib/python3.7/site-packages/vine/utils.py in reraise(tp, value, tb)
28 if value.__traceback__ is not tb:
29 raise value.with_traceback(tb)
---> 30 raise value
TypeError: 'NoneType' object does not support item assignment
I tried self.request.chain = None and self.request.chain[:] = [],
#app.task(bind=True)
def add(self, x, y):
if self.request.chain and (x + y) % 2 == 0:
self.request.chain = None
return x + y
In logs, it shows, it return the data.
[2021-03-24 22:11:48,795: WARNING/MainProcess] Substantial drift from scrpc_worker#458cd596aed3 may mean clocks are out of sync. Current drift is
14400 seconds. [orig: 2021-03-24 22:11:48.795402 recv: 2021-03-25 02:11:48.796579]
[2021-03-24 22:11:52,227: INFO/MainProcess] Events of group {task} enabled by remote.
[2021-03-24 22:11:57,853: INFO/MainProcess] Received task: myprj.tasks.add[8aaae68f-d5ca-4c0a-8f2e-f1c7b5916e29]
[2021-03-24 22:11:57,867: INFO/ForkPoolWorker-8] Task myprj.tasks.add[8aaae68f-d5ca-4c0a-8f2e-f1c7b5916e29] succeeded in 0.01066690299999884s: 4
but it wait for some socket and when we press ctr+c, it give below traceback.
In [1]: from myprj.tasks import add
In [2]: ~(add.s(2, 2) | add.s(3))
^C---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-2-0fe85c5d0e22> in <module>
----> 1 ~(add.s(2, 2) | add.s(3))
~/virtualenv/lib/python3.7/site-packages/celery/canvas.py in __invert__(self)
479
480 def __invert__(self):
--> 481 return self.apply_async().get()
482
483 def __reduce__(self):
~/virtualenv/lib/python3.7/site-packages/celery/result.py in get(self, timeout, propagate, interval, no_ack, follow_parents, callback, on_message, on_interval, disable_sync_subtasks, EXCEPTION_STATES, PROPAGATE_STATES)
228 propagate=propagate,
229 callback=callback,
--> 230 on_message=on_message,
231 )
232 wait = get # deprecated alias to :meth:`get`.
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in wait_for_pending(self, result, callback, propagate, **kwargs)
197 callback=None, propagate=True, **kwargs):
198 self._ensure_not_eager()
--> 199 for _ in self._wait_for_pending(result, **kwargs):
200 pass
201 return result.maybe_throw(callback=callback, propagate=propagate)
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in _wait_for_pending(self, result, timeout, on_interval, on_message, **kwargs)
265 for _ in self.drain_events_until(
266 result.on_ready, timeout=timeout,
--> 267 on_interval=on_interval):
268 yield
269 sleep(0)
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in drain_events_until(self, p, timeout, interval, on_interval, wait)
52 raise socket.timeout()
53 try:
---> 54 yield self.wait_for(p, wait, timeout=interval)
55 except socket.timeout:
56 pass
~/virtualenv/lib/python3.7/site-packages/celery/backends/asynchronous.py in wait_for(self, p, wait, timeout)
61
62 def wait_for(self, p, wait, timeout=None):
---> 63 wait(timeout=timeout)
64
65
~/virtualenv/lib/python3.7/site-packages/celery/backends/redis.py in drain_events(self, timeout)
149 if self._pubsub:
150 with self.reconnect_on_error():
--> 151 message = self._pubsub.get_message(timeout=timeout)
152 if message and message['type'] == 'message':
153 self.on_state_change(self._decode_result(message['data']), message)
~/virtualenv/lib/python3.7/site-packages/redis/client.py in get_message(self, ignore_subscribe_messages, timeout)
3615 number.
3616 """
-> 3617 response = self.parse_response(block=False, timeout=timeout)
3618 if response:
3619 return self.handle_message(response, ignore_subscribe_messages)
~/virtualenv/lib/python3.7/site-packages/redis/client.py in parse_response(self, block, timeout)
3501 self.check_health()
3502
-> 3503 if not block and not conn.can_read(timeout=timeout):
3504 return None
3505 response = self._execute(conn, conn.read_response)
~/virtualenv/lib/python3.7/site-packages/redis/connection.py in can_read(self, timeout)
732 self.connect()
733 sock = self._sock
--> 734 return self._parser.can_read(timeout)
735
736 def read_response(self):
~/virtualenv/lib/python3.7/site-packages/redis/connection.py in can_read(self, timeout)
319
320 def can_read(self, timeout):
--> 321 return self._buffer and self._buffer.can_read(timeout)
322
323 def read_response(self):
~/virtualenv/lib/python3.7/site-packages/redis/connection.py in can_read(self, timeout)
229 return bool(self.length) or \
230 self._read_from_socket(timeout=timeout,
--> 231 raise_on_timeout=False)
232
233 def read(self, length):
~/virtualenv/lib/python3.7/site-packages/redis/connection.py in _read_from_socket(self, length, timeout, raise_on_timeout)
196 sock.settimeout(timeout)
197 while True:
--> 198 data = recv(self._sock, socket_read_size)
199 # an empty string indicates the server shutdown the socket
200 if isinstance(data, bytes) and len(data) == 0:
~/virtualenv/lib/python3.7/site-packages/redis/_compat.py in recv(sock, *args, **kwargs)
70 else: # Python 3.5 and above automatically retry EINTR
71 def recv(sock, *args, **kwargs):
---> 72 return sock.recv(*args, **kwargs)
73
74 def recv_into(sock, *args, **kwargs):
KeyboardInterrupt:
First of all, terminating chain is good option or we have to add condition in next chain task that if some condition, don't process, just return what you get.
If we have to terminate the chain, what is the best option? where you get the output of partial chain.

Got 504 Deadline Exceeded in Jupiter Notebook (Python) with Big query

I am trying to get a the result of a Google Bigquery query in a pandas dataframe (in Jupiter notebook).
But everytime I try to run the query I get a DeadlineExceeded: 504 Deadline Exceeded.
This happens not only for queries in my own BQ project but also for other projects.
I have tried a lot of option to run the query like in here: https://cloud.google.com/bigquery/docs/bigquery-storage-python-pandas
Anyone have a idea how to fix this?
Query:
%load_ext google.cloud.bigquery
%%bigquery tax_forms --use_bqstorage_api
SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
---------------------------------------------------------------------------
_MultiThreadedRendezvous Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
149 prefetch_first = getattr(callable_, "_prefetch_first_result_", True)
--> 150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in __init__(self, wrapped, prefetch_first_result)
72 if prefetch_first_result:
---> 73 self._stored_first_result = six.next(self._wrapped)
74 except TypeError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in __next__(self)
415 def __next__(self):
--> 416 return self._next()
417
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in _next(self)
705 elif self._state.code is not None:
--> 706 raise self
707
_MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "{"created":"#1597838569.388000000","description":"Error received from peer ipv4:172.217.168.202:443","file":"src/core/lib/surface/call.cc","file_line":1062,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>
The above exception was the direct cause of the following exception:
DeadlineExceeded Traceback (most recent call last)
<ipython-input-2-4fdaec7219df> in <module>
----> 1 get_ipython().run_cell_magic('bigquery', 'tax_forms --use_bqstorage_api', 'SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`\n')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2357 with self.builtin_trap:
2358 args = (magic_arg_s, cell)
-> 2359 result = fn(*args, **kwargs)
2360 return result
2361
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\magics.py in _cell_magic(line, query)
589 )
590 else:
--> 591 result = query_job.to_dataframe(bqstorage_client=bqstorage_client)
592
593 if args.destination_var:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\job.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
3381 progress_bar_type=progress_bar_type,
3382 create_bqstorage_client=create_bqstorage_client,
-> 3383 date_as_object=date_as_object,
3384 )
3385
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
1726 progress_bar_type=progress_bar_type,
1727 bqstorage_client=bqstorage_client,
-> 1728 create_bqstorage_client=create_bqstorage_client,
1729 )
1730
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_arrow(self, progress_bar_type, bqstorage_client, create_bqstorage_client)
1544 record_batches = []
1545 for record_batch in self._to_arrow_iterable(
-> 1546 bqstorage_client=bqstorage_client
1547 ):
1548 record_batches.append(record_batch)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in _to_page_iterable(self, bqstorage_download, tabledata_list_download, bqstorage_client)
1433 ):
1434 if bqstorage_client is not None:
-> 1435 for item in bqstorage_download():
1436 yield item
1437 return
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage(project_id, table, bqstorage_client, preserve_order, selected_fields, page_to_item)
723 # Call result() on any finished threads to raise any
724 # exceptions encountered.
--> 725 future.result()
726
727 try:
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage_stream(download_state, bqstorage_client, session, stream, worker_queue, page_to_item)
591 rowstream = bqstorage_client.read_rows(position).rows(session)
592 else:
--> 593 rowstream = bqstorage_client.read_rows(stream.name).rows(session)
594
595 for page in rowstream.pages:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\client.py in read_rows(self, name, offset, retry, timeout, metadata)
120 retry=retry,
121 timeout=timeout,
--> 122 metadata=metadata,
123 )
124 return reader.ReadRowsStream(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\gapic\big_query_read_client.py in read_rows(self, read_stream, offset, retry, timeout, metadata)
370
371 return self._inner_api_calls["read_rows"](
--> 372 request, retry=retry, timeout=timeout, metadata=metadata
373 )
374
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\gapic_v1\method.py in __call__(self, *args, **kwargs)
143 kwargs["metadata"] = metadata
144
--> 145 return wrapped_func(*args, **kwargs)
146
147
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_wrapped_func(*args, **kwargs)
284 sleep_generator,
285 self._deadline,
--> 286 on_error=on_error,
287 )
288
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
182 for sleep in sleep_generator:
183 try:
--> 184 return target()
185
186 # pylint: disable=broad-except
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\timeout.py in func_with_timeout(*args, **kwargs)
212 """Wrapped function that adds timeout."""
213 kwargs["timeout"] = next(timeouts)
--> 214 return func(*args, **kwargs)
215
216 return func_with_timeout
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
--> 152 six.raise_from(exceptions.from_grpc_error(exc), exc)
153
154 return error_remapped_callable
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
DeadlineExceeded: 504 Deadline Exceeded
Let me know if you need to know more. Thanks in advance.
Rutger
It turned out to be a conflict between a Conda package and a pip packages.
I resolved it by reinstall all the packages.

Pymongo KeyError: '$err' MongoDB Atlas

I'm following this https://www.mongodb.com/blog/post/getting-started-with-python-and-mongodb introductory tutorial. I can connect to the cluster fine with mongo shell, but not with pymongo (Python: 3.6.1, Pymongo 3.4.0). Pymongo works okay with a local mongodb. What is the problem? Below is the exception I get:
----------------------------------------------------------------------
-----
KeyError Traceback (most recent call
last)
<ipython-input-22-1c9d47341338> in <module>()
----> 1 server_status_result = db.command('serverStatus')
2 pprint(server_status_result)
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/database.py in command(self, command, value, check,
allowable_errors, read_preference, codec_options, **kwargs)
489 """
490 client = self.__client
--> 491 with client._socket_for_reads(read_preference) as
(sock_info, slave_ok):
492 return self._command(sock_info, command, slave_ok,
value,
493 check, allowable_errors,
read_preference,
/usr/lib/python3.6/contextlib.py in __enter__(self)
80 def __enter__(self):
81 try:
---> 82 return next(self.gen)
83 except StopIteration:
84 raise RuntimeError("generator didn't yield") from None
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/mongo_client.py in _socket_for_reads(self,
read_preference)
857 topology = self._get_topology()
858 single = topology.description.topology_type ==
TOPOLOGY_TYPE.Single
--> 859 with self._get_socket(read_preference) as sock_info:
860 slave_ok = (single and not sock_info.is_mongos) or (
861 preference != ReadPreference.PRIMARY)
/usr/lib/python3.6/contextlib.py in __enter__(self)
80 def __enter__(self):
81 try:
---> 82 return next(self.gen)
83 except StopIteration:
84 raise RuntimeError("generator didn't yield") from None
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/mongo_client.py in _get_socket(self, selector)
823 server = self._get_topology().select_server(selector)
824 try:
--> 825 with server.get_socket(self.__all_credentials) as
sock_info:
826 yield sock_info
827 except NetworkTimeout:
/usr/lib/python3.6/contextlib.py in __enter__(self)
80 def __enter__(self):
81 try:
---> 82 return next(self.gen)
83 except StopIteration:
84 raise RuntimeError("generator didn't yield") from None
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/server.py in get_socket(self, all_credentials,
checkout)
166 #contextlib.contextmanager
167 def get_socket(self, all_credentials, checkout=False):
--> 168 with self.pool.get_socket(all_credentials, checkout)
as sock_info:
169 yield sock_info
170
/usr/lib/python3.6/contextlib.py in __enter__(self)
80 def __enter__(self):
81 try:
---> 82 return next(self.gen)
83 except StopIteration:
84 raise RuntimeError("generator didn't yield") from None
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/pool.py in get_socket(self, all_credentials,
checkout)
790 sock_info = self._get_socket_no_auth()
791 try:
--> 792 sock_info.check_auth(all_credentials)
793 yield sock_info
794 except:
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/pool.py in check_auth(self, all_credentials)
510
511 for credentials in cached - authset:
--> 512 auth.authenticate(credentials, self)
513 self.authset.add(credentials)
514
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/auth.py in authenticate(credentials, sock_info)
468 mechanism = credentials.mechanism
469 auth_func = _AUTH_MAP.get(mechanism)
--> 470 auth_func(credentials, sock_info)
471
472
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/auth.py in _authenticate_default(credentials,
sock_info)
448 def _authenticate_default(credentials, sock_info):
449 if sock_info.max_wire_version >= 3:
--> 450 return _authenticate_scram_sha1(credentials,
sock_info)
451 else:
452 return _authenticate_mongo_cr(credentials, sock_info)
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/auth.py in _authenticate_scram_sha1(credentials,
sock_info)
227 ('conversationId', res['conversationId']),
228 ('payload', Binary(client_final))])
--> 229 res = sock_info.command(source, cmd)
230
231 parsed = _parse_scram_response(res['payload'])
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/pool.py in command(self, dbname, spec, slave_ok,
read_preference, codec_options, check, allowable_errors, check_keys,
read_concern, write_concern, parse_write_concern_error, collation)
422 # Catch socket.error, KeyboardInterrupt, etc. and close
ourselves.
423 except BaseException as error:
--> 424 self._raise_connection_failure(error)
425
426 def send_message(self, message, max_doc_size):
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/pool.py in _raise_connection_failure(self, error)
550 _raise_connection_failure(self.address, error)
551 else:
--> 552 raise error
553
554 def __eq__(self, other):
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/pool.py in command(self, dbname, spec, slave_ok,
read_preference, codec_options, check, allowable_errors, check_keys,
read_concern, write_concern, parse_write_concern_error, collation)
417 read_concern,
418
parse_write_concern_error=parse_write_concern_error,
--> 419 collation=collation)
420 except OperationFailure:
421 raise
/home/tim/.virtualenvs/main/lib/python3.6/site-p
ackages/pymongo/network.py in command(sock, dbname, spec, slave_ok,
is_mongos, read_preference, codec_options, check, allowable_errors,
address, check_keys, listeners, max_bson_size, read_concern,
parse_write_concern_error, collation)
108 response = receive_message(sock, 1, request_id)
109 unpacked = helpers._unpack_response(
--> 110 response, codec_options=codec_options)
111
112 response_doc = unpacked['data'][0]
/home/tim/.virtualenvs/main/lib/python3.6/site-
packages/pymongo/helpers.py in _unpack_response(response, cursor_id,
codec_options)
126 # Fake the ok field if it doesn't exist.
127 error_object.setdefault("ok", 0)
--> 128 if error_object["$err"].startswith("not master"):
129 raise NotMasterError(error_object["$err"],
error_object)
130 elif error_object.get("code") == 50:
KeyError: '$err'
I believe this is an Atlas bug, I've reported it to the team. The bug is, if you fail to log in to Atlas because your username or password are incorrect, it replies in a way that makes PyMongo throw a KeyError instead of the proper OperationFailure("auth failed").
PyMongo does work with Atlas, however, if you properly format your connection string with your username and password. Make sure your username and password are URL-quoted. Substitute your username and password into this Python code:
from urllib import quote_plus
print(quote_plus('MY USERNAME'))
print(quote_plus('MY PASSWORD'))
Take the output and put it into the connection string Atlas gave you, e.g. if your username is jesse#example.com and your password is "foo:bar", put that in the first part of the string, and get the rest of the string from the Atlas control panel for your account:
mongodb://jesse%40example.com:foo%3Abar/#cluster0-shard-00-00-abc.mongodb.net:27017,cluster0-shard-00-01-abc.mongodb.net:27017,cluster0-shard-00-02-abc.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin
Note how "jesse#example.com" has become "jesse%40example.com", and "foo:bar" has become "foo%3Abar".

Categories