unable to download images as bytes from telegram - python

I am trying to download the images from telegram as bytes or base64 instead of image. I don't know if there is a way to do it. I went through the documentation and tried something as shown below but it did not work.
channels = ['channel_name']
async with TelegramClient(username, api_id, api_hash) as client:
## looping through channels in telegram
for channel in channels:
try:
async with client.takeout() as takeout:
async for message in takeout.iter_messages(channel, wait_time=0):
try:
# await client.download_media(message.media, temp_folder+channel+str(message.id))
print(message.media)
data = await client.download_file(
message.media,
bytes,
progress_callback=callback
)
image = mpimg.imread(io.BytesIO(data), format='JPEG')
plt.imshow(image)
except FileReferenceExpiredError:
print('A FileReferenceExpiredError Exception occured')
except errors.TakeoutInitDelayError as e:
print('Must wait', e.seconds, 'before takeout')
It prints the message first and then show the error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
D:\TEMP\hkay\Temp\ipykernel_79040\2965379116.py in <module>
53 # await client.download_media(message.media, temp_folder+channel+str(message.id))
54 print(message.media)
---> 55 data = await client.download_file(
56 message,
57 bytes,
~\AppData\Roaming\Python\Python39\site-packages\telethon\client\downloads.py in download_file(self, input_location, file, part_size_kb, file_size, progress_callback, dc_id, key, iv)
490 print(data[:16])
491 """
--> 492 return await self._download_file(
493 input_location,
494 file,
~\AppData\Roaming\Python\Python39\site-packages\telethon\client\downloads.py in _download_file(self, input_location, file, part_size_kb, file_size, progress_callback, dc_id, key, iv, msg_data)
547
548 if progress_callback:
--> 549 r = progress_callback(f.tell(), file_size)
550 if inspect.isawaitable(r):
551 await r
D:\TEMP\hkay\Temp\ipykernel_79040\1493750919.py in callback(current, total, channel_name)
33 def callback(current, total, channel_name=channel):
34 print('Downloaded from',channel,'-', 'message_id:', message.id, ' ', current, 'out of', total,
---> 35 'bytes: {:.2%}'.format(current / total))
36
37 # Connect to API with Client
TypeError: unsupported operand type(s) for /: 'int' and 'NoneType'
Edit 1:
After removing the progress_callback, here is what I am seeing.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Roaming\Python\Python39\site-packages\telethon\utils.py in _get_file_info(location)
800 try:
--> 801 if location.SUBCLASS_OF_ID == 0x1523d462:
802 return _FileInfo(None, location, None) # crc32(b'InputFileLocation'):
AttributeError: 'NoneType' object has no attribute 'SUBCLASS_OF_ID'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
D:\TEMP\hkhan\Temp\ipykernel_79040\1631659142.py in <module>
53 #await client.download_media(message.media, temp_folder+channel+str(message.id))
54
---> 55 data = await client.download_file(message.media
56 ,bytes
57 )
~\AppData\Roaming\Python\Python39\site-packages\telethon\client\downloads.py in download_file(self, input_location, file, part_size_kb, file_size, progress_callback, dc_id, key, iv)
490 print(data[:16])
491 """
--> 492 return await self._download_file(
493 input_location,
494 file,
~\AppData\Roaming\Python\Python39\site-packages\telethon\client\downloads.py in _download_file(self, input_location, file, part_size_kb, file_size, progress_callback, dc_id, key, iv, msg_data)
538
539 try:
--> 540 async for chunk in self._iter_download(
541 input_location, request_size=part_size, dc_id=dc_id, msg_data=msg_data):
542 if iv and key:
~\AppData\Roaming\Python\Python39\site-packages\telethon\client\downloads.py in _iter_download(self, file, offset, stride, limit, chunk_size, request_size, file_size, dc_id, msg_data)
680 msg_data: tuple = None
681 ):
--> 682 info = utils._get_file_info(file)
683 if info.dc_id is not None:
684 dc_id = info.dc_id
~\AppData\Roaming\Python\Python39\site-packages\telethon\utils.py in _get_file_info(location)
802 return _FileInfo(None, location, None) # crc32(b'InputFileLocation'):
803 except AttributeError:
--> 804 _raise_cast_fail(location, 'InputFileLocation')
805
806 if isinstance(location, types.Message):
~\AppData\Roaming\Python\Python39\site-packages\telethon\utils.py in _raise_cast_fail(entity, target)
136
137 def _raise_cast_fail(entity, target):
--> 138 raise TypeError('Cannot cast {} to any kind of {}.'.format(
139 type(entity).__name__, target))
140
TypeError: Cannot cast NoneType to any kind of InputFileLocation.

The problem is in your progress_callback. total is None because the maximum file size is not known. Simply removing it, if you don't need it, should work. Otherwise, check that total is not none inside the callback, or explicitly pass a size to the download method.

Related

How to serialize metpy (pint) units for use with dask distributed?

How can I properly serialize metpy units (based on pint) to work with dask distributed? As far as I understand, it looks like dask distributed automatically pickles data for ease of transfer, but fails to pickle the metpy units which is necessary for computation. Error produced: TypeError: cannot pickle 'weakref' object. MWE below.
import metpy.calc as mpcalc
from metpy.units import units
from dask.distributed import Client, LocalCluster
def calculate_dewpoint(vapor_pressure):
dewpoint = mpcalc.dewpoint(vapor_pressure * units('hPa'))
return dewpoint
cluster = LocalCluster()
client = Client(cluster)
## works
vapor_pressure = 5
dp = calculate_dewpoint(vapor_pressure)
print(dp)
## doesn't work
vapor_pressure = 5
dp_future = client.submit(calculate_dewpoint, vapor_pressure)
dp = dp_future.result()
EDIT: Added full traceback.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/worker.py in dumps_function(func)
4271 with _cache_lock:
-> 4272 result = cache_dumps[func]
4273 except KeyError:
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/utils.py in __getitem__(self, key)
1362 def __getitem__(self, key):
-> 1363 value = super().__getitem__(key)
1364 self.data.move_to_end(key)
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/collections/__init__.py in __getitem__(self, key)
1009 return self.__class__.__missing__(self, key)
-> 1010 raise KeyError(key)
1011 def __setitem__(self, key, item): self.data[key] = item
KeyError: <function calculate_dewpoint at 0x2ad5e010f0d0>
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/protocol/pickle.py in dumps(x, buffer_callback, protocol)
52 buffers.clear()
---> 53 result = cloudpickle.dumps(x, **dump_kwargs)
54 elif not _always_use_pickle_for(x) and b"__main__" in result:
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dumps(obj, protocol, buffer_callback)
72 )
---> 73 cp.dump(obj)
74 return file.getvalue()
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dump(self, obj)
601 try:
--> 602 return Pickler.dump(self, obj)
603 except RuntimeError as e:
TypeError: cannot pickle 'weakref' object
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/glade/scratch/cbecker/ipykernel_272346/952144406.py in <module>
20 ## doesn't work
21 vapor_pressure = 5
---> 22 dp_future = client.submit(calculate_dewpoint, vapor_pressure)
23 dp = dp_future.result()
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/client.py in submit(self, func, key, workers, resources, retries, priority, fifo_timeout, allow_other_workers, actor, actors, pure, *args, **kwargs)
1577 dsk = {skey: (func,) + tuple(args)}
1578
-> 1579 futures = self._graph_to_futures(
1580 dsk,
1581 [skey],
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/client.py in _graph_to_futures(self, dsk, keys, workers, allow_other_workers, priority, user_priority, resources, retries, fifo_timeout, actors)
2628 # Pack the high level graph before sending it to the scheduler
2629 keyset = set(keys)
-> 2630 dsk = dsk.__dask_distributed_pack__(self, keyset, annotations)
2631
2632 # Create futures before sending graph (helps avoid contention)
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/dask/highlevelgraph.py in __dask_distributed_pack__(self, client, client_keys, annotations)
1074 "__module__": layer.__module__,
1075 "__name__": type(layer).__name__,
-> 1076 "state": layer.__dask_distributed_pack__(
1077 self.get_all_external_keys(),
1078 self.key_dependencies,
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/dask/highlevelgraph.py in __dask_distributed_pack__(self, all_hlg_keys, known_key_dependencies, client, client_keys)
432 for k, v in dsk.items()
433 }
--> 434 dsk = toolz.valmap(dumps_task, dsk)
435 return {"dsk": dsk, "dependencies": dependencies}
436
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/worker.py in dumps_task(task)
4308 return d
4309 elif not any(map(_maybe_complex, task[1:])):
-> 4310 return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
4311 return to_serialize(task)
4312
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/worker.py in dumps_function(func)
4272 result = cache_dumps[func]
4273 except KeyError:
-> 4274 result = pickle.dumps(func, protocol=4)
4275 if len(result) < 100000:
4276 with _cache_lock:
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/distributed/protocol/pickle.py in dumps(x, buffer_callback, protocol)
58 try:
59 buffers.clear()
---> 60 result = cloudpickle.dumps(x, **dump_kwargs)
61 except Exception as e:
62 logger.info("Failed to serialize %s. Exception: %s", x, e)
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dumps(obj, protocol, buffer_callback)
71 file, protocol=protocol, buffer_callback=buffer_callback
72 )
---> 73 cp.dump(obj)
74 return file.getvalue()
75
/glade/work/cbecker/miniconda3/envs/risk/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dump(self, obj)
600 def dump(self, obj):
601 try:
--> 602 return Pickler.dump(self, obj)
603 except RuntimeError as e:
604 if "recursion" in e.args[0]:
TypeError: cannot pickle 'weakref' object
So there's an issue where (I think) it's trying to serialize the unit registry or units and transfer them between processes. To work around this, try moving the import of units inside the function (though this might cause some other problems):
def calculate_dewpoint(vapor_pressure):
from metpy.units import units
return mpcalc.dewpoint(vapor_pressure * units('hPa'))

py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!
I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

How to prevent TimeoutError on asyncio when making large amounts of API calls

I'm new to the asyncio module. Until recently I used to use requests for the task I'm about to write about.
I'm trying to scale a script I work with which works well with up to 120 calls on Requests. However, with request being single threaded, it would take forever to get 1000 API calls (which is what I'm trying to achieve). This is where I found asyncio which makes asynchonous requests.
This is a script I put together for asyncio with the help of this article.
url = 'https://api.url.com/search?api_key=api_key&api_params=multiple_params'
queries = ['online slots', 'metaverse', 'sports betting', 'basketball odds', 'soccer odds', 'online poker', 'best casinos in germany', 'barbecue grills', 'outdoor pizza ovens']
results = []
def get_tasks(session):
tasks=[]
for q in queries:
tasks.append(asyncio.create_task(session.get(url.format(q), ssl=False)))
return tasks
timeout = ClientTimeout(total=500)
async def get_queries():
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = get_tasks(session)
responses = await asyncio.gather(*tasks)
for response in responses:
results.append(await response.json())
asyncio.run(get_queries())
It seems to work fine in most instances. But, it seems to timeout on many occasions. When I'm using the German queries and when it is making more than 500 API calls.
Below is what I keep getting back. As you can see in the script I've added a client timeout.
---------------------------------------------------------------------------
TimeoutError Traceback (most recent call last)
<ipython-input-4-8c48df090394> in <module>
33 results.append(await response.json())
34
---> 35 asyncio.run(get_queries())
/opt/anaconda3/lib/python3.8/site-packages/nest_asyncio.py in run(future, debug)
30 loop = asyncio.get_event_loop()
31 loop.set_debug(debug)
---> 32 return loop.run_until_complete(future)
33
34 if sys.version_info >= (3, 6, 0):
/opt/anaconda3/lib/python3.8/site-packages/nest_asyncio.py in run_until_complete(self, future)
68 raise RuntimeError(
69 'Event loop stopped before Future completed.')
---> 70 return f.result()
71
72 def _run_once(self):
/opt/anaconda3/lib/python3.8/asyncio/futures.py in result(self)
176 self.__log_traceback = False
177 if self._exception is not None:
--> 178 raise self._exception
179 return self._result
180
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __step(***failed resolving arguments***)
278 # We use the `send` method directly, because coroutines
279 # don't have `__iter__` and `__next__` methods.
--> 280 result = coro.send(None)
281 else:
282 result = coro.throw(exc)
<ipython-input-4-8c48df090394> in get_queries()
29 async with aiohttp.ClientSession(timeout=timeout) as session:
30 tasks = get_tasks(session)
---> 31 responses = await asyncio.gather(*tasks)
32 for response in responses:
33 results.append(await response.json())
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __wakeup(self, future)
347 def __wakeup(self, future):
348 try:
--> 349 future.result()
350 except BaseException as exc:
351 # This may also be a cancellation.
/opt/anaconda3/lib/python3.8/asyncio/tasks.py in __step(***failed resolving arguments***)
280 result = coro.send(None)
281 else:
--> 282 result = coro.throw(exc)
283 except StopIteration as exc:
284 if self._must_cancel:
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client.py in throw(self, arg)
1123
1124 def throw(self, arg: BaseException) -> None: # type: ignore[arg-type,override]
-> 1125 self._coro.throw(arg)
1126
1127 def close(self) -> None:
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client.py in _request(self, method, str_or_url, params, data, json, cookies, headers, skip_auto_headers, auth, allow_redirects, max_redirects, compress, chunked, expect100, raise_for_status, read_until_eof, proxy, proxy_auth, timeout, verify_ssl, fingerprint, ssl_context, ssl, proxy_headers, trace_request_ctx, read_bufsize)
557 resp = await req.send(conn)
558 try:
--> 559 await resp.start(conn)
560 except BaseException:
561 resp.close()
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in start(self, connection)
911 if self._continue is not None:
912 set_result(self._continue, True)
--> 913 self._continue = None
914
915 # payload eof handler
/opt/anaconda3/lib/python3.8/site-packages/aiohttp/helpers.py in __exit__(self, exc_type, exc_val, exc_tb)
719
720 if exc_type is asyncio.CancelledError and self._cancelled:
--> 721 raise asyncio.TimeoutError from None
722 return None
723
TimeoutError:
Can anyone help me figure what I'm doing wrong? And how to avoid timeouts for large amounts of API calls on asyncio?
Much appreciated!

Twint scraping: ClientPayloadError: Response payload is not completed

While I was scraping Tweets about a certain hashtag using twint, I received the error below. Can anyone explain me why this error occurs and how can I fix the problem? Because the tweets to a certain date were scraped, however, the other tweets before that day could not be scraped because of this error.
Thank you for your help already!
---------------------------------------------------------------------------
ClientPayloadError Traceback (most recent call last)
<ipython-input-8-f28f8e9aab1e> in <module>
----> 1 twint.run.Search(c)
~/.local/lib/python3.8/site-packages/twint/run.py in Search(config, callback)
408 config.Followers = False
409 config.Profile = False
--> 410 run(config, callback)
411 if config.Pandas_au:
412 storage.panda._autoget("tweet")
~/.local/lib/python3.8/site-packages/twint/run.py in run(config, callback)
327 raise
328
--> 329 get_event_loop().run_until_complete(Twint(config).main(callback))
330
331
~/opt/anaconda3/lib/python3.8/asyncio/base_events.py in run_until_complete(self, future)
614 raise RuntimeError('Event loop stopped before Future completed.')
615
--> 616 return future.result()
617
618 def stop(self):
~/.local/lib/python3.8/site-packages/twint/run.py in main(self, callback)
233 task.add_done_callback(callback)
234
--> 235 await task
236
237 async def run(self):
~/.local/lib/python3.8/site-packages/twint/run.py in run(self)
284 elif self.config.TwitterSearch:
285 logme.debug(__name__ + ':Twint:main:twitter-search')
--> 286 await self.tweets()
287 else:
288 logme.debug(__name__ + ':Twint:main:no-more-tweets')
~/.local/lib/python3.8/site-packages/twint/run.py in tweets(self)
215
216 async def tweets(self):
--> 217 await self.Feed()
218 # TODO : need to take care of this later
219 if self.config.Location:
~/.local/lib/python3.8/site-packages/twint/run.py in Feed(self)
60 # this will receive a JSON string, parse it into a `dict` and do the required stuff
61 try:
---> 62 response = await get.RequestUrl(self.config, self.init)
63 except TokenExpiryException as e:
64 logme.debug(__name__ + 'Twint:Feed:' + str(e))
~/.local/lib/python3.8/site-packages/twint/get.py in RequestUrl(config, init)
133 _serialQuery = _url
134
--> 135 response = await Request(_url, params=params, connector=_connector, headers=_headers)
136
137 if config.Debug:
~/.local/lib/python3.8/site-packages/twint/get.py in Request(_url, connector, params, headers)
159 logme.debug(__name__ + ':Request:Connector')
160 async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
--> 161 return await Response(session, _url, params)
162
163
~/.local/lib/python3.8/site-packages/twint/get.py in Response(session, _url, params)
166 with timeout(120):
167 async with session.get(_url, ssl=True, params=params, proxy=httpproxy) as response:
--> 168 resp = await response.text()
169 if response.status == 429: # 429 implies Too many requests i.e. Rate Limit Exceeded
170 raise TokenExpiryException(loads(resp)['errors'][0]['message'])
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in text(self, encoding, errors)
1074 """Read response payload and decode."""
1075 if self._body is None:
-> 1076 await self.read()
1077
1078 if encoding is None:
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/client_reqrep.py in read(self)
1030 if self._body is None:
1031 try:
-> 1032 self._body = await self.content.read()
1033 for trace in self._traces:
1034 await trace.send_response_chunk_received(
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in read(self, n)
368 blocks = []
369 while True:
--> 370 block = await self.readany()
371 if not block:
372 break
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in readany(self)
390 # without feeding any data
391 while not self._buffer and not self._eof:
--> 392 await self._wait("readany")
393
394 return self._read_nowait(-1)
~/opt/anaconda3/lib/python3.8/site-packages/aiohttp/streams.py in _wait(self, func_name)
304 if self._timer:
305 with self._timer:
--> 306 await waiter
307 else:
308 await waiter
ClientPayloadError: Response payload is not completed
Not sure if you found a response to your problem but thought I would add this here for anyone looking in the future:
https://github.com/twintproject/twint/issues/1099
Essentially the above link suggests using a try / except block to capture the error and try again, if that works for your code.
I have also found that twint works better using python3.6 - might help!
Good luck :)

How can I load sklearn data in Jupyter Python 3?

Hey I have a very short question. I need to load data for my machine learning course, but it does not work for me and I have no idea why. Im using Jupyter with Python 3.
My Code:
from sklearn.datasets import fetch_covtype
forest = fetch_covtype()
For my friend it works fine with the same conditions. I already tried to update sklearn with pip install -U scikit-learn, but it did not solve the problem. I hope somebody can help me.
It creates the following error:
UnboundLocalError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
126 try:
--> 127 X, y
128 except NameError:
UnboundLocalError: local variable 'X' referenced before assignment
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-fb303a92b6ca> in <module>
----> 1 forest =fetch_covtype()
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
127 X, y
128 except NameError:
--> 129 X, y = _refresh_cache([samples_path, targets_path], 9)
130 # TODO: Revert to the following two lines in v0.23
131 # X = joblib.load(samples_path)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in _refresh_cache(files, compress)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in <listcomp>(.0)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load(filename, mmap_mode)
603 return load_compatibility(fobj)
604
--> 605 obj = _unpickle(fobj, filename, mmap_mode)
606
607 return obj
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
527 obj = None
528 try:
--> 529 obj = unpickler.load()
530 if unpickler.compat_mode:
531 warnings.warn("The file '%s' has been generated with a "
/opt/conda/lib/python3.7/pickle.py in load(self)
1083 raise EOFError
1084 assert isinstance(key, bytes_types)
-> 1085 dispatch[key[0]](self)
1086 except _Stop as stopinst:
1087 return stopinst.value
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load_build(self)
353 if isinstance(array_wrapper, NDArrayWrapper):
354 self.compat_mode = True
--> 355 self.stack.append(array_wrapper.read(self))
356
357 # Be careful to register our new method.
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read(self, unpickler)
196 array = self.read_mmap(unpickler)
197 else:
--> 198 array = self.read_array(unpickler)
199
200 # Manage array subclass case
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read_array(self, unpickler)
147 read_size = int(read_count * self.dtype.itemsize)
148 data = _read_bytes(unpickler.file_handle,
--> 149 read_size, "array data")
150 array[i:i + read_count] = \
151 unpickler.np.frombuffer(data, dtype=self.dtype,
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle_utils.py in _read_bytes(fp, size, error_template)
241 if len(data) != size:
242 msg = "EOF: reading %s, expected %d bytes got %d"
--> 243 raise ValueError(msg % (error_template, size, len(data)))
244 else:
245 return data
ValueError: EOF: reading array data, expected 262144 bytes got 209661

Categories