This is the sample code I am using. I wanted to quickly parse the pages and enter the resulting data into the database. However, after adding one line, my code started to run significantly slower. I understand that this is related to the work of the database. But I don't understand how to fix it.
If you have any other suggestions for speeding up this code, I would be grateful for your help.
import asyncpg
import asyncio
import aiohttp
from settings import URI, SQL, URLS
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]
class DBManager(metaclass=Singleton):
""" Class for interacting with Postgres database """
def __init__(self, dsn) -> None:
""" The constructor takes the database """
self.dsn = dsn
self.pool = None
async def connect(self):
self.pool = await asyncpg.create_pool(dsn=self.dsn)
async def insert_data(self, peson: str, address: str):
async with self.pool.acquire() as connect:
return await connect.execute(SQL, type_tx, address)
db = DBManager(URI)
async def check_address(url, session):
async with session.get(url) as result:
try:
result = await result.json()
person = 'adult' if result['age'] >= 21 else 'child'
address = result['address']
await db.insert_data(person, address)
return print(address, person)
except Exception as e:
print(e)
async def bound_fetch(sem, url, session):
async with sem:
return await check_address(url, session)
async def main():
await db.connect()
urls = [url for i in URLS]
sem = asyncio.Semaphore(50)
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
task = asyncio.ensure_future(bound_fetch(sem, url, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
if __name__ == '__main__':
asyncio.run(main())
awaiting for a task to complete is slow. By necessity await will wait for a task to complete. When it is not necessary to wait, simply carrying on and not blocking your code is generally preferable.
As you mention, this line: await db.insert_data(person, address) is slow. That's because it is awaiting the db.insert. But you are not using the result of this so rather than await you can just omit the await and allow the insert to go ahead asynchronously.
You mention asynchio, the python documentation gives a good overview of this here.
Related
I tested the following code with Ncat. It only sends a single message, then it does not send anything, and does not except. It is also not reading anything.
I have no idea what could be going on. There is no exception, and no data seems to be sent.
import asyncio
loop = asyncio.get_event_loop()
class Client:
def __init__(self):
self.writer = None
self.reader = None
async def connect(self, address, port):
reader, writer = await asyncio.open_connection(address, port)
self.reader = reader
self.writer = writer
print("connected!")
async def send(self, message):
print("writing " + message)
self.writer.write((message + '\n').encode())
await self.writer.drain()
async def receive(self):
print("receiving")
message = (self.reader.readuntil('\n'.encode())).decode()
return message
async def read_loop(self):
while True:
incoming = await self.receive()
print("remote: " + incoming)
async def main():
client = Client()
await client.connect("127.0.0.1", 31416)
loop.create_task(client.read_loop())
while True:
text = input("message: ")
await client.send(text)
loop.run_until_complete(main())
I haven't tested your code but there are things that immediately jump out.
First, you should avoid to use low-level asyncio APIs calling loop.create_task or loop.run_until_complete. Those are intended to be used by frameworks developers. Use asyncio.create_task and asyncio.run instead.
Then, you are getting no exception (or result) because you are not keeping any reference to the task you are creating. You should always keep one to tasks (i.e. assigning it to a variabile or adding it in a list/set).
Last, you are missing an await in the receive method: StreamReader.readuntil is a coroutine function and so you have to await on it.
class Client:
...
async def receive(self):
print('receiving...')
message = await self.reader.readuntil('\n'.encode())
return message.decode()
...
I am building own async python package and faced the problem.
This is my code:
class Client:
"""
Async client for making requests
"""
def __init__(self, base_url: str = BASE_URL) -> None:
self.base_url = base_url
self.session = ClientSession()
async def get(self, method: str, *args: tp.Any, **kwargs: tp.Any) -> tp.Any:
async with self.session.get(f'{self.base_url}/{method}', *args, **kwargs) as response:
data = await response.json()
return data
When I try to use something like this:
await client.get()
I get
RuntimeError: Timeout context manager should be used inside a task
I suppose that the reason of this error is calling ClientSession() not inside the coroutine. But I hope that somebody knows the way to re-use ClientSession()
I have already read other similar questions, but they are not suitable to my situation.
You can initialize (and cache) the session when needed:
class Client:
"""
Async client for making requests
"""
def __init__(self, base_url: str = BASE_URL) -> None:
self.base_url = base_url
self.session = None
async def get(self, method: str, *args: tp.Any, **kwargs: tp.Any) -> tp.Any:
if not self.session:
self.session = ClientSession()
async with self.session.get(f'{self.base_url}/{method}', *args, **kwargs) as response:
data = await response.json()
return data
Depending on how you use the Client you can also use a class attribute for the session object.
Update:
ClientSession creation should be protected from race condition using Mutex:
_session_mutex = asyncio.Lock()
async def __create_session_if_required(self):
if self.session is None:
async with self._session_mutex:
if self.session is None:
self.session = aiohttp.ClientSession()
# should be closed if not Singleton class: "await session.close()"
async def get(..):
await self.__create_session_if_required()
async with self.session.get() as response:
# ...
I'm making a python module for interacting with an API. I'd like it to be fast, so I chose to use asyncio and Aiohttp. I'm quite new to async programming and I'm not quite sure how to reuse the same session for every request. Also, I'd like to spare my end-users the hassle of creating the loop etc. I came up with this class for my base client:
import asyncio
import aiohttp
class BaseClient:
API_BASE_URL = "dummyURL"
API_VERSION = 3
async def __aenter__(self):
self._session = aiohttp.ClientSession(raise_for_status=True)
return self
async def __aexit__(self, exc_type, exc, tb):
await self._session.close()
#remove the next line when aiohttp 4.0 is released
await asyncio.sleep(0.250)
async def _get(self, endpoint: str) -> None:
url = f"{self.API_BASE_URL}/{endpoint}/?v={self.API_VERSION}"
async with self._session.get(url) as resp:
json_body = await resp.json()
return json_body
async def list_forums(self):
endpoint = "forums"
return await self._get(endpoint)
async def main():
async with BaseClient() as client:
forums = await client.list_forums()
print(forums)
asyncio.run(main())
Is that the right way to reuse the same session? Is it possible to refactor BaseClient in such a way my end-users would only have to dothe following:
client = BaseClient()
forums = client.list_forums()
Thanks for your help.
I have this basic exchange monitor script. I'm trying to create one thread per symbol, apart from the main thread which is handling other work, and have them listen to public Gemini websocket endpoints. I'm getting the first thread running and printing exchange data to the console, but not the second one. I had expected to see data from both threads being printed at approximately the same time. I've tried using the threading library instead of asyncio and encountered the same situation.
I realize my two public API MarketWebsocket classes could be combined to be cleaner, I'm still trying to work out a way to easily add other symbols to the list. Thanks for any nudges in the right direction!
import asyncio
from websockets import connect
symbols_to_watch = [
"BTCUSD",
"ETHUSD"
]
class BTCMarketWebsocket:
disable = False
async def __aenter__(self):
symbol = symbols_to_watch[0]
self._conn = connect("wss://api.gemini.com/v1/marketdata/{}".format(symbol))
self.websocket = await self._conn.__aenter__()
return self
async def __aexit__(self, *args, **kwargs):
await self._conn.__aexit__(*args, **kwargs)
async def receive(self):
return await self.websocket.recv()
class ETHMarketWebsocket:
disable = False
async def __aenter__(self):
symbol = symbols_to_watch[1]
self._conn = connect("wss://api.gemini.com/v1/marketdata/{}".format(symbol))
self.websocket = await self._conn.__aenter__()
return self
async def __aexit__(self, *args, **kwargs):
await self._conn.__aexit__(*args, **kwargs)
async def receive(self):
return await self.websocket.recv()
async def btcMarketWebsocket():
async with BTCMarketWebsocket() as btcMarketWebsocket:
while not btcMarketWebsocket.disable:
print(await btcMarketWebsocket.receive())
async def ethMarketWebsocket():
async with ETHMarketWebsocket() as ethMarketWebsocket:
while not ethMarketWebsocket.disable:
print(await ethMarketWebsocket.receive())
if __name__ == '__main__':
asyncio.run(btcMarketWebsocket())
asyncio.run(ethMarketWebsocket())
You can do
async def multiple_tasks():
Tasks =[]
Tasks.append(btcMarketWebsocket())
Tasks.append(ethMarketWebsocket())
await asyncio.gather(*Tasks, return_exceptions=True)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(multiple_tasks())
I have a script that checks the status code for a couple hundred thousand supplied websites, and I was trying to integrate a Semaphore to the flow to speed up processing. The problem is that whenever I integrate a Semaphore, I just get a list populated with None objects, and I'm not entirely sure why.
I have been mostly copying code from other sources as I don't fully grok asynchronous programming fully yet, but it seems like when I debug I should be getting results out of the function, but something is going wrong when I gather the results. I've tried juggling around my looping, my gathering, ensuring futures, etc, but nothing seems to return a list of things that work.
async def fetch(session, url):
try:
async with session.head(url, allow_redirects=True) as resp:
return url, resp.real_url, resp.status, resp.reason
except Exception as e:
return url, None, e, 'Error'
async def bound_fetch(sem, session, url):
async with sem:
await fetch(session, url)
async def run(urls):
timeout = 15
tasks = []
sem = asyncio.Semaphore(100)
conn = aiohttp.TCPConnector(limit=64, ssl=False)
async with aiohttp.ClientSession(connector=conn) as session:
for url in urls:
task = asyncio.wait_for(bound_fetch(sem, session, url), timeout)
tasks.append(task)
responses = await asyncio.gather(*tasks)
# responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks))]
return responses
urls = ['https://google.com', 'https://yahoo.com']
loop = asyncio.ProactorEventLoop()
data = loop.run_until_complete(run(urls))
I've commented out the progress bar component, but that implementation returns the desired results when there is no semaphore.
Any help would be greatly appreciated. I am furiously reading up on asynchronous programming, but I can't wrap my mind around it yet.
You should explicitly return results of awaiting coroutines.
Replace this code...
async def bound_fetch(sem, session, url):
async with sem:
await fetch(session, url)
... with this:
async def bound_fetch(sem, session, url):
async with sem:
return await fetch(session, url)