fastapi/uvicorn prevent ungzipping with httpx.AsyncClinet - python

i work on reverse proxy based on fastapi. I want transparenty send data requested by AsyncClient. I have problem with gziped pages. Please can you help me, how to prevent default ungzipping of resp.content on this example?
#app.get("/{path:path}")
async def _get ( path: str, request: Request ):
url = await my_proxy_logic (path, request)
async with httpx.AsyncClient() as client:
req = client.build_request("GET", url)
resp = await client.send(req, stream=False)
return Response( status_code=resp.status_code, headers=resp.headers, content=resp.content)```

It is possible to extract undecoded data from httpx response only in case of streaming mode stream=True or httpx.stream. In the example below, I collect the entire response using aiter_raw and return it from the path operation. Keep in mind that the entire response is loaded into memory, if you want to avoid this use fastapi StreamingResponse
import httpx
from fastapi import FastAPI, Request, Response
app = FastAPI()
#app.get("/pass")
async def root(request: Request):
async with httpx.AsyncClient() as client:
req = client.build_request('GET', 'http://httpbin.org/gzip')
resp = await client.send(req, stream=True)
return Response(status_code=resp.status_code, headers=resp.headers,
content=b"".join([part async for part in resp.aiter_raw()]))

Related

How can i send file with aiohttp?

this is my code:
payload = {'text': input_text,
'question_info': '',
'include_intonation': 1,
'stress_version': stress_version,
'include_fluency': 1,
'include_ielts_subscore': 1}
files = [
('user_audio_file', open(saved_file_path, 'rb'))
]
headers = {}
form = aiohttp.FormData()
for key, value in payload.items():
form.add_field(key, value)
form.add_field('user_audio_file', open(saved_file_path, 'rb'))
async with aiohttp.ClientSession() as session:
async with session.post(url,data=form) as response:
response_json = await response.json()
and I want to send file with aiohttp to URL but I got this exception
'Can not serialize value type: <class \'int\'> headers: {} value: 1'
I do that with requests library like this
response = request(
"POST", url, headers=headers, data=payload, files=files)
response_json = response.json()
but I decided to use aiohttp because it shoud be async
please help me for this decision
thanks
you need to serialize payload data using data= b'form'
e.g.
async with aiohttp.ClientSession() as session:
async with session.post(url,data=b'form') as response:
response_json = await response.json()
By default session uses python’s standard json module for serialization. But it is possible to use different serializer. ClientSession accepts json_serialize parameter. Then you dont need to explicitly serialize your payload.
import ujson
async with aiohttp.ClientSession(
json_serialize=ujson.dumps) as session:
await session.post(url,data=form) as response:
response_json = await response.json()
....
Warning: above code is not tested.
Update
I tried setting up a local http server and upload a json. I am getting past your error and able to upload data. Are your serializing form data using b'form'?
As per this GitHub issue discussion, we need asyncio to control async event loop and execute async/await through a function.
Here's relevant code.
async def uploadForm():
async with aiohttp.ClientSession() as session:
async with session.post(url,data=b'form') as response: #Converting form to binary payload using b'form'
response_json = await response.json(content_type='text/html')
print(response_json)
def main():
loop = asyncio.get_event_loop()
loop.run_until_complete(uploadForm())
loop.close()
if __name__ == '__main__':
main()
Hope this helps you.

What the fastest to post million requests with url, headers, and body?

I have the static url, headers, and data.
Is it possible to make million post requests simultaneously with python?
This is the file.py:
import json
import requests
url = "https://abcd.com"
headers = "headers"
body = "body"
resp = requests.post(url, headers=headers, data=body)
json_resp = json.loads(resp.content)["data"]
print(json_resp)
You might want to use some python tools for that such as:
https://locust.io/
Your file would look like:
from locust import HttpUser, task, between
class QuickstartUser(HttpUser):
#task
def task_name(self):
self.client.post(url, headers=headers, data=body)
You could feed it to locust in such a way:
locust --headless --users <number_of_user> -f <your_file.py>
You can do this in several ways, which is the best method and idea of async work
The second method is ThreadPoolExecutor, which I do not highly recommend
there's a example for do this.
# modified fetch function with semaphore
import random
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
delay = response.headers.get("DELAY")
date = response.headers.get("DATE")
print("{}:{} with delay {}".format(date, response.url, delay))
return await response.read()
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run(r):
url = "http://localhost:8080/{}"
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession() as session:
for i in range(r):
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url.format(i), session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
number = 10000
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)

aiohttp: multiple requests to same URL return authentication error, but the URL is correct

I am using the below code to make 599 asynchronous requests to Strava API.
For some reason the response I get for each of them is
{"message":"Authorization
Error","errors":[{"resource":"Application","field":"","code":"invalid"}]}
This is the type of error you typically get when your access_token query string parameter
is invalid.
But in this case the token is 100% correct: the URL returns correct response when just
copy-pasted manually in the browser.
What might be the reason of the error and how to fix it? Might it be that the aiohttp session is somehow
messing the authentication procedure up?
Note: for privacy reasons the token in the code below is fake.
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
print(await response.text())
async def main():
urls = ['''https://www.strava.com/api/v3/activities/
280816027?include_all_efforts=true&
access_token=11111111'''] * 599
async with aiohttp.ClientSession() as session:
tasks = [
asyncio.ensure_future(fetch(session, url))
for url in urls
]
await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You shouldn't use a multiline string as the URL, because it will keep all whitespaces and as a result you will get the wrong URL.

Session reusing in aiohhttp

I try to reuse HTTP-session as aiohttp docs advice
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
But usual pattern which I use with requests lib doesn`t work:
def __init__(self):
self.session = aiohttp.ClientSession()
async def get_u(self, id):
async with self.session.get('url') as resp:
json_resp = await resp.json()
return json_resp.get('data', {})
Then I try to
await client.get_u(1)
I got error
RuntimeError: Timeout context manager should be used inside a task
Any workarounds with async_timeout didn't help.
Another way is working:
async def get_u(self, id):
async with aiohttp.ClientSession() as session:
with async_timeout.timeout(3):
async with session.get('url') as resp:
json_resp = await resp.json()
return json_resp.get('data', {})
But it seems like creating session per request.
So my question: how to properly reuse aiohttp-session?
UPD: minimal working example. Sanic application with following view
import aiohttp
from sanic.views import HTTPMethodView
class Client:
def __init__(self):
self.session = aiohttp.ClientSession()
self.url = 'https://jsonplaceholder.typicode.com/todos/1'
async def get(self):
async with self.session.get(self.url) as resp:
json_resp = await resp.json()
return json_resp
client = Client()
class ExView(HTTPMethodView):
async def get(self, request):
todo = await client.get()
print(todo)
I had the same error. The solution for me was initializing the client within an async function. EG:
class SearchClient(object):
def __init__(self, search_url: str, api_key: str):
self.search_url = search_url
self.api_key = api_key
self.session = None
async def _get(self, url, attempt=1):
if self.session is None:
self.session = aiohttp.ClientSession(raise_for_status=True)
headers = {
'Content-Type': 'application/json',
'api-key': self.api_key
}
logger.info("Running Search: {}".format(url))
try:
with timeout(60):
async with self.session.get(url, headers=headers) as response:
results = await response.json()
return results
For example you can create ClientSession on app start (using on_startup signal https://docs.aiohttp.org/en/stable/web_advanced.html#signals).
Store it to you app (aiohttp application has dict interface for such issues https://aiohttp.readthedocs.io/en/stable/faq.html#id4) and get access to your session through request.app['YOU_CLIENT_SESSION'] in request.

Using Python and 3 aiohttp to find the URL after redirect when timeout

I'm currently trying to audit a large number of redirect URL handles to make sure that their destinations are still valid.
I'm using aiohttp to go through the large volume in order to produce a report.
try:
with aiohttp.Timeout(timeout):
async with session.get(url) as resp:
return {"Handle URL": url,
"Status Code": resp.status,
"Redirects": resp.url != url,
"Resolving URL": resp.url,
"Success": resp.status == 200,
"Message": ""}
except asyncio.TimeoutError:
return {"Handle URL": url,
"Success": False,
"Message": "Handle server timed out. >{} seconds".format(timeout)}
For the most part, this has been fine for identifying which URL redirect no longer sends to a valid URL. However, I'd really like to know the final address where times out.
Any ideas?
The only way to do it is disabling redirects by allow_redirects=False and performing redirections manually.
async with aiohttp.ClientSession() as session:
async with session.get(URL, allow_redirects=False) as response:
Location = str(response).split("Location': \'")[1].split("\'")[0]
return Location
I don't think it is necessary anymore to parse that string for a Location. Here is a small example.
Local flask server with a redirect:
from flask import Flask, redirect
app = Flask(__name__)
#app.route('/')
def hello_world():
return 'Hello World!'
#app.route('/redirect')
def redir():
return redirect('/')
if __name__ == '__main__':
app.run()
aiohttp request to that redirect:
# coding: utf-8
import asyncio
import aiohttp
async def fetch(URL):
async with aiohttp.ClientSession() as session:
async with session.get(URL, allow_redirects=False) as response:
print(response.url, response.real_url, 'location' in str(response).lower())
async with session.get(URL, allow_redirects=True) as response:
print(response.url, response.real_url, 'location' in str(response).lower())
url = "http://127.0.0.1:5000/redirect"
async def main():
await fetch(local_url)
loop = asyncio.new_event_loop()
loop.run_until_complete(main())
prints:
http://127.0.0.1:5000/redirect http://127.0.0.1:5000/redirect True
http://127.0.0.1:5000/ http://127.0.0.1:5000/ False
According to the docs, the difference between url and real_url is that real_url is the raw string of the original request, not stripped.

Categories