Displaying a file (image) from S3 via Flask & BytesIO - python

I am attempting to pull a file from AWS S3, using Boto3, directly into a BytesIO object. This will eventually be used to manipulate the downloaded data but for now I'm just trying to give that file directly to a user via Flask. As I understand everything the below should work, but does not. The browser simply displays nothing (and shows only downloaded a few bytes of data).
(In this example, my sample file is a png)
from flask import Flask, send_from_directory, abort, Response, send_file, make_response
import boto3, botocore
import os
import io
AWS_ACCESS_KEY = os.environ['AWS_ACCESS_KEY'].rstrip()
AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'].rstrip()
S3_BUCKET = "static1"
app = Flask(__name__, static_url_path='/tmp')
#app.route('/', defaults={'path': ''})
#app.route('/<path:path>')
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
return send_file(file, mimetype=metadata['ContentType'])
if __name__ == '__main__':
app.run(debug=True,port=3000,host='0.0.0.0')
If I modify that core routine to write the BytesIO object to disk, then read it back into a new BytesIO object - it works fine. As below:
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
print(file.getvalue())
fh = open("/tmp/test1.png","wb")
fh.write(file.getvalue())
fh.close()
fh = open("/tmp/test1.png","rb")
f2 = io.BytesIO(fh.read())
fh.close
print(f2.getvalue())
return send_file(f2, mimetype=metadata['ContentType'])
Going around in circles with this for a few days, It's clear that I'm missing something and I'm not sure what. The script is being run inside a Python 3.8 docker container with the latest copies of boto3/flask/etc.

Rewinding your BytesIO object should do the trick, with file.seek(0) just before send_file(...).
For the record I'm not sure your boto3/botocore calls are "best practices", to try your usecase I ended up with:
from boto3.session import Session
session = Session(
aws_access_key_id=KEY_ID, aws_secret_access_key=ACCESS_KEY, region_name=REGION_NAME
)
s3 = session.resource("s3")
#base_bp.route("/test-stuff")
def test_stuff():
a_file = io.BytesIO()
s3_object = s3.Object(BUCKET, PATH)
s3_object.download_fileobj(a_file)
a_file.seek(0)
return send_file(a_file, mimetype=s3_object.content_type)
It works on when reading the file from disk because you instanciate your BytesIO with the full content of the file, so it's properly fulfilled and still at "position 0".

Related

Flask send_file() working with pdf but not with html

I'm having trouble with a flask + azure app. I have some files saved on the storage (pdfs and htmls) and I need to return these files when I invoke the get_file_safe endpoint. This method takes a file_id parameter and accesses the database, goes to blob azure, creates a temporary file and returns that file. When I pass codes that refer to PDF files, it works perfectly and the file is displayed on the screen. When the code matches an HTML file the answer is blank. Does anyone have any idea what it might be? Thank you very much ! (Note: When I used GCP it worked but I had to migrate, so I put here that it is azure).
from flask import Flask, flash, jsonify, session, redirect, url_for, escape, request, render_template, session, send_file
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__, ContentSettings
def get_file_safe():
#login and security stuff (...) Logic goes here ->>>
file_id = request.args.get('file_id')
cursor.execute(
"""SELECT link, mimetype from TABLE where id = %s """, (file_id))
rows = cursor.fetchall()
link = rows[0][0]
mimetype = rows[0][1]
filename = link.split("/")[-1]
print("Filename{}".format(filename))
print("Mimetype {}".format(mimetype))
# google cloud version, commented
#client = storage.Client()
#bucket = client.get_bucket('BUCKET_NAME')
#blob = bucket.blob(link)
#with tempfile.NamedTemporaryFile() as temp:
# blob.download_to_filename(temp.name)
# return send_file(temp.name, attachment_filename=filename)
# azure verson
bucket_name = 'BUCKET-NAME'
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
blob_client = blob_service_client.get_blob_client(container=bucket_name, blob=link)
with tempfile.NamedTemporaryFile() as temp:
temp.write(blob_client.download_blob().readall())
#return send_file(temp.name, attachment_filename=filename, mimetype=mimetype)
return send_file(temp.name, download_name=filename)
As you mentioned only html files not able to read so I tried with html file reading temporary file display it on the browser
I tried with tempfile.NamedTemporaryFile() as temp: but getting the black page
And then I also tried with with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
And I write data as string able to get the page
Can you just try with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f: for html files
from azure.storage.blob import BlobServiceClient
import tempfile
import webbrowser
blob_service_client = BlobServiceClient.from_connection_string("Connection String ")
# Initialise container
blob_container_client = blob_service_client.get_container_client("test")
# Get blob
blob_client = blob_container_client.get_blob_client("test.html")
print("downloaded the blob ")
# Download
str=blob_client.download_blob().readall()
print(str)
print(str.decode("utf-8"))
//Getting the Blank Page
with tempfile.NamedTemporaryFile() as temp:
url = 'file://' + temp.name
temp.write(blob_client.download_blob().readall())
#temp.write(str)
webbrowser.open(url)
//Getting page
html=str.decode("utf-8")
with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
url = 'file://' + f.name
f.write(html)
webbrowser.open(url)
Here is the OUTPUT how it looks

Downloading multiple files in Flask

I am trying to provide the client side the option of downloading some files in Flask. There can be multiple files or a single file available for the user/client to download.
However I am not able to understand how to provide the user the option to download multiple files.
Here is what I have tried so far:
#app.route('/download_files')
def download():
count=0
download_list=[]
for path in pathlib.Path("dir1/dir2").iterdir():
if path.is_file():
for i in names:
if pathlib.PurePosixPath(path).stem == i:
count += 1
download_list.append(path)
return send_file(download_list, as_attachment=True, mimetype="text/plain", download_name="Downloaded Files", attachment_filename="Generated Files")
This does not work properly even with a single file. The file type I am trying to download is text file with the extension .sql .
Will I somehow have to zip multiple files and then provide the download option? Please guide with my available options.
In order to offer several files together as a download, you only have the option of compressing them in an archive.
In my example, all files that match the specified pattern are listed and compressed in a zip archive. This is written to the memory and sent by the server.
from flask import Flask
from flask import send_file
from glob import glob
from io import BytesIO
from zipfile import ZipFile
import os
app = Flask(__name__)
#app.route('/download')
def download():
target = 'dir1/dir2'
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
for file in glob(os.path.join(target, '*.sql')):
zf.write(file, os.path.basename(file))
stream.seek(0)
return send_file(
stream,
as_attachment=True,
download_name='archive.zip'
)
You haven't provided a code sample where you actually getting these files or this file. Minimum working example would be like this:
from flask import Flask, request
app = Flask(__name__)
#app.route('/download_files', methods=['POST'])
def download():
file = request.files['file'] # for one file
files = request.files.getlist("file[]") # if there're multiple files provided
if __name__ == "__main__":
app.run()
After what your file variable will be an object of werkzeug.FileStorage and files variable will be a list of these objects.
And to download all these files you can check this question.

Download PDF file using pdfkit and FastAPI

I am going to create an API that converts an HTML page to a PDF file. I made it using pdfkit and FastAPI. However, it saves the file to my local disk. After I serve this API online, how could users download this PDF file to their computer?
from typing import Optional
from fastapi import FastAPI
import pdfkit
app = FastAPI()
#app.post("/htmltopdf/{url}")
def convert_url(url:str):
pdfkit.from_url(url, 'converted.pdf')
Returning FileResponse is solved my problem. Thanks to #Paul H and #clmno
Below codes are working example of returning pdf file to download with FastApi.
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview","file.pdf", configuration=config)
return FileResponse(
"file.pdf",
media_type="application/pdf",
filename="ticket.pdf")
**2)**This is another way with using tempfiles - to add pdf to a variable just write False instead of path -
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import tempfile
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdf = pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview",False, configuration=config)
with tempfile.NamedTemporaryFile(mode="w+b", suffix=".pdf", delete=False) as TPDF:
TPDF.write(pdf)
return FileResponse(
TPDF.name,
media_type="application/pdf",
filename="ticket.pdf")
Once you get the bytes of the PDF file, you can simply return a custom Response, specifying the content, headers and media_type. Thus, no need for saving the file to the disk or generating temporary files, as suggested by another answer. Similar to this answer, you can set the Content-Disposition header to let the browser know whether the PDF file should be viewed or downloaded.
Example
from fastapi import Response
#app.get('/')
def main():
pdf = pdfkit.from_url('http://google.com', configuration=config)
headers = {'Content-Disposition': 'attachment; filename="out.pdf"'}
return Response(pdf, headers=headers, media_type='application/pdf')
To have the PDF file viewed in the borwser instead of downloaded, use:
headers = {'Content-Disposition': 'inline; filename="out.pdf"'}
See this answer on how to install and use pdfkit.

How to serve an image from google cloud storage using python flask

I'm currently working on a project running flask on the Appengine standard environment, and I'm attempting to serve an image that has been uploaded onto Google Cloud Storage on my project's default Appengine storage bucket.
This is the routing code I currently have:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
myphoto = images.Image(filename="/gs/myappname.appspot.com/mysamplefolder/photo.jpg")
return send_file(myphoto)
...
However, I am getting an AttributeError: 'Image' object has no attribute 'read' error.
The question is, how do I serve an image sourced from google cloud storage using an arbitrary route using python and flask?
EDIT:
I am actually trying to serve an image that I have uploaded to the default Cloud Storage Bucket in my app engine project.
I've also tried to serve the image using the following code without success:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
import cloudstorage as gcs
gcs_file = gcs.open("/mybucketname/mysamplefolder/photo.jpg")
img = gcs_file.read()
gcs_file.close()
return send_file(img, mimetype='image/jpeg')
...
I've used the GoogleAppEngineCloudStorageClient Python library and loaded images with code similar to the following example:
from google.appengine.api import app_identity
import cloudstorage
from flask import Flask, send_file
import io, os
app = Flask(__name__)
# ...
#app.route('/imagetest')
def test_image():
# Use BUCKET_NAME or the project default bucket.
BUCKET_NAME = '/' + os.environ.get('MY_BUCKET_NAME',
app_identity.get_default_gcs_bucket_name())
filename = 'mytestimage.jpg'
file = os.path.join(BUCKET_NAME, filename)
gcs_file = cloudstorage.open(file)
contents = gcs_file.read()
gcs_file.close()
return send_file(io.BytesIO(contents),
mimetype='image/jpeg')
Using google-cloud-storage==1.6.0
from flask import current_app as app, send_file, abort
from google.cloud import storage
import tempfile
#app.route('/blobproxy/<filename>', methods=['GET'])
def get(filename):
if filename:
client = storage.Client()
bucket = client.get_bucket('yourbucketname')
blob = bucket.blob(filename)
with tempfile.NamedTemporaryFile() as temp:
blob.download_to_filename(temp.name)
return send_file(temp.name, attachment_filename=filename)
else:
abort(400)
I recommend looking at the docs for the path or string converters for your route, and NamedTemporaryFile defaults to delete=True so no residue.
flask also figures out the mimetype if you give it a file name, as is the case here.
Are you trying to accomplish something like this:
#app.route("/sample_route")
def sample_handler():
import urllib2
import StringIO
request = urllib2.Request("{image url}")
img = StringIO.StringIO(urllib2.urlopen(request).read())
return send_file(img, mimetype='image/jpeg') # display in browser
or
return send_file(img) # download file
The image url is needed, not a relative path. You could just do a redirect to the image url, but they would get a 301.

Flask to return image stored in database

My images are stored in a MongoDB, and I'd like to return them to the client, here is how the code is like:
#app.route("/images/<int:pid>.jpg")
def getImage(pid):
# get image binary from MongoDB, which is bson.Binary type
return image_binary
However, it seems that I can't return binary directly in Flask? My idea so far:
Return the base64 of the image binary. The problem is that IE<8 doesn't support this.
Create a temporary file then return it with send_file.
Are there better solutions?
Create a response object with the data and then set the content type header. Set the content disposition header to attachment if you want the browser to save the file instead of displaying it.
#app.route('/images/<int:pid>.jpg')
def get_image(pid):
image_binary = read_image(pid)
response = make_response(image_binary)
response.headers.set('Content-Type', 'image/jpeg')
response.headers.set(
'Content-Disposition', 'attachment', filename='%s.jpg' % pid)
return response
Relevant: werkzeug.Headers and flask.Response
You can pass a file-like object and the header arguments to send_file to let it set up the complete response. Use io.BytesIO for binary data:
return send_file(
io.BytesIO(image_binary),
mimetype='image/jpeg',
as_attachment=True,
download_name='%s.jpg' % pid)
Prior to Flask 2.0, download_name was called attachment_filename.
Just wanted to confirm that dav1d's second suggestion is correct - I tested this (where obj.logo is a mongoengine ImageField), works fine for me:
import io
from flask import current_app as app
from flask import send_file
from myproject import Obj
#app.route('/logo.png')
def logo():
"""Serves the logo image."""
obj = Obj.objects.get(title='Logo')
return send_file(
io.BytesIO(obj.logo.read()),
download_name='logo.png',
mimetype='image/png'
)
Easier than manually creating a Response object and settings its headers.
Prior to Flask 2.0, download_name was called attachment_filename.
Suppose i have the stored image path with me. The below code helps to send image through.
from flask import send_file
#app.route('/get_image')
def get_image():
filename = 'uploads\\123.jpg'
return send_file(filename, mimetype='image/jpg')
uploads is my folder name where my image with 123.jpg is present.
[PS: The uploads folder should be in the current directory as of the your script file]
Hope it helps.
The following worked for me (for Python 3.7.3):
import io
import base64
# import flask
from PIL import Image
def get_encoded_img(image_path):
img = Image.open(image_path, mode='r')
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
my_encoded_img = base64.encodebytes(img_byte_arr.getvalue()).decode('ascii')
return my_encoded_img
...
# your api code
...
img_path = 'assets/test.png'
img = get_encoded_img(img_path)
# prepare the response: data
response_data = {"key1": value1, "key2": value2, "image": img}
# return flask.jsonify(response_data )

Categories