I am going to create an API that converts an HTML page to a PDF file. I made it using pdfkit and FastAPI. However, it saves the file to my local disk. After I serve this API online, how could users download this PDF file to their computer?
from typing import Optional
from fastapi import FastAPI
import pdfkit
app = FastAPI()
#app.post("/htmltopdf/{url}")
def convert_url(url:str):
pdfkit.from_url(url, 'converted.pdf')
Returning FileResponse is solved my problem. Thanks to #Paul H and #clmno
Below codes are working example of returning pdf file to download with FastApi.
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview","file.pdf", configuration=config)
return FileResponse(
"file.pdf",
media_type="application/pdf",
filename="ticket.pdf")
**2)**This is another way with using tempfiles - to add pdf to a variable just write False instead of path -
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import tempfile
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdf = pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview",False, configuration=config)
with tempfile.NamedTemporaryFile(mode="w+b", suffix=".pdf", delete=False) as TPDF:
TPDF.write(pdf)
return FileResponse(
TPDF.name,
media_type="application/pdf",
filename="ticket.pdf")
Once you get the bytes of the PDF file, you can simply return a custom Response, specifying the content, headers and media_type. Thus, no need for saving the file to the disk or generating temporary files, as suggested by another answer. Similar to this answer, you can set the Content-Disposition header to let the browser know whether the PDF file should be viewed or downloaded.
Example
from fastapi import Response
#app.get('/')
def main():
pdf = pdfkit.from_url('http://google.com', configuration=config)
headers = {'Content-Disposition': 'attachment; filename="out.pdf"'}
return Response(pdf, headers=headers, media_type='application/pdf')
To have the PDF file viewed in the borwser instead of downloaded, use:
headers = {'Content-Disposition': 'inline; filename="out.pdf"'}
See this answer on how to install and use pdfkit.
Related
I am trying to generate a list of tools with prices. The prices will be present in an excel file. How do I go about so that the excel file is read at first and I will just use pandas data frame to perform search and get the price. Note I don't want to read the excel file again. I tried adding
PRICE_FILE=pd.read_excel("./static/assets/tool_price.xlsx") in config.py so that it gets read when the app is initialized but I am having some error. This might be incorrect, I am not very sure.
I am using a flask web app with sqlite as my database (flask-sqlalchemy)
Here's how my main and config files look:
main.py
import os
from flask import Flask
from database import db
from config import LocalDevelopmentConfig
from flask_restful import Resource, Api
def create_app():
app = Flask(__name__, template_folder="templates")
app.config.from_object(LocalDevelopmentConfig)
db.init_app(app)
app.app_context().push()
return app
app = create_app()
from controllers import *
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)
config.py
import os
import pandas as pd
basedir = os.path.abspath(os.path.dirname(__file__))
class Config:
DEBUG = False
SQLITE_DB_DIR = None
SQLALCHEMY_DATABASE_URI = None
SQLALCHEMY_TRACK_MODIFICATIONS = False
PRICE_FILE = None
class LocalDevelopmentConfig(Config):
SQLITE_DB_DIR = os.path.join(basedir)
SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(SQLITE_DB_DIR, "toolcodes.sqlite3")
PRICE_FILE=pd.read_excel("./static/assets/tool_price.xlsx")
DEBUG = True
I also have controllers.py which handles the GET and POST requests.
This is the structure of my project:
enter image description here
According to documentation for pandas, there is a "pandas.read_excel()" function which you can use to read excel files.
The file can be read using the file name as string or an open file object:
pd.read_excel('tmp.xlsx', index_col=0)
or
pd.read_excel(open('tmp.xlsx', 'rb'), sheet_name='Sheet3')
Note: Reading the excel file in the Config.py file is a bad idea. What you should do is, create a route that expect a post request, then save the file in a local directory.
Once the file is save you can pass the URL for the file to the pd.read_excel() function to perform some logic. example is:
from flask import Flask,url_for, render_template
import pandas as pd
import request
app = Flask(__name__)
#app.route("/read/excel",methods=['POST', 'GET'])
def read_excel_files():
if request.method == "POST":
# URL we are requesting excel file from
url = 'https://www.somelocation.com/prices.xlsx'
response = requests.get(url, allow_redirects=True)
#Save the file to some location (example: "./some/path/prices.xlsx")
file_location = "./some/path/prices.xlsx"
open('file_location , 'wb').write(response.content)
Then read the excel file with pandas
pandas_file = pd.read_excel(file_location,index_col=0)
Or if you are using flask url_for
excel_file = pd.read_excel(url_for('static',filename='documents/'+ excel_file_name),index_col=0)
return render_template("readExcel.html",title="Read Excel Files")
One you have successfully read the file with pandas, you can perform some logic on the file. You can also use Flask-WTForms to get the excel file from the user and pass it to the pd.read_excel() function
I am trying to provide the client side the option of downloading some files in Flask. There can be multiple files or a single file available for the user/client to download.
However I am not able to understand how to provide the user the option to download multiple files.
Here is what I have tried so far:
#app.route('/download_files')
def download():
count=0
download_list=[]
for path in pathlib.Path("dir1/dir2").iterdir():
if path.is_file():
for i in names:
if pathlib.PurePosixPath(path).stem == i:
count += 1
download_list.append(path)
return send_file(download_list, as_attachment=True, mimetype="text/plain", download_name="Downloaded Files", attachment_filename="Generated Files")
This does not work properly even with a single file. The file type I am trying to download is text file with the extension .sql .
Will I somehow have to zip multiple files and then provide the download option? Please guide with my available options.
In order to offer several files together as a download, you only have the option of compressing them in an archive.
In my example, all files that match the specified pattern are listed and compressed in a zip archive. This is written to the memory and sent by the server.
from flask import Flask
from flask import send_file
from glob import glob
from io import BytesIO
from zipfile import ZipFile
import os
app = Flask(__name__)
#app.route('/download')
def download():
target = 'dir1/dir2'
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
for file in glob(os.path.join(target, '*.sql')):
zf.write(file, os.path.basename(file))
stream.seek(0)
return send_file(
stream,
as_attachment=True,
download_name='archive.zip'
)
You haven't provided a code sample where you actually getting these files or this file. Minimum working example would be like this:
from flask import Flask, request
app = Flask(__name__)
#app.route('/download_files', methods=['POST'])
def download():
file = request.files['file'] # for one file
files = request.files.getlist("file[]") # if there're multiple files provided
if __name__ == "__main__":
app.run()
After what your file variable will be an object of werkzeug.FileStorage and files variable will be a list of these objects.
And to download all these files you can check this question.
I see the functions for uploading in an API, but I don't see how to download. Am I missing something? I want to create an API for a file download site. Is there a different API I should be using?
from typing import List
from fastapi import FastAPI, Query
app = FastAPI()
PATH "some/path"
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
entry = PATH + "/".join(query_items["q"]) + "/"
dirs = os.listdir(entry)
results["folders"] = [val for val in dirs if os.path.isdir(entry+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry+val)]
results["path_vars"] = query_items["q"]
return results
Here is the sample bit of code for python to fetch files and dirs for a path, you can return the path as a list with a new entry in a loop to go deeper into a file tree. Passing a file name should trigger a download function, but I cant seem to get a download func going.
This worked For me
from starlette.responses import FileResponse
return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
This will download the file with filename
Since we're talking about FastAPI, the proper way to return a file response is covered in their documentation, code snippet below:
from fastapi import FastAPI
from fastapi.responses import FileResponse
file_path = "large-video-file.mp4"
app = FastAPI()
#app.get("/")
def main():
return FileResponse(path=file_path, filename=file_path, media_type='text/mp4')
FastAPI uses Starlette's FileResponse class so there are two ways to import FileResponse on your API code. But of course importing from FastAPI would be a better choice. You can follow the approach below to enable your API endpoints support file download.
Do not forget to add aiofiles to your dependency list. A basic requirements.txt file should look like (versions of modules might change in time, version 0.63.0 of fastapi strictly use starlette 0.13.6)
uvicorn==0.13.4
fastapi==0.63.0
starlette==0.13.6
aiofiles==0.6.0
And the API code
import os
from fastapi import FastAPI
from fastapi.responses import FileResponse
app = FastAPI()
#app.get("/")
async def main():
file_name = "FILE NAME"
# DEPENDS ON WHERE YOUR FILE LOCATES
file_path = os.getcwd() + "/" + file_name
return FileResponse(path=file_path, media_type='application/octet-stream', filename=file_name)
I figured it out,
from starlette.responses import FileResponse
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
if query_items["q"]:
entry = PATH + "/".join(query_items["q"])
else:
entry = PATH
if os.path.isfile(entry):
return download(entry)
dirs = os.listdir(entry + "/")
results["folders"] = [
val for val in dirs if os.path.isdir(entry + "/"+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry + "/"+val)]
results["path_vars"] = query_items["q"]
return results
def download(file_path):
"""
Download file for given path.
"""
if os.path.isfile(file_path):
return FileResponse(file_path)
# return FileResponse(path=file_path)
return None
I added this part
from starlette.responses import FileResponse
if os.path.isfile(entry):
return download(entry)
Allows you to host static file. But for some reason all files download as "download" .extension. If you know how to ensure original file name, let me know.
from fastapi import FastAPI
from fastapi.responses import FileResponse
import uvicorn
import os
app = FastAPI()
#app.get("/download-file")
def download_file(file_name: str):
folder_path = r"C:\Users\HP\Desktop\excel files"
file_location = f'{folder_path}{os.sep}{file_name}.xlsx'#os.sep is used to seperate with a \
return FileResponse(file_location, media_type='application/octet-stream', filename=file_name)
uvicorn.run(app, port=9105)
I am attempting to pull a file from AWS S3, using Boto3, directly into a BytesIO object. This will eventually be used to manipulate the downloaded data but for now I'm just trying to give that file directly to a user via Flask. As I understand everything the below should work, but does not. The browser simply displays nothing (and shows only downloaded a few bytes of data).
(In this example, my sample file is a png)
from flask import Flask, send_from_directory, abort, Response, send_file, make_response
import boto3, botocore
import os
import io
AWS_ACCESS_KEY = os.environ['AWS_ACCESS_KEY'].rstrip()
AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'].rstrip()
S3_BUCKET = "static1"
app = Flask(__name__, static_url_path='/tmp')
#app.route('/', defaults={'path': ''})
#app.route('/<path:path>')
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
return send_file(file, mimetype=metadata['ContentType'])
if __name__ == '__main__':
app.run(debug=True,port=3000,host='0.0.0.0')
If I modify that core routine to write the BytesIO object to disk, then read it back into a new BytesIO object - it works fine. As below:
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
print(file.getvalue())
fh = open("/tmp/test1.png","wb")
fh.write(file.getvalue())
fh.close()
fh = open("/tmp/test1.png","rb")
f2 = io.BytesIO(fh.read())
fh.close
print(f2.getvalue())
return send_file(f2, mimetype=metadata['ContentType'])
Going around in circles with this for a few days, It's clear that I'm missing something and I'm not sure what. The script is being run inside a Python 3.8 docker container with the latest copies of boto3/flask/etc.
Rewinding your BytesIO object should do the trick, with file.seek(0) just before send_file(...).
For the record I'm not sure your boto3/botocore calls are "best practices", to try your usecase I ended up with:
from boto3.session import Session
session = Session(
aws_access_key_id=KEY_ID, aws_secret_access_key=ACCESS_KEY, region_name=REGION_NAME
)
s3 = session.resource("s3")
#base_bp.route("/test-stuff")
def test_stuff():
a_file = io.BytesIO()
s3_object = s3.Object(BUCKET, PATH)
s3_object.download_fileobj(a_file)
a_file.seek(0)
return send_file(a_file, mimetype=s3_object.content_type)
It works on when reading the file from disk because you instanciate your BytesIO with the full content of the file, so it's properly fulfilled and still at "position 0".
I'm currently working on a project running flask on the Appengine standard environment, and I'm attempting to serve an image that has been uploaded onto Google Cloud Storage on my project's default Appengine storage bucket.
This is the routing code I currently have:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
myphoto = images.Image(filename="/gs/myappname.appspot.com/mysamplefolder/photo.jpg")
return send_file(myphoto)
...
However, I am getting an AttributeError: 'Image' object has no attribute 'read' error.
The question is, how do I serve an image sourced from google cloud storage using an arbitrary route using python and flask?
EDIT:
I am actually trying to serve an image that I have uploaded to the default Cloud Storage Bucket in my app engine project.
I've also tried to serve the image using the following code without success:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
import cloudstorage as gcs
gcs_file = gcs.open("/mybucketname/mysamplefolder/photo.jpg")
img = gcs_file.read()
gcs_file.close()
return send_file(img, mimetype='image/jpeg')
...
I've used the GoogleAppEngineCloudStorageClient Python library and loaded images with code similar to the following example:
from google.appengine.api import app_identity
import cloudstorage
from flask import Flask, send_file
import io, os
app = Flask(__name__)
# ...
#app.route('/imagetest')
def test_image():
# Use BUCKET_NAME or the project default bucket.
BUCKET_NAME = '/' + os.environ.get('MY_BUCKET_NAME',
app_identity.get_default_gcs_bucket_name())
filename = 'mytestimage.jpg'
file = os.path.join(BUCKET_NAME, filename)
gcs_file = cloudstorage.open(file)
contents = gcs_file.read()
gcs_file.close()
return send_file(io.BytesIO(contents),
mimetype='image/jpeg')
Using google-cloud-storage==1.6.0
from flask import current_app as app, send_file, abort
from google.cloud import storage
import tempfile
#app.route('/blobproxy/<filename>', methods=['GET'])
def get(filename):
if filename:
client = storage.Client()
bucket = client.get_bucket('yourbucketname')
blob = bucket.blob(filename)
with tempfile.NamedTemporaryFile() as temp:
blob.download_to_filename(temp.name)
return send_file(temp.name, attachment_filename=filename)
else:
abort(400)
I recommend looking at the docs for the path or string converters for your route, and NamedTemporaryFile defaults to delete=True so no residue.
flask also figures out the mimetype if you give it a file name, as is the case here.
Are you trying to accomplish something like this:
#app.route("/sample_route")
def sample_handler():
import urllib2
import StringIO
request = urllib2.Request("{image url}")
img = StringIO.StringIO(urllib2.urlopen(request).read())
return send_file(img, mimetype='image/jpeg') # display in browser
or
return send_file(img) # download file
The image url is needed, not a relative path. You could just do a redirect to the image url, but they would get a 301.