I see the functions for uploading in an API, but I don't see how to download. Am I missing something? I want to create an API for a file download site. Is there a different API I should be using?
from typing import List
from fastapi import FastAPI, Query
app = FastAPI()
PATH "some/path"
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
entry = PATH + "/".join(query_items["q"]) + "/"
dirs = os.listdir(entry)
results["folders"] = [val for val in dirs if os.path.isdir(entry+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry+val)]
results["path_vars"] = query_items["q"]
return results
Here is the sample bit of code for python to fetch files and dirs for a path, you can return the path as a list with a new entry in a loop to go deeper into a file tree. Passing a file name should trigger a download function, but I cant seem to get a download func going.
This worked For me
from starlette.responses import FileResponse
return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
This will download the file with filename
Since we're talking about FastAPI, the proper way to return a file response is covered in their documentation, code snippet below:
from fastapi import FastAPI
from fastapi.responses import FileResponse
file_path = "large-video-file.mp4"
app = FastAPI()
#app.get("/")
def main():
return FileResponse(path=file_path, filename=file_path, media_type='text/mp4')
FastAPI uses Starlette's FileResponse class so there are two ways to import FileResponse on your API code. But of course importing from FastAPI would be a better choice. You can follow the approach below to enable your API endpoints support file download.
Do not forget to add aiofiles to your dependency list. A basic requirements.txt file should look like (versions of modules might change in time, version 0.63.0 of fastapi strictly use starlette 0.13.6)
uvicorn==0.13.4
fastapi==0.63.0
starlette==0.13.6
aiofiles==0.6.0
And the API code
import os
from fastapi import FastAPI
from fastapi.responses import FileResponse
app = FastAPI()
#app.get("/")
async def main():
file_name = "FILE NAME"
# DEPENDS ON WHERE YOUR FILE LOCATES
file_path = os.getcwd() + "/" + file_name
return FileResponse(path=file_path, media_type='application/octet-stream', filename=file_name)
I figured it out,
from starlette.responses import FileResponse
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
if query_items["q"]:
entry = PATH + "/".join(query_items["q"])
else:
entry = PATH
if os.path.isfile(entry):
return download(entry)
dirs = os.listdir(entry + "/")
results["folders"] = [
val for val in dirs if os.path.isdir(entry + "/"+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry + "/"+val)]
results["path_vars"] = query_items["q"]
return results
def download(file_path):
"""
Download file for given path.
"""
if os.path.isfile(file_path):
return FileResponse(file_path)
# return FileResponse(path=file_path)
return None
I added this part
from starlette.responses import FileResponse
if os.path.isfile(entry):
return download(entry)
Allows you to host static file. But for some reason all files download as "download" .extension. If you know how to ensure original file name, let me know.
from fastapi import FastAPI
from fastapi.responses import FileResponse
import uvicorn
import os
app = FastAPI()
#app.get("/download-file")
def download_file(file_name: str):
folder_path = r"C:\Users\HP\Desktop\excel files"
file_location = f'{folder_path}{os.sep}{file_name}.xlsx'#os.sep is used to seperate with a \
return FileResponse(file_location, media_type='application/octet-stream', filename=file_name)
uvicorn.run(app, port=9105)
Related
I have created an app, generated client credentials, and trusted the app for my SharePoint online site.
I have created a file test.txt and it is placed under -https://company.sharepoint.com/sites/testsite/Shared%20Documents/General/test.txt
Additionally, I have installed the latest version of the module
pip freeze | grep Office
Office365-REST-Python-Client==2.3.11
class SharePoint:
def __init__(self):
context_auth = AuthenticationContext(Configs.SITE_URL) ---> SITE_URL='https://company.sharepoint.com/sites/testsite/'
context_auth.acquire_token_for_app(client_id=Configs.OAUTH_CLIENT_ID, client_secret=Configs.OAUTH_CLIENT_SECRET)
self.ctx = ClientContext(Configs.SITE_URL, context_auth)
def download_files(self):
file_url = "/sites/testsite/Shared%20Documents/General/test.txt"
download_path = os.path.join(tempfile.mkdtemp(), os.path.basename(file_url))
print(download_path)
with open(download_path, "wb") as local_file:
file = self.ctx.web.get_file_by_server_relative_url(file_url).download(local_file).execute_query()
print("[Ok] file has been downloaded into: {0}".format(download_path))
if __name__ == '__main__':
s = SharePoint()
s.download_files()
However, it throws an error, not able to get my head around this.
office365.runtime.client_request_exception.ClientRequestException: ('-2130575338, Microsoft.SharePoint.SPException', 'The file /sites/testsite/Shared%20Documents/General/test.txt does not exist.', "404 Client Error: Not Found for url: https://company.sharepoint.com/sites/testsite/_api/Web/getFileByServerRelativeUrl('%2Fsites%2Ftestsite%2FShared%2520Documents%2FGeneral%2Ftest.txt')?$select=ServerRelativePath")
You seem to be basing this off of the example shown here.
I was having similar issues at first, until I made all function inputs be absolute paths, inclusive of url scheme and site. This just removes a lot of room for error.
My current script is similar to this:
from urllib.parse import urlparse
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
def download_file(local_absolute_path:str, global_absolute_path:str, client_context:ClientContext) -> None:
print(f"The file {global_absolute_path} is being prepared for download.")
download_location = urlparse(global_absolute_path)
file_to_download = client_context.web.get_file_by_server_relative_url(download_location)
with open(local_absolute_path, "wb") as local_file:
file_to_download.download_session(local_file).execute_query()
print(f"──► Download successful. The file has been saved as {local_absolute_path}\n")
Note that self.ctx in your code is equivalent to client_context in mine.
I recommend writing a bunch of helper functions to convert the paths back and forth between absolute, relative and the file name. The ones I currently use can be found below:
import os
from urllib.parse import urlparse
class PathHandler(object):
def __init__(self, absolute_path:str) -> None:
self.absolute_path = absolute_path
def get_filename_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return os.path.basename(parsed_url.path)
def get_relative_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return parsed_url.path
def get_parent_folder_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return os.path.dirname(parsed_url.path)
def get_scheme_and_root_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return f"{parsed_url.scheme}//{parsed_url.netloc}"
def convert_to_absolute_local(self, local_root:str, global_root:str) -> str:
return local_root + os.sep + self.absolute_path[len(global_root):].replace("/", os.sep)
def convert_to_absolute_global(self, local_root:str, global_root:str) -> str:
return global_root + "/" + self.absolute_path[len(local_root):].replace(os.sep, "/")
As a newbie in Azure,
I am following Microsoft Azure Function tutorial page
https://learn.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-cloud-backup?tabs=python
and github page
https://github.com/Azure/azure-functions-durable-python/tree/master/samples/fan_in_fan_out .
**HttpStart code**
import logging
import json
import azure.functions as func
import azure.durable_functions as df
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse:
client = df.DurableOrchestrationClient(starter)
payload: str = json.loads(req.get_body().decode()) # Load JSON post request data
instance_id = await client.start_new(req.route_params["functionName"], client_input=payload)
logging.info(f"Started orchestration with ID = '{instance_id}'.")
return client.create_check_status_response(req, instance_id)
**E2_BackupSiteContent**
import azure.functions as func
import azure.durable_functions as df
def orchestrator_function(context: df.DurableOrchestrationContext):
root_directory: str = context.get_input()
if not root_directory:
raise Exception("A directory path is required as input")
files = yield context.call_activity("E2_GetFileList", root_directory)
tasks = []
for file in files:
tasks.append(context.call_activity("E2_CopyFileToBlob", file))
results = yield context.task_all(tasks)
total_bytes = sum(results)
return total_bytes
main = df.Orchestrator.create(orchestrator_function)
**E2_CopyFileToBlob**
import os
import pathlib
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceExistsError
connect_str = os.getenv('AzureWebJobsStorage')
def main(filePath: str) -> str:
# Create the BlobServiceClient object which will be used to create a container client
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
# Create a unique name for the container
container_name = "backups"
# Create the container if it does not exist
try:
blob_service_client.create_container(container_name)
except ResourceExistsError:
pass
# Create a blob client using the local file name as the name for the blob
parent_dir, fname = pathlib.Path(filePath).parts[-2:] # Get last two path components
blob_name = parent_dir + "_" + fname
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
# Count bytes in file
byte_count = os.path.getsize(filePath)
# Upload the created file
with open(filePath, "rb") as data:
blob_client.upload_blob(data)
return byte_count
**E2_GetFileList**
import os
from os.path import dirname
from typing import List
def main(rootDirectory: str) -> List[str]:
all_file_paths = []
# We walk the file system
for path, _, files in os.walk(rootDirectory):
# We copy the code for activities and orchestrators
if "E2_" in path:
# For each file, we add their full-path to the list
for name in files:
if name == "__init__.py" or name == "function.json":
file_path = os.path.join(path, name)
all_file_paths.append(file_path)
return all_file_paths
When I http trigger with postman app,
POST http://localhost:7071/api/orchestrators/E2_BackupSiteContent?req="D:\Tmp"
I got the following error messages.
[2021-11-12T02:13:42.432Z] Worker process started and initialized.
[2021-11-12T02:13:46.489Z] Host lock lease acquired by instance ID '000000000000000000000000AE48769C'.
[2021-11-12T02:13:52.529Z] Executing 'Functions.HttpStart' (Reason='This function was programmatically called via the host APIs.', Id=748996d0-1f84-4597-86ea-768467eb36e3)
[2021-11-12T02:13:52.560Z] Executed 'Functions.HttpStart' (Failed, Id=748996d0-1f84-4597-86ea-768467eb36e3, Duration=5433ms)
[2021-11-12T02:13:52.562Z] System.Private.CoreLib: Exception while executing function: Functions.HttpStart. Microsoft.Azure.WebJobs.Host: Exception binding parameter 'req'. Microsoft.AspNetCore.Server.Kestrel.Core: Reading the request body timed out due to data arriving too slowly. See MinRequestBodyDataRate.
What should I do to solve this problem?
(I tested with Linux and Windows.)
--Added--
Postman capture
Instead of passing the directory in the query string of the URL, you should pass the path in the HTTP request body in the postman tool. The Microsoft doc page itself shows how to do it, see Run the sample section.
I am going to create an API that converts an HTML page to a PDF file. I made it using pdfkit and FastAPI. However, it saves the file to my local disk. After I serve this API online, how could users download this PDF file to their computer?
from typing import Optional
from fastapi import FastAPI
import pdfkit
app = FastAPI()
#app.post("/htmltopdf/{url}")
def convert_url(url:str):
pdfkit.from_url(url, 'converted.pdf')
Returning FileResponse is solved my problem. Thanks to #Paul H and #clmno
Below codes are working example of returning pdf file to download with FastApi.
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview","file.pdf", configuration=config)
return FileResponse(
"file.pdf",
media_type="application/pdf",
filename="ticket.pdf")
**2)**This is another way with using tempfiles - to add pdf to a variable just write False instead of path -
from typing import Optional
from fastapi import FastAPI
from starlette.responses import FileResponse
import tempfile
import pdfkit
app = FastAPI()
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
#app.get("/")
def read_root():
pdf = pdfkit.from_url("https://nakhal.expo.com.tr/nakhal/preview",False, configuration=config)
with tempfile.NamedTemporaryFile(mode="w+b", suffix=".pdf", delete=False) as TPDF:
TPDF.write(pdf)
return FileResponse(
TPDF.name,
media_type="application/pdf",
filename="ticket.pdf")
Once you get the bytes of the PDF file, you can simply return a custom Response, specifying the content, headers and media_type. Thus, no need for saving the file to the disk or generating temporary files, as suggested by another answer. Similar to this answer, you can set the Content-Disposition header to let the browser know whether the PDF file should be viewed or downloaded.
Example
from fastapi import Response
#app.get('/')
def main():
pdf = pdfkit.from_url('http://google.com', configuration=config)
headers = {'Content-Disposition': 'attachment; filename="out.pdf"'}
return Response(pdf, headers=headers, media_type='application/pdf')
To have the PDF file viewed in the borwser instead of downloaded, use:
headers = {'Content-Disposition': 'inline; filename="out.pdf"'}
See this answer on how to install and use pdfkit.
I am attempting to pull a file from AWS S3, using Boto3, directly into a BytesIO object. This will eventually be used to manipulate the downloaded data but for now I'm just trying to give that file directly to a user via Flask. As I understand everything the below should work, but does not. The browser simply displays nothing (and shows only downloaded a few bytes of data).
(In this example, my sample file is a png)
from flask import Flask, send_from_directory, abort, Response, send_file, make_response
import boto3, botocore
import os
import io
AWS_ACCESS_KEY = os.environ['AWS_ACCESS_KEY'].rstrip()
AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'].rstrip()
S3_BUCKET = "static1"
app = Flask(__name__, static_url_path='/tmp')
#app.route('/', defaults={'path': ''})
#app.route('/<path:path>')
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
return send_file(file, mimetype=metadata['ContentType'])
if __name__ == '__main__':
app.run(debug=True,port=3000,host='0.0.0.0')
If I modify that core routine to write the BytesIO object to disk, then read it back into a new BytesIO object - it works fine. As below:
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
print(file.getvalue())
fh = open("/tmp/test1.png","wb")
fh.write(file.getvalue())
fh.close()
fh = open("/tmp/test1.png","rb")
f2 = io.BytesIO(fh.read())
fh.close
print(f2.getvalue())
return send_file(f2, mimetype=metadata['ContentType'])
Going around in circles with this for a few days, It's clear that I'm missing something and I'm not sure what. The script is being run inside a Python 3.8 docker container with the latest copies of boto3/flask/etc.
Rewinding your BytesIO object should do the trick, with file.seek(0) just before send_file(...).
For the record I'm not sure your boto3/botocore calls are "best practices", to try your usecase I ended up with:
from boto3.session import Session
session = Session(
aws_access_key_id=KEY_ID, aws_secret_access_key=ACCESS_KEY, region_name=REGION_NAME
)
s3 = session.resource("s3")
#base_bp.route("/test-stuff")
def test_stuff():
a_file = io.BytesIO()
s3_object = s3.Object(BUCKET, PATH)
s3_object.download_fileobj(a_file)
a_file.seek(0)
return send_file(a_file, mimetype=s3_object.content_type)
It works on when reading the file from disk because you instanciate your BytesIO with the full content of the file, so it's properly fulfilled and still at "position 0".
I'm currently working on a project running flask on the Appengine standard environment, and I'm attempting to serve an image that has been uploaded onto Google Cloud Storage on my project's default Appengine storage bucket.
This is the routing code I currently have:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
myphoto = images.Image(filename="/gs/myappname.appspot.com/mysamplefolder/photo.jpg")
return send_file(myphoto)
...
However, I am getting an AttributeError: 'Image' object has no attribute 'read' error.
The question is, how do I serve an image sourced from google cloud storage using an arbitrary route using python and flask?
EDIT:
I am actually trying to serve an image that I have uploaded to the default Cloud Storage Bucket in my app engine project.
I've also tried to serve the image using the following code without success:
# main.py
from google.appengine.api import images
from flask import Flask, send_file
app = Flask(__name__)
...
#app.route("/sample_route")
def sample_handler():
import cloudstorage as gcs
gcs_file = gcs.open("/mybucketname/mysamplefolder/photo.jpg")
img = gcs_file.read()
gcs_file.close()
return send_file(img, mimetype='image/jpeg')
...
I've used the GoogleAppEngineCloudStorageClient Python library and loaded images with code similar to the following example:
from google.appengine.api import app_identity
import cloudstorage
from flask import Flask, send_file
import io, os
app = Flask(__name__)
# ...
#app.route('/imagetest')
def test_image():
# Use BUCKET_NAME or the project default bucket.
BUCKET_NAME = '/' + os.environ.get('MY_BUCKET_NAME',
app_identity.get_default_gcs_bucket_name())
filename = 'mytestimage.jpg'
file = os.path.join(BUCKET_NAME, filename)
gcs_file = cloudstorage.open(file)
contents = gcs_file.read()
gcs_file.close()
return send_file(io.BytesIO(contents),
mimetype='image/jpeg')
Using google-cloud-storage==1.6.0
from flask import current_app as app, send_file, abort
from google.cloud import storage
import tempfile
#app.route('/blobproxy/<filename>', methods=['GET'])
def get(filename):
if filename:
client = storage.Client()
bucket = client.get_bucket('yourbucketname')
blob = bucket.blob(filename)
with tempfile.NamedTemporaryFile() as temp:
blob.download_to_filename(temp.name)
return send_file(temp.name, attachment_filename=filename)
else:
abort(400)
I recommend looking at the docs for the path or string converters for your route, and NamedTemporaryFile defaults to delete=True so no residue.
flask also figures out the mimetype if you give it a file name, as is the case here.
Are you trying to accomplish something like this:
#app.route("/sample_route")
def sample_handler():
import urllib2
import StringIO
request = urllib2.Request("{image url}")
img = StringIO.StringIO(urllib2.urlopen(request).read())
return send_file(img, mimetype='image/jpeg') # display in browser
or
return send_file(img) # download file
The image url is needed, not a relative path. You could just do a redirect to the image url, but they would get a 301.