I'm having trouble with a flask + azure app. I have some files saved on the storage (pdfs and htmls) and I need to return these files when I invoke the get_file_safe endpoint. This method takes a file_id parameter and accesses the database, goes to blob azure, creates a temporary file and returns that file. When I pass codes that refer to PDF files, it works perfectly and the file is displayed on the screen. When the code matches an HTML file the answer is blank. Does anyone have any idea what it might be? Thank you very much ! (Note: When I used GCP it worked but I had to migrate, so I put here that it is azure).
from flask import Flask, flash, jsonify, session, redirect, url_for, escape, request, render_template, session, send_file
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__, ContentSettings
def get_file_safe():
#login and security stuff (...) Logic goes here ->>>
file_id = request.args.get('file_id')
cursor.execute(
"""SELECT link, mimetype from TABLE where id = %s """, (file_id))
rows = cursor.fetchall()
link = rows[0][0]
mimetype = rows[0][1]
filename = link.split("/")[-1]
print("Filename{}".format(filename))
print("Mimetype {}".format(mimetype))
# google cloud version, commented
#client = storage.Client()
#bucket = client.get_bucket('BUCKET_NAME')
#blob = bucket.blob(link)
#with tempfile.NamedTemporaryFile() as temp:
# blob.download_to_filename(temp.name)
# return send_file(temp.name, attachment_filename=filename)
# azure verson
bucket_name = 'BUCKET-NAME'
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
blob_client = blob_service_client.get_blob_client(container=bucket_name, blob=link)
with tempfile.NamedTemporaryFile() as temp:
temp.write(blob_client.download_blob().readall())
#return send_file(temp.name, attachment_filename=filename, mimetype=mimetype)
return send_file(temp.name, download_name=filename)
As you mentioned only html files not able to read so I tried with html file reading temporary file display it on the browser
I tried with tempfile.NamedTemporaryFile() as temp: but getting the black page
And then I also tried with with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
And I write data as string able to get the page
Can you just try with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f: for html files
from azure.storage.blob import BlobServiceClient
import tempfile
import webbrowser
blob_service_client = BlobServiceClient.from_connection_string("Connection String ")
# Initialise container
blob_container_client = blob_service_client.get_container_client("test")
# Get blob
blob_client = blob_container_client.get_blob_client("test.html")
print("downloaded the blob ")
# Download
str=blob_client.download_blob().readall()
print(str)
print(str.decode("utf-8"))
//Getting the Blank Page
with tempfile.NamedTemporaryFile() as temp:
url = 'file://' + temp.name
temp.write(blob_client.download_blob().readall())
#temp.write(str)
webbrowser.open(url)
//Getting page
html=str.decode("utf-8")
with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
url = 'file://' + f.name
f.write(html)
webbrowser.open(url)
Here is the OUTPUT how it looks
Related
What I have: I've a Flask web app deployed to Heroku's server, which consists of only one web process app.py. Here it is:
#importation
from flask import Flask, render_template, current_app, send_file, request, json, send_file
import os
#working functions
#json write
def json_write(dictionary):
with open("./json/info.json", "w+") as f:
json.dump(dictionary, f, indent=4)
#make file name
def make_file_name(name):
filename = "tube4u_"
for t in str(name):
if t.isalnum():
filename += t
filename += ".mp4"
return filename
#application initialisation
app=Flask(__name__)
#home
#app.route("/")
def home():
return render_template("index.html")
#processor
#app.route("/process/", methods=["GET"])
def process():
#get url
url = request.args["url"]
#import & initialisation
from pytube import YouTube
import pickle
json_dict = {}
try:
yt = YouTube(url)
except:
return "<h1>Invalid URL</h1>"
all_videos = yt.streams.filter(type="video", progressive=True)
json_dict["title"] = yt.title
json_dict["thumbnail"] = yt.thumbnail_url
json_dict["name"] = make_file_name(yt.title)
with open("./pickle/ytobj.pkl", "wb") as f:
pickle.dump(all_videos, f)
#videos with itag
json_dict["videos"] = [ {"itag": item.itag, "res": item.resolution} for item in all_videos]
json_write(json_dict)
return render_template("menu.html")
#download
#app.route("/download/", methods=["GET"])
def download():
import pickle
itag = int(request.args["itag"])
with open("./json/info.json") as f:
json_dict = json.load(f)
with open("./pickle/ytobj.pkl", "rb") as f:
all_videos = pickle.load(f)
video = all_videos.get_by_itag(itag)
video.download(output_path="./video", filename=f"{json_dict['name']}")
return render_template("thank.html")
#return video
#app.route("/video/", methods=["GET"])
def video():
filename = request.args["filename"]
return send_file(f"./video/{filename}", as_attachment=True)
#return json
#app.route("/json")
def fetchjson():
with open("./json/info.json") as f:
content = json.load(f)
return content
#get name
#app.route("/name")
def fetchname():
with open("./json/info.json") as f:
content = json.load(f)
return content
#app.route("/list")
def listall():
return f"{os.listdir('./video')}"
#running the app
if __name__ == "__main__":
app.run(debug=True)
How it works: here I made the app like that, whenever someone enter a URL and click Go then it creates a json file with the name info.json. after it gets everything properly it performs some task with the given URL reading from the file.
My problem:
Now the problem is, if I make a request of the web it will create a json with my given URL, suppose at the same time someone else make a request and enter a URL then server will lost my information and rewrite the json file with another client's given input URL my task will be performed with another's input url. It's really weird.
How to fix it? Like if there any way to create the info.json file on separate path for each client and gets deleted after work done?
There is a lot of ways in my point of view
When the server get client request then check if there is already a file.if there is already a file then add timestamp or add something else in the filename so the file will not be overwritten.
Ask the user file name and also add timestamp in the name and save it.
You can also use databases to store data of different clients .may be you can create login system and give every user an id and store data for every user in database accordingly.
So on...
You can see there is a lot of ways to solve this.
I'm able to upload a file in Google Storage but the problem is that it goes to the default bucket where my static files are: GS_BUCKET_NAME='static-files'
I'd like to continue uploading the static files to the 'static-files' bucket, but I would like to also upload the user files to a different bucket: 'user-upload-files'
How can I do this in Django 3.2.7, Python 3.9.7
For reference, right now I'm doing:
from django.core.files.storage import default_storage
file = default_storage.open(filename, 'w')
file.write('testing'')
file.close()
import base64
import random
import string
import os
letters = string.ascii_lowercase
random_string = ''.join(random.choice(letters) for i in range(10))
env = os.environ.get("_ENVIROMENT", "development")
filename = f'test_upload_file_{env}_{random_string}.txt'
encoded_text = 'S2FtaWwgd2FzIGhlcmU='
decoded_plaintext = base64.b64decode(encoded_text)
# Perform upload to the default GS_BUCKET_NAME location
# from django.core.files.storage import default_storage
# file = default_storage.open(filename, 'w')
# file.write(decoded_plaintext)
# file.close()
f = open(filename, "wb")
f.write(decoded_plaintext)
f.close()
from google.cloud import storage
project_id = os.environ.get("GOOGLE_CLOUD_PROJECT", None)
bucket_name = os.environ.get("GS_BUCKET_NAME_FILE_UPLOADS", None)
client = storage.Client(project=project_id)
bucket = client.get_bucket(bucket_name)
filename_on_gcp = filename
blob = bucket.blob(filename_on_gcp)
with open(filename, "rb") as my_file:
blob.upload_from_file(my_file)
I need to download a PDF from a blob container in azure as a download stream (StorageStreamDownloader) and open it in both PDFPlumber and PDFminer.
I developed all the requirements loading them as a file, but I cant manage to received a download stream (StorageStreamDownloader) and open it successfully.
I was opening the PDFs like this:
pdf = pdfplumber.open(pdfpath) //for pdfplumber
fp = open('Pdf/' + fileGlob, 'rb') // for pdfminer
parser = PDFParser(fp)
document = PDFDocument(parser)
However, i need to be able to download a stream. Code snippet that downloads the pdf as a file:
blob_client = container.get_blob_client(remote_file)
with open(local_file_path,"wb") as local_file:
download_stream = blob_client.download_blob()
local_file.write(download_stream.readall())
local_file.close()
I tried several options, even using a temp file with no luck.
Any ideas?
download_blob() download the blob to a StorageStreamDownloader class, and in this class there is a download_to_stream, with this you will get the blob stream.
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from io import BytesIO
import PyPDF2
filename = "test.pdf"
container_name="test"
blob_service_client = BlobServiceClient.from_connection_string("connection string")
container_client=blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
streamdownloader=blob_client.download_blob()
stream = BytesIO()
streamdownloader.download_to_stream(stream)
fileReader = PyPDF2.PdfFileReader(stream)
print(fileReader.numPages)
And this is my result. It will print the pdf pages number.
It seems download_to_stream() is now deprecated and instead should be used readinto().
from azure.storage.blob import BlobClient
conn_string = ''
container_name = ''
blob_name = ''
blob_obj = BlobClient.from_connection_string(
conn_str=conn_string, container_name=container_name,
blob_name=blob_name
)
with open(blob_name, 'wb') as f:
b = blob_obj.download_blob()
b.readinto(f)
This will create a file in working directory with the data that was downloaded.
simply add readall() to the download_blob() which will read the data
as bytes.
from azure.storage.blob import BlobClient
conn_string = ''
container_name = ''
blob_name = ''
blob_obj =
BlobClient.from_connection_string(conn_string,container_name,blob_name)
with open(blob_name, 'wb') as f:
b = blob_obj.download_blob().readall()
I need to download a PDF from a blob container in azure as a download stream (StorageStreamDownloader) and open it in both PDFPlumber and PDFminer.
I developed all the requirements loading them as a file, but I cant manage to received a download stream (StorageStreamDownloader) and open it successfully.
I was opening the PDFs like this:
pdf = pdfplumber.open(pdfpath) //for pdfplumber
fp = open('Pdf/' + fileGlob, 'rb') // for pdfminer
parser = PDFParser(fp)
document = PDFDocument(parser)
However, i need to be able to download a stream. Code snippet that downloads the pdf as a file:
blob_client = container.get_blob_client(remote_file)
with open(local_file_path,"wb") as local_file:
download_stream = blob_client.download_blob()
local_file.write(download_stream.readall())
local_file.close()
I tried several options, even using a temp file with no luck.
Any ideas?
download_blob() download the blob to a StorageStreamDownloader class, and in this class there is a download_to_stream, with this you will get the blob stream.
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from io import BytesIO
import PyPDF2
filename = "test.pdf"
container_name="test"
blob_service_client = BlobServiceClient.from_connection_string("connection string")
container_client=blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
streamdownloader=blob_client.download_blob()
stream = BytesIO()
streamdownloader.download_to_stream(stream)
fileReader = PyPDF2.PdfFileReader(stream)
print(fileReader.numPages)
And this is my result. It will print the pdf pages number.
It seems download_to_stream() is now deprecated and instead should be used readinto().
from azure.storage.blob import BlobClient
conn_string = ''
container_name = ''
blob_name = ''
blob_obj = BlobClient.from_connection_string(
conn_str=conn_string, container_name=container_name,
blob_name=blob_name
)
with open(blob_name, 'wb') as f:
b = blob_obj.download_blob()
b.readinto(f)
This will create a file in working directory with the data that was downloaded.
simply add readall() to the download_blob() which will read the data
as bytes.
from azure.storage.blob import BlobClient
conn_string = ''
container_name = ''
blob_name = ''
blob_obj =
BlobClient.from_connection_string(conn_string,container_name,blob_name)
with open(blob_name, 'wb') as f:
b = blob_obj.download_blob().readall()
I am attempting to pull a file from AWS S3, using Boto3, directly into a BytesIO object. This will eventually be used to manipulate the downloaded data but for now I'm just trying to give that file directly to a user via Flask. As I understand everything the below should work, but does not. The browser simply displays nothing (and shows only downloaded a few bytes of data).
(In this example, my sample file is a png)
from flask import Flask, send_from_directory, abort, Response, send_file, make_response
import boto3, botocore
import os
import io
AWS_ACCESS_KEY = os.environ['AWS_ACCESS_KEY'].rstrip()
AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'].rstrip()
S3_BUCKET = "static1"
app = Flask(__name__, static_url_path='/tmp')
#app.route('/', defaults={'path': ''})
#app.route('/<path:path>')
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
return send_file(file, mimetype=metadata['ContentType'])
if __name__ == '__main__':
app.run(debug=True,port=3000,host='0.0.0.0')
If I modify that core routine to write the BytesIO object to disk, then read it back into a new BytesIO object - it works fine. As below:
def catch_all(path):
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY,)
file = io.BytesIO()
metadata = s3.head_object(Bucket=S3_BUCKET, Key=path)
conf = boto3.s3.transfer.TransferConfig(use_threads=False)
s3.download_fileobj(S3_BUCKET, path, file)
print(file.getvalue())
fh = open("/tmp/test1.png","wb")
fh.write(file.getvalue())
fh.close()
fh = open("/tmp/test1.png","rb")
f2 = io.BytesIO(fh.read())
fh.close
print(f2.getvalue())
return send_file(f2, mimetype=metadata['ContentType'])
Going around in circles with this for a few days, It's clear that I'm missing something and I'm not sure what. The script is being run inside a Python 3.8 docker container with the latest copies of boto3/flask/etc.
Rewinding your BytesIO object should do the trick, with file.seek(0) just before send_file(...).
For the record I'm not sure your boto3/botocore calls are "best practices", to try your usecase I ended up with:
from boto3.session import Session
session = Session(
aws_access_key_id=KEY_ID, aws_secret_access_key=ACCESS_KEY, region_name=REGION_NAME
)
s3 = session.resource("s3")
#base_bp.route("/test-stuff")
def test_stuff():
a_file = io.BytesIO()
s3_object = s3.Object(BUCKET, PATH)
s3_object.download_fileobj(a_file)
a_file.seek(0)
return send_file(a_file, mimetype=s3_object.content_type)
It works on when reading the file from disk because you instanciate your BytesIO with the full content of the file, so it's properly fulfilled and still at "position 0".