Allow user to download zip file generated on AWS - python

Here is how I do to generate ZIP and download it from a server, it works well in local development.
import zipfile
doc = get_object_or_404(Document,id=id_obj)
filepath = doc.file.path
filename = os.path.basename(doc.file.name)
directory = os.path.dirname(filepath)
xzip = zipfile.ZipFile(os.path.join(directory,"%s.zip" % filename), "w")
xzip.write(filepath,filename)
xzip.close()
zip_file = open(xzip.filename, 'rb')
response = HttpResponse(zip_file, content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="%s.zip"' %
os.path.splitext(filename)[0]
return response
All my static & media files are uploaded to AWS in production. So I change a little bit
# filepath becomes
filepath = settings.MEDIA_ROOT + "/" + doc.file.name
But When I try to download it, it gives me [Errno 2] No such file or directorywith the link:
https://bucket_name.s3.amazonaws.com/media/public/files/file.pdf.zip
the settings.MEDIA_ROOT is:
AWS_ACCESS_KEY_ID = config('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = config('AWS_SECRET_ACCESS_KEY')
AWS_STORAGE_BUCKET_NAME = 'bucket_name'
AWS_S3_CUSTOM_DOMAIN = '%s.s3.amazonaws.com' % AWS_STORAGE_BUCKET_NAME
AWS_PUBLIC_MEDIA_LOCATION = 'media/public'
MEDIA_ROOT = "https://%s/%s/" % (AWS_S3_CUSTOM_DOMAIN, AWS_PUBLIC_MEDIA_LOCATION)
doc.file.path gives me the error: 'This backend doesn't support absolute paths', that's why I changed to MEDIA_ROOT + doc.file.name
How to do that to download from AWS the zip file generated?

The file exists on S3, not the local file system. When you call those os.path.* functions the code is trying to find the file on the local file system. It's giving you that error because that S3 URL you are giving it as the path can't be mapped to anything on the local file system.
Why don't you allow S3 to serve the file directly to the end-user's browser by simply return a redirect response with the URL of the S3 file instead of trying to read the file and return the contents in the response?

Related

Upload all files in a directory to minIO

I would like to know if there is a way to upload all the files contained in a folder to minIO, or if there is a method already implemented.
To upload one file is very simple, but I can't find a way to upload several files that are inside a directory in local
You can try this out
def upload_local_directory_to_minio(local_path: str, bucket_name: str):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
local_file = local_file.replace(os.sep, "/")
if not os.path.isfile(local_file):
upload_local_directory_to_minio(
local_file, bucket_name)
else:
remote_path = os.path.join(
local_file[1 + len(local_path):])
remote_path = remote_path.replace(
os.sep, "/")
minioClient.fput_object(bucket_name, remote_path, local_file)

Specify file download location using PyDrive?

I'm currently trying to download a file from Google Drive using PyDrive, but am only able to download the file to the same location as my Python program. Is there a way to specify the file's download location? This is how I am downloading files currently.
if file1['title'] == file_name:
file2 = drive.CreateFile({'id': file1['id']})
print('Downloading file %s from Google Drive' % file2['title'])
file2.GetContentFile(file_name) # Save Drive file as a local file
Try following:
if file1['title'] == file_name:
location = "Path/where/you/want/to/save/"
"""
if you are in linux and want to save it to documents try following
location = "~/Documents/"
the last forward slash i.e. '/' is important
"""
full_path = location + file_name
file2 = drive.CreateFile({'id': file1['id']})
print('Downloading file %s from Google Drive' % file2['title'])
file2.GetContentFile(full_path)

How to avoid saving subfolders when saving a file?

The function creates a folder and saves a file into it. Then the folder is packed into a rar archive and sent to the user, and the newly created folder and archive are deleted from the server after.
code.py
new_file_name = self.generate_file_name(rfi, vendor, current_scoring_round)
path_to_temp_folder = os.path.dirname(BASE_DIR)
if not os.path.exists(f'{path_to_temp_folder}/temp_folder'):
pathlib.Path(f'{path_to_temp_folder}/temp_folder').mkdir(parents=True, exist_ok=True)
wb.save(f'{path_to_temp_folder}/temp_folder/{new_file_name}') #save xlsx file from openpyxl library
archive = self.generate_zip_name(rfi) # generate name for archive
to_rar = f'{path_to_temp_folder}/temp_folder'
patoolib.create_archive(archive, (to_rar,)) # patoolib - to .rar converter
to_download = f'{path_to_temp_folder}/{archive}'
if os.path.exists(to_download):
try:
with open(to_download, 'rb') as fh:
response = HttpResponse(fh.read(),
content_type="content_type='application/vnd.rar'")
response['Content-Disposition'] = 'attachment; filename= "{}"'.format(archive)
return response
finally:
shutil.rmtree(to_rar, ignore_errors=True)
default_storage.delete(to_download)
Everything work, but the problem is that the downloaded archive contains subfolders - paths to the saved file.
Expected result:
folder.rar
file.xlsx
Actual result:
folder.rar
/home
/y700
/projects
file.xlsx
The documentation to patool is minimal. It certainly seems to suggest that this should be possible by passing the path to the file in the create-archive command. I've tried this though, and it appears not.
So the only option, probably, is to change the working directory to the location of the test.xlsx file:
import patoolib
import os
new_file_name = self.generate_file_name(rfi, vendor, current_scoring_round)
path_to_temp_folder = os.path.dirname(BASE_DIR)
if not os.path.exists(f'{path_to_temp_folder}/temp_folder'):
pathlib.Path(f'{path_to_temp_folder}/temp_folder').mkdir(parents=True, exist_ok=True)
wb.save(f'{path_to_temp_folder}/temp_folder/{new_file_name}') #save xlsx file from openpyxl library
archive = self.generate_zip_name(rfi) # generate name for archive
to_rar = f'{path_to_temp_folder}/temp_folder'
cwd=os.getcwd()
os.chdir('to_rar')
patoolib.create_archive(cwd+archive, ({new_file_name},)) # patoolib - to .rar converter
os.chdir('cwd')
to_download = f'{path_to_temp_folder}/{archive}'
if os.path.exists(to_download):
try:
with open(to_download, 'rb') as fh:
response = HttpResponse(fh.read(),
content_type="content_type='application/vnd.rar'")
response['Content-Disposition'] = 'attachment; filename= "{}"'.format(archive)
return response
finally:
shutil.rmtree(to_rar, ignore_errors=True)
default_storage.delete(to_download)
This works on my system, for example, and I get a single file in the archive (using tar, because I don't have rar installed):
import patoolib
import os
cwd=os.getcwd()
os.chdir('foo/bar/baz/qux/')
patoolib.create_archive(cwd+'/foo.tar.gz',('test.txt',))
os.chdir(cwd)
Note that you should really use os.path.join rather than concatenating strings, but this was just a quick & dirty test.

Download S3 Files with Boto

I am trying to set up an app where users can download their files stored in an S3 Bucket. I am able to set up my bucket, and get the correct file, but it won't download, giving me the this error: No such file or directory: 'media/user_1/imageName.jpg' Any idea why? This seems like a relatively easy problem, but I can't quite seem to get it. I can delete an image properly, so it is able to identify the correct image.
Here's my views.py
def download(request, project_id=None):
conn = S3Connection('AWS_BUCKET_KEY', 'AWS_SECRET_KEY')
b = Bucket(conn, 'BUCKET_NAME')
k = Key(b)
instance = get_object_or_404(Project, id=project_id)
k.key = 'media/'+str(instance.image)
k.get_contents_to_filename(str(k.key))
return redirect("/dashboard/")
The problem is that you are downloading to a local directory that doesn't exist (media/user1). You need to either:
Create the directory on the local machine first
Just use the filename rather than a full path
Use the full path, but replace slashes (/) with another character -- this will ensure uniqueness of filename without having to create directories
The last option could be achieved via:
k.get_contents_to_filename(str(k.key).replace('/', '_'))
See also: Boto3 to download all files from a S3 Bucket
Downloading files using boto3 is very simple, configure your AWS credentials at system level before using this code.
client = boto3.client('s3')
// if your bucket name is mybucket and the file path is test/abc.txt
// then the Bucket='mybucket' Prefix='test'
resp = client.list_objects_v2(Bucket="<your bucket name>", Prefix="<prefix of the s3 folder>")
for obj in resp['Contents']:
key = obj['Key']
//to read s3 file contents as String
response = client.get_object(Bucket="<your bucket name>",
Key=key)
print(response['Body'].read().decode('utf-8'))
//to download the file to local
client.download_file('<your bucket name>', key, key.replace('test',''))
replace is to locate the file in your local with s3 file name, if you don't replace it will try to save as 'test/abc.txt'.
import os
import boto3
import json
s3 = boto3.resource('s3', aws_access_key_id="AKIAxxxxxxxxxxxxJWB",
aws_secret_access_key="LV0+vsaxxxxxxxxxxxxxxxxxxxxxry0/LjxZkN")
my_bucket = s3.Bucket('s3testing')
# download file into current directory
for s3_object in my_bucket.objects.all():
# Need to split s3_object.key into path and file name, else it will give error file not found.
path, filename = os.path.split(s3_object.key)
my_bucket.download_file(s3_object.key, filename)

Uploading a File to GCS using boto in python

I'm trying to upload files from my local system to the GCS using boto in cloud. After a file get uploaded I get an error which says " The MD5 you specified in Content-MD5 or x-goog-hash did not match what we computed." Below is my code.
def upload():
bucket_name = 'bucketname'
bucket = conn.get_bucket(bucket_name)
fpic = Key(bucket)
d='E:/Eclipse/workspace/Files'
for filename in os.listdir(d):
contents=d + '/' + filename
fpic.key = 'my-files'+filename
fpic.set_contents_from_filename(contents, {}, replace = True)
There is also another way to upload the files from local to GCS using boto. Find the link below [1]. Try that it will work for you without any error.
[1] https://cloud.google.com/storage/docs/gspythonlibrary#credentials

Categories