Download multiple files from S3 django

Download multiple files from S3 django - python

Here is the link i have used (Download files from Amazon S3 with Django). Using this i'm able to download single file.
Code:
s3_template_path = queryset.values('file')
filename = 'test.pdf'
conn = boto.connect_s3('<aws access key>', '<aws secret key>')
bucket = conn.get_bucket('your_bucket')
s3_file_path = bucket.get_key(s3_template_path)
response_headers = {
'response-content-type': 'application/force-download',
'response-content-disposition':'attachment;filename="%s"'% filename
}
url = s3_file_path.generate_url(60, 'GET',
response_headers=response_headers,
force_http=True)
return HttpResponseRedirect(url)
I need to download multiple files from S3, as a zip would be better. Can the mentioned method be modified and used. If not please suggest other method.

Okay here is a possible solution, it basically downloads each file and zips them into a folder, then returns this to the user.
Not sure if s3_template_path is the same for each file, but change this if neccessary
# python 3
import requests
import os
import zipfile
file_names = ['test.pdf', 'test2.pdf', 'test3.pdf']
# set up zip folder
zip_subdir = "download_folder"
zip_filename = zip_subdir + ".zip"
byte_stream = io.BytesIO()
zf = zipfile.ZipFile(byte_stream, "w")
for filename in file_names:
s3_template_path = queryset.values('file')
conn = boto.connect_s3('<aws access key>', '<aws secret key>')
bucket = conn.get_bucket('your_bucket')
s3_file_path = bucket.get_key(s3_template_path)
response_headers = {
'response-content-type': 'application/force-download',
'response-content-disposition':'attachment;filename="%s"'% filename
}
url = s3_file_path.generate_url(60, 'GET',
response_headers=response_headers,
force_http=True)
# download the file
file_response = requests.get(url)
if file_response.status_code == 200:
# create a copy of the file
f1 = open(filename , 'wb')
f1.write(file_response.content)
f1.close()
# write the file to the zip folder
fdir, fname = os.path.split(filename)
zip_path = os.path.join(zip_subdir, fname)
zf.write(filename, zip_path)
# close the zip folder and return
zf.close()
response = HttpResponse(byte_stream.getvalue(), content_type="application/x-zip-compressed")
response['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
return response

Related

How to commit files from a S3 bucket to codecommit repository in a python lambda function

I need to commit and push files from a bucket in S3 to a codecommit repository in a programmatic way using a python lambda function.
I am using boto3 library, first I get and unzip the zip file from bucket, finaly I loop for each file and make a put_file.
The problem is that put_file generates as many commits as there are files in the repository, but I only need one commit because I have to send a single notification to codebuild.
My lambda code:
file_key = event['Records'][0]['s3']['object']['key']
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
body_dec = base64.b64decode(obj['Body'].read())
memory_file = io.BytesIO(body_dec)
with zipfile.ZipFile(memory_file, 'r') as zf:
files = zf.namelist()
for individualFile in files:
data = zf.read(individualFile)
#get parentCommitId for nuew push
parentCommitId=""
try:
response = client.get_branch(
repositoryName='test-codecommit',
branchName='master'
)
parentCommitId= response['branch']['commitId']
except botocore.exceptions.ClientError as error:
print(error.response['Error'])
try:
if not parentCommitId:
#parentCommitId= None
response = client.put_file(
repositoryName='test-codecommit',
branchName='master',
fileContent=data,
filePath=individualFile,
commitMessage='tag1',
name='Javier',
email='jramirezneira#gmail.com'
)
else:
response = client.put_file(
repositoryName='test-codecommit',
branchName='master',
fileContent=data,
filePath=individualFile,
#fileMode='EXECUTABLE'|'NORMAL'|'SYMLINK',
parentCommitId=parentCommitId,
commitMessage='tag1',
name='Javier',
email='jramirezneira#gmail.com'
)
result.append({'file': individualFile, 'Message': 'Added to Codecommit'})
except botocore.exceptions.ClientError as error:
print(error.response['Error'])
result.append({'file': individualFile, 'Message': error.response['Error']['Message']})
I will appreciate your help or suggestions

Instead of using put_file, you can use create_commit which takes multiple files in its putFiles parameter. I was able to do it using this code-
def create_codecommit_repo_commit(repo_name, branch_name, code_folder):
client = boto3.client('codecommit')
parent_folder = os.path.join(code_folder, repo_name)
putFilesList = []
for (root, folders, files) in os.walk(parent_folder):
for file in files:
file_path = os.path.join(root, file)
with open(file_path, mode='r+b') as file_obj:
file_content = file_obj.read()
putFileEntry = {'filePath': str(file_path).replace(parent_folder, ''),
'fileContent': file_content}
putFilesList.append(putFileEntry)
response = client.create_commit(repositoryName=repo_name, branchName=branch_name, putFiles=putFilesList)
return response

How to create zipfile in S3 with Boto3 Python?

I'm trying to create a zipfile from several files from a subfolder in an S3 bucket, then save that zipfile in another subfolder in the same bucket.
I can create zipfiles from my S3 subfolder buckets when running my flask application locally, but not with Heroku since it doesn't store anything.
I was going over this example, but it seems dated and uses local files.
https://www.botreetechnologies.com/blog/create-and-download-zip-file-in-django-via-amazon-s3
Here is a snippet of my code I'm working with.
from flask import Response
import boto3, zipfile, os
AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"
aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
aws_secret_access_key = AWS_SECRET_ACCESS_KEY)
s3 = aws_session.resource("s3")
s3 = boto3.client("s3", region_name = "some region")
s3_resource = boto3.resource("s3")
blog_folder = "blog_1"
paginator = s3.get_paginator("list_objects")
file_list = [page for page in paginator.paginate(Bucket=AWS_STORAGE_BUCKET_NAME)\
.search("Contents[?Size >`0`][]")
if blog_folder in page["Key"]]
zf = zipfile.ZipFile(byte, "w")
zipped_files = []
zip_filename = "download_files.zip"
for key in file_list:
file_name = key["Key"].split("/")[-1]
my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)
file_obj = my_bucket.Object(key["Key"]).get()
zipped_files.append(file_obj["Body"].read())
Any idea how I can solve this? It's much more convenient for a user to be able to download a zipfile rather than individual files.
Any help is very much appreciated.

python's in-memory zip library is perfect for this. Here's an example from one of my projects:
import io
import zipfile
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zipper:
infile_object = s3.get_object(Bucket=bucket, Key=object_key)
infile_content = infile_object['Body'].read()
zipper.writestr(file_name, infile_content)
s3.put_object(Bucket=bucket, Key=PREFIX + zip_name, Body=zip_buffer.getvalue())

So I managed to get it to work in my Heroku flask app.
Hope it helps anyone who is struggling.
PS subfolder = blog_folder
So structure is, Bucket/blog_folder/resources
Bucket/blog_folder/zipped
import tempfile, zipfile, os, boto3
AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
aws_secret_access_key = AWS_SECRET_ACCESS_KEY)
s3 = aws_session.resource("s3")
current_path = os.getcwd()
temp = tempfile.TemporaryDirectory(suffix="_tmp", prefix="basic_", dir=current_path)
### AT TOP OF YOUR APP.PY file ^^^^^^^^^^
#app_blog.route("/download_blog_res_zipfile/<int:blog_id>", methods = ["GET", "POST"])
def download_blog_res_zipfile(blog_id):
current_path = os.getcwd()
blog = Blog.query.filter_by(id = blog_id).first()
print(blog)
print("DOWNLOAD COUNT!!!")
print(blog.download_count)
blog.download_count += 1
db.session.commit()
del_folders = os.listdir(os.getcwd() + "/BLOG_ZIPPED_FOLDER")
for folder in del_folders:
zipp_path = os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder
print(folder)
print("DELETING ZIPPING!")
shutil.rmtree(os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder)
temp_zipp = tempfile.TemporaryDirectory(suffix="_tmp", prefix="zipping_",
dir=current_path + "/BLOG_ZIPPED_FOLDER")
s3 = boto3.client("s3", region_name = REGION_NAME)
s3_resource = boto3.resource("s3")
my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)
paginator = s3.get_paginator("list_objects")
folder = "blogs/blog_{}/resources".format(blog.id)
file_list = [page for page in paginator.paginate(Bucket = AWS_STORAGE_BUCKET_NAME)\
.search("Contents[?Size >`0`][]")
if folder in page["Key"]]
for key in file_list:
file_name = key["Key"].split("/")[-1]
print(file_name)
file_obj = my_bucket.Object(key["Key"]).get()["Body"]
with open(os.getcwd() + "/" + BLOG_FOLDER + "/" + file_name, "wb") as w:
w.write(file_obj.read())
make_zipfile(temp_zipp.name + "/blog_res_{}.zip".format(blog_id),
current_path + "/" + BLOG_FOLDER)
try:
for key in file_list:
file_name = key["Key"].split("/")[-1]
file_path = current_path + "/" + BLOG_FOLDER +"/" + file_name
os.remove(file_path)
print("TRYY!!")
print("REMOVED!!!")
except:
for key in file_list:
file_name = key["Key"].split("/")[-1]
file_path = current_path + "/" + BLOG_FOLDER + "/" + file_name
os.remove(file_path)
print("EXCEPT!!!")
print("REMOVED!!!")
return send_from_directory(temp_zipp.name, "blog_res_{}.zip".format(blog_id),
as_attachment = True)

Download list of images in S3 with boto3 and python

I have two lists of urls and file name and I'd like to download it in my S3 bucket. But how to do it with lists ?
My url list:
gm_new = ['https://img.com/30.jpg', 'https://img.com/3.jpg']
My name file list:
ccv_name = ['30.jpg', '3.jpg']
My function:
def dl_imgs():
s3 = boto3.resource("s3")
if gm_new is not None:
req_img = requests.get(gm_new, stream=True)
file_obj = req_img.raw
req_data = file_obj.read()
ccv_name_path = "images/" + ccv_name + ""
#upload to S3
s3.Bucket(_BUCKET_NAME_IMG).put_object(
Key=ccv_name_path, Body=req_data, ContentType="image/jpeg", ACL="public-read")
dl_imgs()

Iterate over the urls list and file names and process item by item:
for url, file_name in zip(gm_new, ccv_name):
<download file>
<upload to s3>

How to download Amazon S3 files on to local machine in folder using python and boto3?

I am trying to download a file from Amazon S3 to a predefined folder in the local machine. This is the code and it works fine. But when the file is saved, it saves with lastname of the path. How should I correct this?
import boto3
import os
S3_Object = boto3.client('s3', aws_access_key_id='##', aws_secret_access_key='##')
BUCKET_NAME = '##'
filename2 = []
Key2 = []
bucket = S3_Object.list_objects(Bucket=BUCKET_NAME)['Contents']
download_path = target_file_path = os.path.join('..', 'data', 'lz', 'test_sample', 'sample_file' )
for key in bucket:
path, filename = os.path.split(key['Key'])
filename2.append(filename)
Key2.append(key['Key'])
for f in Key2:
if f.endswith('.csv'):
#if f.endswith('.csv'):
print(f)
file_name = str(f.rsplit('/', 1)[-1])
print(file_name)
if not os.path.exists(download_path):
os.makedirs(download_path)
else:
S3_Object.download_file(BUCKET_NAME, f, download_path + file_name)
print("success")

Here is my test code.
import boto3
import os
s3 = boto3.resource('s3')
bucket = 'your bucket'
response = s3.Bucket(bucket).objects.all()
# If you want to search only specific path of bucket,
#response = s3.Bucket(bucket).objects.filter(Prefix='path')
path = 'your path'
if not os.path.exists(path):
os.makedirs(path)
for item in response:
filename = item.key.rsplit('/', 1)[-1]
if filename.endswith('.csv'):
s3.Object(bucket, item.key).download_file(path + filename)
print("success")
I have tested the code and it gives a correct name.
What is wrong?
I think, there is a missing / in your code for the path.
print(os.path.join('..', 'data', 'lz', 'test_sample', 'sample_file'))
The code gives the result:
../data/lz/test_sample/sample_file
So, in the below step,
S3_Object.download_file(BUCKET_NAME, f, download_path + file_name)
the download_path + file_name will be wrong and it should be:
S3_Object.download_file(BUCKET_NAME, f, download_path + '/' + file_name)

the following function downloadS recursively the files.
The directories are created locally only if they contain files.
import boto3
import os
def download_dir(client, resource, dist, local='/tmp', bucket='your_bucket'):
paginator = client.get_paginator('list_objects')
for result in paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=dist):
if result.get('CommonPrefixes') is not None:
for subdir in result.get('CommonPrefixes'):
download_dir(client, resource, subdir.get('Prefix'), local, bucket)
for file in result.get('Contents', []):
dest_pathname = os.path.join(local, file.get('Key'))
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
resource.meta.client.download_file(bucket, file.get('Key'), dest_pathname)
The function is called that way:
def _start():
client = boto3.client('s3')
resource = boto3.resource('s3')
download_dir(client, resource, 'clientconf/', '/tmp', bucket='my-bucket')

Upload file to S3 folder using python boto

I am trying to upload files from local directory to S3 folder. I am able to upload files to S3 bucket but I am unable to upload files to folder within S3 bucket.
Could any one help? What am i doing wrong here..
Here is the code:
import os
import sys
import boto3
import fnmatch
import pprint
import re
import hashlib
SOURCE_DIR = '/home/user/Downloads/tracks/'
BUCKET_NAME = 'mybucket'
S3_FOLDER = 'mybucket/folder1/'
client = boto3.client('s3')
s3 = boto3.resource('s3')
def get_md5(filename):
f = open(filename, 'rb')
m = hashlib.md5()
while True:
data = f.read(10240)
if len(data) == 0:
break
m.update(data)
return m.hexdigest()
def get_etag(filebase,filepath):
for item in bucket.objects.all():
keyfile = S3_FOLDER + filebase
if(keyfile == item.key):
md5 = get_md5(filepath)
etag = item.e_tag.strip('"').strip("'")
if etag != md5:
print(filebase + ": " + md5 + " != " + etag)
return(files_to_upload.append(filepath))
else:
return(files_to_upload.append(filepath))
files_to_upload = []
for root, dirnames, filenames in os.walk(SOURCE_DIR):
for filename in filenames:
filepath = os.path.join(root, filename)
get_etag(filename,filepath)
for f in files_to_upload:
client.put_object(Bucket=BUCKET_NAME, Key=f)

Folders don't really exist in S3. You can prefix the file name (object key) with the something that looks like a folder path.
It's not entirely clear to me what your code is doing with the file paths, but your code needs to be changed to something like this:
for f in files_to_upload:
key = "my/s3/folder/name/" + f
client.put_object(Bucket=BUCKET_NAME, Key=key, Body=f)
Note: You weren't passing a Body parameter, so I think your code was just creating empty objects in S3.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Download multiple files from S3 django - python

Related

How to commit files from a S3 bucket to codecommit repository in a python lambda function

How to create zipfile in S3 with Boto3 Python?

Download list of images in S3 with boto3 and python

How to download Amazon S3 files on to local machine in folder using python and boto3?

Upload file to S3 folder using python boto

Categories

Resources