Download multiple files from S3 django - python

Here is the link i have used (Download files from Amazon S3 with Django). Using this i'm able to download single file.
Code:
s3_template_path = queryset.values('file')
filename = 'test.pdf'
conn = boto.connect_s3('<aws access key>', '<aws secret key>')
bucket = conn.get_bucket('your_bucket')
s3_file_path = bucket.get_key(s3_template_path)
response_headers = {
'response-content-type': 'application/force-download',
'response-content-disposition':'attachment;filename="%s"'% filename
}
url = s3_file_path.generate_url(60, 'GET',
response_headers=response_headers,
force_http=True)
return HttpResponseRedirect(url)
I need to download multiple files from S3, as a zip would be better. Can the mentioned method be modified and used. If not please suggest other method.

Okay here is a possible solution, it basically downloads each file and zips them into a folder, then returns this to the user.
Not sure if s3_template_path is the same for each file, but change this if neccessary
# python 3
import requests
import os
import zipfile
file_names = ['test.pdf', 'test2.pdf', 'test3.pdf']
# set up zip folder
zip_subdir = "download_folder"
zip_filename = zip_subdir + ".zip"
byte_stream = io.BytesIO()
zf = zipfile.ZipFile(byte_stream, "w")
for filename in file_names:
s3_template_path = queryset.values('file')
conn = boto.connect_s3('<aws access key>', '<aws secret key>')
bucket = conn.get_bucket('your_bucket')
s3_file_path = bucket.get_key(s3_template_path)
response_headers = {
'response-content-type': 'application/force-download',
'response-content-disposition':'attachment;filename="%s"'% filename
}
url = s3_file_path.generate_url(60, 'GET',
response_headers=response_headers,
force_http=True)
# download the file
file_response = requests.get(url)
if file_response.status_code == 200:
# create a copy of the file
f1 = open(filename , 'wb')
f1.write(file_response.content)
f1.close()
# write the file to the zip folder
fdir, fname = os.path.split(filename)
zip_path = os.path.join(zip_subdir, fname)
zf.write(filename, zip_path)
# close the zip folder and return
zf.close()
response = HttpResponse(byte_stream.getvalue(), content_type="application/x-zip-compressed")
response['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
return response

Related

How to commit files from a S3 bucket to codecommit repository in a python lambda function

I need to commit and push files from a bucket in S3 to a codecommit repository in a programmatic way using a python lambda function.
I am using boto3 library, first I get and unzip the zip file from bucket, finaly I loop for each file and make a put_file.
The problem is that put_file generates as many commits as there are files in the repository, but I only need one commit because I have to send a single notification to codebuild.
My lambda code:
file_key = event['Records'][0]['s3']['object']['key']
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
body_dec = base64.b64decode(obj['Body'].read())
memory_file = io.BytesIO(body_dec)
with zipfile.ZipFile(memory_file, 'r') as zf:
files = zf.namelist()
for individualFile in files:
data = zf.read(individualFile)
#get parentCommitId for nuew push
parentCommitId=""
try:
response = client.get_branch(
repositoryName='test-codecommit',
branchName='master'
)
parentCommitId= response['branch']['commitId']
except botocore.exceptions.ClientError as error:
print(error.response['Error'])
try:
if not parentCommitId:
#parentCommitId= None
response = client.put_file(
repositoryName='test-codecommit',
branchName='master',
fileContent=data,
filePath=individualFile,
commitMessage='tag1',
name='Javier',
email='jramirezneira#gmail.com'
)
else:
response = client.put_file(
repositoryName='test-codecommit',
branchName='master',
fileContent=data,
filePath=individualFile,
#fileMode='EXECUTABLE'|'NORMAL'|'SYMLINK',
parentCommitId=parentCommitId,
commitMessage='tag1',
name='Javier',
email='jramirezneira#gmail.com'
)
result.append({'file': individualFile, 'Message': 'Added to Codecommit'})
except botocore.exceptions.ClientError as error:
print(error.response['Error'])
result.append({'file': individualFile, 'Message': error.response['Error']['Message']})
I will appreciate your help or suggestions
Instead of using put_file, you can use create_commit which takes multiple files in its putFiles parameter. I was able to do it using this code-
def create_codecommit_repo_commit(repo_name, branch_name, code_folder):
client = boto3.client('codecommit')
parent_folder = os.path.join(code_folder, repo_name)
putFilesList = []
for (root, folders, files) in os.walk(parent_folder):
for file in files:
file_path = os.path.join(root, file)
with open(file_path, mode='r+b') as file_obj:
file_content = file_obj.read()
putFileEntry = {'filePath': str(file_path).replace(parent_folder, ''),
'fileContent': file_content}
putFilesList.append(putFileEntry)
response = client.create_commit(repositoryName=repo_name, branchName=branch_name, putFiles=putFilesList)
return response

How to create zipfile in S3 with Boto3 Python?

I'm trying to create a zipfile from several files from a subfolder in an S3 bucket, then save that zipfile in another subfolder in the same bucket.
I can create zipfiles from my S3 subfolder buckets when running my flask application locally, but not with Heroku since it doesn't store anything.
I was going over this example, but it seems dated and uses local files.
https://www.botreetechnologies.com/blog/create-and-download-zip-file-in-django-via-amazon-s3
Here is a snippet of my code I'm working with.
from flask import Response
import boto3, zipfile, os
AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"
aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
aws_secret_access_key = AWS_SECRET_ACCESS_KEY)
s3 = aws_session.resource("s3")
s3 = boto3.client("s3", region_name = "some region")
s3_resource = boto3.resource("s3")
blog_folder = "blog_1"
paginator = s3.get_paginator("list_objects")
file_list = [page for page in paginator.paginate(Bucket=AWS_STORAGE_BUCKET_NAME)\
.search("Contents[?Size >`0`][]")
if blog_folder in page["Key"]]
zf = zipfile.ZipFile(byte, "w")
zipped_files = []
zip_filename = "download_files.zip"
for key in file_list:
file_name = key["Key"].split("/")[-1]
my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)
file_obj = my_bucket.Object(key["Key"]).get()
zipped_files.append(file_obj["Body"].read())
Any idea how I can solve this? It's much more convenient for a user to be able to download a zipfile rather than individual files.
Any help is very much appreciated.
python's in-memory zip library is perfect for this. Here's an example from one of my projects:
import io
import zipfile
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zipper:
infile_object = s3.get_object(Bucket=bucket, Key=object_key)
infile_content = infile_object['Body'].read()
zipper.writestr(file_name, infile_content)
s3.put_object(Bucket=bucket, Key=PREFIX + zip_name, Body=zip_buffer.getvalue())
So I managed to get it to work in my Heroku flask app.
Hope it helps anyone who is struggling.
PS subfolder = blog_folder
So structure is, Bucket/blog_folder/resources
Bucket/blog_folder/zipped
import tempfile, zipfile, os, boto3
AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
aws_secret_access_key = AWS_SECRET_ACCESS_KEY)
s3 = aws_session.resource("s3")
current_path = os.getcwd()
temp = tempfile.TemporaryDirectory(suffix="_tmp", prefix="basic_", dir=current_path)
### AT TOP OF YOUR APP.PY file ^^^^^^^^^^
#app_blog.route("/download_blog_res_zipfile/<int:blog_id>", methods = ["GET", "POST"])
def download_blog_res_zipfile(blog_id):
current_path = os.getcwd()
blog = Blog.query.filter_by(id = blog_id).first()
print(blog)
print("DOWNLOAD COUNT!!!")
print(blog.download_count)
blog.download_count += 1
db.session.commit()
del_folders = os.listdir(os.getcwd() + "/BLOG_ZIPPED_FOLDER")
for folder in del_folders:
zipp_path = os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder
print(folder)
print("DELETING ZIPPING!")
shutil.rmtree(os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder)
temp_zipp = tempfile.TemporaryDirectory(suffix="_tmp", prefix="zipping_",
dir=current_path + "/BLOG_ZIPPED_FOLDER")
s3 = boto3.client("s3", region_name = REGION_NAME)
s3_resource = boto3.resource("s3")
my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)
paginator = s3.get_paginator("list_objects")
folder = "blogs/blog_{}/resources".format(blog.id)
file_list = [page for page in paginator.paginate(Bucket = AWS_STORAGE_BUCKET_NAME)\
.search("Contents[?Size >`0`][]")
if folder in page["Key"]]
for key in file_list:
file_name = key["Key"].split("/")[-1]
print(file_name)
file_obj = my_bucket.Object(key["Key"]).get()["Body"]
with open(os.getcwd() + "/" + BLOG_FOLDER + "/" + file_name, "wb") as w:
w.write(file_obj.read())
make_zipfile(temp_zipp.name + "/blog_res_{}.zip".format(blog_id),
current_path + "/" + BLOG_FOLDER)
try:
for key in file_list:
file_name = key["Key"].split("/")[-1]
file_path = current_path + "/" + BLOG_FOLDER +"/" + file_name
os.remove(file_path)
print("TRYY!!")
print("REMOVED!!!")
except:
for key in file_list:
file_name = key["Key"].split("/")[-1]
file_path = current_path + "/" + BLOG_FOLDER + "/" + file_name
os.remove(file_path)
print("EXCEPT!!!")
print("REMOVED!!!")
return send_from_directory(temp_zipp.name, "blog_res_{}.zip".format(blog_id),
as_attachment = True)

Download list of images in S3 with boto3 and python

I have two lists of urls and file name and I'd like to download it in my S3 bucket. But how to do it with lists ?
My url list:
gm_new = ['https://img.com/30.jpg', 'https://img.com/3.jpg']
My name file list:
ccv_name = ['30.jpg', '3.jpg']
My function:
def dl_imgs():
s3 = boto3.resource("s3")
if gm_new is not None:
req_img = requests.get(gm_new, stream=True)
file_obj = req_img.raw
req_data = file_obj.read()
ccv_name_path = "images/" + ccv_name + ""
#upload to S3
s3.Bucket(_BUCKET_NAME_IMG).put_object(
Key=ccv_name_path, Body=req_data, ContentType="image/jpeg", ACL="public-read")
dl_imgs()
Iterate over the urls list and file names and process item by item:
for url, file_name in zip(gm_new, ccv_name):
<download file>
<upload to s3>

How to download Amazon S3 files on to local machine in folder using python and boto3?

I am trying to download a file from Amazon S3 to a predefined folder in the local machine. This is the code and it works fine. But when the file is saved, it saves with lastname of the path. How should I correct this?
import boto3
import os
S3_Object = boto3.client('s3', aws_access_key_id='##', aws_secret_access_key='##')
BUCKET_NAME = '##'
filename2 = []
Key2 = []
bucket = S3_Object.list_objects(Bucket=BUCKET_NAME)['Contents']
download_path = target_file_path = os.path.join('..', 'data', 'lz', 'test_sample', 'sample_file' )
for key in bucket:
path, filename = os.path.split(key['Key'])
filename2.append(filename)
Key2.append(key['Key'])
for f in Key2:
if f.endswith('.csv'):
#if f.endswith('.csv'):
print(f)
file_name = str(f.rsplit('/', 1)[-1])
print(file_name)
if not os.path.exists(download_path):
os.makedirs(download_path)
else:
S3_Object.download_file(BUCKET_NAME, f, download_path + file_name)
print("success")
Here is my test code.
import boto3
import os
s3 = boto3.resource('s3')
bucket = 'your bucket'
response = s3.Bucket(bucket).objects.all()
# If you want to search only specific path of bucket,
#response = s3.Bucket(bucket).objects.filter(Prefix='path')
path = 'your path'
if not os.path.exists(path):
os.makedirs(path)
for item in response:
filename = item.key.rsplit('/', 1)[-1]
if filename.endswith('.csv'):
s3.Object(bucket, item.key).download_file(path + filename)
print("success")
I have tested the code and it gives a correct name.
What is wrong?
I think, there is a missing / in your code for the path.
print(os.path.join('..', 'data', 'lz', 'test_sample', 'sample_file'))
The code gives the result:
../data/lz/test_sample/sample_file
So, in the below step,
S3_Object.download_file(BUCKET_NAME, f, download_path + file_name)
the download_path + file_name will be wrong and it should be:
S3_Object.download_file(BUCKET_NAME, f, download_path + '/' + file_name)
the following function downloadS recursively the files.
The directories are created locally only if they contain files.
import boto3
import os
def download_dir(client, resource, dist, local='/tmp', bucket='your_bucket'):
paginator = client.get_paginator('list_objects')
for result in paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=dist):
if result.get('CommonPrefixes') is not None:
for subdir in result.get('CommonPrefixes'):
download_dir(client, resource, subdir.get('Prefix'), local, bucket)
for file in result.get('Contents', []):
dest_pathname = os.path.join(local, file.get('Key'))
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
resource.meta.client.download_file(bucket, file.get('Key'), dest_pathname)
The function is called that way:
def _start():
client = boto3.client('s3')
resource = boto3.resource('s3')
download_dir(client, resource, 'clientconf/', '/tmp', bucket='my-bucket')

Upload file to S3 folder using python boto

I am trying to upload files from local directory to S3 folder. I am able to upload files to S3 bucket but I am unable to upload files to folder within S3 bucket.
Could any one help? What am i doing wrong here..
Here is the code:
import os
import sys
import boto3
import fnmatch
import pprint
import re
import hashlib
SOURCE_DIR = '/home/user/Downloads/tracks/'
BUCKET_NAME = 'mybucket'
S3_FOLDER = 'mybucket/folder1/'
client = boto3.client('s3')
s3 = boto3.resource('s3')
def get_md5(filename):
f = open(filename, 'rb')
m = hashlib.md5()
while True:
data = f.read(10240)
if len(data) == 0:
break
m.update(data)
return m.hexdigest()
def get_etag(filebase,filepath):
for item in bucket.objects.all():
keyfile = S3_FOLDER + filebase
if(keyfile == item.key):
md5 = get_md5(filepath)
etag = item.e_tag.strip('"').strip("'")
if etag != md5:
print(filebase + ": " + md5 + " != " + etag)
return(files_to_upload.append(filepath))
else:
return(files_to_upload.append(filepath))
files_to_upload = []
for root, dirnames, filenames in os.walk(SOURCE_DIR):
for filename in filenames:
filepath = os.path.join(root, filename)
get_etag(filename,filepath)
for f in files_to_upload:
client.put_object(Bucket=BUCKET_NAME, Key=f)
Folders don't really exist in S3. You can prefix the file name (object key) with the something that looks like a folder path.
It's not entirely clear to me what your code is doing with the file paths, but your code needs to be changed to something like this:
for f in files_to_upload:
key = "my/s3/folder/name/" + f
client.put_object(Bucket=BUCKET_NAME, Key=key, Body=f)
Note: You weren't passing a Body parameter, so I think your code was just creating empty objects in S3.

Categories