Azure Durable Functions : Http Trigger error - python

As a newbie in Azure,
I am following Microsoft Azure Function tutorial page
https://learn.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-cloud-backup?tabs=python
and github page
https://github.com/Azure/azure-functions-durable-python/tree/master/samples/fan_in_fan_out .
**HttpStart code**
import logging
import json
import azure.functions as func
import azure.durable_functions as df
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse:
client = df.DurableOrchestrationClient(starter)
payload: str = json.loads(req.get_body().decode()) # Load JSON post request data
instance_id = await client.start_new(req.route_params["functionName"], client_input=payload)
logging.info(f"Started orchestration with ID = '{instance_id}'.")
return client.create_check_status_response(req, instance_id)
**E2_BackupSiteContent**
import azure.functions as func
import azure.durable_functions as df
def orchestrator_function(context: df.DurableOrchestrationContext):
root_directory: str = context.get_input()
if not root_directory:
raise Exception("A directory path is required as input")
files = yield context.call_activity("E2_GetFileList", root_directory)
tasks = []
for file in files:
tasks.append(context.call_activity("E2_CopyFileToBlob", file))
results = yield context.task_all(tasks)
total_bytes = sum(results)
return total_bytes
main = df.Orchestrator.create(orchestrator_function)
**E2_CopyFileToBlob**
import os
import pathlib
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceExistsError
connect_str = os.getenv('AzureWebJobsStorage')
def main(filePath: str) -> str:
# Create the BlobServiceClient object which will be used to create a container client
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
# Create a unique name for the container
container_name = "backups"
# Create the container if it does not exist
try:
blob_service_client.create_container(container_name)
except ResourceExistsError:
pass
# Create a blob client using the local file name as the name for the blob
parent_dir, fname = pathlib.Path(filePath).parts[-2:] # Get last two path components
blob_name = parent_dir + "_" + fname
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
# Count bytes in file
byte_count = os.path.getsize(filePath)
# Upload the created file
with open(filePath, "rb") as data:
blob_client.upload_blob(data)
return byte_count
**E2_GetFileList**
import os
from os.path import dirname
from typing import List
def main(rootDirectory: str) -> List[str]:
all_file_paths = []
# We walk the file system
for path, _, files in os.walk(rootDirectory):
# We copy the code for activities and orchestrators
if "E2_" in path:
# For each file, we add their full-path to the list
for name in files:
if name == "__init__.py" or name == "function.json":
file_path = os.path.join(path, name)
all_file_paths.append(file_path)
return all_file_paths
When I http trigger with postman app,
POST http://localhost:7071/api/orchestrators/E2_BackupSiteContent?req="D:\Tmp"
I got the following error messages.
[2021-11-12T02:13:42.432Z] Worker process started and initialized.
[2021-11-12T02:13:46.489Z] Host lock lease acquired by instance ID '000000000000000000000000AE48769C'.
[2021-11-12T02:13:52.529Z] Executing 'Functions.HttpStart' (Reason='This function was programmatically called via the host APIs.', Id=748996d0-1f84-4597-86ea-768467eb36e3)
[2021-11-12T02:13:52.560Z] Executed 'Functions.HttpStart' (Failed, Id=748996d0-1f84-4597-86ea-768467eb36e3, Duration=5433ms)
[2021-11-12T02:13:52.562Z] System.Private.CoreLib: Exception while executing function: Functions.HttpStart. Microsoft.Azure.WebJobs.Host: Exception binding parameter 'req'. Microsoft.AspNetCore.Server.Kestrel.Core: Reading the request body timed out due to data arriving too slowly. See MinRequestBodyDataRate.
What should I do to solve this problem?
(I tested with Linux and Windows.)
--Added--
Postman capture

Instead of passing the directory in the query string of the URL, you should pass the path in the HTTP request body in the postman tool. The Microsoft doc page itself shows how to do it, see Run the sample section.

Related

how to download all blob from a container where blob is in sub directory style using python

The below code will download a particular blob by giving the blob name
import constants
import os
import tempfile
from azure.storage.blob import BlobServiceClient
temp_dir = tempfile.TemporaryDirectory()
print(temp_dir.name)
Local_path = os.path.join(temp_dir.name, constants.BLOB_NAME)
class AzureBlob:
def __init__(self, CONNECTION_STRING, BLOB_CONTAINER,
BLOB_PATH, BLOB_NAME):
self.blob_service_client = self.activate_blob_service()
self.container_client = self.initialize_container()
self.BLOB_CONTAINER = BLOB_CONTAINER
self.CONNECTION_STRING = CONNECTION_STRING
self.BLOB_PATH = BLOB_PATH
self.BLOB_NAME = BLOB_NAME
# Initialize a BlobServiceClient object
def activate_blob_service(self):
self.blob_service_client = BlobServiceClient.from_connection_string(self.CONNECTION_STRING)
# print(self.CONNECTION_STRING)
return self.blob_service_client
# Initialize a container from its name
def initialize_container(self):
self.container_client = self.blob_service_client.get_container_client(self.BLOB_CONTAINER)
# print(container_client)
return self.container_client
# Download Blob to local
def download_file(self):
with open(Local_path, 'wb+') as f:
f.write(self.container_client.download_blob(os.path.join(self.BLOB_PATH, self.BLOB_NAME)).readall())
return Local_path
# AzureBlob().download_file()
a = AzureBlob(constants.CONNECTION_STRING, constants.BLOB_CONTAINER,
constants.BLOB_PATH, constants.BLOB_NAME)
What iam actualy trying to achieve is to download all blob from a container where blob is in sub directory. I will provide the directory path of the blob and i need all the information inside the directory to be downloaded.
To achieve the above requirement you can try the below workaround to download all the files from your container,
# download_blobs.py
# Python program to bulk download blob files from azure storage
# Uses latest python SDK() for Azure blob storage
# Requires python 3.6 or above
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
# IMPORTANT: Replace connection string with your storage account connection string
# Usually starts with DefaultEndpointsProtocol=https;...
MY_CONNECTION_STRING = "REPLACE_THIS"
# Replace with blob container
MY_BLOB_CONTAINER = "myimages"
# Replace with the local folder where you want files to be downloaded
LOCAL_BLOB_PATH = "REPLACE_THIS"
class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")
# Initialize the connection to Azure storage account
self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
def save_blob(self,file_name,file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(file_content)
def download_all_blobs_in_container(self):
my_blobs = self.my_container.list_blobs()
for blob in my_blobs:
print(blob.name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
self.save_blob(blob.name, bytes)
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()
For more information please refer this blog post & SO THREAD

Airflow: how to download PDF files from S3 bucket into Airflow

tried below code but getting error as "unable to locate Credentials"
def download():
bucket = 'bucketname'
key = 'path and filename'
s3_resource = boto3.resource('s3')
my_bucket = s3_resource.Bucket(bucket)
objects = my_bucket.objects.filter(Prefix=key)
for obj = objects:
path,filename = os.path.split(obj.key)
my_bucket.download_file(obj.key, filename)
You'll need to define the AWS connection and use
download_fileobj function via the S3Hook.
I didn't test it but it should be something like:
from tempfile import NamedTemporaryFile
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
hook = S3Hook('my_aws_conn')
key_object = hook.get_key('your_path')
with NamedTemporaryFile("wb") as f:
key_object.download_fileobj(Fileobj=f)
f.flush()

Download file using fastapi

I see the functions for uploading in an API, but I don't see how to download. Am I missing something? I want to create an API for a file download site. Is there a different API I should be using?
from typing import List
from fastapi import FastAPI, Query
app = FastAPI()
PATH "some/path"
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
entry = PATH + "/".join(query_items["q"]) + "/"
dirs = os.listdir(entry)
results["folders"] = [val for val in dirs if os.path.isdir(entry+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry+val)]
results["path_vars"] = query_items["q"]
return results
Here is the sample bit of code for python to fetch files and dirs for a path, you can return the path as a list with a new entry in a loop to go deeper into a file tree. Passing a file name should trigger a download function, but I cant seem to get a download func going.
This worked For me
from starlette.responses import FileResponse
return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
This will download the file with filename
Since we're talking about FastAPI, the proper way to return a file response is covered in their documentation, code snippet below:
from fastapi import FastAPI
from fastapi.responses import FileResponse
file_path = "large-video-file.mp4"
app = FastAPI()
#app.get("/")
def main():
return FileResponse(path=file_path, filename=file_path, media_type='text/mp4')
FastAPI uses Starlette's FileResponse class so there are two ways to import FileResponse on your API code. But of course importing from FastAPI would be a better choice. You can follow the approach below to enable your API endpoints support file download.
Do not forget to add aiofiles to your dependency list. A basic requirements.txt file should look like (versions of modules might change in time, version 0.63.0 of fastapi strictly use starlette 0.13.6)
uvicorn==0.13.4
fastapi==0.63.0
starlette==0.13.6
aiofiles==0.6.0
And the API code
import os
from fastapi import FastAPI
from fastapi.responses import FileResponse
app = FastAPI()
#app.get("/")
async def main():
file_name = "FILE NAME"
# DEPENDS ON WHERE YOUR FILE LOCATES
file_path = os.getcwd() + "/" + file_name
return FileResponse(path=file_path, media_type='application/octet-stream', filename=file_name)
I figured it out,
from starlette.responses import FileResponse
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
if query_items["q"]:
entry = PATH + "/".join(query_items["q"])
else:
entry = PATH
if os.path.isfile(entry):
return download(entry)
dirs = os.listdir(entry + "/")
results["folders"] = [
val for val in dirs if os.path.isdir(entry + "/"+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry + "/"+val)]
results["path_vars"] = query_items["q"]
return results
def download(file_path):
"""
Download file for given path.
"""
if os.path.isfile(file_path):
return FileResponse(file_path)
# return FileResponse(path=file_path)
return None
I added this part
from starlette.responses import FileResponse
if os.path.isfile(entry):
return download(entry)
Allows you to host static file. But for some reason all files download as "download" .extension. If you know how to ensure original file name, let me know.
from fastapi import FastAPI
from fastapi.responses import FileResponse
import uvicorn
import os
app = FastAPI()
#app.get("/download-file")
def download_file(file_name: str):
folder_path = r"C:\Users\HP\Desktop\excel files"
file_location = f'{folder_path}{os.sep}{file_name}.xlsx'#os.sep is used to seperate with a \
return FileResponse(file_location, media_type='application/octet-stream', filename=file_name)
uvicorn.run(app, port=9105)

Disabling OCR when uploading to Google Drive via API

I was running into an issue with unconv where it would periodically just hang indefinitely when trying to convert random documents to PDFs, so I wrote a small python script to upload documents to GDrive and download them again as PDFs to work around this issue.
The problem I've run into is that, google drive is automatically trying to OCR images that get uploaded and I don't want that to happen, but I've thus far been unable to find documentation on how to disable the OCR.
One thing I did notice: I'm the create function from v3 of the api, in the v2 api, there is an insert function that takes an OCR flag. Is this possible with the v3 api?
Here is my code:
from __future__ import print_function
import httplib2
import magic
import io
import sys
import argparse
import subprocess as sp
from apiclient import discovery
from oauth2client.service_account import ServiceAccountCredentials
from httplib2 import Http
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
from settings import *
"""
This script exists to mask unoconv for JUST pdf conversion. If it gets flags for anything else, it will fallback on unoconv.
Otherwise, it uploads the document to google drive, download it as a pdf, and then delete the file out of the drive.
"""
MIMETYPE_MAPPING = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document":"application/vnd.google-apps.document",
"application/rtf":"application/vnd.google-apps.document",
"text/richtext":"application/vnd.google-apps.document",
"text/plain":"application/vnd.google-apps.document",
"text/html":"application/vnd.google-apps.document",
"application/vnd.oasis.opendocument.text":"application/vnd.google-apps.document",
"application/x-iwork-pages-sffpages":"application/vnd.google-apps.document",
"application/msword":"application/vnd.google-apps.document",
"application/vnd.ms-excel":"application/vnd.google-apps.spreadsheets",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":"application/vnd.google-apps.spreadsheets",
"text/csv":"application/vnd.google-apps.spreadsheets",
"text/tab-separated-values":"application/vnd.google-apps.spreadsheets",
"application/vnd.oasis.opendocument.spreadsheets":"application/vnd.google-apps.spreadsheets",
"application/vnd.oasis.opendocument.spreadsheet":"application/vnd.google-apps.spreadsheets",
"application/vnd.ms-powerpoint":"application/vnd.google-apps.presentation",
"application/vnd.openxmlformats-officedocument.presentationml.presentationml":"application/vnd.google-apps.presentation",
"application/vnd.oasis.opendocument.presentation":"application/vnd.google-apps.presentation",
"image/png":"application/vnd.google-apps.document",
"image/x-citrix-png":"application/vnd.google-apps.document",
"image/x-png":"application/vnd.google-apps.document",
"image/jpeg":"application/vnd.google-apps.document",
"image/x-citrix-jpeg":"application/vnd.google-apps.document",
"image/gif":"application/vnd.google-apps.document",
"image/bmp":"application/vnd.google-apps.document",
"application/pdf":"application/vnd.google-apps.document",
}
SERVICE = None
def get_service():
"""
Establishes the connection to the google drive APIs.
"""
global SERVICE
if SERVICE is None:
credentials = ServiceAccountCredentials.from_json(JSON_KEY)
http = http_auth = credentials.authorize(Http())
SERVICE = discovery.build('drive', 'v3', http=http_auth)
return SERVICE
def drive_upload(fp, fn):
"""
Uploads the file found at fp to root of google drive account as a google doc with name fn
Returns the id of the new file
"""
mimetype = magic.from_file(fp, mime=True)
drive_service = get_service()
file_metadata = {
'name' : fn,
'mimeType' : MIMETYPE_MAPPING.get(mimetype, 'application/vnd.google-apps.document'),
}
media = MediaFileUpload(fp,
mimetype=mimetype,
resumable=True)
import inspect
print(inspect.getargspec(drive_service.files().create)[0])
file = drive_service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
return file.get('id')
def download_pdf(file_id,dlp):
"""
Downloads file from google drive specified by file_id to the filepath in dlp
Will download file as pdf
"""
drive_service = get_service()
request = drive_service.files().export_media(fileId=file_id,
mimeType='application/pdf')
resp = request.execute()
f = open(dlp,'w')
f.write(resp)
f.close()
def convert_to_pdf(inputf, outputf):
"""
Converts input file to pdf located at output file and cleans up file from google drive
"""
fid = drive_upload(inputf,inputf.split('/')[-1])
download_pdf(fid,outputf)
#Now delete the file from drive
service = get_service()
service.files().delete(fileId=fid).execute()
def pass_through():
"""
Calls unoconv with same args that were passed to this script
"""
print("PASSING THROUGH",file=sys.stderr)
cmd = PATH_TO_UNOCONV + " " + " ".join(sys.argv[1:])
child = sp.Popen(cmd.split(), stdout=sp.PIPE, stderr=sp.PIPE)
stdout, stderr = child.communicate()
print(stdout,end='')
print(stderr, file=sys.stderr,end='')
sys.exit(child.returncode)
class ArgParse(argparse.ArgumentParser):
"""
This subclass of ArgumentParser exists to change the default behaviour of the exit function
If the exit function is called with a status other than 0 (usually because unsupported flags are used),
a call is made to pass_through let unoconv handle this call.
"""
def exit(self, status=0,message=None):
if status != 0:
pass_through()
else:
return super(ArgParse,self).exit(status=status,message=message)
if __name__ == '__main__':
parser = ArgParse(description="Wrapper for unoconv that farms pdf conversions to google drive, using any args other than the supplied will cause it to fallback on unoconv")
parser.add_argument('-f', metavar='format', help='Desired ouput format')
parser.add_argument('-o', metavar='output_file', help='Path to output file')
parser.add_argument('fname', metavar='inputf', type=str, nargs=1, help='Path to file to convert')
args = parser.parse_args()
fmt = args.f
output_file = args.o
input_file = args.fname[0]
if fmt.upper() == "PDF":
try:
convert_to_pdf(input_file, output_file)
except:
pass_through()
else:
#if we aren't converting the file to a PDF, let unoconv handle it
pass_through()

How to upload a file to directory in S3 bucket using boto

I want to copy a file in s3 bucket using python.
Ex : I have bucket name = test. And in the bucket, I have 2 folders name "dump" & "input". Now I want to copy a file from local directory to S3 "dump" folder using python... Can anyone help me?
NOTE: This answer uses boto. See the other answer that uses boto3, which is newer.
Try this...
import boto
import boto.s3
import sys
from boto.s3.key import Key
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
bucket_name = AWS_ACCESS_KEY_ID.lower() + '-dump'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY)
bucket = conn.create_bucket(bucket_name,
location=boto.s3.connection.Location.DEFAULT)
testfile = "replace this with an actual filename"
print 'Uploading %s to Amazon S3 bucket %s' % \
(testfile, bucket_name)
def percent_cb(complete, total):
sys.stdout.write('.')
sys.stdout.flush()
k = Key(bucket)
k.key = 'my test file'
k.set_contents_from_filename(testfile,
cb=percent_cb, num_cb=10)
[UPDATE]
I am not a pythonist, so thanks for the heads up about the import statements.
Also, I'd not recommend placing credentials inside your own source code. If you are running this inside AWS use IAM Credentials with Instance Profiles (http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html), and to keep the same behaviour in your Dev/Test environment, use something like Hologram from AdRoll (https://github.com/AdRoll/hologram)
import boto3
s3 = boto3.resource('s3')
BUCKET = "test"
s3.Bucket(BUCKET).upload_file("your/local/file", "dump/file")
No need to make it that complicated:
s3_connection = boto.connect_s3()
bucket = s3_connection.get_bucket('your bucket name')
key = boto.s3.key.Key(bucket, 'some_file.zip')
with open('some_file.zip') as f:
key.send_file(f)
Upload file to s3 within a session with credentials.
import boto3
session = boto3.Session(
aws_access_key_id='AWS_ACCESS_KEY_ID',
aws_secret_access_key='AWS_SECRET_ACCESS_KEY',
)
s3 = session.resource('s3')
# Filename - File to upload
# Bucket - Bucket to upload to (the top level directory under AWS S3)
# Key - S3 object name (can contain subdirectories). If not specified then file_name is used
s3.meta.client.upload_file(Filename='input_file_path', Bucket='bucket_name', Key='s3_output_key')
I used this and it is very simple to implement
import tinys3
conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True)
f = open('some_file.zip','rb')
conn.upload('some_file.zip',f,'my_bucket')
https://www.smore.com/labs/tinys3/
from boto3.s3.transfer import S3Transfer
import boto3
#have all the variables populated which are required below
client = boto3.client('s3', aws_access_key_id=access_key,aws_secret_access_key=secret_key)
transfer = S3Transfer(client)
transfer.upload_file(filepath, bucket_name, folder_name+"/"+filename)
This is a three liner. Just follow the instructions on the boto3 documentation.
import boto3
s3 = boto3.resource(service_name = 's3')
s3.meta.client.upload_file(Filename = 'C:/foo/bar/baz.filetype', Bucket = 'yourbucketname', Key = 'baz.filetype')
Some important arguments are:
Parameters:
Filename (str) -- The path to the file to upload.
Bucket (str) -- The name of the bucket to upload to.
Key (str) -- The name of the that you want to assign to your file in your s3 bucket. This could be the same as the name of the file or a different name of your choice but the filetype should remain the same.
Note: I assume that you have saved your credentials in a ~\.aws folder as suggested in the best configuration practices in the boto3 documentation.
This will also work:
import os
import boto
import boto.s3.connection
from boto.s3.key import Key
try:
conn = boto.s3.connect_to_region('us-east-1',
aws_access_key_id = 'AWS-Access-Key',
aws_secret_access_key = 'AWS-Secrete-Key',
# host = 's3-website-us-east-1.amazonaws.com',
# is_secure=True, # uncomment if you are not using ssl
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
bucket = conn.get_bucket('YourBucketName')
key_name = 'FileToUpload'
path = 'images/holiday' #Directory Under which file should get upload
full_key_name = os.path.join(path, key_name)
k = bucket.new_key(full_key_name)
k.set_contents_from_filename(key_name)
except Exception,e:
print str(e)
print "error"
Using boto3
import logging
import boto3
from botocore.exceptions import ClientError
def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_name is used
:return: True if file was uploaded, else False
"""
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name
# Upload the file
s3_client = boto3.client('s3')
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True
For more:-
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
import boto
from boto.s3.key import Key
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
END_POINT = '' # eg. us-east-1
S3_HOST = '' # eg. s3.us-east-1.amazonaws.com
BUCKET_NAME = 'test'
FILENAME = 'upload.txt'
UPLOADED_FILENAME = 'dumps/upload.txt'
# include folders in file path. If it doesn't exist, it will be created
s3 = boto.s3.connect_to_region(END_POINT,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
host=S3_HOST)
bucket = s3.get_bucket(BUCKET_NAME)
k = Key(bucket)
k.key = UPLOADED_FILENAME
k.set_contents_from_filename(FILENAME)
For upload folder example as following code and S3 folder picture
import boto
import boto.s3
import boto.s3.connection
import os.path
import sys
# Fill in info on data to upload
# destination bucket name
bucket_name = 'willie20181121'
# source directory
sourceDir = '/home/willie/Desktop/x/' #Linux Path
# destination directory name (on s3)
destDir = '/test1/' #S3 Path
#max size in bytes before uploading in parts. between 1 and 5 GB recommended
MAX_SIZE = 20 * 1000 * 1000
#size of parts when uploading in parts
PART_SIZE = 6 * 1000 * 1000
access_key = 'MPBVAQ*******IT****'
secret_key = '11t63yDV***********HgUcgMOSN*****'
conn = boto.connect_s3(
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
host = '******.org.tw',
is_secure=False, # uncomment if you are not using ssl
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
bucket = conn.create_bucket(bucket_name,
location=boto.s3.connection.Location.DEFAULT)
uploadFileNames = []
for (sourceDir, dirname, filename) in os.walk(sourceDir):
uploadFileNames.extend(filename)
break
def percent_cb(complete, total):
sys.stdout.write('.')
sys.stdout.flush()
for filename in uploadFileNames:
sourcepath = os.path.join(sourceDir + filename)
destpath = os.path.join(destDir, filename)
print ('Uploading %s to Amazon S3 bucket %s' % \
(sourcepath, bucket_name))
filesize = os.path.getsize(sourcepath)
if filesize > MAX_SIZE:
print ("multipart upload")
mp = bucket.initiate_multipart_upload(destpath)
fp = open(sourcepath,'rb')
fp_num = 0
while (fp.tell() < filesize):
fp_num += 1
print ("uploading part %i" %fp_num)
mp.upload_part_from_file(fp, fp_num, cb=percent_cb, num_cb=10, size=PART_SIZE)
mp.complete_upload()
else:
print ("singlepart upload")
k = boto.s3.key.Key(bucket)
k.key = destpath
k.set_contents_from_filename(sourcepath,
cb=percent_cb, num_cb=10)
PS: For more reference URL
If you have the aws command line interface installed on your system you can make use of pythons subprocess library.
For example:
import subprocess
def copy_file_to_s3(source: str, target: str, bucket: str):
subprocess.run(["aws", "s3" , "cp", source, f"s3://{bucket}/{target}"])
Similarly you can use that logics for all sort of AWS client operations like downloading or listing files etc. It is also possible to get return values. This way there is no need to import boto3. I guess its use is not intended that way but in practice I find it quite convenient that way. This way you also get the status of the upload displayed in your console - for example:
Completed 3.5 GiB/3.5 GiB (242.8 MiB/s) with 1 file(s) remaining
To modify the method to your wishes I recommend having a look into the subprocess reference as well as to the AWS Cli reference.
Note: This is a copy of my answer to a similar question.
I have something that seems to me has a bit more order:
import boto3
from pprint import pprint
from botocore.exceptions import NoCredentialsError
class S3(object):
BUCKET = "test"
connection = None
def __init__(self):
try:
vars = get_s3_credentials("aws")
self.connection = boto3.resource('s3', 'aws_access_key_id',
'aws_secret_access_key')
except(Exception) as error:
print(error)
self.connection = None
def upload_file(self, file_to_upload_path, file_name):
if file_to_upload is None or file_name is None: return False
try:
pprint(file_to_upload)
file_name = "your-folder-inside-s3/{0}".format(file_name)
self.connection.Bucket(self.BUCKET).upload_file(file_to_upload_path,
file_name)
print("Upload Successful")
return True
except FileNotFoundError:
print("The file was not found")
return False
except NoCredentialsError:
print("Credentials not available")
return False
There're three important variables here, the BUCKET const, the file_to_upload and the file_name
BUCKET: is the name of your S3 bucket
file_to_upload_path: must be the path from file you want to upload
file_name: is the resulting file and path in your bucket (this is where you add folders or what ever)
There's many ways but you can reuse this code in another script like this
import S3
def some_function():
S3.S3().upload_file(path_to_file, final_file_name)
You should mention the content type as well to omit the file accessing issue.
import os
image='fly.png'
s3_filestore_path = 'images/fly.png'
filename, file_extension = os.path.splitext(image)
content_type_dict={".png":"image/png",".html":"text/html",
".css":"text/css",".js":"application/javascript",
".jpg":"image/png",".gif":"image/gif",
".jpeg":"image/jpeg"}
content_type=content_type_dict[file_extension]
s3 = boto3.client('s3', config=boto3.session.Config(signature_version='s3v4'),
region_name='ap-south-1',
aws_access_key_id=S3_KEY,
aws_secret_access_key=S3_SECRET)
s3.put_object(Body=image, Bucket=S3_BUCKET, Key=s3_filestore_path, ContentType=content_type)
xmlstr = etree.tostring(listings, encoding='utf8', method='xml')
conn = boto.connect_s3(
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
# host = '<bucketName>.s3.amazonaws.com',
host = 'bycket.s3.amazonaws.com',
#is_secure=False, # uncomment if you are not using ssl
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
conn.auth_region_name = 'us-west-1'
bucket = conn.get_bucket('resources', validate=False)
key= bucket.get_key('filename.txt')
key.set_contents_from_string("SAMPLE TEXT")
key.set_canned_acl('public-read')
A lot of the existing answers here are pretty complex. A simple approach is to use cloudpathlib, which wraps boto3.
First, be sure to be authenticated properly with an ~/.aws/credentials file or environment variables set. See more options in the cloudpathlib docs.
This is how you would upload a file:
from pathlib import Path
from cloudpathlib import CloudPath
# write a local file that we will upload:
Path("test_file.txt").write_text("hello")
#> 5
# upload that file to S3
CloudPath("s3://drivendata-public-assets/testsfile.txt").upload_from("test_file.txt")
#> S3Path('s3://mybucket/testsfile.txt')
# read it back from s3
CloudPath("s3://mybucket/testsfile.txt").read_text()
#> 'hello'
Note, that you could write to the cloud path directly using the normal write_text, write_bytes, or open methods as well.
I modified your example slightly, dropping some imports and the progress to get what I needed for a boto example.
import boto.s3
from boto.s3.key import Key
AWS_ACCESS_KEY_ID = 'your-access-key-id'
AWS_SECRET_ACCESS_KEY = 'your-secret-access-key'
bucket_name = AWS_ACCESS_KEY_ID.lower() + '-form13'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = conn.create_bucket(bucket_name, location=boto.s3.connection.Location.DEFAULT)
filename = 'embedding.csv'
k = Key(bucket)
k.key = filename
k.set_contents_from_filename(filename)
Here's a boto3 example as well:
import boto3
ACCESS_KEY = 'your-access-key'
SECRET_KEY = 'your-secret-key'
file_name='embedding.csv'
object_name=file_name
bucket_name = ACCESS_KEY.lower() + '-form13'
s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY)
s3.create_bucket(Bucket=bucket_name)
s3.upload_file(file_name, bucket_name, object_name)

Categories