Get filename from S3 bucket path - python

I am getting the last modified file from S3 bucket using the below code:
import boto3
import urllib.parse
import json
import botocore.session as bc
import time
from time import mktime
from datetime import datetime
print('Loading function')
def lambda_handler(event, context):
s3_client = boto3.client("s3")
list_of_s3_objs = s3_client.list_objects_v2(Bucket="mybucket", Prefix="folder/sub/")
# Returns a bunch of json
contents = list_of_s3_objs["Contents"]
#get last modified
sorted_contents = sorted(list_of_s3_objs['Contents'], key=lambda d: d['LastModified'], reverse=True)
print(sorted_contents[0].get('Key'))
This prints the last modified file from the path 'mybucket/folder/sub' correctly. The output is:
folder/sub/2023-02-03_myfile.csv
How to extract just the filename '2023-02-03_myfile.csv' file from this path?

Split on / and get the last element:
sorted_contents[0].get('Key').split("/")[-1]

Split on "/" will help you with this:
S3_Key.split("/")[-1]
Sample code:
import os
folder_path = "folder/sub/2023-02-03_myfile.csv"
file_name = os.path.basename(folder_path)
last_name = file_name.split("/")[-1]
print(last_name)
OUTPUT: 2023-02-03_myfile.csv
Ref: https://www.cloudkaramchari.com/blog/restart-all-aws-ecs-services-using-lambda/

Related

How to upload s3 using boto3

I want to upload my logs to my bucket
I never been used python and boto3
This is my code
import os
import datetime as dt
import boto3
x = dt.datetime.now()
date = x.strftime("%Y%m%d")
bucket = 'mybucket'
dir_path = "/log"
s3 = boto3.client('s3')
def log():
global dir_path
for (dir_path, dir, files) in os.walk(dir_path):
for file in files:
if date in file:
file_path = os.path.join(dir_path, file)
print file_path
file_name = (log())
key = (log())
res = s3.upoad_file(file_name, bucket, key)
and this is result
log1
log2
log3
log4
Traceback *most recent call last):
File "test2.py", line 21, in <module>
res = s3.upload_file(file_name, bucket, key)
File "home/user/.local/lib/python2.7/site-packages/boto3/s3/tranfer.py", line 273, in upload_file extra_args=ExtraArgs, callback=Callback)
File "home/user/.local/lib/python2.7/site-packages/boto3/s3/tranfer.py", line 273, in upload_file raise ValueError('Filename must be a string')
ValueError: Filename must be a string
I have 4 log files
please help me
how to fix it?
Since you need to upload more than one file, and you
stated that the upload one log works, you could
do the following, which basically goes through the
directory list as per your original intention, and
then for each file item that satisfies that criteria
(date in file), it returns the filepath to the
calling loop.
import os
import datetime as dt
import boto3
x = dt.datetime.now()
date = x.strftime("%Y%m%d")
bucket = 'mybucket'
dir_path = "/log"
s3 = boto3.client('s3')
def log(in_path):
for (dir_path, dir, files) in os.walk(in_path):
for file in files:
if date in file:
yield os.path.join(dir_path, file)
for file_name in log(dir_path):
res = s3.upload_file(file_name, bucket, file_name)
Please note that if you need to keep track of the results,
then you could make a change like so:
.
.
.
results = {}
for file_name in log(dir_path):
results[file_name] = s3.upload_file(file_name, bucket, file_name)
It was simple. this is my final code thanks.
import os
import datetime as dt
import boto3
import socket
x = dt.datetime.now()
date = x.strftime("%Y%m%d")
bucket = 'mybucket'
dir_path = "/log"
s3 = boto3.client('s3')
def log(in_path):
for (dir_path, dir, files) in os.walk(in_path):
for file in files:
if date in file:
yield os.path.join(dir_path, file)
for file_name in log(dir_path):
key = socket.gethostname() + '/' + file_name
res = s3.upload_file(file_name, bucket, key)

How to extract the elements from csv to json in S3

I need to find the csv files from the folder
List all the files inside the folder
Convert files to json and save in the same bucket
Csv file, Like below so many csv files are there
emp_id,Name,Company
10,Aka,TCS
11,VeI,TCS
Code is below
import boto3
import pandas as pd
def lambda_handler(event, context):
s3 = boto3.resource('s3')
my_bucket = s3.Bucket('testfolder')
for file in my_bucket.objects.all():
print(file.key)
for csv_f in file.key:
with open(f'{csv_f.replace(".csv", ".json")}', "w") as f:
pd.read_csv(csv_f).to_json(f, orient='index')
Not able to save if you remove bucket name it will save in the folder. How to save back to bucket name
You can check the following code:
from io import StringIO
import boto3
import pandas as pd
s3 = boto3.resource('s3')
def lambda_handler(event, context):
s3 = boto3.resource('s3')
input_bucket = 'bucket-with-csv-file-44244'
my_bucket = s3.Bucket(input_bucket)
for file in my_bucket.objects.all():
if file.key.endswith(".csv"):
csv_f = f"s3://{input_bucket}/{file.key}"
print(csv_f)
json_file = file.key.replace(".csv", ".json")
print(json_file)
json_buffer = StringIO()
df = pd.read_csv(csv_f)
df.to_json(json_buffer, orient='index')
s3.Object(input_bucket, json_file).put(Body=json_buffer.getvalue())
Your lambda layer will need to have:
fsspec
pandas
s3fs

Uploading local images to microsoft cognitive face

Error Screenshot
import sys
import os, time
import cognitive_face as CF
import global_variables as global_var
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Key = global_var.key
CF.Key.set(Key)
BASE_URL = global_var.BASE_URL # Replace with your regional Base URL
CF.BaseUrl.set(BASE_URL)
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
imgurl = imgurl[3:]
print("imageurl = {}".format(imgurl))
res = CF.face.detect(imgurl)
if len(res) != 1:
print("No face detected in image")
else:
res = CF.person.add_face(imgurl, global_var.personGroupId, person_id)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
I hope images should be converted to byte array but I don't know how to do it. Local images have to be uploaded into cognitive API. Tried many ways but cannot solve the error.
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
Above line is where error exists
Welcome to Stack Overflow, #arun.
First of all, as per here, the API you're using is deprecated, and you should switch to this one instead.
Second, in this new API, there is a method called detect_with_stream (ref here), that will make a request to the Face Recognition endpoint, using the byte stream instead of an URL (it will use different request headers than the URL-based method). This method accepts a stream of bytes containing your image. I've worked with another cognitive services API that performs text recognition, and so I've faced this problem of sending an image URL or the image byte stream. You can generate a bytestream from the file as follows:
image_data = open(image_path, "rb").read()
The variable image_data can be passed to the method.
Edit: Instructions on how to use the new API with the image bytestream
First, install the following pip package:
pip install azure-cognitiveservices-vision-face
Then, you can try this approach.
import sys
import os, time
import global_variables as global_var
from azure.cognitiveservices.vision.face import FaceClient
from msrest.authentication import CognitiveServicesCredentials
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
KEY = global_var.key
ENDPOINT = global_var.endpoint
face_client = FaceClient(ENDPOINT,CognitiveServicesCredentials(KEY))
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
img_data = open(filename, "rb").read()
res = face_client.face.detect_with_stream(img_data)
if not res:
print('No face detected from image {}'.format(filename))
continue
res = face_client.person_group_person.add_face_from_stream(global_var.personGroupId, person_id, img_data)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
Edit 2: Notes on traversing all the files in a directory
Ok #arun, your current problem stems from the fact that you're using os.listdir which only lists the filenames, so you don't have their paths. The quickest solution would be to open each image inside the loop with:
img_data = open(os.path.join(imageFolder, filename), "rb").read()

How could I use aws lambda to write file to s3 (python)?

I have tried to use lambda function to write a file to S3, then test shows "succeeded" ,but nothing appeared in my S3 bucket. What happened? Does anyone can give me some advice or solutions? Thanks a lot. Here's my code.
import json
import boto3
def lambda_handler(event, context):
string = "dfghj"
file_name = "hello.txt"
lambda_path = "/tmp/" + file_name
s3_path = "/100001/20180223/" + file_name
with open(lambda_path, 'w+') as file:
file.write(string)
file.close()
s3 = boto3.resource('s3')
s3.meta.client.upload_file(lambda_path, 's3bucket', s3_path)
I've had success streaming data to S3, it has to be encoded to do this:
import boto3
def lambda_handler(event, context):
string = "dfghj"
encoded_string = string.encode("utf-8")
bucket_name = "s3bucket"
file_name = "hello.txt"
s3_path = "100001/20180223/" + file_name
s3 = boto3.resource("s3")
s3.Bucket(bucket_name).put_object(Key=s3_path, Body=encoded_string)
If the data is in a file, you can read this file and send it up:
with open(filename) as f:
string = f.read()
encoded_string = string.encode("utf-8")
My response is very similar to Tim B but the most import part is
1.Go to S3 bucket and create a bucket you want to write to
2.Follow the below steps otherwise you lambda will fail due to permission/access. I've copied and pasted it the link content here for you too just in case if they change the url /move it to some other page.
a. Open the roles page in the IAM console.
b. Choose Create role.
c. Create a role with the following properties.
-Trusted entity – AWS Lambda.
-Permissions – AWSLambdaExecute.
-Role name – lambda-s3-role.
The AWSLambdaExecute policy has the permissions that the function needs to manage objects in Amazon S3 and write logs to CloudWatch Logs.
Copy and past this into your Lambda python function
import json, boto3,os, sys, uuid
from urllib.parse import unquote_plus
s3_client = boto3.client('s3')
def lambda_handler(event, context):
some_text = "test"
#put the bucket name you create in step 1
bucket_name = "my_buck_name"
file_name = "my_test_file.csv"
lambda_path = "/tmp/" + file_name
s3_path = "output/" + file_name
os.system('echo testing... >'+lambda_path)
s3 = boto3.resource("s3")
s3.meta.client.upload_file(lambda_path, bucket_name, file_name)
return {
'statusCode': 200,
'body': json.dumps('file is created in:'+s3_path)
}
from os import path
import json, boto3, sys, uuid
import requests
s3_client = boto3.client('s3')
def lambda_handler(event, context):
bucket_name = "mybucket"
url = "https://i.imgur.com/ExdKOOz.png"
reqponse = requests.get(url)
filenname = get_filename(url)
img = reqponse.content
s3 = boto3.resource("s3")
s3.Bucket(bucket_name).put_object(Key=filenname, Body=img)
return {'statusCode': 200,'body': json.dumps('file is created in:')}
def get_filename(url):
fragment_removed = url.split("#")[0]
query_string_removed = fragment_removed.split("?")[0]
scheme_removed = query_string_removed.split("://")[-1].split(":")[-1]
if scheme_removed.find("/") == -1:
return ""
return path.basename(scheme_removed)

select multiple files using glob in python

I have to select all the files in a directory at a time which has y2001, y2002, and y2003 in the mid of filename. How can I?
import glob
files = glob.glob('*y2001*.jpg')
You can do it with
import glob
files = glob.glob('*y200[123]*.jpg')
for futher reference see http://docs.python.org/2/library/glob.html
Here is an overkill method for solving your problem.
import os
import re
import functools
def validate_file(validators, file_path):
return any(re.search(validator, file_path) for validator in validators)
def get_matching_files_in_dir(directory, validator, append_dir=True):
for file_path in os.listdir(directory):
if validator(file_path):
yield os.path.join(directory, file_path) if append_dir else file_path
# define your needs:
matching_patterns = ['y2001', 'y2002', 'y2003']
validator = functools.partial(validate_file, matching_patterns)
# usage
list(get_matching_files_in_dir('YOUR DIR', validator))
An Example:
>>> matching_patterns = ['README']
>>> validator = functools.partial(validate_file, matching_patterns)
>>> print list(get_matching_files_in_dir('C:\\python27', validator))
['C:\\python27\\README.txt']

Categories