Update Text File Using Lambda - python

I want to be able to update a text file whenever I upload an image to the s3 bucket. This text file will contain on each line the results of Amazon Rekognition. However, the code I've written isn't working properly
bucket_name = "update-my-text-file"
rekognition = boto3.client('rekognition')
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
def handle_image(key):
response = rekognition.detect_labels(
Image={
'S3Object': {
'Bucket': bucket_name,
'Name': key
}
}
)
return response
def lambda_handler(event, context):
file_name = 'results.txt'
object = s3.Object(bucket_name, 'tmp/results.txt')
cli = boto3.client('s3')
response = cli.get_object(Bucket=bucket_name, Key='tmp/results.txt')
data = response['Body'].read()
print('the data is ' + data)
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key'].encode('utf8'))
response = handle_image(key)
print('the response is: ' + response)
object.put(Body=data + '/n' + response)

You might find it easier to download the file like this:
import boto3
s3_client = boto3.client('s3')
s3_client.download_file('mybucket', 'hello.txt', '/tmp/hello.txt')
Then you can read/write the local file however you wish. Then, upload again with:
s3_client.upload_file('/tmp/hello.txt', 'mybucket', 'hello.txt')

Related

issue with Cloud Function python

** i have a google cloud function which needs to connect to url and get data in the form of csv files and store in one bucket. this is what written in python code .
when i test the function its compiling successfully but its not working at all. when i checked the log its giving the eblwo mentioned error.
favt_LnT_acn_blackline_data_pull_func43jttmffma0g Invalid constructor input for AccessSecretVersionRequest: 'projects/gcp-favt-acn-rpt-dev/secrets/blackline_api_key/versions/latest'
please find the code and suggest.
Thanks,
Vithal
**
'
import base64
import logging
import requests
#import pandas as pd
#from pandas import json_normalize
import json
import os
import datetime
from datetime import datetime as dt
import pytz
from google.cloud import storage
from google.cloud import secretmanager
def delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file):
storage_client = storage.Client()
bucket = storage_client.bucket(landing_bucket_name)
blob = bucket.blob(landing_blob_name)
rpt_last_run_blob = bucket.blob('some.csv')
retention_bucket = storage_client.bucket(retention_bucket_name)
if blob.exists(storage_client):
#Delete the old file
blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(landing_blob_name))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
blob.upload_from_filename(source_file_name)
print("File {} uploaded to Bucket {} With Name
{}.".format(source_file_name, bucket, landing_blob_name))
if file_retention_flag == 'Y':
#Copy the last file of the day to retention bucket
new_file_name = retn_file_suffix + '_' + landing_blob_name
blob_copy = bucket.copy_blob(blob, retention_bucket,
new_file_name)
print('File {} is copied to Retention Bucket
{}'.format(new_file_name, retention_bucket))
if rpt_last_run_blob.exists(storage_client):
#Delete the old file
rpt_last_run_blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(rpt_last_run_blob))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
rpt_last_run_blob.upload_from_filename(rpt_last_run_file)
print("File {} uploaded to Bucket {} With Name
{}.".format(rpt_last_run_file, bucket,
'Reports_Latest_Run_time.csv'))
def api_request():
et = pytz.timezone("US/Eastern")
current_et_time = dt.now().astimezone(et)
print('Current ET Time:', current_et_time)
pt = pytz.timezone("US/Pacific")
ut = pytz.timezone("UTC")
blackline_base_url = "https://....com"
blackline_sts_url = blackline_base_url + "/authorize/connect/token"
project_id = 'gcp-favt-acn-dev'
secret_id = '###_api_key'
secret_client = secretmanager.SecretManagerServiceClient()
secret_name =
secret_client.secret_version_path(project_id,secret_id,'latest')
secret_resp = secret_client.access_secret_version(secret_name)
api_key = secret_resp.payload.data.decode('UTF-8')
grant_type = 'password'
scope = '####'
username = '####'
payload = 'grant_type='+grant_type+'&scope='+scope+
'&username='+username+'&password='+api_key
sts_headers = { 'Authorization': 'Basic dXBzOk5KXXx2VENsSiEtRw==',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie':
'BLSIAPPEN=!bpJj4AOTHPcaqipWtDI6FrozN629M9xYLA/
sbM1DWVH+jjuY5fgHVMACha2rIapXRoB7CcqnlaHgBw=='}
response = requests.request("POST", ###_sts_url, headers =
sts_headers, data = payload)
if response.ok:
sts_response = response.json()
access_token = sts_response['access_token']
print(access_token)
blackline_rpt_submit_url = ##_base_url + '/api/queryruns'
rpt_payload = ''
blackline_rpt_api_headers =
{'Authorization': 'Bearer {}'.format(access_token), 'Content-Type':
'text/plain'}
rpt_resp = requests.request("GET", blackline_rpt_submit_url, headers
= blackline_rpt_api_headers, data = rpt_payload)
print(rpt_resp.text)
jl = json.loads(rpt_resp.text)
reports_list = []
rprts_filename = "tmp_rprts.csv"
rprts_full_path = os.path.join("/tmp",rprts_filename)
with open(rprts_full_path, 'w') as f:
f.write('ReportName,ReportLastRunTime'+'\n')
hrs = -2
hrs_to_subtract = datetime.timedelta(hours=hrs)
two_hrs_ago_time = current_et_time + hrs_to_subtract
#print(two_hrs_ago_time)#latest_rpt_check_time)
frmtd_curr_time = two_hrs_ago_time.strftime('%Y-%m-%d %H:%M:%S')
latest_rpt_check_time =
dt.strptime(frmtd_curr_time,'%Y-%m-%d %H:%M:%S')
print("Latest Report Check Time:", latest_rpt_check_time)
for each in jl:
strpd_time = dt.strptime(each['endTime'][0:19],'%Y-%m-
%dT%H:%M:%S')
#print(strpd_time)
pt_localize = pt.localize(strpd_time)
#print(pt_localize)
et_time = pt_localize.astimezone(et)
#print(et_time)
frmtd_et_time = et_time.strftime('%Y-%m-%d %H:%M:%S')
#print(frmtd_et_time)
cnvrted_endTime = dt.strptime(frmtd_et_time,'%Y-%m-%d %H:%M:%S')
#print("Report LastRun EndTime:", cnvrted_endTime)
ut_time = pt_localize.astimezone(ut)
frmtd_ut_time = ut_time.strftime('%Y-%m-%d %H:%M:%S')
if cnvrted_endTime > latest_rpt_check_time:
reports_list.append({each['name']:each['exportUrls'][0]
["url"]})
rpt_last_run = each['name']+','+frmtd_ut_time
print(rpt_last_run)
with open(rprts_full_path, 'a') as f:
f.write(rpt_last_run+'\n')
retn_file_suffix = each['endTime'][0:10]
#print(retn_file_suffix)
rpt_run_hr = cnvrted_endTime.hour
#print(rpt_run_hr)
#############
print(reports_list)
for report in reports_list:
for k in report:
print(report[k])
report_fetch_url = blackline_base_url + '/' + report[k]
print('Report Fetch URL: {}'.format(report_fetch_url))
filename = "temp_file.csv"
full_path = os.path.join("/tmp",filename)
rpt_data = requests.request("GET", report_fetch_url, headers
= blackline_rpt_api_headers)
print(rpt_data.text)
with open(full_path,'wb') as tmp_file:
tmp_file.write(rpt_data.content)
#Upload it to Cloud Storage
landing_bucket_name = "####_dev_landing_bkt" #CHANGE ME
source_file_name = os.path.join(full_path)
rpt_last_run_file = os.path.join(rprts_full_path)
landing_blob_name = '##.csv' #CHANGE ME
retention_bucket_name = '####_dev_retention_bkt'
print('file retention check')
if (rpt_run_hr >= 22):
file_retention_flag = 'Y'
else:
file_retention_flag = 'N'
print(file_retention_flag)
delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file)
#Remove the temp file after it is uploaded to Cloud Storage to
avoid OOM issues with the Cloud Function.
os.remove(full_path)
#Remove the tmp file after upload
os.remove(rprts_full_path)
#def pacific_to_eastern_conversion(pacific_time, eastern_time):
def main(event,context):
try:
if 'data' in event:
name = base64.b64decode(event['data']).decode('utf-8')
else:
name = 'World'
print('Hello{}',format(name))
api_request()
except Exception as e:
logging.error(e)' enter code here
The approach you are using will work for Cloud Run but won't work for Cloud functions.
To make use of secrets in Google cloud functions, following are the steps:
Make sure that the function's runtime service account must be granted access to the secret. To use Secret Manager with Cloud Functions, assign the roles/secretmanager.secretAccessor role to the service account associated with your function.
Make the secret accessible to the function. This can be done using either the Google Cloud Console or the gcloud command-line tool.
I exposed the secret as an environment variable(with name set to "api_key") and accessed them in the code as stated below:
import os
api_key = os.environ.get('api_key')
I hope this answers your question.
Your cloud functions service account haven't access to Secret manager. Grant your Cloud Functions service account on the secret, or on the project (not recommended).
If you don't set a custom service account on your Cloud Functions (which is also not a good practice), the App Engine default service account is used. Here the pattern <ProjectID>#appspot.gserviceaccount.com

Listing objects from each and every bucket present in my s3

I have 5 buckets in my S3. I have to list objects for every bucket present in my s3 by python script. I am writing script something like this :
import boto3
def lambda_handler(event, context):
s3 = boto3.client('s3')
response = s3.list_buckets()
print('Existing buckets:')
for bucket in response['Buckets']:
for obj in bucket.object.all(['bucket']):
response = obj.get(
Key, StorageClass, Size)
print(response)
You can check the following code:
import boto3
s3 = boto3.client('s3')
s3r = boto3.resource('s3')
def lambda_handler(event, context):
response = s3.list_buckets()
for bucket_info in response['Buckets']:
bucket = s3r.Bucket(bucket_info['Name'])
print('Existing buckets:', bucket_info['Name'])
for object in bucket.objects.all():
print(' - ', object.key)

Cannot upload s3 files to another region (clients bucket) despite successful response

This is my code. I am trying to copy a directory from one bucket to another. I am seeing everything is positive, but files are not appearing in the clients bucket.
import boto3
ACCESS_KEY = 'access_key'
SECRET_KEY = 'secret_key'
REGION_NAME = 'US_EAST_1'
source_bucket = 'source_bucket'
#Make sure you provide / in the end
source_prefix = 'source_prefix'
target_bucket = 'target-bucket'
target_prefix = 'target-prefix'
client = boto3.client('s3')
session_src = boto3.session.Session()
source_s3_r = session_src.resource('s3')
def get_s3_keys(bucket, prefix):
keys = []
response = client.list_objects_v2(Bucket=bucket,Prefix=prefix,MaxKeys=100)
for obj in response['Contents']:
keys.append(obj['Key'])
return keys
session_dest = boto3.session.Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
dest_s3_r = session_dest.resource('s3')
# create a reference to source image
old_obj = source_s3_r.Object(source_bucket, source_prefix)
# create a reference for destination image
new_obj = dest_s3_r.Object(target_bucket, target_prefix)
keys = get_s3_keys(source_bucket, source_prefix)
responses = []
# upload the image to destination S3 object
for filename in keys:
print("Transferring file {}, {}".format(source_bucket,filename))
old_obj = source_s3_r.Object(source_bucket, filename)
response = new_obj.put(Body=old_obj.get()['Body'].read())
response_code = response['ResponseMetadata']['HTTPStatusCode']
responses.append(response_code)
print("File transfer response {}".format(response_code))
distinct_response = list(set(responses))
if len(distinct_response) > 1 or distinct_response[0] != 200:
print("File could not be transfered to krux bucket. Exiting now")
exit(1)
else:
print("File transfer to krux bucket successful")
I am getting a successful response code of 200 but the file is not transferred across.
Srinivas, Try this
I used S3 Resource object, try equivalent S3 Client if you want...
bucket= s3.Bucket(bucket_name) #from_bucket
for osi in bucket.objects.all():
print(osi)
copy_source={
'Bucket': bucket.name,
'Key': osi.key
}
s3.Bucket('to_bucket').copy(copy_source, osi.key)
Hope it helps..
r0ck

AWS Athena python connection S3 error when trying retrieve object using execution ID

When trying to read the S3 object that is CSV the response is the execution ID of the AWS Athena query:
def run_query(query, database, s3_output):
client = boto3.client('athena')
response = client.start_query_execution(
QueryString=query,
QueryExecutionContext={
'Database': database
},
ResultConfiguration={
'OutputLocation': s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
return response
response = run_query(query1, db, s3_output)
result = get_exec_status(response)
print(result)
s3_resource = boto3.resource('s3')
s3_client = boto3.client('s3')
def read_s3(path):
path = path.replace("s3://", "")
bucket, key = path.split('/', 1)
s3_client.copy_object(Bucket=bucket, CopySource=path, Key=".csv")
s3_client.delete_object(Bucket=bucket, Key=key)
read_s3("s3://"+ response + ".csv")
Error:
File "athena_connect.py", line 67, in <module>
read_s3("s3://"+ response + ".csv")
File "athena_connect.py", line 64, in read_s3
s3_client.copy_object(Bucket=bucket, CopySource=path, Key=".csv")
botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the CopyObject operation: The specified key does not exist.
But, when
response ='somekey'
this code is working fine. What might be wrong?
The error is:
The specified key does not exist
This means the program is trying to read a non-existent object in Amazon S3.
This line:
read_s3("s3://"+ response + ".csv")
is expecting response to be a string that contains the Key to the file.
However, response is used earlier as a dictionary:
print('Execution ID: ' + response['QueryExecutionId'])
Therefore, it might be better to use:
read_s3("s3://"+ response['QueryExecutionId'] + ".csv")
success = False
while not success and exec_id:
result = get_exec_status(exec_id, config)
if result == 'SUCCEEDED':
success = True
print(result)
break
add this it will work fine

How to check S3 bucket have tags or not

I tried to check the existing s3 buckets have tags or not, if bucket not have tags, will add the tags, i tried below code
for region in region_list:
s3 = boto3.resource('s3', region)
s3_client = boto3.client('s3', region)
for bucket in s3.buckets.all():
s3_bucket = bucket
s3_bucket_name = s3_bucket.name
response = s3_client.get_bucket_tagging(Bucket=s3_bucket_name)
tagset = response['TagSet']
if len(response['TagSet'])==0:
print "s3 bucket not have tags, adding tags"
else:
pass
but getting below error
Traceback (most recent call last):
File "C:\Python27\ec2info.py", line 235, in <module>
response = s3_client.get_bucket_tagging(Bucket=s3_bucket_name)
File "C:\Python27\lib\site-packages\botocore\client.py", line 314, in
_api_call
return self._make_api_call(operation_name, kwargs)
File "C:\Python27\lib\site-packages\botocore\client.py", line 612, in
_make_api_call
raise error_class(parsed_response, operation_name)
ClientError: An error occurred (NoSuchTagSet) when calling the
GetBucketTagging operation: The TagSet does not exist
where i am doing wrong here, what is the correct way of checking s3 bucket have tags or not
Thanks in advance for your help
Because get_bucket_tagging throws NoSuchTagSet when there are no tags. Catch the exception and create tags. Also, do not loop through regions, you will get all buckets irrespective of the region endpoint you connect to.
See: NoSuchTagSet when calling the GetBucketTagging operation
from botocore.exceptions import ClientError
for bucket in s3.buckets.all():
s3_bucket = bucket
s3_bucket_name = s3_bucket.name
try:
response = s3_client.get_bucket_tagging(Bucket=s3_bucket_name)
#print response
#tagset = response['TagSet']
except ClientError:
print s3_bucket_name, "does not have tags, adding tags"
To expand on the correct answer of helloV, catch the correct exception as following:
s3_client = boto3.client('s3')
bucket_name = 'mybucket'
try:
response = s3_client.get_bucket_tagging(Bucket=bucket_name)
tags = response["TagSet"]
except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchTagSet':
tags = {}
else:
raise e
here is the complete code how you will do it
import boto3
from botocore.exceptions import ClientError
s3 = boto3.client('s3')
s3_re = boto3.resource('s3')
for bucket in s3_re.buckets.all():
s3_bucket = bucket
s3_bucket_name = s3_bucket.name
bucket_tagging = s3_re.BucketTagging(s3_bucket_name)
try:
response = s3.get_bucket_tagging(Bucket=s3_bucket_name)
except ClientError:
print (bucket+ ",does not have tags, add tag")
print("give key : ")
inp_key = input()
print("give value : ")
inp_val = input()
response = bucket_tagging.put(
Tagging={
'TagSet': [
{
'Key': inp_key,
'Value': inp_val
},
]
}
)
Hope this code helps to keep track of your s3 tags
#s3 Buckets
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client('s3')
dict_of_s3_buckets = s3_client.list_buckets()
list_of_s3_buckets= [each['Name'] for each in dict_of_s3_buckets['Buckets']]
i=0
s3_bucket_tag_status={}
while i<len(list_of_s3_buckets):
s3_bucket_name = list_of_s3_buckets[i]
try:
response = s3_client.get_bucket_tagging(Bucket=s3_bucket_name)
tags = response['TagSet']
s3_bucket_tag_status[s3_bucket_name]=tags
except ClientError:
#print(s3_bucket_name, "does not have tags")
no_tags='does not have tags'
s3_bucket_tag_status[s3_bucket_name]=no_tags
i+=1
#changing to pandas dataframe (if required)
import pandas as pd
s3_bucket_tags= pd.DataFrame.from_dict(s3_bucket_tag_status,orient='index').reset_index().rename(columns={'index':'bucketName',0:'Tags'})

Categories