Yesterday my code was working while inserting my csv into dynamo db, It could not identify the bucket_name also Yesterday in the cloud watch logs the event was visible while uploading but today it is not
import boto3
s3_client = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
def lambda_handler(event, context):
bucket_name = event['Records'][0]['s3']['bucket']['name']
#bucket_name = event['query']['Records'][0]['s3']['bucket']['name']
print (bucket_name)
s3_file_name = event['Records'][0]['s3']['object']['key']
resp = s3_client.get_object(Bucket=bucket_name,Key=s3_file_name)
data = resp['Body'].read().decode('utf-8')
employees = data.split("\n")
table = dynamodb.Table('employees')
for emp in employees:
emp_data = emp.split(',')
print (emp_data)
try:
table.put_item(
Item = {
"emp_id": emp_data[0],
"Name": emp_data[1],
"Company": emp_data[2]
}
)
except Exception as e:
print ('endof file')
return 'files saved to Dynamodb'
Today i got the error below
Response:
{
"errorMessage": "'Records'",
"errorType": "KeyError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 7, in lambda_handler\n bucket_name = event['Records'][0]['s3']['bucket']['name']\n"
]
}
The error means that event does not contain Records.
To check this and protect against the error you can do the following:
def lambda_handler(event, context):
if 'Records' not in event:
# execute some operations that you want
# in case there are no Records
# in the event
return
# continue processing Records if
# they are available
event['Records'][0]['s3']['bucket']['name']
# the rest of your code
Related
I am new to coding and you may find multiple silly mistakes in my python code. Pardon me for that.
I need to write a python code which loads any file format data like Avro, Parquet or png from GCS bucket to Bigquery.
I have tried nested if and elif loop but seems it is not working.
I have used below code using python 3.7
import json
import logging
import os
import traceback
from datetime import datetime
from google.api_core import retry
from google.cloud import bigquery
from google.cloud import storage
# Get ENV variables
BQ_PROJECT_ID = os.getenv('bq_project')
BQ_DATASET_ID = os.getenv('bq_dataset')
SOURCE_LANDING_BUCKET = os.getenv('source_bucket')
DESTINATION_BUCKET = os.getenv('destination_bucket')
# Cloud storage client
CS = storage.Client()
# BigQuery Client
BQ = bigquery.Client()
def bq_load(data, context):
'''This function is executed whenever a file is added to Cloud Storage Landing bucket'''
file_name = data['name']
table_name = file_name.split(".")[0]
file_extension = str(file_name.split(".")[1])
# Check for file extension
if(file_extension.lower() == "avro"):
message = 'Perform bq load with file movement, file : \'%s\'' % (file_name)
logging.info(message)
_perform_bq_load_file_movement(table_name,file_name)
elif(file_extension.lower() == "png"):
message = 'Perform file movemnt only , file : \'%s\'' % (file_name)
logging.info(message)
source_bucket_name = SOURCE_LANDING_BUCKET
destination_bucket_name = DESTINATION_BUCKET
_move_file(file_name,source_bucket_name,destination_bucket_name)
else:
message = 'Not supported file format, file : \'%s\'' % (file_name)
logging.info(message)
def _perform_bq_load_file_movement(table_name,file_name):
'''This function will perform loading bq table and file movement'''
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "%s.%s.%s" % (BQ_PROJECT_ID,BQ_DATASET_ID,table_name)
message = 'Table_id : \'%s\'' % (table_id)
logging.info(message)
if(_if_tbl_exists(table_id)):
destination_table = BQ.get_table(table_id)
num_rows_added = destination_table.num_rows
if job_config = bigquery.LoadJobConfig(
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
source_format=bigquery.SourceFormat.AVRO,
)
uri = 'gs://%s/%s' % (SOURCE_LANDING_BUCKET,file_name)
try:
load_job = BQ.load_table_from_uri(
uri, table_id, job_config=job_config
)
if job_config = bigquery.LoadJobConfig(
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
source_format=bigquery.SourceFormat.PARQUET,
)
uri = 'gs://%s/%s' % (SOURCE_LANDING_BUCKET,file_name)
try:
load_job = BQ.load_table_from_uri(
uri, table_id, job_config=job_config
)
else:
message = 'issue with the file, file : \'%s\'' % (file_name)
logging.info(message)
else:
message = 'table does not exist, file : \'%s\'' % (file_name)
logging.info(message)
def _move_file(file_name,source_bucket_name,destination_bucket_name):
'''This function perform file movement '''
source_bucket = CS.get_bucket(source_bucket_name)
source_blob = source_bucket.blob(file_name)
destination_bucket = CS.get_bucket(destination_bucket_name)
source_bucket.copy_blob(source_blob, destination_bucket, file_name)
source_blob.delete()
logging.info('File \'%s\' moved from \'%s\' to \'%s\'',
file_name,
source_bucket_name,
destination_bucket_name
)
def _if_tbl_exists(table_ref):
''' This function check if bigquery table is present or not '''
from google.cloud.exceptions import NotFound
try:
BQ.get_table(table_ref)
return True
except NotFound:
return False
class BigQueryError(Exception):
'''Exception raised whenever a BigQuery error happened'''
def __init__(self, errors):
super().__init__(self._format(errors))
self.errors = errors
def _format(self, errors):
err = []
for error in errors:
err.extend(error['errors'])
return json.dumps(err)
I tried using nested if and elif loop but I get stuck in sytax and indentation error, no google searched helped me so far.
I have the lambda function code below that transfers objects from s3 buckets to AWS RDS database.
import json
import boto3
import pymysql
s3_client = boto3.client('s3')
def lambda_handler(event, context):
bucket_name = event["bucket"]
s3_file_name = event["object"]
resp = s3_client.get_object(Bucket=bucket_name, Key=s3_file_name)
data = resp['Body']
rds_endpoint = ""
username = #username for RDS Mysql
password = # RDS Mysql password
db_name = # RDS MySQL DB name
conn = None
try:
conn = pymysql.connect(host=rds_endpoint, user=username, password=password, database=db_name)
except pymysql.MySQLError as e:
print("ERROR: Unexpected error: Could not connect to MySQL instance.")
try:
cur = conn.cursor()
cur.execute(#db stuff)
conn.commit()
except Exception as e:
print(e)
return 'Table not created!'
with conn.cursor() as cur:
try:
cur.execute(#db stuff)
conn.commit()
output = cur.execute()
except:
output = ("Entry not inputted! Error!")
print("Deleting the csv file from s3 bucket")
return {
'statusCode': 200,
'body': 'Successfully uploaded!'
}
The code above works fine with this given test ev:
{
"bucket": "python-bucket",
"object": "bobmarley.mp3"
}
However, when I try to adapt it to the s3 bucket by changing the lines of code to below as seen in this tutorial: https://www.data-stats.com/s3-data-ingestion-to-rds-through-lambda/
bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
s3_file_name = event["Records"][0]["s3"]["object"]["key"]
I get this error:
[ERROR] TypeError: list indices must be integers or slices, not str
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 7, in lambda_handler
bucket_name = event["Records"]["s3"]["bucket"]["name"]
When trying to read the S3 object that is CSV the response is the execution ID of the AWS Athena query:
def run_query(query, database, s3_output):
client = boto3.client('athena')
response = client.start_query_execution(
QueryString=query,
QueryExecutionContext={
'Database': database
},
ResultConfiguration={
'OutputLocation': s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
return response
response = run_query(query1, db, s3_output)
result = get_exec_status(response)
print(result)
s3_resource = boto3.resource('s3')
s3_client = boto3.client('s3')
def read_s3(path):
path = path.replace("s3://", "")
bucket, key = path.split('/', 1)
s3_client.copy_object(Bucket=bucket, CopySource=path, Key=".csv")
s3_client.delete_object(Bucket=bucket, Key=key)
read_s3("s3://"+ response + ".csv")
Error:
File "athena_connect.py", line 67, in <module>
read_s3("s3://"+ response + ".csv")
File "athena_connect.py", line 64, in read_s3
s3_client.copy_object(Bucket=bucket, CopySource=path, Key=".csv")
botocore.errorfactory.NoSuchKey: An error occurred (NoSuchKey) when calling the CopyObject operation: The specified key does not exist.
But, when
response ='somekey'
this code is working fine. What might be wrong?
The error is:
The specified key does not exist
This means the program is trying to read a non-existent object in Amazon S3.
This line:
read_s3("s3://"+ response + ".csv")
is expecting response to be a string that contains the Key to the file.
However, response is used earlier as a dictionary:
print('Execution ID: ' + response['QueryExecutionId'])
Therefore, it might be better to use:
read_s3("s3://"+ response['QueryExecutionId'] + ".csv")
success = False
while not success and exec_id:
result = get_exec_status(exec_id, config)
if result == 'SUCCEEDED':
success = True
print(result)
break
add this it will work fine
How to Skip the first row - when reading the Object using get_object API
import os
import boto3
import json
import logging
def lambda_handler(event, context):
# Fetch the bucket name and the file
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
# Generate record in DynamoDB
try :
# Declare S3 bucket and DynamoDB Boto3 Clients
s3_client = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
# Read the Object using get_object API
obj = s3_client.get_object(Bucket=bucket, Key=key)
rows = obj['Body'].read().decode("utf-8").split('\n')
tableName = os.environ['DB_TABLE_NAME']
table = dynamodb.Table(tableName)
log.info("TableName: " + tableName)
# Need client just to access the Exception
dynamodb_client = boto3.client('dynamodb')
try :
# Write the CSV file to the DynamoDB Table
with table.batch_writer() as batch:
for row in rows:
batch.put_item(Item={
'x': row.split(',')[0],
'c': row.split(',')[1],
'w': row.split(',')[2],
'f': row.split(',')[3]
})
print('Finished Inserting into TableName: ' + tableName)
except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
return ('ERROR: Unable to locate DynamoDB table: ', tableName)
except KeyError as dynamoDBKeyError:
msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
print(dynamoDBKeyError)
return msg;
Above code reads CSV and insert into dynamo db. The issue here is - header row (column nmaes) also get inserted into the table. How do I skip the first row and start parsing from the second row? next doesn't work for me
Perhaps not the best solution but this should do the trick:
import os
import boto3
import json
import logging
def lambda_handler(event, context):
# Fetch the bucket name and the file
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
# Generate record in DynamoDB
try :
# Declare S3 bucket and DynamoDB Boto3 Clients
s3_client = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
# Read the Object using get_object API
obj = s3_client.get_object(Bucket=bucket, Key=key)
rows = obj['Body'].read().decode("utf-8").split('\n')
tableName = os.environ['DB_TABLE_NAME']
table = dynamodb.Table(tableName)
log.info("TableName: " + tableName)
# Need client just to access the Exception
dynamodb_client = boto3.client('dynamodb')
try :
first = True
# Write the CSV file to the DynamoDB Table
with table.batch_writer() as batch:
for row in rows:
if first:
first = False
else:
batch.put_item(Item={
'x': row.split(',')[0],
'c': row.split(',')[1],
'w': row.split(',')[2],
'f': row.split(',')[3]
})
print('Finished Inserting into TableName: ' + tableName)
except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
return ('ERROR: Unable to locate DynamoDB table: ', tableName)
except KeyError as dynamoDBKeyError:
msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
print(dynamoDBKeyError)
return msg;
It would probably be better to use a for i in range(1, len(rows)) loop but the above required the less changes to the code
I want to be able to update a text file whenever I upload an image to the s3 bucket. This text file will contain on each line the results of Amazon Rekognition. However, the code I've written isn't working properly
bucket_name = "update-my-text-file"
rekognition = boto3.client('rekognition')
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
def handle_image(key):
response = rekognition.detect_labels(
Image={
'S3Object': {
'Bucket': bucket_name,
'Name': key
}
}
)
return response
def lambda_handler(event, context):
file_name = 'results.txt'
object = s3.Object(bucket_name, 'tmp/results.txt')
cli = boto3.client('s3')
response = cli.get_object(Bucket=bucket_name, Key='tmp/results.txt')
data = response['Body'].read()
print('the data is ' + data)
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key'].encode('utf8'))
response = handle_image(key)
print('the response is: ' + response)
object.put(Body=data + '/n' + response)
You might find it easier to download the file like this:
import boto3
s3_client = boto3.client('s3')
s3_client.download_file('mybucket', 'hello.txt', '/tmp/hello.txt')
Then you can read/write the local file however you wish. Then, upload again with:
s3_client.upload_file('/tmp/hello.txt', 'mybucket', 'hello.txt')