Python AWS Lambda Url invokes twice - python

I seem to have a problem with my lambda function. My function invokes only once when I test it on AWS. However, when I invoke the lambda function through the function URL it executes twice! Has anyone experienced this before?
import json
import boto3
dynamodb = boto3.resource('dynamodb')
ddbClient = boto3.client('dynamodb')
def LatestDBrow():
...
return row
#Invoke lambda function
def lambda_handler(event, context):
scratchPadlist = []
scratchPadlist = LatestDBrow()
#print(scratchPadlist)
#Put TS row into variables
inBatchTrial = scratchPadlist[0]
inNoOfUsersProcessed = scratchPadlist[1]
inl1Alpha = scratchPadlist[2]
inl1Beta = scratchPadlist[3]
#Update wanted values
inBatchTrial += 1
inl1Alpha = 1
inl1Beta = 1
TableColNames = ['Batch Trial','No. of users processed','L1 Alpha',
'L1 Beta','L1 N','L1 Rewards',
'L1 Click Pct','L2 Alpha','L2 Beta',
'L2 N','L2 Rewards','L2 Click Pct',
'Next Page Shown']
TSlist = [inBatchTrial, inNoOfUsersProcessed, inl1Alpha,
inl1Beta, inl1N, inl1Rewards,
inl1ClickPct, inl2Alpha, inl2Beta,
inl2N, inl2Rewards,inl2ClickPct,
innextPageShown]
dbDict = {TableColNames[i]: str(TSlist[i]) for i in range(len(TableColNames))}
table1 = dynamodb.Table("TestDB")
table1.put_item(Item = dbDict)

Related

Python / Boto3 / How can i get a tag list with pagination?

I'd like to get a rds and each tag list using boto3 without 100 limits.
This is the code for getting the list of rds and each tag.
client = boto3.client('rds')
instances = client.describe_db_instances()['DBInstances']
for i in instances:
db_instance_name = i['DBInstanceIdentifier']
arn = i['DBInstanceArn']
tags = client.list_tags_for_resource(ResourceName=arn)
for item in tags['TagList']:
if item['Key'] == 'Name':
print(db_instance_name,item['Value'])
And this is the code for pagination.
def all_rds_instances(page_size=20):
client = session.client('rds')
marker = ""
pool = []
while True:
for instance in pool:
yield instance
if marker is None:
break
result = client.describe_db_instances(MaxRecords=page_size, Marker=marker)
marker = result.get("Marker")
pool = result.get("DBInstances")
How can I combine these 2 codes ?
You just need to change the for loop to iterate over your all_rds_instances generator.
Your script would look like:
import boto3
client = boto3.client('rds')
def all_rds_instances(page_size=20):
marker = ''
pool = []
while True:
for instance in pool:
yield instance
if marker is None:
break
result = client.describe_db_instances(MaxRecords=page_size, Marker=marker)
marker = result.get('Marker')
pool = result.get('DBInstances')
for i in all_rds_instances():
db_instance_name = i['DBInstanceIdentifier']
arn = i['DBInstanceArn']
tags = client.list_tags_for_resource(ResourceName=arn)
for item in tags['TagList']:
if item['Key'] == 'Name':
print(db_instance_name, item['Value'])
When you use the yield keyword your function becomes a generator and it works the same way as any iterable in python.
There is some cool answer about how generator works here

How to query dynamoDB with lambda function with multiple filters passed from API

I am creating a lambda function with dynamodb to list items and trying to pass parameters to filter the data. Without passing any filter i am able to get the data with scan method but when passing the filter, getting error. Below is the code that i am trying
from __future__ import print_function
import json
import boto3
from boto3.dynamodb.conditions import Key, Attr
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('table-all')
def scan_table_allpages(self, table_name, filter_key=None, filter_value=None):
table = self.dynamodb_resource.Table(table)
if filter_key and filter_value:
filtering_exp = Key(filter_key).eq(filter_value)
response = table.scan(FilterExpression=filtering_exp)
else:
response = table.scan()
items = response['Items']
while True:
print(len(response['Items']))
if response.get('LastEvaluatedKey'):
response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
items += response['Items']
else:
break
return items
# def lambda_handler(event, context):
# print(table.creation_date_time)
# response = table.get_items(
# Key={
# 'Country':event['pathParameters']['USA']
# }
# )
# #response = table.query()
# #print(response)
# return response
scan_table_allpages(self.table,filter_key="Country",filter_value='USA')

main() takes 0 positional arguments but 2 were given

I have the following code
client = bigquery.Client()
dataset_id = 'dataset' # replace with your dataset ID
table_id = 'table' # replace with your table ID
table_ref = client.dataset(dataset_id).table(table_id)
table = client.get_table(table_ref) # API request
rows_to_insert = []
bq = bigquery.Client(project='project-id')
query = """SELECT Url FROM `project-id.dataset.urltable`"""
query_job = bq.query(query)
data = query_job.result()
rows = list(data)
def main():
for row in rows:
URL = urllib.request.urlopen(row[0])
soup_page = soup(URL, features="lxml")
try:
data = json.loads(soup_page.find_all('script', type='application/ld+json')[1].text)
except:
data ='unknown'
try:
price_ruw = data['offers']['price']
shopprice = price_ruw.replace(',','.')
except:
price = 0
try:
ean = data['gtin13']
ean = str(ean)
except:
ean = 'unknown'
try:
title_ruw1 = data['name']
title_ruw = title_ruw1
tile_trim = title_ruw[:750]
title = tile_trim.replace("'", "")
except:
title = "unknown"
try:
reviews = data['aggregateRating']['reviewCount']
except:
reviews = 0
try:
score = (float(data['aggregateRating']['ratingValue']) * 2)
except:
score = 0
datenow = (datetime.datetime.now())
shoplink = row[0]
rows_to_insert.append([shoplink,ean,title,reviews,score,shopprice,datenow])
client.insert_rows(table, rows_to_insert) # API request
main()
Testing this code in Google Cloud platform gives
Error: function crashed. Details:
main() takes 0 positional arguments but 2 were given
However when deploying this code it does not give an error. Only scheduling this query does not work since it keeps giving the error below.
For deploying i use the following command (which works)
gcloud functions deploy <function> --entry-point main --
runtime python37 --trigger-resource <name> --trigger-event google.pubsub.topic.publish --timeout 540s
It's not clear how you're trigging this function, but it seems like a "Background Function", which means that it needs to take two arguments, even if they're unused:
def main(data, context):
...
See https://cloud.google.com/functions/docs/concepts/events-triggers for more information.

AWS Lambda - How do I convert my code to work in AWS?

I'm struggling to get a Lambda function working. I have a python script to access twitter API, pull information, and export that information into an excel sheet. I'm trying to transfer python script over to AWS/Lambda, and I'm having a lot of trouble.
What I've done so far: Created AWS account, setup S3 to have a bucket, and poked around trying to get things to work.
I think the main area I'm struggling is how to go from a python script that I'm executing via local CLI and transforming that code into lambda-capable code. I'm not sure I understand how the lambda_handler function works, what the event or context arguments actually mean (despite watching a half dozen different tutorial videos), or how to integrate my existing functions into Lambda in the context of the lambda_handler, and I'm just very confused and hoping someone might be able to help me get some clarity!
Code that I'm using to pull twitter data (just a sample):
import time
import datetime
import keys
import pandas as pd
from twython import Twython, TwythonError
import pymysql
def lambda_handler(event, context):
def oauth_authenticate():
twitter_oauth = Twython(keys.APP_KEY, keys.APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter_oauth.obtain_access_token()
twitter = Twython(keys.APP_KEY, access_token = ACCESS_TOKEN)
return twitter
def get_username():
"""
Prompts for the screen name of targetted account
"""
username = input("Enter the Twitter screenname you'd like information on. Do not include '#':")
return username
def get_user_followers(username):
"""
Returns data on all accounts following the targetted user.
WARNING: The number of followers can be huge, and the data isn't very valuable
"""
#username = get_username()
#import pdb; pdb.set_trace()
twitter = oauth_authenticate()
datestamp = str(datetime.datetime.now().strftime("%Y-%m-%d"))
target = twitter.lookup_user(screen_name = username)
for y in target:
target_id = y['id_str']
next_cursor = -1
index = 0
followersdata = {}
while next_cursor:
try:
get_followers = twitter.get_followers_list(screen_name = username,
count = 200,
cursor = next_cursor)
for x in get_followers['users']:
followersdata[index] = {}
followersdata[index]['screen_name'] = x['screen_name']
followersdata[index]['id_str'] = x['id_str']
followersdata[index]['name'] = x['name']
followersdata[index]['description'] = x['description']
followersdata[index]['date_checked'] = datestamp
followersdata[index]['targeted_account_id'] = target_id
index = index + 1
next_cursor = get_followers["next_cursor"]
except TwythonError as e:
print(e)
remainder = (float(twitter.get_lastfunction_header(header = 'x-rate-limit-reset')) \
- time.time())+1
print("Rate limit exceeded. Waiting for:", remainder/60, "minutes")
print("Current Time is:", time.strftime("%I:%M:%S"))
del twitter
time.sleep(remainder)
twitter = oauth_authenticate()
continue
followersDF = pd.DataFrame.from_dict(followersdata, orient = "index")
followersDF.to_excel("%s-%s-follower list.xlsx" % (username, datestamp),
index = False, encoding = 'utf-8')

AWS Lambda: Unable to import module 'lambda_function': No module named boto.ec2.autoscale

Following is the Lambda function, I wrote that gets the list of Autoscaling group and prints them.
import json
import boto3
import boto.ec2.autoscale
role = "arn:aws:iam::XXXXXXXXXX:role/lambda-autoshutdown-role"
regions = ["eu-central-1"]
autoscaling = boto3.client('autoscaling')
class App(object):
def __init__(self,RoleArn):
self.RoleArn = RoleArn
if self.RoleArn != "local":
sts_client = boto3.client('sts')
self.sts = sts_client.assume_role(
RoleArn=self.RoleArn,
RoleSessionName="lambda_poweroff")["Credentials"]
def get_resource(self,region="eu-central-1"):
if self.RoleArn == "local":
return boto3.resource(region_name=region)
else:
return boto.ec2.autoscale.connect_to_region(
region_name=region,
aws_access_key_id=self.sts['AccessKeyId'],
aws_secret_access_key=self.sts['SecretAccessKey'],)
def lambda_handler(event, context):
a = App(role)
for region in regions:
asgs = a.get_resource(region)
# locate all running instances
#autoscaling_groups_to_suspend = []
#for i in asgs:
# print asgs[i]
print '[%s]' % ', '.join(map(str, asgs))
This function uses: boto.ec2.autoscale.connect_to_region to connect and returns the object.
But when I try to deploy it on AWS, I get the following error:
Unable to import module 'lambda_function': No module named boto.ec2.autoscale
It seems like the class boto.ec2.autoscale is not being loaded by AWS.
Any idea what might be wrong here?
For someone looking for an answer, the following piece of Lambda code gets the lists of all ASG's and then suspends them (except the ones that match the regrex)
import json
import boto3
regions = ["eu-central-1"]
autoscaling = boto3.client('autoscaling')
def lambda_handler(event, context):
response = autoscaling.describe_auto_scaling_groups(MaxRecords=100)
#print response
#print(response['AutoScalingGroups'][0]['AutoScalingGroupName'])
autoscaling_group_to_suspend = []
for doc in response['AutoScalingGroups']:
response_parsed = doc['AutoScalingGroupName']
autoscaling_group_to_suspend.append(response_parsed)
#print autoscaling_group_to_suspend
import re
regex = re.compile(r'es-data-asg|consul|influxdb|vault|es-master')
filtered = filter(lambda i: not regex.search(i), autoscaling_group_to_suspend)
filtered = [i for i in autoscaling_group_to_suspend if not regex.search(i)]
print filtered
if len(filtered) > 0:
for x in filtered:
autoscaling.suspend_processes(AutoScalingGroupName=x)
I am trying to do the same thing with s3. I need boto.s3.connect_to_region() but I get the same error. Probably importing the boto module in the lambda dependency might solve the issue. Else, we might have to use boto3.client and parse the json response to get the appropriate values.

Categories