Lambda function timeout error when setting up SNS

Lambda function timeout error when setting up SNS - python

I am getting this error when trying to send and SNS email via lambda function:
"errorMessage": "Connect timeout on endpoint URL: \"https://sns.us-west-1.amazonaws.com/\"",
"errorType": "ConnectTimeoutError"
I have all the policies set up with SNS full access to the respective role tied to function. Here is the full function:
import json
import psycopg2
import boto3
import time
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import sys
import logging
import os
import csv
import smtplib
from base64 import b64decode
#bucket = 's3://data-lake-020192/'
credential = {
'dbname' : 'main',
'host_url' : 'test.us-west-1.redshift.amazonaws.com',
'port' : '5439',
'user' : '####',
'password' : '########'
}
redshift_role = {
'dev': 'arn:aws:lambda:us-west-1:##########:function:test_function'
}
def lambda_handler(event, context):
## S3 CONNECTIVITY ##
s3 = boto3.resource('s3')
#client = boto3.client('s3')
# TODO implement
conn_string = "dbname='{}' port='{}' user='{}' password='{}' host='{}'"\
.format(credential['dbname'], credential['port'], credential['user'], credential['password'], credential['host_url'])
sql_query = """with
tbl as (
select
case
when (sa.parentid like '001i0000023STBY%' or sa.ultimate_parent_account__c like '001i0000023STBY%') --Parent OR Ultimate Parent is <Department of Defense>
then sa.id
else
coalesce(sa.ultimate_parent_account__c, sa.parentid, sa.id) end as cust_id,
(select name from salesforce.account where id=cust_id) as cust_name,
sa.name as acct_name,
sa.id as acct_id,
sa.parentid,
(select name from salesforce.account where id=sa.parentid) as par_name,
(select name from salesforce.account where id=sa.ultimate_parent_account__c) as ult_par_name,
so.id as opp_id,
so.name as opp_name,
so.stagename as stg_name,
so.type as opp_type,
so.Manager_Commit__c as mgr_commit,
so.renewal_risk__c as opp_risk,
so.isclosed as cls
salesforce.opportunity so
join
salesforce.account sa on
so.accountid = sa.id
join salesforce.user su on
so.ownerid = su.id
join salesforce.opportunitylineitem sol on
so.id = sol.opportunityid
join salesforce.product2 sp on
sol.product2id = sp.id
join salesforce.customasset__c sca on
so.id = sca.opportunity__c
where
so.isdeleted = false
and sa.isdeleted = false
and sol.isdeleted = false
)
select * from
(select
tbl.acct_name as acct,
'[' || 'Link' || '](' || concat('https://vectranetworks.lightning.force.com/', tbl.opp_id) || ')' as opp_link,
tbl.ca_name,
tbl.ca_pr_name,
tbl.ca_mode,
date(tbl.ca_last_seen) as ca_last_seen,
tbl.ca_sw_version,
tbl.ca_tot_hosts,
tbl.ca_active_hosts,
tbl.ca_x95_hosts_tot,
tbl.ca_traffic,
tbl.ca_uiconfig
from
tbl
where
tbl.stg_name like 'Closed Won%'
and tbl.arr is not null
group by
tbl.acct_name,
tbl.opp_id,
tbl.ca_name,
tbl.ca_pr_name,
tbl.ca_mode,
tbl.ca_last_seen,
tbl.ca_sw_version,
tbl.ca_tot_hosts,
tbl.ca_active_hosts,
tbl.ca_x95_hosts_tot,
tbl.ca_traffic,
tbl.ca_uiconfig) df
WHERE ca_last_seen >= DATEADD(MONTH, -3, GETDATE())
limit 5"""
con = psycopg2.connect(conn_string)
client2 = boto3.client('sns')
with con.cursor() as cur:
# Enter the query that you want to execute
cur.execute(sql_query)
for row in cur:
df = pd.DataFrame.from_records(cur.fetchall(), columns = [desc[0] for desc in cur.description])
df['Time_Stamp'] = pd.to_datetime('now', utc=True)
df['ca_active_hosts'] = df['ca_active_hosts'].astype('Int64', errors='ignore')
df['ca_active_hosts'].fillna(0, inplace=True)
#print(df.iloc[0])
#if (df.iloc[0]['ca_active_hosts'].notna()):
if (df['ca_active_hosts'] >= 0).all():
print('the file is present, going to send notifaction')
response = client2.publish(
TopicArn = 'arn:aws:sns:us-west-1:##########:email-data-lake',
Message = 'Warning User active_hosts is ' +str(df['Time_Stamp']),
Subject = 'User Warning')
else:
print('the file is not present')
#cur.close()
Is there anything else in code/connection I need to change? Feel I have exhausted all that I can find online being new to SNS

I imagine that your lambda function does not have any internet connectivity.
Thus, a connection timeout issue indicates that the network interface associated with your lambda function is unable to talk to the service.
To fix this, create a VPC interface endpoint for sns.us-west-1.amazonaws.com in the same subnet as that of the lambda's network interface.

Related

Mock AWSwrangler for unittesting

As there is no support for AWSwrangler by moto i am stuck here and don't know how to mock.
I am trying to unittest my lambda code which run athena query using AWSwrangler.
import awswrangler as wr
import boto3
def athena_query(dbtable, contact_id, athena_output, session):
query = """
SELECT
*
FROM
:dbtable;
WHERE
contactid=:contactid;
"""
output = wr.athena.read_sql_query(
query,
params = {
"contactid": f"'{contact_id}'",
"dbtable": f"{dbtable}"
},
s3_output = athena_output,
boto3_session = session
)
results = output.head().loc[0]
return results
response = athena_query("table_name", "123", "s3://bucket", boto3.session.Session())
I referenced AWSwrangler github issue and while trying some of the test provided in link it's hitting AWS service instead of running locally.

Here is an example implementation for this function using moto and pytest.
First I would correct your function according to awswrangler required parameters in its current version (2.16.1).
import awswrangler as wr
import boto3
def athena_query(database, dbtable, contact_id, athena_output, session):
query = """
SELECT
*
FROM
:dbtable;
WHERE
contactid=:contactid;
"""
output = wr.athena.read_sql_query(
query,
database,
params = {
"contactid": f"'{contact_id}'",
"dbtable": f"{dbtable}"
},
s3_output = athena_output,
boto3_session = session
)
results = output.head().loc[0]
return results
The in a test/conftest.py fil I would declare the necessary mocked objects:
import pytest
import moto
TEST_BUCKET_NAME = "my_bucket"
REGION = "us-east-1"
DATABASE_NAME = "test_db"
TABLE_NAME = "test_table"
TABLE_DDL = f"""CREATE EXTERNAL TABLE IF NOT EXISTS
{DATABASE_NAME}.{TABLE_NAME} (
a string,
b string,
contactid string
) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
'separatorChar' = ',',
'quoteChar' = '\"',
'escapeChar' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://{TEST_BUCKET_NAME}/input/';"""
#pytest.fixture
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
#pytest.fixture
def s3_client(aws_credentials):
with moto.mock_s3():
conn = boto3.client("s3", region_name=REGION)
yield conn
#pytest.fixture
def athena_client(aws_credentials):
with moto.athena.mock_athena():
conn = boto3.client("athena", region_name=REGION)
yield conn
#pytest.fixture
def s3_bucket(s3_client):
s3_client.create_bucket(
Bucket=TEST_BUCKET_NAME,
CreateBucketConfiguration={
'LocationConstraint': 'eu-west-1'
}
)
yield boto3.resource('s3').Bucket(TEST_BUCKET_NAME)
#pytest.fixture
def athena_table(athena_client, s3_bucket):
# create database
_ = athena_client.start_query_execution(
QueryString=f"create database {DATABASE_NAME}",
ResultConfiguration={"OutputLocation": "s3://{TEST_BUCKET_NAME}/queries/"}
)
# create table
_ = athena_client.start_query_execution(
QueryString=TABLE_DDL,
ResultConfiguration={"OutputLocation": "s3://{TEST_BUCKET_NAME}/queries/"}
)
And, then I would define a test of the function in a separate test/athena_test.py file. This is using mocker to mock awswrangler response to the query but you could use advanced testing using the mock objects created in the conftest.py file :
from conftest import TEST_BUCKET_NAME, DATABASE_NAME, TABLE_NAME
# import your function to test here
def test_athena_query(s3_bucket, athena_table, mocker):
def mock_response(*args, **kwargs):
return pd.DataFrame.from_dict({"a": [1, 2], "b": [3, 4], "contactid": [123, 123]})
# mocking
mock_wr_call = mocker.patch('wr.athena.read_sql_query')
mock_wr_call.side_effect = mock_response
response = athena_query(DATABASE_NAME, TABLE_NAME, "123", f"s3://{TEST_BUCKET_NAME}/queries/", boto3.session.Session())
assert response.shape[0] == 2
Resources:
https://aws-data-wrangler.readthedocs.io/en/stable/stubs/awswrangler.athena.read_sql_query.html

Dataframe results to bigquery are empty

I am working in a google cloud function with the intention of putting the results in a dataframe and then porting all of that into BigQuery. My function was able to be deployed without error but when looking into the associated bq table I am seeing no data. Below is a view of my code:
# general setup, common imports
import json, requests, time, urllib.parse
import pandas as pd
from pandas import DataFrame
import datetime
import io
import os
from google.cloud import bigquery
from google.cloud.bigquery.client import Client
def crux_data():
# Read the URLs for auditing
url_list = open('pagespeedlist', 'r')
url_list.read()
results = []
for x in url_list:
url = x[0]
pagespeed_results = urllib.request.urlopen('https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={}&strategy=mobile&key=API_KEY'\
.format(url)).read().decode('UTF-8')
pagespeed_results_json = json.loads(pagespeed_results)
add_date = datetime.date.today()
largest_contentful_paint = pagespeed_results_json['lighthouseResult']['audits']['largest-contentful-paint']['displayValue'].replace(u'\xa0', u'') # Largest Contenful Paint
first_input_delay = str(round(pagespeed_results_json['loadingExperience']['metrics']['FIRST_INPUT_DELAY_MS']['distributions'][2]['proportion'] * 1000, 1)) + 'ms' # First Input Delay
cumulative_layout_shift = pagespeed_results_json['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue'] # CLS
crux_lcp = pagespeed_results_json['loadingExperience']['metrics']['LARGEST_CONTENTFUL_PAINT_MS']['category'] # Largest Contenful Paint Score
crux_fid = pagespeed_results_json['loadingExperience']['metrics']['FIRST_INPUT_DELAY_MS']['category'] # First Input Delay Score
crux_cls = pagespeed_results_json['loadingExperience']['metrics']['CUMULATIVE_LAYOUT_SHIFT_SCORE']['category'] # CLS Score
result_url = [url,date,largest_contentful_paint,first_input_delay,cumulative_layout_shift,lcp_score,fid_score,cls_score]
results.append(result_url)
#Convert to dataframe
results_csv = DataFrame (results,columns=['URL','DATE','LCP','FID','CLS','LCP_SCORE','FID_SCORE','CLS_SCORE'])
# Construct a BigQuery client object.
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'credentials.json'
client = Client()
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "db.datatable.dataLoc"
job_config = bigquery.LoadJobConfig()
job = client.load_table_from_dataframe(
results_csv, table_id, job_config=job_config
) # Make an API request.
job.result() # Wait for the job to complete.
table = client.get_table(table_id) # Make an API request.
print(
"Loaded {} rows and {} columns to {}".format(
table.num_rows, len(table.schema), table_id
)
)
I do see the proper schema in the bq table but no actual data. Is there something I am missing with loading a df to bigquery?
Any help is much appreciated!

exchangelib bug: Message object has no attribute item_id

Although I've been happily running this script for best part of a year, i recently upgraded to Catalina OSX and reinstalled Exchangelib. Now I get an error with item_id:
'Message' object has no attribute 'item_id'
Here's my code, I would love to know what I'm doing wrong please 🙏 TIA ps-forgive any convoluted coding...
from exchangelib import DELEGATE, Account, Credentials, Message, \
EWSDateTime, EWSTimeZone, Configuration
from exchangelib.util import PrettyXmlHandler
import logging
logging.basicConfig(level=logging.DEBUG, handlers=[PrettyXmlHandler()])
from datetime import datetime, timedelta
import monthdelta as md
import sqlite3
import pandas as pd
import pm_ews_module as pem
__DBPATH__ = "/Users/patrickstacey/CODE/JUMPY_CODE/dev/data/test_data_tbase11_002"
__CONFIGFILE__ = '/Users/patrickstacey/CODE/JUMPY_CODE/dev/config/jumpyConfig.csv'
__OUTLOOK_EMAIL__ = 'bspks#lunet.lboro.ac.uk'
_PRIMARY_SMTP_ADDRESS_ = 'bspks#lunet.lboro.ac.uk'
__OUTLOOK_PASSWORD__ = '****'
def connect_to_EWS(__OUTLOOK_EMAIL__, __OUTLOOK_PASSWORD__, _PRIMARY_SMTP_ADDRESS_):
creds = Credentials(__OUTLOOK_EMAIL__,__OUTLOOK_PASSWORD__)
config = Configuration(server='outlook.office365.com/EWS/Exchange.asmx', \
credentials=creds)
return Account(
primary_smtp_address=_PRIMARY_SMTP_ADDRESS_,
autodiscover=False,
config = config,
access_type=DELEGATE
)
last_analysis = pem.determine_start_date_required(__OUTLOOK_EMAIL__)
if last_analysis == "no records":
df = pd.read_csv(__CONFIGFILE__)
retrodays = df['detail'].where(df['item'] == "demo_user_days_retro").dropna().values
retrodays = int(retrodays)
last_analysis = None
last_analysis = datetime.today() - timedelta(days=retrodays)
(year,month,day,hour,mins,secs) = pem.unpackDateElements(str(last_analysis))
tz = EWSTimeZone.timezone('Europe/London')
last_analysis = tz.localize(EWSDateTime(year, month, day, hour, mins, secs))
account = connect_to_EWS(__OUTLOOK_EMAIL__, __OUTLOOK_PASSWORD__, __OUTLOOK_EMAIL__)
for item in account.inbox.filter(datetime_received__gt=last_analysis):
if type(item) == Message:
try:
db = sqlite3.connect(__DBPATH__)
cursor = db.cursor()
cursor.execute("INSERT INTO escores_log(email, datetime, subject, body, emailtype, pos_threshold, item_id, status, sender) VALUES(?,?,?,?,?,?,?,?,?)", (__OUTLOOK_EMAIL__, str(item.datetime_received), pem.deEmojify(item.subject), item.text_body, "received", 0.5, item.item_id, 0, item.sender.email_address))
print("Inserted an email from ",item.sender.email_address," about ",item.subject," on ",str(item.datetime_received))
db.commit()
db.close()
except Exception as e:
print ("Exception found: "+str(e))
pass

item_id was renamed to id in version 1.12.0, and finally deprecated in 2.0.0. See notes in the CHANGELOG: https://github.com/ecederstrand/exchangelib/blob/master/CHANGELOG.md#200

looks as though item_id is now called id. I took an educated guess. so the script works again. look fwd to hearing any other views on this. with thanks.

Python Script to return records added in last 1 hour in MongoDB collection

Above Script is giving error at gt as Invalid Syntax; could someone help me in this. I was trying to retrieve records inserted in last 1hour.
import pymongo
import sys
from datetime import datetime
from datetime import timedelta
from pymongo import MongoClient
# establish connectivity to Mongodb via ssl using pymongo module
#args = sys.argv
host = 'mongo-db-prd'
uname = 'superuser'
passwrd = 'Hayyo'
#print (args)
port = "27017"
print(uname)
print(passwrd)
uri = 'mongodb://' + uname + ":" + passwrd + "#" + host + ":" + port + '/?authSource=admin'
client = MongoClient(uri, ssl=True, ssl_ca_certs='./files/rds-combined-ca-bundle.pem')
# This will create hl7feeds docdb
print("connected client")
db = client.feeds # This command will create a DB
print(client.list_database_names()) # This command will print list of DBs
print(client.list_database_names()) # This command will print list of DBs
mycol = db[ "feeds_100"] # This command will create a collection in DB
docins=mycol.insert_one({"name" : "test"}) # This will insert a document in collection
dblist = client.list_database_names()
print(client.list_database_names())
# Lets create collections on docdb for all tenants
tlist1 = ["feeds_104","feeds_105","feeds_106"]
for each_val in tlist1:
print (each_val)
countvalue = db.getCollection('each_val').find({"row_created_date":{"$gt":datetime.utcnow() - timedelta(hours=1)}}).count();
print (countvalue)```

You have used javascript code inside python:
countvalue = db.getCollection('each_val').find({"row_created_date":{"$gt":new date(date.now() - 1*60*60 * 1000)}}).count();
Correct it like this:
from datetime import datetime, timedelta
...
countvalue = db.getCollection('each_val').find({"row_created_date":{"$gt":datetime.utcnow() - timedelta(hours=1)}}).count();
Also, formatted your code, for better understanding:
import sys
from datetime import datetime, timedelta
from pymongo import MongoClient
# establish connectivity to Mongodb via ssl using pymongo module
# args = sys.argv
host = "mongo-db-prd"
uname = "superuser"
passwrd = "Hayyo"
# print (args)
port = "27017"
print(uname)
print(passwrd)
uri = (
"mongodb://" + uname + ":" + passwrd + "#" + host + ":" + port + "/?authSource=admin"
)
client = MongoClient(uri, ssl=True, ssl_ca_certs="./files/rds-combined-ca-bundle.pem")
# This will create hl7feeds docdb
print("connected client")
db = client.feeds # This command will create a DB
print(client.list_database_names()) # This command will print list of DBs
mycol = db["feeds_100"] # This command will create a collection in DB
docins = mycol.insert_one({"name": "test"}) # This will insert a document in collection
dblist = client.list_database_names()
print(client.list_database_names())
# Lets create collections on docdb for all tenants
tlist1 = ["feeds_104", "feeds_105", "feeds_106"]
for each_val in tlist1:
print(each_val)
countvalue = (
db.getCollection("each_val")
.find({"row_created_date": {"$gt": datetime.utcnow() - timedelta(hours=1)}})
.count()
)
print(countvalue)
P.S. please look at f-strings to better format your uri

i want to export google sccfindings in biq query table using cloud functions but getting error 'client' object has no attribute 'list_findings'

Python function to list findings and getting error client' object has no attribute 'list_findings' and requirements.txt include (google-cloud-securitycenter and biq query)
import os
import json
from google.cloud import bigquery
def test_list_all_findings(request):
if request.method != 'POST':
return abort(405)
request_json = request.get_json()
# [START list_all_findings]
from google.cloud import securitycenter
# Create a client.
client = securitycenter.SecurityCenterClient()
client = bigquery.Client()
cuid = request_json['cuid']
organization_id = request_json['organization_id']
# organization_id is the numeric ID of the organization. e.g.:
organization_id = organization_id
org_name = "organizations/{org_id}".format(org_id=organization_id)
# The "sources/-" suffix lists findings across all sources. You
# also use a specific source_name instead.
all_sources = "{org_name}/sources/-".format(org_name=org_name)
finding_result_iterator = client.list_findings(all_sources)
job_config = bigquery.CopyJobConfig()
job_config.write_disposition = "WRITE_TRUNCATE"
destination_table_id = "gce-kubernetes.onboard_gcp.cc_data_billing_"+cuid
blob = destination_table_id.blob("findings.json")
f=open("/tmp/findings.json", "a+")
for i, finding_result in enumerate(finding_result_iterator):
s = "{}) 'name': {}, resource: {}, destination_table_id: {}".format(
i, finding_result.finding.name, finding_result.finding.resource_name, destination_table_id)
print(s)
f.write(str(finding_result))
f.write(",\n")
f.close()
blob.upload_from_filename('/tmp/findings.json')
os.remove("/tmp/findings.json")
# [END list_all_findings]

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Lambda function timeout error when setting up SNS - python

Related

Mock AWSwrangler for unittesting

Dataframe results to bigquery are empty

exchangelib bug: Message object has no attribute item_id

Python Script to return records added in last 1 hour in MongoDB collection

i want to export google sccfindings in biq query table using cloud functions but getting error 'client' object has no attribute 'list_findings'

Categories

Resources