Mock AWSwrangler for unittesting

Mock AWSwrangler for unittesting - python

As there is no support for AWSwrangler by moto i am stuck here and don't know how to mock.
I am trying to unittest my lambda code which run athena query using AWSwrangler.
import awswrangler as wr
import boto3
def athena_query(dbtable, contact_id, athena_output, session):
query = """
SELECT
*
FROM
:dbtable;
WHERE
contactid=:contactid;
"""
output = wr.athena.read_sql_query(
query,
params = {
"contactid": f"'{contact_id}'",
"dbtable": f"{dbtable}"
},
s3_output = athena_output,
boto3_session = session
)
results = output.head().loc[0]
return results
response = athena_query("table_name", "123", "s3://bucket", boto3.session.Session())
I referenced AWSwrangler github issue and while trying some of the test provided in link it's hitting AWS service instead of running locally.

Here is an example implementation for this function using moto and pytest.
First I would correct your function according to awswrangler required parameters in its current version (2.16.1).
import awswrangler as wr
import boto3
def athena_query(database, dbtable, contact_id, athena_output, session):
query = """
SELECT
*
FROM
:dbtable;
WHERE
contactid=:contactid;
"""
output = wr.athena.read_sql_query(
query,
database,
params = {
"contactid": f"'{contact_id}'",
"dbtable": f"{dbtable}"
},
s3_output = athena_output,
boto3_session = session
)
results = output.head().loc[0]
return results
The in a test/conftest.py fil I would declare the necessary mocked objects:
import pytest
import moto
TEST_BUCKET_NAME = "my_bucket"
REGION = "us-east-1"
DATABASE_NAME = "test_db"
TABLE_NAME = "test_table"
TABLE_DDL = f"""CREATE EXTERNAL TABLE IF NOT EXISTS
{DATABASE_NAME}.{TABLE_NAME} (
a string,
b string,
contactid string
) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
'separatorChar' = ',',
'quoteChar' = '\"',
'escapeChar' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://{TEST_BUCKET_NAME}/input/';"""
#pytest.fixture
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
#pytest.fixture
def s3_client(aws_credentials):
with moto.mock_s3():
conn = boto3.client("s3", region_name=REGION)
yield conn
#pytest.fixture
def athena_client(aws_credentials):
with moto.athena.mock_athena():
conn = boto3.client("athena", region_name=REGION)
yield conn
#pytest.fixture
def s3_bucket(s3_client):
s3_client.create_bucket(
Bucket=TEST_BUCKET_NAME,
CreateBucketConfiguration={
'LocationConstraint': 'eu-west-1'
}
)
yield boto3.resource('s3').Bucket(TEST_BUCKET_NAME)
#pytest.fixture
def athena_table(athena_client, s3_bucket):
# create database
_ = athena_client.start_query_execution(
QueryString=f"create database {DATABASE_NAME}",
ResultConfiguration={"OutputLocation": "s3://{TEST_BUCKET_NAME}/queries/"}
)
# create table
_ = athena_client.start_query_execution(
QueryString=TABLE_DDL,
ResultConfiguration={"OutputLocation": "s3://{TEST_BUCKET_NAME}/queries/"}
)
And, then I would define a test of the function in a separate test/athena_test.py file. This is using mocker to mock awswrangler response to the query but you could use advanced testing using the mock objects created in the conftest.py file :
from conftest import TEST_BUCKET_NAME, DATABASE_NAME, TABLE_NAME
# import your function to test here
def test_athena_query(s3_bucket, athena_table, mocker):
def mock_response(*args, **kwargs):
return pd.DataFrame.from_dict({"a": [1, 2], "b": [3, 4], "contactid": [123, 123]})
# mocking
mock_wr_call = mocker.patch('wr.athena.read_sql_query')
mock_wr_call.side_effect = mock_response
response = athena_query(DATABASE_NAME, TABLE_NAME, "123", f"s3://{TEST_BUCKET_NAME}/queries/", boto3.session.Session())
assert response.shape[0] == 2
Resources:
https://aws-data-wrangler.readthedocs.io/en/stable/stubs/awswrangler.athena.read_sql_query.html

Related

Lambda function timeout error when setting up SNS

I am getting this error when trying to send and SNS email via lambda function:
"errorMessage": "Connect timeout on endpoint URL: \"https://sns.us-west-1.amazonaws.com/\"",
"errorType": "ConnectTimeoutError"
I have all the policies set up with SNS full access to the respective role tied to function. Here is the full function:
import json
import psycopg2
import boto3
import time
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import sys
import logging
import os
import csv
import smtplib
from base64 import b64decode
#bucket = 's3://data-lake-020192/'
credential = {
'dbname' : 'main',
'host_url' : 'test.us-west-1.redshift.amazonaws.com',
'port' : '5439',
'user' : '####',
'password' : '########'
}
redshift_role = {
'dev': 'arn:aws:lambda:us-west-1:##########:function:test_function'
}
def lambda_handler(event, context):
## S3 CONNECTIVITY ##
s3 = boto3.resource('s3')
#client = boto3.client('s3')
# TODO implement
conn_string = "dbname='{}' port='{}' user='{}' password='{}' host='{}'"\
.format(credential['dbname'], credential['port'], credential['user'], credential['password'], credential['host_url'])
sql_query = """with
tbl as (
select
case
when (sa.parentid like '001i0000023STBY%' or sa.ultimate_parent_account__c like '001i0000023STBY%') --Parent OR Ultimate Parent is <Department of Defense>
then sa.id
else
coalesce(sa.ultimate_parent_account__c, sa.parentid, sa.id) end as cust_id,
(select name from salesforce.account where id=cust_id) as cust_name,
sa.name as acct_name,
sa.id as acct_id,
sa.parentid,
(select name from salesforce.account where id=sa.parentid) as par_name,
(select name from salesforce.account where id=sa.ultimate_parent_account__c) as ult_par_name,
so.id as opp_id,
so.name as opp_name,
so.stagename as stg_name,
so.type as opp_type,
so.Manager_Commit__c as mgr_commit,
so.renewal_risk__c as opp_risk,
so.isclosed as cls
salesforce.opportunity so
join
salesforce.account sa on
so.accountid = sa.id
join salesforce.user su on
so.ownerid = su.id
join salesforce.opportunitylineitem sol on
so.id = sol.opportunityid
join salesforce.product2 sp on
sol.product2id = sp.id
join salesforce.customasset__c sca on
so.id = sca.opportunity__c
where
so.isdeleted = false
and sa.isdeleted = false
and sol.isdeleted = false
)
select * from
(select
tbl.acct_name as acct,
'[' || 'Link' || '](' || concat('https://vectranetworks.lightning.force.com/', tbl.opp_id) || ')' as opp_link,
tbl.ca_name,
tbl.ca_pr_name,
tbl.ca_mode,
date(tbl.ca_last_seen) as ca_last_seen,
tbl.ca_sw_version,
tbl.ca_tot_hosts,
tbl.ca_active_hosts,
tbl.ca_x95_hosts_tot,
tbl.ca_traffic,
tbl.ca_uiconfig
from
tbl
where
tbl.stg_name like 'Closed Won%'
and tbl.arr is not null
group by
tbl.acct_name,
tbl.opp_id,
tbl.ca_name,
tbl.ca_pr_name,
tbl.ca_mode,
tbl.ca_last_seen,
tbl.ca_sw_version,
tbl.ca_tot_hosts,
tbl.ca_active_hosts,
tbl.ca_x95_hosts_tot,
tbl.ca_traffic,
tbl.ca_uiconfig) df
WHERE ca_last_seen >= DATEADD(MONTH, -3, GETDATE())
limit 5"""
con = psycopg2.connect(conn_string)
client2 = boto3.client('sns')
with con.cursor() as cur:
# Enter the query that you want to execute
cur.execute(sql_query)
for row in cur:
df = pd.DataFrame.from_records(cur.fetchall(), columns = [desc[0] for desc in cur.description])
df['Time_Stamp'] = pd.to_datetime('now', utc=True)
df['ca_active_hosts'] = df['ca_active_hosts'].astype('Int64', errors='ignore')
df['ca_active_hosts'].fillna(0, inplace=True)
#print(df.iloc[0])
#if (df.iloc[0]['ca_active_hosts'].notna()):
if (df['ca_active_hosts'] >= 0).all():
print('the file is present, going to send notifaction')
response = client2.publish(
TopicArn = 'arn:aws:sns:us-west-1:##########:email-data-lake',
Message = 'Warning User active_hosts is ' +str(df['Time_Stamp']),
Subject = 'User Warning')
else:
print('the file is not present')
#cur.close()
Is there anything else in code/connection I need to change? Feel I have exhausted all that I can find online being new to SNS

I imagine that your lambda function does not have any internet connectivity.
Thus, a connection timeout issue indicates that the network interface associated with your lambda function is unable to talk to the service.
To fix this, create a VPC interface endpoint for sns.us-west-1.amazonaws.com in the same subnet as that of the lambda's network interface.

Possible to use boto3/SDK service resource cross account? [duplicate]

I'm trying to use the AssumeRole in such a way that i'm traversing multiple accounts and retrieving assets for those accounts. I've made it to this point:
import boto3
stsclient = boto3.client('sts')
assumedRoleObject = sts_client.assume_role(
RoleArn="arn:aws:iam::account-of-role-to-assume:role/name-of-role",
RoleSessionName="AssumeRoleSession1")
Great, i have the assumedRoleObject. But now i want to use that to list things like ELBs or something that isn't a built-in low level resource.
How does one go about doing that? If i may ask - please code out a full example, so that everyone can benefit.

Here's a code snippet from the official AWS documentation where an s3 resource is created for listing all s3 buckets. boto3 resources or clients for other services can be built in a similar fashion.
# create an STS client object that represents a live connection to the
# STS service
sts_client = boto3.client('sts')
# Call the assume_role method of the STSConnection object and pass the role
# ARN and a role session name.
assumed_role_object=sts_client.assume_role(
RoleArn="arn:aws:iam::account-of-role-to-assume:role/name-of-role",
RoleSessionName="AssumeRoleSession1"
)
# From the response that contains the assumed role, get the temporary
# credentials that can be used to make subsequent API calls
credentials=assumed_role_object['Credentials']
# Use the temporary credentials that AssumeRole returns to make a
# connection to Amazon S3
s3_resource=boto3.resource(
's3',
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken'],
)
# Use the Amazon S3 resource object that is now configured with the
# credentials to access your S3 buckets.
for bucket in s3_resource.buckets.all():
print(bucket.name)

To get a session with an assumed role:
import botocore
import boto3
import datetime
from dateutil.tz import tzlocal
assume_role_cache: dict = {}
def assumed_role_session(role_arn: str, base_session: botocore.session.Session = None):
base_session = base_session or boto3.session.Session()._session
fetcher = botocore.credentials.AssumeRoleCredentialFetcher(
client_creator = base_session.create_client,
source_credentials = base_session.get_credentials(),
role_arn = role_arn,
extra_args = {
# 'RoleSessionName': None # set this if you want something non-default
}
)
creds = botocore.credentials.DeferredRefreshableCredentials(
method = 'assume-role',
refresh_using = fetcher.fetch_credentials,
time_fetcher = lambda: datetime.datetime.now(tzlocal())
)
botocore_session = botocore.session.Session()
botocore_session._credentials = creds
return boto3.Session(botocore_session = botocore_session)
# usage:
session = assumed_role_session('arn:aws:iam::ACCOUNTID:role/ROLE_NAME')
ec2 = session.client('ec2') # ... etc.
The resulting session's credentials will be automatically refreshed when required which is quite nice.
Note: my previous answer was outright wrong but I can't delete it, so I've replaced it with a better and working answer.

You can assume role using STS token, like:
class Boto3STSService(object):
def __init__(self, arn):
sess = Session(aws_access_key_id=ARN_ACCESS_KEY,
aws_secret_access_key=ARN_SECRET_KEY)
sts_connection = sess.client('sts')
assume_role_object = sts_connection.assume_role(
RoleArn=arn, RoleSessionName=ARN_ROLE_SESSION_NAME,
DurationSeconds=3600)
self.credentials = assume_role_object['Credentials']
This will give you temporary access key and secret keys, with session token. With these temporary credentials, you can access any service. For Eg, if you want to access ELB, you can use the below code:
self.tmp_credentials = Boto3STSService(arn).credentials
def get_boto3_session(self):
tmp_access_key = self.tmp_credentials['AccessKeyId']
tmp_secret_key = self.tmp_credentials['SecretAccessKey']
security_token = self.tmp_credentials['SessionToken']
boto3_session = Session(
aws_access_key_id=tmp_access_key,
aws_secret_access_key=tmp_secret_key, aws_session_token=security_token
)
return boto3_session
def get_elb_boto3_connection(self, region):
sess = self.get_boto3_session()
elb_conn = sess.client(service_name='elb', region_name=region)
return elb_conn

with reference to the solution by #jarrad which is not working as of Feb 2021, and as a solution that does not use STS explicitly please see the following
import boto3
import botocore.session
from botocore.credentials import AssumeRoleCredentialFetcher, DeferredRefreshableCredentials
def get_boto3_session(assume_role_arn=None):
session = boto3.Session(aws_access_key_id="abc", aws_secret_access_key="def")
if not assume_role_arn:
return session
fetcher = AssumeRoleCredentialFetcher(
client_creator=_get_client_creator(session),
source_credentials=session.get_credentials(),
role_arn=assume_role_arn,
)
botocore_session = botocore.session.Session()
botocore_session._credentials = DeferredRefreshableCredentials(
method='assume-role',
refresh_using=fetcher.fetch_credentials
)
return boto3.Session(botocore_session=botocore_session)
def _get_client_creator(session):
def client_creator(service_name, **kwargs):
return session.client(service_name, **kwargs)
return client_creator
the function can be called as follows
ec2_client = get_boto3_session(role_arn='my_role_arn').client('ec2', region_name='us-east-1')

If you want a functional implementation, this is what I settled on:
def filter_none_values(kwargs: dict) -> dict:
"""Returns a new dictionary excluding items where value was None"""
return {k: v for k, v in kwargs.items() if v is not None}
def assume_session(
role_session_name: str,
role_arn: str,
duration_seconds: Optional[int] = None,
region_name: Optional[str] = None,
) -> boto3.Session:
"""
Returns a session with the given name and role.
If not specified, duration will be set by AWS, probably at 1 hour.
If not specified, region will be left unset.
Region can be overridden by each client or resource spawned from this session.
"""
assume_role_kwargs = filter_none_values(
{
"RoleSessionName": role_session_name,
"RoleArn": role_arn,
"DurationSeconds": duration_seconds,
}
)
credentials = boto3.client("sts").assume_role(**assume_role_kwargs)["Credentials"]
create_session_kwargs = filter_none_values(
{
"aws_access_key_id": credentials["AccessKeyId"],
"aws_secret_access_key": credentials["SecretAccessKey"],
"aws_session_token": credentials["SessionToken"],
"region_name": region_name,
}
)
return boto3.Session(**create_session_kwargs)
def main() -> None:
session = assume_session(
"MyCustomSessionName",
"arn:aws:iam::XXXXXXXXXXXX:role/TheRoleIWantToAssume",
region_name="us-east-1",
)
client = session.client(service_name="ec2")
print(client.describe_key_pairs())

import json
import boto3
roleARN = 'arn:aws:iam::account-of-role-to-assume:role/name-of-role'
client = boto3.client('sts')
response = client.assume_role(RoleArn=roleARN,
RoleSessionName='RoleSessionName',
DurationSeconds=900)
dynamodb_client = boto3.client('dynamodb', region_name='us-east-1',
aws_access_key_id=response['Credentials']['AccessKeyId'],
aws_secret_access_key=response['Credentials']['SecretAccessKey'],
aws_session_token = response['Credentials']['SessionToken'])
response = dynamodb_client.get_item(
Key={
'key1': {
'S': '1',
},
'key2': {
'S': '2',
},
},
TableName='TestTable')
print(response)

#!/usr/bin/env python3
import boto3
sts_client = boto3.client('sts')
assumed_role = sts_client.assume_role(RoleArn = "arn:aws:iam::123456789012:role/example_role",
RoleSessionName = "AssumeRoleSession1",
DurationSeconds = 1800)
session = boto3.Session(
aws_access_key_id = assumed_role['Credentials']['AccessKeyId'],
aws_secret_access_key = assumed_role['Credentials']['SecretAccessKey'],
aws_session_token = assumed_role['Credentials']['SessionToken'],
region_name = 'us-west-1'
)
# now we make use of the role to retrieve a parameter from SSM
client = session.client('ssm')
response = client.get_parameter(
Name = '/this/is/a/path/parameter',
WithDecryption = True
)
print(response)

Assuming that 1) the ~/.aws/config or ~/.aws/credentials file is populated with each of the roles that you wish to assume and that 2) the default role has AssumeRole defined in its IAM policy for each of those roles, then you can simply (in pseudo-code) do the following and not have to fuss with STS:
import boto3
# get all of the roles from the AWS config/credentials file using a config file parser
profiles = get_profiles()
for profile in profiles:
# this is only used to fetch the available regions
initial_session = boto3.Session(profile_name=profile)
# get the regions
regions = boto3.Session.get_available_regions('ec2')
# cycle through the regions, setting up session, resource and client objects
for region in regions:
boto3_session = boto3.Session(profile_name=profile, region_name=region)
boto3_resource = boto3_session.resource(service_name='s3', region_name=region)
boto3_client = boto3_session.client(service_name='s3', region_name=region)
[ do something interesting with your session/resource/client here ]
Credential Setup (boto3 - Shared Credentials File)
Assume Role Setup (AWS)

After a few days of searching, this is the simplest solution I have found. explained here but does not have a usage example.
import boto3
for profile in boto3.Session().available_profiles:
boto3.DEFAULT_SESSION = boto3.session.Session(profile_name=profile)
s3 = boto3.resource('s3')
for bucket in s3.buckets.all():
print(bucket)
This will switch the default role you will be using. To not make the profile the default, just do not assign it to boto3.DEFAULT_SESSION. but instead, do the following.
testing_profile = boto3.session.Session(profile_name='mainTesting')
s3 = testing_profile.resource('s3')
for bucket in s3.buckets.all():
print(bucket)
Important to note that the .aws credentials need to be set in a specific way.
[default]
aws_access_key_id = default_access_id
aws_secret_access_key = default_access_key
[main]
aws_access_key_id = main_profile_access_id
aws_secret_access_key = main_profile_access_key
[mainTesting]
source_profile = main
role_arn = Testing role arn
mfa_serial = mfa_arn_for_main_role
[mainProduction]
source_profile = main
role_arn = Production role arn
mfa_serial = mfa_arn_for_main_role
I don't know why but the mfa_serial key has to be on the roles for this to work instead of the source account which would make more sense.

Here's the code snippet I used
sts_client = boto3.client('sts')
assumed_role_object = sts_client.assume_role(
RoleArn=<arn of the role to assume>,
RoleSessionName="<role session name>"
)
print(assumed_role_object)
credentials = assumed_role_object['Credentials']
session = Session(
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken']
)
self.s3 = session.client('s3')

How to instantiate a class correctly using Depency Injector in a Flask app?

In my Flask app, I'm using Dependency Injection and here's what my app looks like. I have a service which uses S3 as a datastore and I'm trying to instantiate my app with the service injected (which is injected with the S3 client). However, it doesn't look like the S3 client is correctly instantiated or I'm doing something wildly different.
containers.py
class Container(containers.DeclarativeContainer):
wiring_config = containers.WiringConfiguration(modules=[".routes", ".scheduler"])
config = providers.Configuration()
s3_config = dict()
s3_config["path"], s3_config["filters"] = config.get("s3_bucket"), [("member_status_nm", "=", "ACTIVE")]
s3_repository = providers.Singleton(S3Repository, s3_config)
my_service = providers.Factory(
MyService, config, S3Repository
)
Here's my S3Repository:
import logging
import sys
import time
import some_library as lib
class S3Repository:
def __init__(self, s3_config):
self.path, self.columns, self.filters = \
s3_config.get("path", ""), s3_config.get("columns", []), s3_config.get("filters", [])
def fetch(self):
# execute fetch
result = lib.some_fetch_method(self.path, self.columns, self.filters)
return result
and MyService:
import #all relevant imports here
class MyService:
def __init__(self, config: dict, s3_repository: S3Repository) -> None:
logging.info("HealthSignalService(): initializing")
self.config = config["app"]["health_signal_service"]
# prepare s3_repository for the service
self.s3_repository = s3_repository
self.s3_repository.columns, self.s3_repository.filters, self.s3_repository.path = \
["x","y"], ["x1","y1"], "file_path"
def fetch_data(self) -> None:
try:
summary_result = self.s3_repository.fetch()
except (FileNotFoundError, IOError) as e:
print("failure")
return summary_result
def get_data(memberId):
sth = self.fetch_data()
return sth.get(memberId)
and finally tying it together in my routes.py:
#inject
#auth.login_required
def get_signals(
my_service: MyService = Provide[
Container.my_service
],
):
content = request.json
member_id = content["memberId"]
result = my_service.get_signals(member_id)
return jsonify(result)
When I hit my API endpoint I see this error:
summary_result = self.s3_repository.fetch()
TypeError: fetch() missing 1 required positional argument: 'self'
How do I correctly initialize my S3 client while using dependency injection?

Add limit in Lambda DynamoDB function

I need to limit the number of results returned for the query
My lambda function is
import json
import boto3
from boto3.dynamodb.conditions import Key, Attr
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('users')
def lambda_handler(event, context):
response = table.scan(
FilterExpression=Attr('name').eq('test')
)
items = response['Items']
return items
Please help me how can i add a limit to the number of results it will return

You can try with pagination.
import boto3
def lambda_handler(event, context):
client = boto3.client('dynamodb')
paginator = client.get_paginator('scan')
operation_parameters = {
'TableName': 'foo',
'FilterExpression': 'foo > :x',
'ExpressionAttributeValues': {}
}
page_iterator = paginator.paginate(**operation_parameters)
for page in page_iterator:
# do something
boto3 source source2
If you want just limiting techinique take a look at this solution but it uses query method not scan.
def full_query(table, **kwargs):
response = table.query(**kwargs)
items = response['Items']
while 'LastEvaluatedKey' in response:
response = table.query(ExclusiveStartKey=response['LastEvaluatedKey'], **kwards)
items.extend(response['Items'])
return items
full_query(Limit=37, KeyConditions={...})
Source : https://stackoverflow.com/a/56468281/9592801

Python - BigQuery Temporary Table

Is it possible to import data already in Cloud Storage to a temporary table in bigquery using Python? Can I create a BigQuery temporary table in Python and insert data into it?

You can only create temporary tables as part of a bigquery script or stored procedure.
What you can do is create tables with a random suffix name and a short expiry. One hour in my example. The example function create the temp table and only need a dataset as a parameter.
from google.cloud import bigquery
import datetime, pytz, random
PROJECT = "myproject"
def get_temp_table(dataset: str, table_name: str = None, project=None) -> bigquery.Table:
prefix = "temp"
suffix = random.randint(10000, 99999)
if not table_name:
table_name = "noname"
temp_table_name = f"{dataset}.{prefix}_{table_name}_{suffix}"
if project:
temp_table_name = f"{project}.{temp_table_name}"
tmp_table_def = bigquery.Table(temp_table_name)
tmp_table_def.expires = datetime.datetime.now(pytz.utc) + datetime.timedelta(
hours=1
)
return tmp_table_def
client = bigquery.Client(project=PROJECT)
tmp_table_def = get_temp_table("mydataset", "new_users", project=PROJECT)
tmp_table_def.schema = [
bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
tmp_table = client.create_table(tmp_table_def) # type: bigquery.Table
data = [
{"id": "c-1234", "full_name": "John Smith", "age": 39},
{"id": "c-1234", "full_name": "Patricia Smith", "age": 41},
]
errors = client.insert_rows(tmp_table, data)
print(f"Loaded {len(data)} rows into {tmp_table.dataset_id}:{tmp_table.table_id} with {len(errors)} errors")

(this draft don't consider a temporary table, but i think can help.)
I used this with google cloud functions and Python 3.7 and works fine.
from google.cloud import storage,bigquery
import json
import os
import csv
import io
import pandas as pd
def upload_dataframe_gbq(df,table_name):
bq_client = bigquery.Client()
dataset_id = 'your_dataset_id'
dataset_ref = bq_client.dataset(dataset_id)
table_ref = dataset_ref.table(table_name)
job = bq_client.load_table_from_dataframe(df, table_ref)
job.result() # Waits for table load to complete.
assert job.state == "DONE"
table = bq_client.get_table(table_ref)
print(table.num_rows)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="your_credentials.json"
client = storage.Client()
bucket = client.get_bucket('your_bucket_name')
blob = bucket.blob('sample.csv')
content = blob.download_as_string()
csv_content = BytesIO(content)
df = pd.read_csv(csv_content, sep=",", header=0 )
table_name = "your_big_query_table_name"
upload_dataframe_gbq(df,table_name)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Mock AWSwrangler for unittesting - python

Related

Lambda function timeout error when setting up SNS

Possible to use boto3/SDK service resource cross account? [duplicate]

How to instantiate a class correctly using Depency Injector in a Flask app?

Add limit in Lambda DynamoDB function

Python - BigQuery Temporary Table

Categories

Resources