Insert Google Analytics API data to postgresql [python] - python

I want to store bulk data to postgresql.
The data I got are from google analytics [API]. The data is about pageviews and here is my code:
data = '[["20151201","path","title",345], ["20151202","path","title",321], ["20151203","path","title",214]]'
def storeJson( jsonFile, tableName ):
conn = psycopg2.connect( host=hostname, user=username, password=password, dbname=database )
try:
cur = conn.cursor()
# Here is the problem:
cur.executemany( "INSERT INTO " + tableName + " VALUES(%s)", [jsonFile])
conn.commit()
except psycopg2.DatabaseError as e:
if conn:
conn.rollback()
print("Error %s" %e)
exit()
finally:
if conn:
cur.close()
conn.close()
def main()
storeJson(data, "daily_pageviews")
if __name__ == '__main__':
main()
with the code above, i got error message like this:
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 1 column 12 (char 11)
Can someone enlighten me? Thanks guys!

Finally, here is the case: First, my data isn't json format but it is list of list format. Here is the solution I got from my friend using sqlalchemy:
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import MetaData, Table
engine = create_engine('postgresql://db_username:db_password#ip/dbname')
metadata = MetaData()
metadata.bind = engine
def storeJson( jsonFile, tableName ):
table = Table(tableName, metadata, autoload=True)
#import ipdb; ipdb.set_trace()
def to_dicts(rows):
for row in rows:
data = {}
for i, column in enumerate(table.columns):
data[column.name] = row[i]
yield data
params = list(to_dicts(jsonFile))
engine.execute(table.insert(), params)
return
Assuming value of jsonFile list ordered exactly like the table on db.
Note: You could install sqlalchemy using pip
python -m pip install sqlalchemy --user
As for "how to" get data from google analytics, you could visit it's site: https://developers.google.com/analytics/devguides/reporting/core/v3/quickstart/service-py

jsonFile is a string in your case. You need to load it with json.loads():
import json
data = json.loads(jsonFile)
cur.executemany("INSERT INTO " + tableName + " VALUES(%s, %s, %s, %s)", data)
Note that I have 4 placeholders in the query - each for every item in every sublist.

Related

Getting row counts from Redshift during unload process and counting rows loaded in S3

My python code looks like below where I am unloading data from Redshift to Amazon S3 bucket. I am trying to get row count from Redshift and S3 bucket to ensure that all the data is loaded. Additionally, I would also like to get last uploaded date from S3 bucket so that I know when last unload was performed. Kindly suggest the code with explanation.
Thanks in advance for your time and efforts!
import csv
import redshift_connector
import sys
CSV_FILE="Tables.csv"
CSV_DELIMITER=';'
S3_DEST_PATH="s3://..../"
DB_HOST="MY HOST"
DB_PORT=1234
DB_DB="MYDB"
DB_USER="MY_READ"
DB_PASSWORD="MY_PSWD"
IM_ROLE="arn:aws:iam::/redshift-role/unload data","arn:aws::iam::/write in bucket"
def get_tables(path):
tables=[]
with open (path, 'r') as file:
csv_reader = csv.reader (file,delimiter=CSV_DELIMITER)
header = next(csv_reader)
if header != None:
for row in csv_reader:
tables.append(row)
return tables
def unload(conn, tables, s3_path):
cur = conn.cursor()
for table in tables:
print(f">{table[0]}.{table[1]}")
try:
query= f'''unload('select * from {table[0]}.{table[1]}' to '{s3_path}/{table[1]}/'
iam_role '{IAM_ROLE}'
CSV
PARALLEL FALSE
CLEANPATH;'''
print(f"loading in progress")
cur.execute(query)
print(f"Done.")
except Esception as e:
print("Failed to load")
print(str(e))
sys.exit(1)
cur.close()
def main():
try:
conn = redshift_connector.connect(
host=DB_HOST,
port=DB_PORT,
database= DB_DB,
user= DB_USER,
password=DB_PASSWORD
)
tables = get_tables(CSV_FILE)
unload(conn,tables,S3_DEST_PATH)
conn.close()
except Exception as e:
print(e)
sys.exit(1)
Update code based on SO User's comment
tables=['schema1.tablename','schema2.table2']
conn=redshift_connector.connect(
host='my_host',
port= "my_port",
database='my_db'
user="user"
password='password')
cur=conn.cursor()
cur.execute ('select count(*) from {',' .join("'"+y+"'" for y in tables)}')
results=cur.fetchall()
print("The table {} contained".format(tables[0]),*result[0],"rows"+"\n" ) #Printing row counts along with table names
cur.close()
conn.close()
2nd Update:
tables=['schema1.tablename','schema2.table2']
conn=redshift_connector.connect(
host='my_host',
port= "my_port",
database='my_db'
user="user"
password='password')
cur=conn.cursor()
for table in tables:
cur.execute(f'select count(*) from {table};')
results=cur.fetchone()
for row in result:
print("The table {} contained".format(tables[0]),result[0],"rows"+"\n" ) #Printing row counts along with table names
The simple query to get number of rows is
query = "select count(*) from {table_name}"
For Redshift, all you need to do is
cur.execute(query)
row_count = cur.fetchall()
Using boto3, you can use a similar SQL query to fetch S3 row count as well, as elucidated in this answer.
Edit:
Corrected your updated approach a little:
cur=conn.cursor()
for table in tables:
cur.execute(f'select count(*) from {table};')
result=cur.fetchone()
count = result[0] if result else 0
print(f"The table {table} contained {count} rows.\n" )

Add column to postgres table using psycopg2

I'm trying to add a column to an existing postgres table using psycopg2.
add_column takes two arguments: column_name and data_type.
I currently get the following error: syntax error at or near "VALUES" in line 6:
Does anyone know where I'm going wrong? Cheers!
import psycopg2
from config import config
def add_column(column_name, data_type):
# """Add a new column to the vendors table"""
sql = """ALTER TABLE vendors ADD COLUMN column_name data_type VALUES (%s, %s);"""
conn = None
try:
params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute(sql, (column_name, data_type,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
if __name__ == '__main__':
add_column("test_name", "VARCHAR")

Open database files (.db) using python

I have a data base file .db in SQLite3 format and I was attempting to open it to look at the data inside it. Below is my attempt to code using python.
import sqlite3
# Create a SQL connection to our SQLite database
con = sqlite3.connect(dbfile)
cur = con.cursor()
# The result of a "cursor.execute" can be iterated over by row
for row in cur.execute("SELECT * FROM "):
print(row)
# Be sure to close the connection
con.close()
For the line ("SELECT * FROM ") , I understand that you have to put in the header of the table after the word "FROM", however, since I can't even open up the file in the first place, I have no idea what header to put. Hence how can I code such that I can open up the data base file to read its contents?
So, you analyzed it all right. After the FROM you have to put in the tablenames. But you can find them out like this:
SELECT name FROM sqlite_master WHERE type = 'table'
In code this looks like this:
# loading in modules
import sqlite3
# creating file path
dbfile = '/home/niklas/Desktop/Stuff/StockData-IBM.db'
# Create a SQL connection to our SQLite database
con = sqlite3.connect(dbfile)
# creating cursor
cur = con.cursor()
# reading all table names
table_list = [a for a in cur.execute("SELECT name FROM sqlite_master WHERE type = 'table'")]
# here is you table list
print(table_list)
# Be sure to close the connection
con.close()
That worked for me very good. The reading of the data you have done already right just paste in the tablenames.
If you want to see data for visual analysis as pandas dataframe, the below approach could also be used.
import pandas as pd
import sqlite3
import sqlalchemy
try:
conn = sqlite3.connect("file.db")
except Exception as e:
print(e)
#Now in order to read in pandas dataframe we need to know table name
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(f"Table Name : {cursor.fetchall()}")
df = pd.read_sql_query('SELECT * FROM Table_Name', conn)
conn.close()
from flask import Flask
app = Flask(__name__)
from sqlalchemy import create_engine, select, MetaData, Table
from sqlalchemy.sql import and_, or_
engine = create_engine('sqlite://username:password#host/databasename')
class UserModel():
def __init__(self):
try:
self.meta = MetaData()
self.users = Table("users", self.meta, autoload=True, autoload_with=engine)
except Exception as e:
print(e)
def get(self):
stmt = select([self.users.c.name, self.users.c.email, self.users.c.password])
print(stmt)
result = engine.execute(stmt)
temp = [dict(r) for r in result] if result else None
print(temp)
return temp

Python cx_Oracle SQL with bind string variable

I have a problem with creating SQL query for Oracle database using Python.
I want to bind string variable and it does not work, could you tell me what am I doing wrong?
This is my code:
import cx_Oracle
dokList = []
def LoadDatabase():
conn = None
cursor = None
try:
conn = cx_Oracle.connect("login", "password", "localhost")
cursor = conn.cursor()
query = "SELECT * FROM DOCUMENT WHERE DOC = :param"
for doknumber in dokList:
cursor.execute(query, {'doknr':doknumber})
print(cursor.rowcount)
except cx_Oracle.DatabaseError as err:
print(err)
finally:
if cursor:
cursor.close()
if conn:
conn.close()
def CheckData():
with open('changedNamed.txt') as f:
lines = f.readlines()
for line in lines:
dokList.append(line)
CheckData()
LoadDatabase()
The output of cursor.rowcount is 0 but it should be number greater than 0.
You're using a dictionary ({'doknr' : doknumber}) for your parameter, so it's a named parameter - the :param needs to match the key name. Try this:
query = "SELECT * FROM DOCUMENT WHERE DOC = :doknr"
for doknumber in dokList:
cursor.execute(query, {'doknr':doknumber})
print(cursor.rowcount)
For future troubleshooting, to check whether your parameter is getting passed properly, you can also try changing your query to "select :param from dual".

psycopg2 postgres database syntax error near value

I am trying to insert info from a pandas DataFrame into a database table by using a function that I wrote:
def insert(table_name="", name="", genere="", year=1, impd_rating=float(1)):
conn = psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur = conn.cursor()
cur.execute("INSERT INTO %s VALUES %s,%s,%s,%s" % (table_name, name, genere, year, impd_rating))
conn.commit()
conn.close()
When I try to use this function like this:
b=0
for row in DF['id']:
insert(impd_rating=float(DF['idbm_rating'][b]),
year=int(DF['year'][b]),
name=str(DF['name'][b]),
genere=str(DF['genere'][b]),
table_name='test_movies')
b = b+1
I get the following syntax error:
SyntaxError: invalid syntax
PS D:\tito\scripts\database training> python .\postgres_script.py
Traceback (most recent call last):
File ".\postgres_script.py", line 56, in <module>insert (impd_rating=float(DF['idbm_rating'][b]),year=int(DF['year'][b]),name=str(DF['name'][b]),genere=str(DF['genere'][b]),table_name='test_movies')
File ".\postgres_script.py", line 15, in insert
cur.execute("INSERT INTO %s VALUES %s,%s,%s,%s"  % (table_name ,name ,genere , year,impd_rating))
psycopg2.ProgrammingError: syntax error at or near "Avatar"
LINE 1: INSERT INTO test_movies VALUES Avatar,action,2009,7.9
I also tried to change the str replacement method from %s to .format()
but I had the same error.
The error message is explicit, this SQL command is wrong at Avatar: INSERT INTO test_movies VALUES Avatar,action,2009,7.9. Simply because values must be enclosed in parenthesis, and character strings must be quoted, so the correct SQL is:
INSERT INTO test_movies VALUES ('Avatar','action',2009,7.9)
But building a full SQL command by concatenating parameters is bad practice (*), only the table name should be directly inserted into the command because is is not a SQL parameter. The correct way is to use a parameterized query:
cur.execute("INSERT INTO %s VALUES (?,?,?,?)" % (table_name,) ,(name ,genere , year,impd_rating)))
(*) It was the cause of numerous SQL injection flaws because if one of the parameter contains a semicolumn (;) what comes after could be interpreted as a new command
Pandas has a DataFrame method for this, to_sql:
# Only needs to be executed once.
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
df.to_sql('test_movies', con=conn, if_exists='append', index=False)
This should hopefully get you going in the right direction.
In your original query
INSERT INTO %s VALUES %s,%s,%s,%s
there is a sql problem: you need braces around the values, i.e. it should be VALUES (%s, %s, %s, %s). On top of that the table name cannot be merged as a parameter, or it would be escaped as a string, which is not what you want.
You can use the psycopg 2.7 sql module to merge the table name to the query, with placeholders for the values:
from psycopg2 import sql
query = sql.SQL("INSERT INTO {} VALUES (%s, %s, %s, %s)").format(
sql.Identifier('test_movies'))
cur.execute(query, ('Avatar','action',2009,7.9))
This will make secure both merging the table name and the arguments to the query.
Hello mohamed mahrous,
First install psycopg2 package for the access access PostgreSQL database.
Try this below code,
import psycopg2
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur=conn.cursor()
def insert(table_name,name,genere,year,impd_rating):
query = "INSERT INTO "+table_name+"(name,genere,year,impd_rating) VALUES(%s,%s,%s,%s)"
try:
print query
cur.execute(query,(name,genere,year,impd_rating))
except Exception, e:
print "Not execute..."
conn.commit()
b=0
for row in DF['id']:
insert (impd_rating=float(DF['idbm_rating'][b]),year=int(DF['year'][b]),name=str(DF['name'][b]),genere=str(DF['genere'][b]),table_name='test_movies')
b= b+1
conn.close()
Example,
import psycopg2
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur=conn.cursor()
def insert(table_name,name,genere,year,impd_rating):
query = "INSERT INTO "+table_name+"(name,genere,year,impd_rating) VALUES(%s,%s,%s,%s)"
try:
print query
cur.execute(query,(name,genere,year,impd_rating))
except Exception, e:
print "Not execute"
conn.commit()
b=0
for row in DF['id']:
insert (impd_rating="7.0",year="2017",name="Er Ceo Vora Mayur",genere="etc",table_name="test_movies")
b= b+1
conn.close()
I hope my answer is helpful.
If any query so comment please.
i found a solution for my issue by using sqlalchemy and pandas to_sql method
thanks for help everyone
from sqlalchemy import *
import pandas as pd
def connect(user, password, db, host='localhost', port=5433):
'''Returns a connection and a metadata object'''
# We connect with the help of the PostgreSQL URL
# postgresql://federer:grandestslam#localhost:5432/tennis
url = 'postgresql://{}:{}#{}:{}/{}'
url = url.format(user, password, host, port, db)
# The return value of create_engine() is our connection object
con = sqlalchemy.create_engine(url, client_encoding='utf8')
# We then bind the connection to MetaData()
meta = sqlalchemy.MetaData(bind=con, reflect=True)
return con, meta
con, meta = connect('postgres','postgres333','database1')
movies= Table('test',meta,
Column('id',Integer,primary_key=True),
Column('name',String),
Column('genere',String),
Column('year',Integer),
Column('idbm_rating',REAL))
meta.create_all(con)
DF=pd.read_csv('new_movies.txt',sep=' ',engine='python')
DF.columns=('id','name' ,'genere' ,'year' ,'idbm_rating' )
DF.to_sql('movies', con=con, if_exists='append', index=False)

Categories