Write Python dataframe to Oracle - python

I have a dataframe testdata like this:
Here are the variables' types in Python:
detectorid:int64
starttime:str
volume:float64
speed:float64
occupancy:float64
Now I want to creat a datatable in oracle and insert this dataframe into it, here is what I tried:
import pandas as pd
import cx_Oracle
host = "192.168.1.100"
port = "1521"
sid = "orcl"
dsn = cx_Oracle.makedsn(host, port, sid)
conn = cx_Oracle.connect("scott", "tiger", dsn)
cursor = conn.cursor()
#creat datatable:
sql_creat = "create table portland(detectorid number(32), starttime varchar(32), volume number(32), speed number(32), occupancy number(32))"
cursor.execute(sql_creat)
query = "insert into portland (detectorid,starttime,volume,speed,occupancy) VALUES (%d,'%s',%f,%f,%f)"
#insert by rows:
for i in range(len(testdata)):
detectorid= testdata.ix[i,0]
starttime= testdata.ix[i,1]
volume= testdata.ix[i,2]
speed= testdata.ix[i,3]
occupancy= testdata.ix[i,4]
cursor.execute(query % (detectorid,starttime,volume,speed,occupancy))
conn.commit()
cursor.close()
conn.close()
However it gives me DatabaseError: ORA-00984:column not allowed here. I think there are something wrong about the columns' types in my sql statement but I don't know how to solve it. Could somebody give me some instructions? Thank you for your attention!

#!/usr/local/bin/python3
import cx_Oracle
import os
conn = cx_Oracle.connect("user", "xxx", "localhost:1512/ORCLPDB1", encoding="UTF-8")
cursor = conn.cursor()
#creat datatable:
sql_creat = "create table portland(detectorid number(32), starttime varchar(32), volume number(32), speed number(32), occupancy number(32))"
#cursor.execute(sql_creat)
query = "insert into portland (detectorid,starttime,volume,speed,occupancy) VALUES (%d,'%s',%f,%f,%f)"
detectorid = 1345
starttime = '2011-09-15 00:00:00'
volume = 0
speed = 0
occupancy= 0
cursor.execute(query % (detectorid,starttime,volume,speed,occupancy))
conn.commit()
cursor.close()
conn.close()

Related

Insert variable Data into SQL server using python

def LiraRateApiCall():
R = requests.get(url)
timestamp = R.json()['buy'][-1][0]/1000
format_date = '%d/%m/%y'
date = datetime.fromtimestamp(timestamp)
buyRate = R.json()['buy'][-1][1]
print(date.strftime(format_date))
print(buyRate)
#ADDDING TO SQL SERVER
conn = odbc.connect("Driver={ODBC Driver 17 for SQL Server};"
'Server=LAPTOP-36NUUO53\SQLEXPRESS;'
'Database=test;'
'Trusted_connection=yes;')
cursor = conn.cursor()
cursor.execute('''
INSERT INTO Data_table (Time1,Price)
VALUES
('date',140),
('Date2' , 142)
''')
conn.commit()
cursor.execute('SELECT * FROM Data_table')
for i in cursor:
print(i)
How do i pass the variables date and buy rate to the table instead of putting in values liek i did (i put in'date' , 140 for example but i want to pass variables not specific values)
You'll need to check the driver version that you're using, but what you're looking for is the concept of bind variables. I'd suggest you look into the concept of fast_executemany as well - that should help speed things up. I've edited your code to show how bind variables typically work (using the (?, ?) SQL syntax), but there are other formats out there.
def LiraRateApiCall():
R = requests.get(url)
timestamp = R.json()['buy'][-1][0]/1000
format_date = '%d/%m/%y'
date = datetime.fromtimestamp(timestamp)
buyRate = R.json()['buy'][-1][1]
print(date.strftime(format_date))
print(buyRate)
#ADDDING TO SQL SERVER
conn = odbc.connect("Driver={ODBC Driver 17 for SQL Server};"
'Server=LAPTOP-36NUUO53\SQLEXPRESS;'
'Database=test;'
'Trusted_connection=yes;')
cursor = conn.cursor()
#Setup data
data = [('date',140), ('Date2' , 142)]
#Use executemany since we have a list
cursor.executemany('''
INSERT INTO Data_table (Time1,Price)
VALUES (?, ?)
''', data)
conn.commit()
cursor.execute('SELECT * FROM Data_table')
for i in cursor:
print(i)
I dont understand at all your question
If you want to pass the variables:
insert_sql = 'INSERT INTO Data_table (Time1,Price) VALUES (' + date + ',' + str(buyRate) + ')'
cursor.execute(insert_sql)
If you want to do dynamic Insert:
You can only insert knowing the values ​​or by inserting with a select
INSERT INTO table
SELECT * FROM tableAux
WHERE condition;
That or you could iterate through the fields you have in a table, extract them and compare it to your variables to do a dynamic insert.
With this select you can extract the columns.
SELECT *
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'table1'

Integer out of range when inserting large number of rows to postgress

I have tried multiple solutions and way around to solve this issue, probably something is still I am missing.
I want to insert a list of values to my database. Here is what I am doing -
import psycopg2
import pandas as pd
Region = [
"Region1",
"Region2",
]
qa = "endpoint1"
def insert_many(data_list):
"""Add data to the table."""
insert_query = """INSERT INTO pbi_forecast(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
conn = None
try:
conn = psycopg2.connect(
database='db',
user='user',
host='host1',
port=5432,
password=pw
)
cur = conn.cursor()
cur.executemany(insert_query, data_list)
conn.commit()
cur.close()
except(Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
# connect prod2
conn_prod2 = psycopg2.connect(
database='db',
user='user',
host='host2',
port=5432,
password=pw
)
cur_prod2 = conn_prod2.cursor()
for re in region:
sql_prod2_select = f"select * from pbi_forecast where \
run_date >= '2022-04-20 00:00:00'\
and run_date <= '2022-04-22 00:00:00'\
and region = '{re}' ;"
cur_prod2.execute(sql_prod2_select)
forecast = pd.DataFrame(cur_prod2.fetchall())
data_list = [list(row) for row in forecast.itertuples(index=False)]
insert_many(data_list)
I am getting integer out of range error when running it. If I restrict the insert record to somewhere 50 records it works but when running it without any limit it throws this error.
Thanks in advance.

How to insert the records into postgresql python?

I am trying to insert the pandas dataframe into postgresql table. What I am doing is inserting the record of dataframe loop by loop. I am recursively getting errors, Code is shown below:
Code:
import psycopg2
df = pd.read_csv('dataframe.csv')
conn = psycopg2.connect(database = "postgres",
user = "postgres",
password = "12345",
host = "127.0.0.1",
port = "5432")
cur = conn.cursor()
for i in range(0,len(df)):
cur.execute("INSERT INTO stock_market_forecasting_new (date, open, high, low, close) \
VALUES (df['date'][i], df['open'][i], df['high'][i], df['low'][i], df['close'][i])")
conn.commit()
print("Records created successfully")
conn.close()
Error:
UndefinedColumn: column "df" does not exist
LINE 1: ..._new (date, open, high, low, close) VALUES (df['date']...
Edit1:
I am doing like this,
cur.execute("SELECT * from STOCK_MARKET_FORECASTING")
rows = cur.fetchall()
for row in rows:
print(row)
print("Operation done successfully")
conn.close()
Output giving:
('2021-12-07 00:00:00', 1.12837, 1.12846, 1.1279, 1.128)
('2021-12-07 01:00:00', 1.12799, 1.12827, 1.1276, 1.1282)
Output which I want should be with column names:
**Date open high low close**
('2021-12-07 00:00:00', 1.12837, 1.12846, 1.1279, 1.128)
('2021-12-07 01:00:00', 1.12799, 1.12827, 1.1276, 1.1282)
You didn't use a formatted string. It should be f"VALUES ({df['date'][i]}," ect depending on your data type, but that always leads to issues with quotation marks. I recommend inserting with tuples instead as seen in the documentation:
import psycopg2
df = pd.read_csv('dataframe.csv')
conn = psycopg2.connect(database = "postgres",
user = "postgres",
password = "12345",
host = "127.0.0.1",
port = "5432")
cur = conn.cursor()
for i in range(0 ,len(df)):
values = (df['date'][i], df['open'][i], df['high'][i], df['low'][i], df['close'][i])
cur.execute("INSERT INTO stock_market_forecasting_new (date, open, high, low, close) VALUES (%s, %s, %s, %s, %s)",
values)
conn.commit()
print("Records created successfully")
conn.close()
Alternatively, you could see if df.to_sql() (documentation) supports psycopg2 connections:
df.to_sql('stock_market_forecasting_new', con=conn, if_exists='append', index=False)

Updating results from a mysql-connector fetchall

I'm trying to select certain records from the civicrm_address table and update the geocode columns. I use fetchall to retrieve the rows then, within the same loop, I try to update with the results of the geocoder API, passing the civicrm_address.id value in the update_sql statement.
The rowcount after the attempted update and commit is always -1 so I am assuming it failed for some reason but I have yet to figure out why.
import geocoder
import mysql.connector
mydb = mysql.connector.connect(
[redacted]
)
mycursor = mydb.cursor(dictionary=True)
update_cursor = mydb.cursor()
sql = """
select
a.id
, street_address
, city
, abbreviation
from
civicrm_address a
, civicrm_state_province b
where
location_type_id = 6
and
a.state_province_id = b.id
and
street_address is not null
and
city is not null
limit 5
"""
mycursor.execute(sql)
rows = mycursor.fetchall()
print(mycursor.rowcount, "records selected")
for row in rows:
address_id = int(row["id"])
street_address = str(row["street_address"])
city = str(row["city"])
state = str(row["abbreviation"])
myaddress = street_address + " " + city + ", " + state
g = geocoder.arcgis(myaddress)
d = g.json
latitude = d["lat"]
longitude = d["lng"]
update_sql = """
begin work;
update
civicrm_address
set
geo_code_1 = %s
, geo_code_2 = %s
where
id = %s
"""
var=(latitude, longitude, address_id)
print(var)
update_cursor.execute(update_sql, var, multi=True)
mydb.commit()
print(update_cursor.rowcount)
mycursor.close()
update_cursor.close()
mydb.close()
Here is a simpler script:
I have executed the update_sql statement directly in the MySQL workbench and it succeeds. It is not working from Python.
import geocoder
import mysql.connector
try:
mydb = mysql.connector.connect(
[redacted]
)
mycursor = mydb.cursor(dictionary=True)
update_cursor = mydb.cursor()
update_sql = """
begin work;
update
civicrm_address
set
geo_code_1 = 37.3445
, geo_code_2 = -118.5366074
where
id = 65450;
"""
update_cursor.execute(update_sql, multi=True)
mydb.commit()
print(update_cursor.rowcount, "row(s) were updated")
except mysql.connector.Error as error:
print("Failed to update record to database: {}".format(error))
mydb.rollback()
finally:
# closing database connection.
if (mydb.is_connected()):
mydb.close()
I have it working now. I did remove the "begin work" statement but not the multi=True and it wouldn't work. Later I removed the multi=True statement and it works.

How to store mySQL query result into pandas DataFrame with pymysql?

I'm trying to store a mySQL query result in a pandas DataFrame using pymysql and am running into errors building the dataframe. Found a similar question here and here, but it looks like there are pymysql-specific errors being thrown:
import pandas as pd
import datetime
import pymysql
# dummy values
connection = pymysql.connect(user='username', password='password', databse='database_name', host='host')
start_date = datetime.datetime(2017,11,15)
end_date = datetime.datetime(2017,11,16)
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(data=cursor.fetchall(), index = None, columns = cursor.keys())
finally:
connection.close()
returns: AttributeError: 'Cursor' object has no attribute 'keys'
If I drop the index and columns arguments:
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(cursor.fetchall())
finally:
connection.close()
returns ValueError: DataFrame constructor not properly called!
Thanks in advance!
Use Pandas.read_sql() for this:
query = "SELECT * FROM orders WHERE date_time BETWEEN ? AND ?"
df = pd.read_sql(query, connection, params=(start_date, end_date))
Thank you for your suggestion to use pandas.read_sql(). It works with executing a stored procedure as well! I tested it in MSSQL 2017 environment.
Below is an example (I hope it helps others):
def database_query_to_df(connection, stored_proc, start_date, end_date):
# Define a query
query ="SET NOCOUNT ON; EXEC " + stored_proc + " ?, ? " + "; SET NOCOUNT OFF"
# Pass the parameters to the query, execute it, and store the results in a data frame
df = pd.read_sql(query, connection, params=(start_date, end_date))
return df
Try This:
import pandas as pd
import pymysql
mysql_connection = pymysql.connect(host='localhost', user='root', password='', db='test', charset='utf8')
sql = "SELECT * FROM `brands`"
df = pd.read_sql(sql, mysql_connection, index_col='brand_id')
print(df)

Categories