How to store mySQL query result into pandas DataFrame with pymysql? - python

I'm trying to store a mySQL query result in a pandas DataFrame using pymysql and am running into errors building the dataframe. Found a similar question here and here, but it looks like there are pymysql-specific errors being thrown:
import pandas as pd
import datetime
import pymysql
# dummy values
connection = pymysql.connect(user='username', password='password', databse='database_name', host='host')
start_date = datetime.datetime(2017,11,15)
end_date = datetime.datetime(2017,11,16)
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(data=cursor.fetchall(), index = None, columns = cursor.keys())
finally:
connection.close()
returns: AttributeError: 'Cursor' object has no attribute 'keys'
If I drop the index and columns arguments:
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(cursor.fetchall())
finally:
connection.close()
returns ValueError: DataFrame constructor not properly called!
Thanks in advance!

Use Pandas.read_sql() for this:
query = "SELECT * FROM orders WHERE date_time BETWEEN ? AND ?"
df = pd.read_sql(query, connection, params=(start_date, end_date))

Thank you for your suggestion to use pandas.read_sql(). It works with executing a stored procedure as well! I tested it in MSSQL 2017 environment.
Below is an example (I hope it helps others):
def database_query_to_df(connection, stored_proc, start_date, end_date):
# Define a query
query ="SET NOCOUNT ON; EXEC " + stored_proc + " ?, ? " + "; SET NOCOUNT OFF"
# Pass the parameters to the query, execute it, and store the results in a data frame
df = pd.read_sql(query, connection, params=(start_date, end_date))
return df

Try This:
import pandas as pd
import pymysql
mysql_connection = pymysql.connect(host='localhost', user='root', password='', db='test', charset='utf8')
sql = "SELECT * FROM `brands`"
df = pd.read_sql(sql, mysql_connection, index_col='brand_id')
print(df)

Related

Export dataframe from Python to SQL Sybase database

I want to export pandas dataframe to Sybase SQL databse. From here i found the code how to do it link
However, I have the following error.
pyodbc.ProgrammingError: ('42000', '[42000] [Sybase][ODBC Driver]Syntax error or access violation (0) (SQLPrepare)')
Could you help to solve it?
My code -
import pandas as pd
import pyodbc as db
from datetime import datetime
#set constants
DSN = 'DSN'
input_table = 'table'
run_timestamp = str(datetime.now())[:19]
test_date_start = '2020-09-09'
test_date_end = '2025-08-08'
input_data = pd.DataFrame({
'model':['aaa','aaa'], 'result_type':['a','test_statistic'], 'test_name':['b', 'mwb'], 'input_variable_name':['c','pd'], 'segment':['car','book'],
'customer_type':['le','le'], 'value':[60, 0.58], 'del_flag':[0,0]
})
query = 'insert into schema.table (data_input_time,test_date_start,test_date_end,model,result_type,test_name,input_variable_name,segment,customer_type,value,del_flag) values (?,?,?,?,?,?,?,?,?,?,?)'
cnxn = db.connect(DSN)
cursor = cnxn.cursor()
cursor.execute('SETUSER MYUSERNAME')
for row_count in range(0, input_data.shape[0]):
#1 method
chunk = input_data.iloc[row_count:row_count + 1, :].values.tolist()
tuple_of_tuples = tuple(tuple(x) for x in chunk)
cursor.executemany(query, tuple_of_tuples)
#2 method
params = [(i,) for i in chunk] #f'txt{i}'
cursor.executemany(query, params)
You're using a 'f' string, but forgot the brackets. It should look like this
query = f'''insert into schema.table ({data_input_time},{test_date_start},{test_date_end,model},{result_type,test_name},{input_variable_name},{segment,customer_type},{value,del_flag}) values ('?,?,?,?,?,?,?,?,?,?,?)'''

Python return error when trying to send a MySQL query that contains MySQL variables

I am trying to retrieve data from a MySQL database by sending a MySQL query using Python.
When I send the MySQL Query in MySQL workbench, it runs perfectly fine.
When I try the same using Python (in a Jupyter Notebook), it returns an error.
Python Code:
import pymysql
import pandas as pd
def run_mysql(SQLQ):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn)
conn.close()
return df
mysql_query = '''set #Yesterday = curdate() -1 ;
SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
'''
df = run_mysql(mysql_query)
display(df)
Error:
DatabaseError: Execution failed on sql 'set #Yesterday = curdate() -1 ;
SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
': (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SELECT * FROM mt4_daily\n where date(time) = date(#Yesterday)' at line 2")
If I remove the variable in the MySQL Query it runs fine:
import pymysql
import pandas as pd
def run_mysql(SQLQ):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn)
conn.close()
return df
mysqlquery = '''SELECT * FROM mt4_daily
where date(time) = date(curdate() -1)
'''
df = run_mysql(mysqlquery)
display(df)
What am I doing wrong?
Final Solution:
Thank you Prashant Sharma for the solution.
I tweaked it a bit so it returns a pandas dataframe and allows for a list of variables to be passed prior to the Select query.
Here is the code:
import pymysql
import pandas as pd
def run_mysql(SQLQ,MySQL_Variable_List=''):
try:
conn = pymysql.connect(host='Server IP', user='UserName', passwd='Password', db='Database name')
cursor = conn.cursor()
for i in MySQL_Variable_List:
cursor.execute(i)
df = pd.read_sql(SQLQ, conn)
except Exception as e:
print(str(e))
finally:
cursor.close()
conn.close()
return df
MySQL_Variable_List = ["set #Yesterday = curdate() -1 ;"]
SQLQ = "SELECT * FROM mt4_daily where date(time) = date(#Yesterday) limit 10"
df1 = run_mysql(MySQL_Variable_List,SQLQ)
display(df1)
The below code does the job, have tested it. You might have to rectify some indentation issue incase if something pops up.
import pymysql
def run_mysql(query1, query2):
try:
conn = pymysql.connect(host='localhost', user='root', passwd='', db='data_new_es')
cursor = conn.cursor()
cursor.execute(query1)
cursor.execute(query2)
row = cursor.fetchone()
print(row)
except Exception as e:
print(str(e))
finally:
cursor.close()
conn.close()
mysqlquery1 = "set #Yesterday = curdate() -1 ;"
mysqlquery2 = "select * from abcde where date(accrual_date) =
date(#Yesterday)"
df1 = run_mysql(mysqlquery1,mysqlquery2)
Try to run them as two separate queries.
mysql_query = '''set #Yesterday = curdate() -1 ;'''
df = run_mysql(mysql_query)
mysql_query = '''SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
'''
df = run_mysql(mysql_query)
I think because there are two statements and this function only allows to read and execute one at the same time. According to pandas read_sql documentetation you can use read_sql "params" keyword parameter to solve this problem and move #Yesterday value calculation to python side:
import pymysql
import pandas as pd
from datetime import datetime, timedelta
def run_mysql(SQLQ, params):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn, params=params)
conn.close()
return df
mysqlquery = '''SELECT * FROM mt4_daily
where date(time) = date(%(yesterday)s)
'''
yesterday = datetime.date(datetime.now())- timedelta(days=1)
params = {'yesterday': yesterday}
df = run_mysql(mysqlquery, params)
display(df)
I could not execute the code, but the idea is this.

Cannot drop table in pandas to_sql using SQLAlchemy

I'm trying to drop an existing table, do a query and then recreate the table using the pandas to_sql function. This query works in pgadmin, but not here. Any ideas of if this is a pandas bug or if my code is wrong?
Specific error is ValueError: Table 'a' already exists.
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
drop table a;
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', engine)
conn.close()
Why are you doing this like that? There is a shorter way: the if_exists kwag in to_sql. Try this:
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
# Notice how below line is different. You forgot the schema argument
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
conn.close()
According to docs:
replace: If table exists, drop it, recreate it, and insert data.
Ps. Additional tip:
This is better way to handle the connection:
with engine.connect() as conn, conn.begin():
sql = """select * from some_table limit 1"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
Because it ensures that your connection is always closed, even if your program exits with an error. This is important to prevent data corruption. Further, I would just use this:
import pandas as pd
...
pd.read_sql(sql, conn)
instead of the way you are doing it.
So, if I was in your place writing that code, it would look like this:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
with engine.connect() as conn, conn.begin():
df = pd.read_sql('select * from some_table limit 1', con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')

insert into a mysql database timestamp

I have a part in my python script that I need to insert some data into a table on a mysql database example below:
insert_data = "INSERT into test (test_date,test1,test2) values (%s,%s,%s)"
cur.execute(insert_data,(test_date,test1,test2))
db.commit()
db.close()
I have a couple of questions what is incorrect with this syntax and how is possible to change the VALUES to timestamp instead of %s for string? Note the column names in the database are the same as the data stored in the variables in my script.
THanks
try this:
import MySQLdb
import time
import datetime
ts = time.time()
timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
conn = MySQLdb.connect(host= "localhost",
user="root",
passwd="newpassword",
db="db1")
x = conn.cursor()
try:
x.execute("""INSERT into test (test_date,test1,test2) values(%s,%s,%s)""",(timestamp,test1,test2))
conn.commit()
except:
conn.rollback()
conn.close()
Timestamp creating can be done in one line, no need to use time.time(), just:
from datetime import datetime
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
Simply use the database NOW() function, e.g.
timestamp="NOW()"
insert_data = "INSERT into test (test_date,test1,test2) values (%s,%s,%s)"
cur.execute(insert_data,(test_date,test1,test2,timestamp))
db.commit()
db.close()

Getting data from table in database

I want to extract data from a postgresql database and use that data (in a dataframe format) in a script. Here's my initial try:
from pandas import DataFrame
import psycopg2
conn = psycopg2.connect(host=host_address, database=name_of_database, user=user_name, password=user_password)
cur = conn.cursor()
cur.execute("SELECT * FROM %s;" % name_of_table)
the_data = cur.fetchall()
colnames = [desc[0] for desc in cur.description]
the_frame = DataFrame(the_data)
the_frame.columns = colnames
cur.close()
conn.close()
Note: I am aware that I should not use "string parameters interpolation (%) to pass variables to a SQL query string", but this works great for me as it is.
Would there be a more direct approach to this?
Edit: Here's what I used from the selected answer:
import pandas as pd
import sqlalchemy as sq
engine = sq.create_engine("postgresql+psycopg2://username:password#host:port/database")
the_frame = pd.read_sql_table(name_of_table, engine)
Pandas can load data from Postgres directly:
import psycopg2
import pandas.io.sql as pdsql
conn = psycopg2.connect(...)
the_frame = pdsql.read_frame("SELECT * FROM %s;" % name_of_table, conn)
If you have a recent pandas (>=0.14), you should use read_sql_query/table (read_frame is deprecated) with an sqlalchemy engine:
import pandas as pd
import sqlalchemy
import psycopg2
engine = sqlalchemy.create_engine("postgresql+psycopg2://...")
the_frame = pd.read_sql_query("SELECT * FROM %s;" % name_of_table, engine)
the_frame = pd.read_sql_table(name_of_table, engine)
Here is an alternate method:
# run sql code
result = conn.execute(sql)
# Insert to a dataframe
df = DataFrame(data=list(result), columns=result.keys())

Categories