why doesn't pandas execute sql query? - python

why doesn't pandas execute sql query?
import sqlite3
import pandas as pd
# load data
df = pd.read_csv('CurriculumAuditReport.csv')
# strip whitespace from headers
df.columns = df.columns.str.strip()
con = sqlite3.connect("sans.db")
# drop data into database
df.to_sql("MyTable", con, if_exists='replace')
df = pd.read_sql_query('SELECT Count (Department) FROM MyTable WHERE `CompletedTraining` LIKE 'Incomplete' GROUP BY Department', con)
print(df)
con.close()
the query produces the result i want just fine in DB browser for SQLite
the output
C:\sans>C:\python34\python test2.py
File "test2.py", line 15
df = pd.read_sql_query('SELECT Count (Department) FROM MyTable WHERE
`CompletedTraining` LIKE 'Incomplete' GROUP BY Department', con)
^
SyntaxError: invalid syntax
my output should have 11 rows

You have an issue with quoting - try this:
df = pd.read_sql_query("SELECT Department, count(*) as cnt FROM MyTable WHERE CompletedTraining = 'Incomplete' GROUP BY Department", con)
You can also use the following technique:
qry = """
SELECT department, count(*) as cnt
FROM MyTable
WHERE CompletedTraining = 'Incomplete'
GROUP BY department
"""
df = pd.read_sql_query(qry, con)

Related

Reading SQL table from Python with merge condition

import pandas as pd
conn = pyodbc.connect("Driver={??};"
"Server=??;"
"Database=??;"
"Trusted_Connection=yes;")
df1 = pd.read_sql_query("SELECT TOP 10000 * FROM table1", conn)
df2 = pd.read_sql_query("SELECT * FROM table2 (((where id_key = id(from table1) ))) ", conn)
Hello,
I have two tables in SQL server. I wanted to pull the data from table2 that has the same ID which mean id_key = id(from table1).
get df1's id as a tuple:
ids = tuple(df1['id'].to_list())
print(ids)
'''
(1, 2)
'''
then, use format and read sql:
sql= 'select*from table where id_key in {}'.format(ids)
print(sql)
'''
select*from table where id_key in (1, 2)
'''
df2=pd.read_sql(sql,conn)
full code:
import pandas as pd
conn = pyodbc.connect("Driver={??};"
"Server=??;"
"Database=??;"
"Trusted_Connection=yes;")
df1 = pd.read_sql_query("SELECT TOP 10000 * FROM table1", conn)
ids = tuple(df1['id'].to_list())
df2_sql = 'SELECT * FROM table2 where id_key in {}'.format(ids)
df2 = pd.read_sql_query(df2_sql, conn)

Search mutiple values from python dataframe in MS-SQL table

I have a string 'new_string' which contains a set of values to be searched in my MS-SQL table. I am getting an error "Could not parse rfc1738 URL from string ''2535488','2568394''"
new_string = "'2535488','2568394'"
cnxn = pyodbc.connect("DRIVER={SQL Server};SERVER=ABCDEF;DATABASE=my_db") #connection is successfully established.
data1 = pd.read_sql("""select * from my_table where my_col in (%s)""",new_string,cnxn)
But if I type the following query, I get my results.
data1 = pd.read_sql("""select * from my_table where my_col in ('2535488','2568394')""",cnxn)
How can I search for the values in my table?
You should use prepared statements like as follows:
In [58]: parms = ['2535488','2568394']
In [59]: q = """select * from my_table where my_col in ({})""".format(','.join(['?'] * len(parms)))
In [60]: q
Out[60]: 'select * from my_table where my_col in (?,?)'
now you should be able to do:
data1 = pd.read_sql(q, cnxn, params=parms)

How to store mySQL query result into pandas DataFrame with pymysql?

I'm trying to store a mySQL query result in a pandas DataFrame using pymysql and am running into errors building the dataframe. Found a similar question here and here, but it looks like there are pymysql-specific errors being thrown:
import pandas as pd
import datetime
import pymysql
# dummy values
connection = pymysql.connect(user='username', password='password', databse='database_name', host='host')
start_date = datetime.datetime(2017,11,15)
end_date = datetime.datetime(2017,11,16)
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(data=cursor.fetchall(), index = None, columns = cursor.keys())
finally:
connection.close()
returns: AttributeError: 'Cursor' object has no attribute 'keys'
If I drop the index and columns arguments:
try:
with connection.cursor() as cursor:
query = "SELECT * FROM orders WHERE date_time BETWEEN %s AND %s"
cursor.execute(query, (start_date, end_date))
df = pd.DataFrame(cursor.fetchall())
finally:
connection.close()
returns ValueError: DataFrame constructor not properly called!
Thanks in advance!
Use Pandas.read_sql() for this:
query = "SELECT * FROM orders WHERE date_time BETWEEN ? AND ?"
df = pd.read_sql(query, connection, params=(start_date, end_date))
Thank you for your suggestion to use pandas.read_sql(). It works with executing a stored procedure as well! I tested it in MSSQL 2017 environment.
Below is an example (I hope it helps others):
def database_query_to_df(connection, stored_proc, start_date, end_date):
# Define a query
query ="SET NOCOUNT ON; EXEC " + stored_proc + " ?, ? " + "; SET NOCOUNT OFF"
# Pass the parameters to the query, execute it, and store the results in a data frame
df = pd.read_sql(query, connection, params=(start_date, end_date))
return df
Try This:
import pandas as pd
import pymysql
mysql_connection = pymysql.connect(host='localhost', user='root', password='', db='test', charset='utf8')
sql = "SELECT * FROM `brands`"
df = pd.read_sql(sql, mysql_connection, index_col='brand_id')
print(df)

How to pass a data frame as parameter to a SQL query in Python?

I have a dataframe that consists of one column of values and I want to pass it as a parameter to execute the following sql query:
query = "SELECT ValueDate, Value"\
"FROM Table "\
"WHERE [ID] in ( ? ) "
So I tried (among so many other things) the following:
df = pd.read_sql_query(query, conn, params=[ df['ID'].values ])
df = pd.read_sql_query(query, conn, params=[ df['ID'].tolist ])
df = pd.read_sql_query(query, conn, params=[ list(df['ID'].values) ])
...
What is the correct way to pass the dataframe values ?
NB: I am using Microsoft SQL Server so the query needs to be formatted as I did.
Does this get you what you need?
import pandas as pd
your_column = pd.Series([1,2,3,4,5,6,7,8,9])
query = "SELECT ValueDate, Value"\
"FROM Table "\
"WHERE [ID] in {}".format(tuple(your_column))
print(query)
# 'SELECT ValueDate, ValueFROM Table WHERE [ID] in (1, 2, 3, 4, 5, 6, 7, 8, 9)'
Then you should be able to query without further parameters.
df = pd.read_sql_query(query, conn)
params = tuple(df['ID'].values)
sql = "SELECT COUNT(*) FROM foobar WHERE id IN (%s)" % (",".join(["?"]*len(params)),)
cursor.execute(sql, params)

Cannot drop table in pandas to_sql using SQLAlchemy

I'm trying to drop an existing table, do a query and then recreate the table using the pandas to_sql function. This query works in pgadmin, but not here. Any ideas of if this is a pandas bug or if my code is wrong?
Specific error is ValueError: Table 'a' already exists.
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
drop table a;
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', engine)
conn.close()
Why are you doing this like that? There is a shorter way: the if_exists kwag in to_sql. Try this:
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
# Notice how below line is different. You forgot the schema argument
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
conn.close()
According to docs:
replace: If table exists, drop it, recreate it, and insert data.
Ps. Additional tip:
This is better way to handle the connection:
with engine.connect() as conn, conn.begin():
sql = """select * from some_table limit 1"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
Because it ensures that your connection is always closed, even if your program exits with an error. This is important to prevent data corruption. Further, I would just use this:
import pandas as pd
...
pd.read_sql(sql, conn)
instead of the way you are doing it.
So, if I was in your place writing that code, it would look like this:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
with engine.connect() as conn, conn.begin():
df = pd.read_sql('select * from some_table limit 1', con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')

Categories