I have a simple impyla code, and I would like to create a pandas dataFrame from my cursor. My code is running but my dataframe is always an empty dataframe.
If I run my query directly on impala, the result is not empty. This is how my code looks like:
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='impala_server', port=21051,
user='user' , password='pass',
use_ssl=True,
auth_mechanism='PLAIN')
cursor = conn.cursor()
cursor.execute("SELECT * FROM TABLE")
results = cursor.fetchall()
df = as_pandas(cursor)
print(df.head())
Help me please, what am I doing wrong?
Just remove:
results = cursor.fetchall()
from your code. It should work.
'results = cursor.fetchall() ' delete this line and it will be ok.
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='****.com', port=****, database='****')
cursor = conn.cursor()
cursor.execute('select * from table limit 10')
df = as_pandas(cursor)
df.head()
I run the code above, and it run well.
Related
I have a database connection:
import sqlalchemy as sa
engine = sa.create_engine('my_info')
connection = engine.connect()
Subsequently I have a function:
import pandas as pd
def load_data(connection):
sql = 'select * from tablename'
df = pd.read_sql(sql, con = connection)
return df
This is part of an app in Streamlit that I'm working on, I need streamlit to cache the output of my load_data function. I thought it worked like this:
import pandas as pd
import streamlit as st
#st.cache()
def load_data(connection):
sql = 'select * from tablename'
df = pd.read_sql(sql, con = connection)
return df
But this gives me the following error:
UnhashableTypeError: Cannot hash object of type builtins.weakref, found in the arguments of load_data().
The error is much longer, and if it helps I will post it. The error also contains a link to the streamlit documentation. I read it and reformulated my code to look like this:
#st.cache()
def DBConnection():
engine = sa.create_engine("my_info")
conn = engine.connect()
return conn
conn = DBConnection()
#st.cache(hash_funcs={DBConnection: id})
def load_data(connection):
sql = 'select * from tablename'
df = pd.read_sql(sql, con = connection)
return df
But this gives me a NameError:
NameError: name 'DBConnection' is not defined
I've run out of idea's to try, any help would be highly appreciated. It is very possible that I misunderstood the documentation as it assumes a lot of prior knowledge about the process of hashing and caching.
Combine the two methods and use:
#st.cache(allow_output_mutation=true)
Code:
#st.cache(allow_output_mutation=true)
def load_data():
engine = sa.create_engine("my_info")
conn = engine.connect()
sql = 'select * from tablename'
df = pd.read_sql(sql, con = conn )
return df
For more you can read in documentation
I have been looking since yesterday about the way I could convert the output of an SQL Query into a Pandas dataframe.
For example a code that does this :
data = select * from table
I've tried so many codes I've found on the internet but nothing seems to work.
Note that my database is stored in Azure DataBricks and I can only access the table using its URL.
Thank you so much !
Hope this would help you out. Both insertion & selection are in this code for reference.
def db_insert_user_level_info(table_name):
#Call Your DF Here , as an argument in the function or pass directly
df=df_parameter
params = urllib.parse.quote_plus("DRIVER={SQL Server};SERVER=DESKTOP-ITAJUJ2;DATABASE=githubAnalytics")
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
engine.connect()
table_row_count=select_row_count(table_name)
df_row_count=df.shape[0]
if table_row_count == df_row_count:
print("Data Cannot Be Inserted Because The Row Count is Same")
else:
df.to_sql(name=table_name,con=engine, index=False, if_exists='append')
print("********************************** DONE EXECTUTED SUCCESSFULLY ***************************************************")
def select_row_count(table_name):
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=DESKTOP-ITAJUJ2;"
"Database=githubAnalytics;"
"Trusted_Connection=yes;")
cur = cnxn.cursor()
try:
db_cmd = "SELECT count(*) FROM "+table_name
res = cur.execute(db_cmd)
# Do something with your result set, for example print out all the results:
for x in res:
return x[0]
except:
print("Table is not Available , Please Wait...")
Using sqlalchemy to connect to the database, and the built-in method read_sql_query from pandas to go straight to a DataFrame:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(url)
connection = engine.connect()
query = "SELECT * FROM table"
df = pd.read_sql_query(query,connection)
I am trying to retrieve data from a MySQL database by sending a MySQL query using Python.
When I send the MySQL Query in MySQL workbench, it runs perfectly fine.
When I try the same using Python (in a Jupyter Notebook), it returns an error.
Python Code:
import pymysql
import pandas as pd
def run_mysql(SQLQ):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn)
conn.close()
return df
mysql_query = '''set #Yesterday = curdate() -1 ;
SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
'''
df = run_mysql(mysql_query)
display(df)
Error:
DatabaseError: Execution failed on sql 'set #Yesterday = curdate() -1 ;
SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
': (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SELECT * FROM mt4_daily\n where date(time) = date(#Yesterday)' at line 2")
If I remove the variable in the MySQL Query it runs fine:
import pymysql
import pandas as pd
def run_mysql(SQLQ):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn)
conn.close()
return df
mysqlquery = '''SELECT * FROM mt4_daily
where date(time) = date(curdate() -1)
'''
df = run_mysql(mysqlquery)
display(df)
What am I doing wrong?
Final Solution:
Thank you Prashant Sharma for the solution.
I tweaked it a bit so it returns a pandas dataframe and allows for a list of variables to be passed prior to the Select query.
Here is the code:
import pymysql
import pandas as pd
def run_mysql(SQLQ,MySQL_Variable_List=''):
try:
conn = pymysql.connect(host='Server IP', user='UserName', passwd='Password', db='Database name')
cursor = conn.cursor()
for i in MySQL_Variable_List:
cursor.execute(i)
df = pd.read_sql(SQLQ, conn)
except Exception as e:
print(str(e))
finally:
cursor.close()
conn.close()
return df
MySQL_Variable_List = ["set #Yesterday = curdate() -1 ;"]
SQLQ = "SELECT * FROM mt4_daily where date(time) = date(#Yesterday) limit 10"
df1 = run_mysql(MySQL_Variable_List,SQLQ)
display(df1)
The below code does the job, have tested it. You might have to rectify some indentation issue incase if something pops up.
import pymysql
def run_mysql(query1, query2):
try:
conn = pymysql.connect(host='localhost', user='root', passwd='', db='data_new_es')
cursor = conn.cursor()
cursor.execute(query1)
cursor.execute(query2)
row = cursor.fetchone()
print(row)
except Exception as e:
print(str(e))
finally:
cursor.close()
conn.close()
mysqlquery1 = "set #Yesterday = curdate() -1 ;"
mysqlquery2 = "select * from abcde where date(accrual_date) =
date(#Yesterday)"
df1 = run_mysql(mysqlquery1,mysqlquery2)
Try to run them as two separate queries.
mysql_query = '''set #Yesterday = curdate() -1 ;'''
df = run_mysql(mysql_query)
mysql_query = '''SELECT * FROM mt4_daily
where date(time) = date(#Yesterday)
'''
df = run_mysql(mysql_query)
I think because there are two statements and this function only allows to read and execute one at the same time. According to pandas read_sql documentetation you can use read_sql "params" keyword parameter to solve this problem and move #Yesterday value calculation to python side:
import pymysql
import pandas as pd
from datetime import datetime, timedelta
def run_mysql(SQLQ, params):
conn = pymysql.connect(host='IP address', user='username', passwd='password', db='database name')
df = pd.read_sql(SQLQ, conn, params=params)
conn.close()
return df
mysqlquery = '''SELECT * FROM mt4_daily
where date(time) = date(%(yesterday)s)
'''
yesterday = datetime.date(datetime.now())- timedelta(days=1)
params = {'yesterday': yesterday}
df = run_mysql(mysqlquery, params)
display(df)
I could not execute the code, but the idea is this.
I tried to fill a SQL SERVER table using Python by executing the Python script below :
import pyodbc
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
df = pd.read_excel('C:/Users/Username/Desktop/file1.xlsx', sheet_name='Sheet1')
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=MYSERVERNAME;"
"Database=DB;"
"uid=sa;pwd=MYPWD;"
"Trusted_Connection=yes;")
print("Column headings:")
print(df.columns)
'''
for i in df.index:
print(df['Last Name'][i],df['First Name'][i] )
'''
cursor = cnxn.cursor()
for i in df.index:
cursor.execute("insert into pyperson (id,firstname,lastname) values (df['ID'][i],df['First Name'][i],df['Last Name'][i])")
cnxn.commit()
PS:
If I try to read only data from excel file and then print it it works fine
if I try to insert directly with an insert into statement using python it works also fine
but when I combine them it shows me the error message below :
IndentationError: expected an indented block
Any ideas,Any help :)
I am using following code to add data from txt file to SQL Server using Python, hope that helps:
import pymssql
import numpy as np
host = 'YourHostName'
username = 'USERNAME'
password = 'PASSWORD'
database = 'TestDB'
conn = pymssql.connect(host, username, password, database)
cursor = conn.cursor()
cursor.execute("Delete from color_type")
with open("Your file path\\filename.csv", "r") as ins:
array=[]
for line in ins:
array.append(line)
data = line.split('|')
fst = data[0]
lst = data[1]
cursor.execute("insert into color_type values(%s, %s)", (fst, lst))
cursor.execute("select * from color_type")
rows = cursor.fetchall()
conn.commit()
print(rows)
I am trying to import mysql data into a .txt file using python 3.x but it look like I'm missing something.The expectation is, data should be imported to a file in tabular/columns format. I tried my level best to get solution but I'm not getting what I need.
Below is my code :
import pymysql.cursors
import pymysql
import sys
import os
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password="",
db='jmeterdb',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
# Select all records
sql = "select * from emp"
cursor.execute(sql)
# connection is not autocommit by default. So you must commit to save
# your changes.
result = cursor.fetchall()
newfile = open("db-data.txt","a+")
for row in result:
newfile.writelines(row)
print(result)
newfile.close()
finally:
connection.close()
On terminal python shows me data when print(result) is executed but in the db-data.txt file, it shows column-names only.
Expected result :
Column_Name1 Column_Name2 Column_Name3
data1 data2 data3
data1 data2 data3
This code is producing expected output for above question is as below :
import pymysql.cursors
import pymysql
import sys
import os
# Open database connection
connection = pymysql.connect(host='localhost',
user='root',
password="",
db='jmeterdb',
cursorclass=pymysql.cursors.DictCursor)
# prepare a cursor object using cursor() method
with connection.cursor() as cursor:
# Prepare SQL query to select a record into the database.
try:
sql = "SELECT * FROM EMP order by ename asc"
# Execute the SQL command
cursor.execute(sql)
# Fetch all the rows in a list of lists.
results = cursor.fetchall()
# print(results)
if results:
newfile = open("db-data.txt","a+")
newfile.write('ename'+"\t"+'jobs'+"\t"+"salary"+"\t"+'comm'+"\t"+'manager'+"\t"+'hiredate'+"\t"+'deptno'+"\t"+'empno'+"\n")
for index in results:
ltr=[]
ltr.append(index['ename'])
ltr.append(index['job'])
ltr.append(index['sal'])
ltr.append(index['comm'])
ltr.append(index['mgr'])
ltr.append(index['hiredate'])
ltr.append(index['deptno'])
ltr.append(index['empno'])
lenltr=len(ltr)
for i in range(lenltr):
newfile.write('{}'.format(ltr[i]))
newfile.write("\t")
print(ltr[i])
newfile.write("\n")
# # Now print fetched result
#print("ename=%s,empno=%d,job=%d,hiredate=%d,comm=%s,sal=%d,deptno=%d,mgr=%d" %(ename, empno, job, hiredate, comm, sal, deptno, mgr))
# print(index)
except:
print ("Error: unable to fecth data")
# disconnect from server
connection.close()
newfile.close()