Among sql-server connectors adodbapi is the only one that's working in my environment.
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
cursor = conn.cursor()
query_list = [row for row in cursor]
type(query_list[0]) = adodbapi.apibase.SQLrow
How to convert this list into a pandas df?
Thanks
This is how I did it:
import adodbapi as ado
import numpy as np
import pandas as pd
def get_df(data):
ar = np.array(data.ado_results) # turn ado results into a numpy array
df = pd.DataFrame(ar).transpose() # create a dataframe from the array
df.columns = data.columnNames.keys() # set column names
return df
with ado.connect('yourconnectionstring') as con:
with con.cursor() as cur:
sql_str = 'yourquery'
cur.execute(sql_str)
data = cur.fetchall()
df = get_df(data)
This may help:
import pandas as pd
.......
ur_statements
.......
query_list = [row for row in cursor]
df = pd.DataFrame({'col':query_list })
print (df)
Consider pandas' read_sql to directly query the database. Currently, though you will recieve an error:
KeyError: '_typ'
However, there is a working fix thanks to #TomAubrunner on this Github ticket which appears to be a bug in adodbapi.
Find location of adodpapi: print(adodbapi.__file__)
Open the script in folder: apibase.py
Locate: return self._getValue(self.rows.columnNames[name.lower()]) and replace with below try/execpt block:
try:
return self._getValue(self.rows.columnNames[name.lower()])
except:
return False
Once done, simply run as you would any DB-API pandas connection even with qmark parameters:
import pandas as pd
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
# WITHOUT PARAMS
df = pd.read_sql("SELECT * FROM myTable", conn)
# WITH PARAMS
df = pd.read_sql("SELECT * FROM myTable WHERE [Col]= ?", conn, params=['myValue'])
conn.close()
Related
import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
!sqlite3 spotify.db < spotify.sql
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
cursor = connection.cursor()
for index in df.index:
sr = df.iloc[index]
cursor = cursor.execute(f"""INSERT INTO spotify (SpotifyID, ArtistID, Track_Name,
Album_Name, Artist_Name, Release_Date, Duration,Popularity, Genres)
VALUES ('{sr.SpotifyID}', '{sr.Artist_ID}', '{sr.Track_Name}',
'{sr.Album_Name}', '{sr.Artist_Name}', '{sr.Release_Date}',
'{sr.Duration}', '{sr.Popularity}', '{sr.Genres}')""")
I'm using pandas to import a .csv file and sqlite3 to enter data into a database made from a SQL script.Image of SQL script
You can directly use to_sql:
import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
df.to_sql('spotify', connection, if_exists='replace', index=None)
I spent over two days trying to solve this but still blocked.
I work with pyodbc and pandas in order to write a specific data ( ID and Role ) from a column called ExtraData ( a nested JSON ) into a pandas DataFrame.
The JSON in ExtraData is the following :
{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}
Here is my attempts :
import pyodbc
import json
import pandas.io.json as pd_json
#// Skip setting the connection string
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data) #['{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}']
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
I have the following error :
---> df = pd_json.json_normalize(data, record_path='Person', meta=['ID', 'Role'])
TypeError: string indices must be integers
Do you have an explanation pls? And how to avoid having this error ?
I hope you help!
you should change code from:
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
to
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test[0]) ** this line!
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
I have a simple impyla code, and I would like to create a pandas dataFrame from my cursor. My code is running but my dataframe is always an empty dataframe.
If I run my query directly on impala, the result is not empty. This is how my code looks like:
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='impala_server', port=21051,
user='user' , password='pass',
use_ssl=True,
auth_mechanism='PLAIN')
cursor = conn.cursor()
cursor.execute("SELECT * FROM TABLE")
results = cursor.fetchall()
df = as_pandas(cursor)
print(df.head())
Help me please, what am I doing wrong?
Just remove:
results = cursor.fetchall()
from your code. It should work.
'results = cursor.fetchall() ' delete this line and it will be ok.
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='****.com', port=****, database='****')
cursor = conn.cursor()
cursor.execute('select * from table limit 10')
df = as_pandas(cursor)
df.head()
I run the code above, and it run well.
I have tried many different things to pull the data from Access and put it into a neat data frame. right now my code looks like this.
from pandas import DataFrame
import numpy as np
import pyodbc
from sqlalchemy import create_engine
db_file = r'C:\Users\username\file.accdb'
user = 'user'
password = 'pw'
odbc_conn_str = 'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=%s;UID=%s;PWD=%s' % (db_file, user, password)
conn = pyodbc.connect(odbc_conn_str)
cur = conn.cursor()
qry = cur.execute("SELECT * FROM table WHERE INST = '796116'")
dataf = DataFrame(qry.fetchall())
print(dataf)
this puts the data into a data frame but the second row is a list. I need the snippet below to be in 4 separate columns, not 2 with a list.
0 (u'RM257095', u'c1', u'796116')
1 (u'RM257097', u'c2', u'796116')
2 (u'RM257043', u'c3', u'796116')
3 (u'RM257044', u'c4', u'796116')
I have used modules like kdb_utils which has a read_query function and it pulled the data from kdb and separated it into a neat dataframe. Is there anything like this for access or another way to pull the data and neatly put it into a data frame?
Consider using pandas' direct read_sql method:
import pyodbc
import pandas as pd
...
cnxn = pyodbc.connect('DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ=' + \
'{};Uid={};Pwd={};'.format(db_file, user, password)
query = "SELECT * FROM mytable WHERE INST = '796116'"
dataf = pd.read_sql(query, cnxn)
cnxn.close()
I want to extract data from a postgresql database and use that data (in a dataframe format) in a script. Here's my initial try:
from pandas import DataFrame
import psycopg2
conn = psycopg2.connect(host=host_address, database=name_of_database, user=user_name, password=user_password)
cur = conn.cursor()
cur.execute("SELECT * FROM %s;" % name_of_table)
the_data = cur.fetchall()
colnames = [desc[0] for desc in cur.description]
the_frame = DataFrame(the_data)
the_frame.columns = colnames
cur.close()
conn.close()
Note: I am aware that I should not use "string parameters interpolation (%) to pass variables to a SQL query string", but this works great for me as it is.
Would there be a more direct approach to this?
Edit: Here's what I used from the selected answer:
import pandas as pd
import sqlalchemy as sq
engine = sq.create_engine("postgresql+psycopg2://username:password#host:port/database")
the_frame = pd.read_sql_table(name_of_table, engine)
Pandas can load data from Postgres directly:
import psycopg2
import pandas.io.sql as pdsql
conn = psycopg2.connect(...)
the_frame = pdsql.read_frame("SELECT * FROM %s;" % name_of_table, conn)
If you have a recent pandas (>=0.14), you should use read_sql_query/table (read_frame is deprecated) with an sqlalchemy engine:
import pandas as pd
import sqlalchemy
import psycopg2
engine = sqlalchemy.create_engine("postgresql+psycopg2://...")
the_frame = pd.read_sql_query("SELECT * FROM %s;" % name_of_table, engine)
the_frame = pd.read_sql_table(name_of_table, engine)
Here is an alternate method:
# run sql code
result = conn.execute(sql)
# Insert to a dataframe
df = DataFrame(data=list(result), columns=result.keys())