I spent over two days trying to solve this but still blocked.
I work with pyodbc and pandas in order to write a specific data ( ID and Role ) from a column called ExtraData ( a nested JSON ) into a pandas DataFrame.
The JSON in ExtraData is the following :
{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}
Here is my attempts :
import pyodbc
import json
import pandas.io.json as pd_json
#// Skip setting the connection string
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data) #['{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}']
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
I have the following error :
---> df = pd_json.json_normalize(data, record_path='Person', meta=['ID', 'Role'])
TypeError: string indices must be integers
Do you have an explanation pls? And how to avoid having this error ?
I hope you help!
you should change code from:
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
to
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test[0]) ** this line!
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
Related
I want to export pandas dataframe to Sybase SQL databse. From here i found the code how to do it link
However, I have the following error.
pyodbc.ProgrammingError: ('42000', '[42000] [Sybase][ODBC Driver]Syntax error or access violation (0) (SQLPrepare)')
Could you help to solve it?
My code -
import pandas as pd
import pyodbc as db
from datetime import datetime
#set constants
DSN = 'DSN'
input_table = 'table'
run_timestamp = str(datetime.now())[:19]
test_date_start = '2020-09-09'
test_date_end = '2025-08-08'
input_data = pd.DataFrame({
'model':['aaa','aaa'], 'result_type':['a','test_statistic'], 'test_name':['b', 'mwb'], 'input_variable_name':['c','pd'], 'segment':['car','book'],
'customer_type':['le','le'], 'value':[60, 0.58], 'del_flag':[0,0]
})
query = 'insert into schema.table (data_input_time,test_date_start,test_date_end,model,result_type,test_name,input_variable_name,segment,customer_type,value,del_flag) values (?,?,?,?,?,?,?,?,?,?,?)'
cnxn = db.connect(DSN)
cursor = cnxn.cursor()
cursor.execute('SETUSER MYUSERNAME')
for row_count in range(0, input_data.shape[0]):
#1 method
chunk = input_data.iloc[row_count:row_count + 1, :].values.tolist()
tuple_of_tuples = tuple(tuple(x) for x in chunk)
cursor.executemany(query, tuple_of_tuples)
#2 method
params = [(i,) for i in chunk] #f'txt{i}'
cursor.executemany(query, params)
You're using a 'f' string, but forgot the brackets. It should look like this
query = f'''insert into schema.table ({data_input_time},{test_date_start},{test_date_end,model},{result_type,test_name},{input_variable_name},{segment,customer_type},{value,del_flag}) values ('?,?,?,?,?,?,?,?,?,?,?)'''
import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
!sqlite3 spotify.db < spotify.sql
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
cursor = connection.cursor()
for index in df.index:
sr = df.iloc[index]
cursor = cursor.execute(f"""INSERT INTO spotify (SpotifyID, ArtistID, Track_Name,
Album_Name, Artist_Name, Release_Date, Duration,Popularity, Genres)
VALUES ('{sr.SpotifyID}', '{sr.Artist_ID}', '{sr.Track_Name}',
'{sr.Album_Name}', '{sr.Artist_Name}', '{sr.Release_Date}',
'{sr.Duration}', '{sr.Popularity}', '{sr.Genres}')""")
I'm using pandas to import a .csv file and sqlite3 to enter data into a database made from a SQL script.Image of SQL script
You can directly use to_sql:
import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
df.to_sql('spotify', connection, if_exists='replace', index=None)
I am getting "TypeError: tuple indices must be integers or slices, not list - when trying to import in SQL Server" when trying to insert my data into SQL Server. How can I update my code? I tried changing "records = df_data.values.tolist()" to "records = df_data.values.totuple(), but no success. Adding in more text for text requirements on this post as it says that my post is mostly code and wants me to add more details.
Edit: Updated code
import yfinance as yf
import glob
import pandas as pd
import pyodbc
import os
import sqlite3
os.chdir(r"C:\Users\Empyz\Desktop")
list1=['2022-03-18', '2022-03-25', '2022-04-01', '2022-04-08', '2022-04-14', '2022-04-22', '2022-05-20', '2022-06-17', '2022-07-15', '2022-10-21', '2023-01-20', '2024-01-19']
gme = yf.Ticker("gme")
for date in list1:
df = gme.option_chain(date)
df_call = df[0]
df_put = df[1]
df_call.to_csv(f'C:\Empyz\Deskop\\call_{date}.csv', index=False)
df_put.to_csv(f'C:\Empyz\Deskop\\put_{date}.csv', index=False)
extension = 'csv'
all_filenames = [i for i in glob.glob(f'*.{extension}')]
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "Options_Data_Combined.csv", index=False, encoding='utf-8-sig')
print(combined_csv)
conn = pyodbc.connect('Driver={SQL Server};'
'Server=localhost;'
'Database=Stocks;'
'Trusted_Connection=yes;')
c = conn.cursor()
table_name = "Options_Data_GME"
# create table. I'm lazy so everything is just set to text
c.execute(f'CREATE TABLE IF NOT EXISTS [{table_name}] (contractSymbol nvarchar(50), lastTradeDate nvarchar(50), strike decimal(18,4), lastPrice decimal(18,4), bid decimal(18,4), ask decimal(18,4), change decimal(18,4), percentChange decimal(18,4), volume float, openInterest float, impliedVolatility float, inTheMoney nvarchar(50), contractSize nvarchar(50), currency nvarchar(50))')
conn.commit()
combined_csv.to_sql(table_name, conn, if_exists='replace')
c.execute(f'''
SELECT * FROM {table_name}
''')
for row in c.fetchall():
print (row)
So the easy way is to just use panda's df.to_sql() method. See the docs here.
See the code below for a simple implementation using a local sqlite3 database. You should be able to easily adapt this to a pyodbc connection.
Note that I've removed your comments and added my own so it's easier to see what I'm doing.
import yfinance as yf
import glob
import pandas as pd
import sqlite3
FILEPATH = "./" ###### EDITED
list1=['2022-03-18', '2022-03-25', '2022-04-01', '2022-04-08', '2022-04-14', '2022-04-22', '2022-05-20', '2022-06-17', '2022-07-15', '2022-10-21', '2023-01-20', '2024-01-19']
gme = yf.Ticker("gme")
for date in list1:
df = gme.option_chain(date)
df_call = df[0]
df_put = df[1]
#outputs options data to csv based on dates and type
df_call.to_csv(f'{FILEPATH}call_{date}.csv', index=False) ###### EDITED
df_put.to_csv(f'{FILEPATH}put_{date}.csv', index=False) ###### EDITED
extension = 'csv'
# use f-strings instead of .format()
all_filenames = [i for i in glob.glob(f'{FILEPATH}*.{extension}')] ###### EDITED
# create dataframe with all the CSVs combined.
combined_df = pd.concat([pd.read_csv(f) for f in all_filenames ])
# using sqlite3 to create a test db
conn = sqlite3.connect('test_database')
c = conn.cursor()
table_name = "Options_Data_GME"
# create table. I'm lazy so everything is just set to text
c.execute(f'CREATE TABLE IF NOT EXISTS {table_name} (contractSymbol text, lastTradeDate text, strike text, lastPrice text, bid text, ask text, change text, percentChange text, volume text, openInterest text, impliedVolatility text, inTheMoney text, contractSize text, currency text)')
conn.commit()
combined_df.to_sql(table_name, conn, if_exists='replace')
c.execute(f'''
SELECT * FROM {table_name}
''')
for row in c.fetchall():
print (row)
Among sql-server connectors adodbapi is the only one that's working in my environment.
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
cursor = conn.cursor()
query_list = [row for row in cursor]
type(query_list[0]) = adodbapi.apibase.SQLrow
How to convert this list into a pandas df?
Thanks
This is how I did it:
import adodbapi as ado
import numpy as np
import pandas as pd
def get_df(data):
ar = np.array(data.ado_results) # turn ado results into a numpy array
df = pd.DataFrame(ar).transpose() # create a dataframe from the array
df.columns = data.columnNames.keys() # set column names
return df
with ado.connect('yourconnectionstring') as con:
with con.cursor() as cur:
sql_str = 'yourquery'
cur.execute(sql_str)
data = cur.fetchall()
df = get_df(data)
This may help:
import pandas as pd
.......
ur_statements
.......
query_list = [row for row in cursor]
df = pd.DataFrame({'col':query_list })
print (df)
Consider pandas' read_sql to directly query the database. Currently, though you will recieve an error:
KeyError: '_typ'
However, there is a working fix thanks to #TomAubrunner on this Github ticket which appears to be a bug in adodbapi.
Find location of adodpapi: print(adodbapi.__file__)
Open the script in folder: apibase.py
Locate: return self._getValue(self.rows.columnNames[name.lower()]) and replace with below try/execpt block:
try:
return self._getValue(self.rows.columnNames[name.lower()])
except:
return False
Once done, simply run as you would any DB-API pandas connection even with qmark parameters:
import pandas as pd
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
# WITHOUT PARAMS
df = pd.read_sql("SELECT * FROM myTable", conn)
# WITH PARAMS
df = pd.read_sql("SELECT * FROM myTable WHERE [Col]= ?", conn, params=['myValue'])
conn.close()
I want to extract data from a postgresql database and use that data (in a dataframe format) in a script. Here's my initial try:
from pandas import DataFrame
import psycopg2
conn = psycopg2.connect(host=host_address, database=name_of_database, user=user_name, password=user_password)
cur = conn.cursor()
cur.execute("SELECT * FROM %s;" % name_of_table)
the_data = cur.fetchall()
colnames = [desc[0] for desc in cur.description]
the_frame = DataFrame(the_data)
the_frame.columns = colnames
cur.close()
conn.close()
Note: I am aware that I should not use "string parameters interpolation (%) to pass variables to a SQL query string", but this works great for me as it is.
Would there be a more direct approach to this?
Edit: Here's what I used from the selected answer:
import pandas as pd
import sqlalchemy as sq
engine = sq.create_engine("postgresql+psycopg2://username:password#host:port/database")
the_frame = pd.read_sql_table(name_of_table, engine)
Pandas can load data from Postgres directly:
import psycopg2
import pandas.io.sql as pdsql
conn = psycopg2.connect(...)
the_frame = pdsql.read_frame("SELECT * FROM %s;" % name_of_table, conn)
If you have a recent pandas (>=0.14), you should use read_sql_query/table (read_frame is deprecated) with an sqlalchemy engine:
import pandas as pd
import sqlalchemy
import psycopg2
engine = sqlalchemy.create_engine("postgresql+psycopg2://...")
the_frame = pd.read_sql_query("SELECT * FROM %s;" % name_of_table, engine)
the_frame = pd.read_sql_table(name_of_table, engine)
Here is an alternate method:
# run sql code
result = conn.execute(sql)
# Insert to a dataframe
df = DataFrame(data=list(result), columns=result.keys())