import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
!sqlite3 spotify.db < spotify.sql
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
cursor = connection.cursor()
for index in df.index:
sr = df.iloc[index]
cursor = cursor.execute(f"""INSERT INTO spotify (SpotifyID, ArtistID, Track_Name,
Album_Name, Artist_Name, Release_Date, Duration,Popularity, Genres)
VALUES ('{sr.SpotifyID}', '{sr.Artist_ID}', '{sr.Track_Name}',
'{sr.Album_Name}', '{sr.Artist_Name}', '{sr.Release_Date}',
'{sr.Duration}', '{sr.Popularity}', '{sr.Genres}')""")
I'm using pandas to import a .csv file and sqlite3 to enter data into a database made from a SQL script.Image of SQL script
You can directly use to_sql:
import pandas as pd
import sqlite3
df = pd.read_csv('liked_songs.csv')
connection = sqlite3.connect('spotify.db')
df.columns = df.columns.str.replace(' ','_')
df.to_sql('spotify', connection, if_exists='replace', index=None)
Related
How can I easily write my pandas dataframe to a MySQL database using mysql.connector?
import mysql.connector as sql
import pandas as pd
db_connection = sql.connect(host='124685.eu-central-1.rds.amazonaws.com',
database="db_name", user='user', password='pw')
query = 'SELECT * FROM table_name'
df = pd.read_sql(sql=query, con=db_connection)
df["Person_Name"] = "xx"
df.to_sql(con=db_connection, name='table_name', if_exists='replace')
Tried this but it gives me an error that:
pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': Not all parameters were used in the SQL statement
Does the mysql.connectornot have a df.to_sqlfunction?
These are the col names:
Col names Index(['Person_ID', 'AirTable_ID_Person', 'Person_Name', 'Gender', 'Ethnicity',
'LinkedIn_Link_to_the_Profile_of_Person', 'Jensen_Analyst',
'Data_Source', 'Created_Time', 'Last_Modified_Time', 'Last refresh',
'createdTime', 'Gender_ID', 'Ethnicity_ID', 'Jensen_Analyst_ID',
'Data_Source_ID', 'Position_ID', 'Egnyte_File', 'Comment', 'Move',
'Right_Move', 'Bio-Import-Assistant', 'Diversity'],
dtype='object')
Pandas requires an SQLAlchemy engine to write data to sql. You can take up the following two approaches, the first being writing with a connector execure and the second using the engine with a pandas.to_sql statement.
It works very similar to your pandas read function.
import pandas as pd
import mysql.connector as sql
db_connection = sql.connect(host='124685.eu-central-1.rds.amazonaws.com',
database="db_name", user='user', password='pw')
query = 'SELECT * FROM table_name'
df = pd.read_sql(sql=query, con=db_connection)
df["Person_Name"] = "xx"
df_temp = df[['Person_Name', 'Person_ID']]
query_insert = 'insert into table_name(Person_Name) values %s where Person_ID = %s'
pars = df_temp.values.tolist()
pars = list(map(tuple, pars))
cursor = db_connection.cursor()
cursor.executemany(query, pars)
cursor.commit()
cursor.close()
Or you can establish an engine for uploading.
import pandas as pd
from sqlalchemy import create_engine
import mysql.connector as sql
# engine = create_engine('mysql+pymysql://username:password#host/database')
# or in your case-
engine = create_engine('mysql+pymysql://user:pw#124685.eu-central-1.rds.amazonaws.com/db_name')
db_connection = sql.connect(host='124685.eu-central-1.rds.amazonaws.com',
database="db_name", user='user', password='pw')
query = 'SELECT * FROM table_name'
df = pd.read_sql(sql=query, con=db_connection)
df["Person_Name"] = "xx"
df.to_sql(con=engine, name='table_name', if_exists='replace')
For this method be sure to install pymysql before running with pip install pymysql and you should be good to go.
I spent over two days trying to solve this but still blocked.
I work with pyodbc and pandas in order to write a specific data ( ID and Role ) from a column called ExtraData ( a nested JSON ) into a pandas DataFrame.
The JSON in ExtraData is the following :
{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}
Here is my attempts :
import pyodbc
import json
import pandas.io.json as pd_json
#// Skip setting the connection string
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data) #['{"Data":{"Person":[{"Source":"C","ID":"45","Role":43}],"NoID":2}}']
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
I have the following error :
---> df = pd_json.json_normalize(data, record_path='Person', meta=['ID', 'Role'])
TypeError: string indices must be integers
Do you have an explanation pls? And how to avoid having this error ?
I hope you help!
you should change code from:
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test)
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
to
import pyodbc
import json
import pandas.io.json as pd_json
crsr = conn.cursor()
query_json ="SELECT ExtraData FROM data"
test_data= crsr.execute(query_json).fetchall()
for row in test_data:
test = json.dumps([x for x in row])
data = pd_json.loads(test[0]) ** this line!
print(data)
df = pd_json.json_normalize(data,
record_path='Person',
meta=['ID', 'Role'])
print(df)
Among sql-server connectors adodbapi is the only one that's working in my environment.
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
cursor = conn.cursor()
query_list = [row for row in cursor]
type(query_list[0]) = adodbapi.apibase.SQLrow
How to convert this list into a pandas df?
Thanks
This is how I did it:
import adodbapi as ado
import numpy as np
import pandas as pd
def get_df(data):
ar = np.array(data.ado_results) # turn ado results into a numpy array
df = pd.DataFrame(ar).transpose() # create a dataframe from the array
df.columns = data.columnNames.keys() # set column names
return df
with ado.connect('yourconnectionstring') as con:
with con.cursor() as cur:
sql_str = 'yourquery'
cur.execute(sql_str)
data = cur.fetchall()
df = get_df(data)
This may help:
import pandas as pd
.......
ur_statements
.......
query_list = [row for row in cursor]
df = pd.DataFrame({'col':query_list })
print (df)
Consider pandas' read_sql to directly query the database. Currently, though you will recieve an error:
KeyError: '_typ'
However, there is a working fix thanks to #TomAubrunner on this Github ticket which appears to be a bug in adodbapi.
Find location of adodpapi: print(adodbapi.__file__)
Open the script in folder: apibase.py
Locate: return self._getValue(self.rows.columnNames[name.lower()]) and replace with below try/execpt block:
try:
return self._getValue(self.rows.columnNames[name.lower()])
except:
return False
Once done, simply run as you would any DB-API pandas connection even with qmark parameters:
import pandas as pd
import adodbapi
conn = adodbapi.connect("PROVIDER=SQLOLEDB;Data Source={0};Database={1}; \
UID={2};PWD={3};".format(server,db,user,pwd))
# WITHOUT PARAMS
df = pd.read_sql("SELECT * FROM myTable", conn)
# WITH PARAMS
df = pd.read_sql("SELECT * FROM myTable WHERE [Col]= ?", conn, params=['myValue'])
conn.close()
I tried to fill a SQL SERVER table using Python by executing the Python script below :
import pyodbc
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
df = pd.read_excel('C:/Users/Username/Desktop/file1.xlsx', sheet_name='Sheet1')
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=MYSERVERNAME;"
"Database=DB;"
"uid=sa;pwd=MYPWD;"
"Trusted_Connection=yes;")
print("Column headings:")
print(df.columns)
'''
for i in df.index:
print(df['Last Name'][i],df['First Name'][i] )
'''
cursor = cnxn.cursor()
for i in df.index:
cursor.execute("insert into pyperson (id,firstname,lastname) values (df['ID'][i],df['First Name'][i],df['Last Name'][i])")
cnxn.commit()
PS:
If I try to read only data from excel file and then print it it works fine
if I try to insert directly with an insert into statement using python it works also fine
but when I combine them it shows me the error message below :
IndentationError: expected an indented block
Any ideas,Any help :)
I am using following code to add data from txt file to SQL Server using Python, hope that helps:
import pymssql
import numpy as np
host = 'YourHostName'
username = 'USERNAME'
password = 'PASSWORD'
database = 'TestDB'
conn = pymssql.connect(host, username, password, database)
cursor = conn.cursor()
cursor.execute("Delete from color_type")
with open("Your file path\\filename.csv", "r") as ins:
array=[]
for line in ins:
array.append(line)
data = line.split('|')
fst = data[0]
lst = data[1]
cursor.execute("insert into color_type values(%s, %s)", (fst, lst))
cursor.execute("select * from color_type")
rows = cursor.fetchall()
conn.commit()
print(rows)
I want to extract data from a postgresql database and use that data (in a dataframe format) in a script. Here's my initial try:
from pandas import DataFrame
import psycopg2
conn = psycopg2.connect(host=host_address, database=name_of_database, user=user_name, password=user_password)
cur = conn.cursor()
cur.execute("SELECT * FROM %s;" % name_of_table)
the_data = cur.fetchall()
colnames = [desc[0] for desc in cur.description]
the_frame = DataFrame(the_data)
the_frame.columns = colnames
cur.close()
conn.close()
Note: I am aware that I should not use "string parameters interpolation (%) to pass variables to a SQL query string", but this works great for me as it is.
Would there be a more direct approach to this?
Edit: Here's what I used from the selected answer:
import pandas as pd
import sqlalchemy as sq
engine = sq.create_engine("postgresql+psycopg2://username:password#host:port/database")
the_frame = pd.read_sql_table(name_of_table, engine)
Pandas can load data from Postgres directly:
import psycopg2
import pandas.io.sql as pdsql
conn = psycopg2.connect(...)
the_frame = pdsql.read_frame("SELECT * FROM %s;" % name_of_table, conn)
If you have a recent pandas (>=0.14), you should use read_sql_query/table (read_frame is deprecated) with an sqlalchemy engine:
import pandas as pd
import sqlalchemy
import psycopg2
engine = sqlalchemy.create_engine("postgresql+psycopg2://...")
the_frame = pd.read_sql_query("SELECT * FROM %s;" % name_of_table, engine)
the_frame = pd.read_sql_table(name_of_table, engine)
Here is an alternate method:
# run sql code
result = conn.execute(sql)
# Insert to a dataframe
df = DataFrame(data=list(result), columns=result.keys())