How to write DataFrame to postgres table - python

There is DataFrame.to_sql method, but it works only for mysql, sqlite and oracle databases. I cant pass to this method postgres connection or sqlalchemy engine.

Starting from pandas 0.14 (released end of May 2014), postgresql is supported. The sql module now uses sqlalchemy to support different database flavors. You can pass a sqlalchemy engine for a postgresql database (see docs). E.g.:
from sqlalchemy import create_engine
engine = create_engine('postgresql://username:password#localhost:5432/mydatabase')
df.to_sql('table_name', engine)
You are correct that in pandas up to version 0.13.1 postgresql was not supported. If you need to use an older version of pandas, here is a patched version of pandas.io.sql: https://gist.github.com/jorisvandenbossche/10841234.
I wrote this a time ago, so cannot fully guarantee that it always works, buth the basis should be there). If you put that file in your working directory and import it, then you should be able to do (where con is a postgresql connection):
import sql # the patched version (file is named sql.py)
sql.write_frame(df, 'table_name', con, flavor='postgresql')

Faster option:
The following code will copy your Pandas DF to postgres DB much faster than df.to_sql method and you won't need any intermediate csv file to store the df.
Create an engine based on your DB specifications.
Create a table in your postgres DB that has equal number of columns as the Dataframe (df).
Data in DF will get inserted in your postgres table.
from sqlalchemy import create_engine
import psycopg2
import io
If you want to replace the table, we can replace it with normal to_sql method using headers from our df and then load the entire big time consuming df into DB.
engine = create_engine(
'postgresql+psycopg2://username:password#host:port/database')
# Drop old table and create new empty table
df.head(0).to_sql('table_name', engine, if_exists='replace',index=False)
conn = engine.raw_connection()
cur = conn.cursor()
output = io.StringIO()
df.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_from(output, 'table_name', null="") # null values become ''
conn.commit()
cur.close()
conn.close()

Pandas 0.24.0+ solution
In Pandas 0.24.0 a new feature was introduced specifically designed for fast writes to Postgres. You can learn more about it here: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-sql-method
import csv
from io import StringIO
from sqlalchemy import create_engine
def psql_insert_copy(table, conn, keys, data_iter):
# gets a DBAPI connection that can provide a cursor
dbapi_conn = conn.connection
with dbapi_conn.cursor() as cur:
s_buf = StringIO()
writer = csv.writer(s_buf)
writer.writerows(data_iter)
s_buf.seek(0)
columns = ', '.join('"{}"'.format(k) for k in keys)
if table.schema:
table_name = '{}.{}'.format(table.schema, table.name)
else:
table_name = table.name
sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
table_name, columns)
cur.copy_expert(sql=sql, file=s_buf)
engine = create_engine('postgresql://myusername:mypassword#myhost:5432/mydatabase')
df.to_sql('table_name', engine, method=psql_insert_copy)

This is how I did it.
It may be faster because it is using execute_batch:
# df is the dataframe
if len(df) > 0:
df_columns = list(df)
# create (col1,col2,...)
columns = ",".join(df_columns)
# create VALUES('%s', '%s",...) one '%s' per column
values = "VALUES({})".format(",".join(["%s" for _ in df_columns]))
#create INSERT INTO table (columns) VALUES('%s',...)
insert_stmt = "INSERT INTO {} ({}) {}".format(table,columns,values)
cur = conn.cursor()
psycopg2.extras.execute_batch(cur, insert_stmt, df.values)
conn.commit()
cur.close()

Faster way to write a df to a table in a custom schema with/without index:
"""
Faster way to write df to table.
Slower way is to use df.to_sql()
"""
from io import StringIO
from pandas import DataFrame
from sqlalchemy.engine.base import Engine
class WriteDfToTableWithIndexMixin:
#classmethod
def write_df_to_table_with_index(
cls,
df: DataFrame,
table_name: str,
schema_name: str,
engine: Engine
):
"""
Truncate existing table and load df into table.
Keep each column as string to avoid datatype conflicts.
"""
df.head(0).to_sql(table_name, engine, if_exists='replace',
schema=schema_name, index=True, index_label='id')
conn = engine.raw_connection()
cur = conn.cursor()
output = StringIO()
df.to_csv(output, sep='\t', header=False,
index=True, index_label='id')
output.seek(0)
contents = output.getvalue()
cur.copy_expert(f"COPY {schema_name}.{table_name} FROM STDIN", output)
conn.commit()
class WriteDfToTableWithoutIndexMixin:
#classmethod
def write_df_to_table_without_index(
cls,
df: DataFrame,
table_name: str,
schema_name: str,
engine: Engine
):
"""
Truncate existing table and load df into table.
Keep each column as string to avoid datatype conflicts.
"""
df.head(0).to_sql(table_name, engine, if_exists='replace',
schema=schema_name, index=False)
conn = engine.raw_connection()
cur = conn.cursor()
output = StringIO()
df.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_expert(f"COPY {schema_name}.{table_name} FROM STDIN", output)
conn.commit()
If you have JSON values in a column in your df then above method will still load all data correctly but the json column will have some weird format. So converting that json column to ::json may generate error. You have to use to_sql() . Add method=multi to speed things up and add chunksize to prevent your machine from freezing:
df.to_sql(table_name, engine, if_exists='replace', schema=schema_name, index=False, method='multi', chunksize=1000)

using psycopg2 you can use native sql commands to write data into a postgres table.
import psycopg2
import pandas as pd
conn = psycopg2.connect("dbname='{db}' user='{user}' host='{host}' port='{port}' password='{passwd}'".format(
user=pg_user,
passwd=pg_pass,
host=pg_host,
port=pg_port,
db=pg_db))
cur = conn.cursor()
def insertIntoTable(df, table):
"""
Using cursor.executemany() to insert the dataframe
"""
# Create a list of tupples from the dataframe values
tuples = list(set([tuple(x) for x in df.to_numpy()]))
# Comma-separated dataframe columns
cols = ','.join(list(df.columns))
# SQL query to execute
query = "INSERT INTO %s(%s) VALUES(%%s,%%s,%%s,%%s)" % (
table, cols)
try:
cur.executemany(query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
return 1

For Python 2.7 and Pandas 0.24.2 and using Psycopg2
Psycopg2 Connection Module
def dbConnect (db_parm, username_parm, host_parm, pw_parm):
# Parse in connection information
credentials = {'host': host_parm, 'database': db_parm, 'user': username_parm, 'password': pw_parm}
conn = psycopg2.connect(**credentials)
conn.autocommit = True # auto-commit each entry to the database
conn.cursor_factory = RealDictCursor
cur = conn.cursor()
print ("Connected Successfully to DB: " + str(db_parm) + "#" + str(host_parm))
return conn, cur
Connect to the database
conn, cur = dbConnect(databaseName, dbUser, dbHost, dbPwd)
Assuming dataframe to be present already as df
output = io.BytesIO() # For Python3 use StringIO
df.to_csv(output, sep='\t', header=True, index=False)
output.seek(0) # Required for rewinding the String object
copy_query = "COPY mem_info FROM STDOUT csv DELIMITER '\t' NULL '' ESCAPE '\\' HEADER " # Replace your table name in place of mem_info
cur.copy_expert(copy_query, output)
conn.commit()

Create engine (where dialect='postgres' or 'mysql', etc..):
from sqlalchemy import create_engine
engine = create_engine(f'{dialect}://{user_name}#{host}:{port}/{db_name}')
Session = sessionmaker(bind=engine)
with Session() as session:
df = pd.read_csv(path + f'/{file}')
df.to_sql('table_name', con=engine, if_exists='append',index=False)

Related

Using fast_executemany = True with sqlalchemy and Pandas [duplicate]

There is DataFrame.to_sql method, but it works only for mysql, sqlite and oracle databases. I cant pass to this method postgres connection or sqlalchemy engine.
Starting from pandas 0.14 (released end of May 2014), postgresql is supported. The sql module now uses sqlalchemy to support different database flavors. You can pass a sqlalchemy engine for a postgresql database (see docs). E.g.:
from sqlalchemy import create_engine
engine = create_engine('postgresql://username:password#localhost:5432/mydatabase')
df.to_sql('table_name', engine)
You are correct that in pandas up to version 0.13.1 postgresql was not supported. If you need to use an older version of pandas, here is a patched version of pandas.io.sql: https://gist.github.com/jorisvandenbossche/10841234.
I wrote this a time ago, so cannot fully guarantee that it always works, buth the basis should be there). If you put that file in your working directory and import it, then you should be able to do (where con is a postgresql connection):
import sql # the patched version (file is named sql.py)
sql.write_frame(df, 'table_name', con, flavor='postgresql')
Faster option:
The following code will copy your Pandas DF to postgres DB much faster than df.to_sql method and you won't need any intermediate csv file to store the df.
Create an engine based on your DB specifications.
Create a table in your postgres DB that has equal number of columns as the Dataframe (df).
Data in DF will get inserted in your postgres table.
from sqlalchemy import create_engine
import psycopg2
import io
If you want to replace the table, we can replace it with normal to_sql method using headers from our df and then load the entire big time consuming df into DB.
engine = create_engine(
'postgresql+psycopg2://username:password#host:port/database')
# Drop old table and create new empty table
df.head(0).to_sql('table_name', engine, if_exists='replace',index=False)
conn = engine.raw_connection()
cur = conn.cursor()
output = io.StringIO()
df.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_from(output, 'table_name', null="") # null values become ''
conn.commit()
cur.close()
conn.close()
Pandas 0.24.0+ solution
In Pandas 0.24.0 a new feature was introduced specifically designed for fast writes to Postgres. You can learn more about it here: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-sql-method
import csv
from io import StringIO
from sqlalchemy import create_engine
def psql_insert_copy(table, conn, keys, data_iter):
# gets a DBAPI connection that can provide a cursor
dbapi_conn = conn.connection
with dbapi_conn.cursor() as cur:
s_buf = StringIO()
writer = csv.writer(s_buf)
writer.writerows(data_iter)
s_buf.seek(0)
columns = ', '.join('"{}"'.format(k) for k in keys)
if table.schema:
table_name = '{}.{}'.format(table.schema, table.name)
else:
table_name = table.name
sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
table_name, columns)
cur.copy_expert(sql=sql, file=s_buf)
engine = create_engine('postgresql://myusername:mypassword#myhost:5432/mydatabase')
df.to_sql('table_name', engine, method=psql_insert_copy)
This is how I did it.
It may be faster because it is using execute_batch:
# df is the dataframe
if len(df) > 0:
df_columns = list(df)
# create (col1,col2,...)
columns = ",".join(df_columns)
# create VALUES('%s', '%s",...) one '%s' per column
values = "VALUES({})".format(",".join(["%s" for _ in df_columns]))
#create INSERT INTO table (columns) VALUES('%s',...)
insert_stmt = "INSERT INTO {} ({}) {}".format(table,columns,values)
cur = conn.cursor()
psycopg2.extras.execute_batch(cur, insert_stmt, df.values)
conn.commit()
cur.close()
Faster way to write a df to a table in a custom schema with/without index:
"""
Faster way to write df to table.
Slower way is to use df.to_sql()
"""
from io import StringIO
from pandas import DataFrame
from sqlalchemy.engine.base import Engine
class WriteDfToTableWithIndexMixin:
#classmethod
def write_df_to_table_with_index(
cls,
df: DataFrame,
table_name: str,
schema_name: str,
engine: Engine
):
"""
Truncate existing table and load df into table.
Keep each column as string to avoid datatype conflicts.
"""
df.head(0).to_sql(table_name, engine, if_exists='replace',
schema=schema_name, index=True, index_label='id')
conn = engine.raw_connection()
cur = conn.cursor()
output = StringIO()
df.to_csv(output, sep='\t', header=False,
index=True, index_label='id')
output.seek(0)
contents = output.getvalue()
cur.copy_expert(f"COPY {schema_name}.{table_name} FROM STDIN", output)
conn.commit()
class WriteDfToTableWithoutIndexMixin:
#classmethod
def write_df_to_table_without_index(
cls,
df: DataFrame,
table_name: str,
schema_name: str,
engine: Engine
):
"""
Truncate existing table and load df into table.
Keep each column as string to avoid datatype conflicts.
"""
df.head(0).to_sql(table_name, engine, if_exists='replace',
schema=schema_name, index=False)
conn = engine.raw_connection()
cur = conn.cursor()
output = StringIO()
df.to_csv(output, sep='\t', header=False, index=False)
output.seek(0)
contents = output.getvalue()
cur.copy_expert(f"COPY {schema_name}.{table_name} FROM STDIN", output)
conn.commit()
If you have JSON values in a column in your df then above method will still load all data correctly but the json column will have some weird format. So converting that json column to ::json may generate error. You have to use to_sql() . Add method=multi to speed things up and add chunksize to prevent your machine from freezing:
df.to_sql(table_name, engine, if_exists='replace', schema=schema_name, index=False, method='multi', chunksize=1000)
using psycopg2 you can use native sql commands to write data into a postgres table.
import psycopg2
import pandas as pd
conn = psycopg2.connect("dbname='{db}' user='{user}' host='{host}' port='{port}' password='{passwd}'".format(
user=pg_user,
passwd=pg_pass,
host=pg_host,
port=pg_port,
db=pg_db))
cur = conn.cursor()
def insertIntoTable(df, table):
"""
Using cursor.executemany() to insert the dataframe
"""
# Create a list of tupples from the dataframe values
tuples = list(set([tuple(x) for x in df.to_numpy()]))
# Comma-separated dataframe columns
cols = ','.join(list(df.columns))
# SQL query to execute
query = "INSERT INTO %s(%s) VALUES(%%s,%%s,%%s,%%s)" % (
table, cols)
try:
cur.executemany(query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
return 1
For Python 2.7 and Pandas 0.24.2 and using Psycopg2
Psycopg2 Connection Module
def dbConnect (db_parm, username_parm, host_parm, pw_parm):
# Parse in connection information
credentials = {'host': host_parm, 'database': db_parm, 'user': username_parm, 'password': pw_parm}
conn = psycopg2.connect(**credentials)
conn.autocommit = True # auto-commit each entry to the database
conn.cursor_factory = RealDictCursor
cur = conn.cursor()
print ("Connected Successfully to DB: " + str(db_parm) + "#" + str(host_parm))
return conn, cur
Connect to the database
conn, cur = dbConnect(databaseName, dbUser, dbHost, dbPwd)
Assuming dataframe to be present already as df
output = io.BytesIO() # For Python3 use StringIO
df.to_csv(output, sep='\t', header=True, index=False)
output.seek(0) # Required for rewinding the String object
copy_query = "COPY mem_info FROM STDOUT csv DELIMITER '\t' NULL '' ESCAPE '\\' HEADER " # Replace your table name in place of mem_info
cur.copy_expert(copy_query, output)
conn.commit()
Create engine (where dialect='postgres' or 'mysql', etc..):
from sqlalchemy import create_engine
engine = create_engine(f'{dialect}://{user_name}#{host}:{port}/{db_name}')
Session = sessionmaker(bind=engine)
with Session() as session:
df = pd.read_csv(path + f'/{file}')
df.to_sql('table_name', con=engine, if_exists='append',index=False)

Open database files (.db) using python

I have a data base file .db in SQLite3 format and I was attempting to open it to look at the data inside it. Below is my attempt to code using python.
import sqlite3
# Create a SQL connection to our SQLite database
con = sqlite3.connect(dbfile)
cur = con.cursor()
# The result of a "cursor.execute" can be iterated over by row
for row in cur.execute("SELECT * FROM "):
print(row)
# Be sure to close the connection
con.close()
For the line ("SELECT * FROM ") , I understand that you have to put in the header of the table after the word "FROM", however, since I can't even open up the file in the first place, I have no idea what header to put. Hence how can I code such that I can open up the data base file to read its contents?
So, you analyzed it all right. After the FROM you have to put in the tablenames. But you can find them out like this:
SELECT name FROM sqlite_master WHERE type = 'table'
In code this looks like this:
# loading in modules
import sqlite3
# creating file path
dbfile = '/home/niklas/Desktop/Stuff/StockData-IBM.db'
# Create a SQL connection to our SQLite database
con = sqlite3.connect(dbfile)
# creating cursor
cur = con.cursor()
# reading all table names
table_list = [a for a in cur.execute("SELECT name FROM sqlite_master WHERE type = 'table'")]
# here is you table list
print(table_list)
# Be sure to close the connection
con.close()
That worked for me very good. The reading of the data you have done already right just paste in the tablenames.
If you want to see data for visual analysis as pandas dataframe, the below approach could also be used.
import pandas as pd
import sqlite3
import sqlalchemy
try:
conn = sqlite3.connect("file.db")
except Exception as e:
print(e)
#Now in order to read in pandas dataframe we need to know table name
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(f"Table Name : {cursor.fetchall()}")
df = pd.read_sql_query('SELECT * FROM Table_Name', conn)
conn.close()
from flask import Flask
app = Flask(__name__)
from sqlalchemy import create_engine, select, MetaData, Table
from sqlalchemy.sql import and_, or_
engine = create_engine('sqlite://username:password#host/databasename')
class UserModel():
def __init__(self):
try:
self.meta = MetaData()
self.users = Table("users", self.meta, autoload=True, autoload_with=engine)
except Exception as e:
print(e)
def get(self):
stmt = select([self.users.c.name, self.users.c.email, self.users.c.password])
print(stmt)
result = engine.execute(stmt)
temp = [dict(r) for r in result] if result else None
print(temp)
return temp

How to fix "invalid input syntax for type json" when inserting json str into postgresql with copy method, using python

I am trying to insert data of json type with the copy method into a postgresql db. However I am getting the error mentioned below
invalid input syntax for type json
DETAIL: Expected end of input, but found ""aco"".
CONTEXT: JSON data, line 1: "{""aco"...
COPY flights2016jsn04, line 1, column flight: ""{""aco"": [""AXE"", ""AXE"", ""AXE"", ""AXE""], ""dist2Org"": 984753, ""flight_att"": {""ypos"": [8..."
I am not sure what the issue and a bit comfused on what I am reading on the internet. Below is my code with some sample data.
import json
import io
import pandas as pd
import psycopg2
dict_ = {"dist2Org": 984753, "aco": ["AXE", "AXE", "AXE", "AXE"],
"flight_att": {"xpos": [823.08988, 6540.32231, 999, 33321],
"ypos": [823.08988, 6540.32231, 999, 33321], "zpos": [823.08988, 6540.32231, 999.33321]}}
json_ = json.dumps(dict_)
col_json = ["id", "flight"]
df = pd.DataFrame([65654, json_]).T
df.to_csv('test_df')
output = io.StringIO()
# ignore the index
df.to_csv(output, sep='\t', header=False, index=False)
output.getvalue()
# jump to start of stream
output.seek(0)
conn = None
try:
# connection string
conn_string = '{0}{1} {2}'.format("host='localhost' dbname=", 'postgres', "user='postgres' password='xxxx'")
# conn_string = "host='localhost' dbname='postgres' user='postgres' password='xxxx'"
# connect to the PostgreSQL database
conn = psycopg2.connect(conn_string)
# create a new cursor
cur = conn.cursor()
# load data
cur.copy_from(output, 'flights2016jsn04', null="", columns=(col_json))
# # commit the changes to the database
conn.commit()
# close communication with the database
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
Postgres seems to only support quoting for csv input. The \t-separated text format expects no quotes.
Pandas on the other hand uses " as the default quote char, which occurs a lot in JSON and needs to be escaped.
There are two options:
If you know that there will be no \ts in your data you could try df.to_csv(sep='\t', quoting=csv.QUOTE_NONE) instead.
Use psycopg2's copy_expert to copy csv instead.

How to create a new table in a MySQL DB from a pandas dataframe

I recently transitioned from using SQLite for most of my data storage and management needs to MySQL. I think I've finally gotten the correct libraries installed to work with Python 3.6, but now I am having trouble creating a new table from a dataframe in the MySQL database.
Here are the libraries I import:
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
In my code, I first create a dataframe from a CSV file (no issues here).
def csv_to_df(infile):
return pd.read_csv(infile)
Then I establish a connection to the MySQL database using this def function:
def mysql_connection():
user = 'root'
password = 'abc'
host = '127.0.0.1'
port = '3306'
database = 'a001_db'
engine = create_engine("mysql://{0}:{1}#{2}:{3}/{4}?charset=utf8".format(user, password, host, port, database))
return engine
Lastly, I use the pandas function "to_sql" to create the database table in the MySQL database:
def df_to_mysql(df, db_tbl_name, conn=mysql_connection(), index=False):
df.to_sql(con = conn, name = db_tbl_name, if_exists='replace', index = False)
I run the code using this line:
df_to_mysql(csv_to_df(r'path/to/file.csv'), 'new_database_table')
The yields the following error:
InvalidRequestError: Could not reflect: requested table(s) not available in Engine(mysql://root:***#127.0.0.1:3306/a001_db?charset=utf8): (new_database_table)
I think this is telling me that I must first create a table in the database before passing the data in the dataframe to this table, but I'm not 100% positive about that. Regardless, I'm looking for a way to create a table in a MySQL database without manually creating the table first (I have many CSVs, each with 50+ fields, that have to be uploaded as new tables in a MySQL database).
Any suggestions?
I took an approach suggested by aws_apprentice above which was to create the table first, then write data to the table.
The code below first auto-generates a mysql table from a df (auto defining table names and datatypes) then writes the df data to that table.
There were a couple of hiccups I had to overcome, such as: unnamed csv columns, determining the correct data type for each field in the mysql table.
I'm sure there are multiple other (better?) ways to do this, but this seems to work.
import pandas as pd
from sqlalchemy import create_engine
infile = r'path/to/file.csv'
db = 'a001_db'
db_tbl_name = 'a001_rd004_db004'
'''
Load a csv file into a dataframe; if csv does not have headers, use the headers arg to create a list of headers; rename unnamed columns to conform to mysql column requirements
'''
def csv_to_df(infile, headers = []):
if len(headers) == 0:
df = pd.read_csv(infile)
else:
df = pd.read_csv(infile, header = None)
df.columns = headers
for r in range(10):
try:
df.rename( columns={'Unnamed: {0}'.format(r):'Unnamed{0}'.format(r)}, inplace=True )
except:
pass
return df
'''
Create a mapping of df dtypes to mysql data types (not perfect, but close enough)
'''
def dtype_mapping():
return {'object' : 'TEXT',
'int64' : 'INT',
'float64' : 'FLOAT',
'datetime64' : 'DATETIME',
'bool' : 'TINYINT',
'category' : 'TEXT',
'timedelta[ns]' : 'TEXT'}
'''
Create a sqlalchemy engine
'''
def mysql_engine(user = 'root', password = 'abc', host = '127.0.0.1', port = '3306', database = 'a001_db'):
engine = create_engine("mysql://{0}:{1}#{2}:{3}/{4}?charset=utf8".format(user, password, host, port, database))
return engine
'''
Create a mysql connection from sqlalchemy engine
'''
def mysql_conn(engine):
conn = engine.raw_connection()
return conn
'''
Create sql input for table names and types
'''
def gen_tbl_cols_sql(df):
dmap = dtype_mapping()
sql = "pi_db_uid INT AUTO_INCREMENT PRIMARY KEY"
df1 = df.rename(columns = {"" : "nocolname"})
hdrs = df1.dtypes.index
hdrs_list = [(hdr, str(df1[hdr].dtype)) for hdr in hdrs]
for hl in hdrs_list:
sql += " ,{0} {1}".format(hl[0], dmap[hl[1]])
return sql
'''
Create a mysql table from a df
'''
def create_mysql_tbl_schema(df, conn, db, tbl_name):
tbl_cols_sql = gen_tbl_cols_sql(df)
sql = "USE {0}; CREATE TABLE {1} ({2})".format(db, tbl_name, tbl_cols_sql)
cur = conn.cursor()
cur.execute(sql)
cur.close()
conn.commit()
'''
Write df data to newly create mysql table
'''
def df_to_mysql(df, engine, tbl_name):
df.to_sql(tbl_name, engine, if_exists='replace')
df = csv_to_df(infile)
create_mysql_tbl_schema(df, mysql_conn(mysql_engine()), db, db_tbl_name)
df_to_mysql(df, mysql_engine(), db_tbl_name)
This
connection = engine.connect()
df.to_sql(con=connection, name='TBL_NAME', schema='SCHEMA', index=False, if_exists='replace')
works with oracle DB in specific schema wothout errors, but will not work if you have limited permissions. And note that table names is case sensative.

Cannot drop table in pandas to_sql using SQLAlchemy

I'm trying to drop an existing table, do a query and then recreate the table using the pandas to_sql function. This query works in pgadmin, but not here. Any ideas of if this is a pandas bug or if my code is wrong?
Specific error is ValueError: Table 'a' already exists.
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
drop table a;
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', engine)
conn.close()
Why are you doing this like that? There is a shorter way: the if_exists kwag in to_sql. Try this:
import pandas.io.sql as psql
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
c = engine.connect()
conn = c.connection
sql = """
select * from some_table limit 1;
"""
df = psql.read_sql(sql, con=conn)
print df.head()
# Notice how below line is different. You forgot the schema argument
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
conn.close()
According to docs:
replace: If table exists, drop it, recreate it, and insert data.
Ps. Additional tip:
This is better way to handle the connection:
with engine.connect() as conn, conn.begin():
sql = """select * from some_table limit 1"""
df = psql.read_sql(sql, con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')
Because it ensures that your connection is always closed, even if your program exits with an error. This is important to prevent data corruption. Further, I would just use this:
import pandas as pd
...
pd.read_sql(sql, conn)
instead of the way you are doing it.
So, if I was in your place writing that code, it would look like this:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(r'postgresql://user#localhost:port/dbname')
with engine.connect() as conn, conn.begin():
df = pd.read_sql('select * from some_table limit 1', con=conn)
print df.head()
df.to_sql('a', con=conn, schema=schema_name, if_exists='replace')

Categories