AttributeError: 'SnowflakeCursor' object has no attribute 'cursor' - python

I am trying to writer my DataFrame to Snowflake using to_sql method.
sf_conn = snowflake.connector.connect(
account=*****,
user=*****,
password=*****,
role=*****,
warehouse=*****,
database=*****
)
sf_cur = sf_conn.cursor()
df = pd.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance'])
df.to_sql('TEST3',con=sf_cur, schema='public', index=False)
But no luck yet.
File "/home/karma/.local/lib/python3.6/site-packages/pandas/io/sql.py", line 1584, in execute
cur = self.con.cursor()
AttributeError: 'SnowflakeCursor' object has no attribute 'cursor'
Even tried giving con=sf_conn but get the following error:
pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
I am able to do the same job using sqlAlchemy create_engine lib, but wanted to use specifically snowflake connection.

You need to use an SQLAlchemy engine as the connection when using pandas.DataFrame.to_sql with Snowflake.
When you use df.to_sql, you need to pass in a SQLAlchemy engine and not a standard Snowflake connection object (and not a cursor either as you've tried to do). You'll need to install snowflake-sqlalchemy using pip but you don't need to install snowflake-connector-python since the snowflake-sqlalchemy does this for you.
Here is an example:
from sqlalchemy import create_engine
import os
import pandas as pd
snowflake_username = os.environ['SNOWFLAKE_USERNAME']
snowflake_password = os.environ['SNOWFLAKE_PASSWORD']
snowflake_account = os.environ['SNOWFLAKE_ACCOUNT']
snowflake_warehouse = os.environ['SNOWFLAKE_WAREHOUSE']
snowflake_database = 'test_db'
snowflake_schema = 'public'
if __name__ == '__main__':
engine = create_engine(
'snowflake://{user}:{password}#{account}/{db}/{schema}?warehouse={warehouse}'.format(
user=snowflake_username,
password=snowflake_password,
account=snowflake_account,
db=snowflake_database,
schema=snowflake_schema,
warehouse=snowflake_warehouse,
)
)
df = pd.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance'])
df.to_sql('TEST_TABLE', con=engine, schema='public', index=False, if_exists='append')
Every time I run the above script the Mark and Luke records get appended to my test_db.public.test_table table.

Related

"The name of the object to be created is identical to the existing name" in IBM DB2 with pandas.to_sql if_exists='append'

Please, help with this problem.
sqlalchemy-1.4.31
pandas-1.4.0
Python 3.6.8
engine = create_engine(ConfigDbDb2.sqlalchemy_engine_c_string, )
chumks = pd.read_sql_query(get_goods, engine, chunksize=chunksize)
for i, chunk in enumerate(chumks):
print(i)
chunk['predicted'] = chunk['name'].progress_apply(my_function)
chunk.to_sql(table_name, engine, if_exists='append', index=False, dtype=dtype_good_test)
After first writing i have
(ibm_db_dbi.ProgrammingError) ibm_db_dbi::ProgrammingError: Statement Execute Failed: [IBM][CLI Driver][DB2/LINUXX8664] SQL0601N The name of the object to be created is identical to the existing name
Also, for some reason, sqlalchemy + pandas ignore schema parametr in connection string when appending. So you should define it directly in the to_sql statement of pandas dataframe like
chunk.to_sql(table_name, engine, schema = 'MY_SCHEMA', if_exists='append', index=False, dtype=dtype_good_test)

AttributeError: 'pyodbc.Cursor' object has no attribute 'dialect'

I am trying to create table in database as this is my connection as the below code:
# pyodbc connection connect to server
conn = pyodbc.connect(
"driver={SQL Server};server=xxxxxxxxxxx; database=master; trusted_connection=true",
autocommit=True, Trusted_Connection='Yes')
crsr = conn.cursor()
# connect db (connect to database name) using SQL-Alchemy
engine = create_engine(
'mssql+pyodbc://xxxxxxxxxxx/master?driver=SQL+Server+Native+Client+11.0')
connection = engine.connect()
it's just a pyodbc conncetion
and this is the error I found:
Traceback (most recent call last):
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/app.py", line 22, in <module>
dfeed_gsm_relation_m.push_dfeed_gsm_relation_sql()
File "C:\Users\haroo501\PycharmProjects\ToolUpdated\meta_data\dfeed_gsm_relation_m.py", line 31, in push_dfeed_gsm_relation_sql
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
AttributeError: 'pyodbc.Cursor' object has no attribute 'dialect'
and this is the code that creates the table in the database using MetaData():
from sqlalchemy import MetaData, Table, Column, Integer, String, Date, Float
from database import connec
import sqlalchemy as db
import pandas as pd
import numpy as np
from txt_to_csv import convert_to_csv
import os
def push_dfeed_gsm_relation_sql():
# Create a ditionary for all gsm_relations_mnm relation excel file
dataf_gsm_relation_col_dict = {
'cell_name': 'Cellname',
'n_cell_name': 'Ncellname',
'technology': 'Technology',
}
# table name in database 'df_gsm_relation'
DATAF_GSM_RELATION = 'df_gsm_relation'
# Create a list for dataf_gsm_relation_cols and put the dictionary in the list
dataf_gsm_relation_cols = list(dataf_gsm_relation_col_dict.keys())
dataf_gsm_relation_cols_meta = MetaData()
dataf_gsm_relation_relations = Table(
DATAF_GSM_RELATION, dataf_gsm_relation_cols_meta,
Column('id', Integer, primary_key=True),
Column(dataf_gsm_relation_cols[0], Integer),
Column(dataf_gsm_relation_cols[1], Integer),
Column(dataf_gsm_relation_cols[2], String),
)
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
dataf_gsm_rel_txt = 'gsmrelation_mnm.txt'
dataf_gsm_txt_df = pd.read_csv(dataf_gsm_rel_txt, sep=';')
dataf_gsm_rel_df_column_index = list(dataf_gsm_txt_df.columns)
dataf_gsm_txt_df.reset_index(inplace=True)
dataf_gsm_txt_df.drop(columns=dataf_gsm_txt_df.columns[-1], inplace=True)
dataf_gsm_rel_df_column_index = dict(zip(list(dataf_gsm_txt_df.columns), dataf_gsm_rel_df_column_index))
dataf_gsm_txt_df.rename(columns=dataf_gsm_rel_df_column_index, inplace=True)
dataf_gsm_txt_df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1', index=False)
dataf_gsm_rel_excel = 'gsmrelation_mnm.csv'
dataf_gsm_rel_df = pd.read_csv(os.path.join(os.path.dirname(__file__), dataf_gsm_rel_excel), dtype={
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[0]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[1]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[2]]: str,
})
dataf_gsm_relations_table_query = db.insert(dataf_gsm_relation_relations)
dataf_gsm_relations_values_list = []
dataf_gsm_relations_row_count = 1
for i in dataf_gsm_rel_df.index:
dataf_gsm_relations_row = dataf_gsm_rel_df.loc[i]
dataf_gsm_rel_df_record = {'id': dataf_gsm_relations_row_count}
for col in dataf_gsm_relation_col_dict.keys():
if col == dataf_gsm_relation_cols[0] or col == dataf_gsm_relation_cols[1]:
dataf_gsm_rel_df_record[col] = int(dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]])
else:
dataf_gsm_rel_df_record[col] = dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]]
dataf_gsm_relations_values_list.append(dataf_gsm_rel_df_record)
dataf_gsm_relations_row_count += 1
ResultProxy_dataf_gsm_relations = connec.crsr.execute(dataf_gsm_relations_table_query,
dataf_gsm_relations_values_list)
as the problem in this part:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
I know dialect function is related to from sqlalchemy import create_engine and this is my old connection as I changed to new connection using import pyodbc .....
So how can I solve this case using pyodbc module?
Edited
The other way to solve this is how to CREATE and DROP table in existing database using SQL ALCHEMY
and this is the related code example:
from database import connec
def create_db():
create_bd_query = "CREATE DATABASE MyNewDatabase"
connec.crsr.execute(create_bd_query)
def delete_database():
delete_bd_query = "DROP DATABASE MyNewDatabase"
connec.crsr.execute(delete_bd_query)
You cannot just import a completley different module and expect it to be the same :)
Dialects are what SQLalchemy uses to communicate to different drivers.
In this instance Pyodbc IS the driver so it has no need for a dialect.
From SQLAlchemy:
Dialects
The dialect is the system SQLAlchemy uses to communicate with various types of DBAPI implementations and databases. The sections that follow contain reference documentation and notes specific to the usage of each backend, as well as notes for the various DBAPIs.
All dialects require that an appropriate DBAPI driver is installed.
Included Dialects
PostgreSQL
MySQL
SQLite
Oracle
Microsoft SQL Server
Microsoft SQL Server
Support for the Microsoft SQL Server database.
DBAPI Support
The following dialect/DBAPI options are available. Please refer to individual
DBAPI sections for connect information.
PyODBC
mxODBC
pymssql
zxJDBC for Jython
adodbapi
Judging from the error and by looking at the PyODBC Wiki Documentation
I think this line:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
should read:
# Does table 'DATAF_GSM_RELATION' exist?
if connec.crsr.tables(table=DATAF_GSM_RELATION).fetchone():
...

How to execute sql stored procedure with multiple date parameters using sqlalchemy in pandas

I am able to execute stored procedure without parameters:
import pandas as pd
import sqlalchemy
import pyodbc
import datetime as dt
engine = sqlalchemy.create_engine('mssql+pymssql://MyServer/MyDB')
df = pd.read_sql_query('EXEC dbo.TestProcedure' , engine) # stored procedure without parameters
print(df)
But unable to execute stored procedure with parameters:
import pandas as pd
import sqlalchemy
import pyodbc
import datetime as dt
myparams = ['2017-02-01','2017-02-28', None] # None substitutes NULL in sql
engine = sqlalchemy.create_engine('mssql+pymssql://MyServer/MyDB')
df = pd.read_sql_query('EXEC PythonTest_Align_RSrptAccountCurrentMunich #EffectiveDateFrom=?,#EffectiveDateTo=?,#ProducerLocationID=?', engine, params=myparams)
print(df)
Error message:
File "src\pymssql.pyx", line 465, in pymssql.Cursor.execute
sqlalchemy.exc.ProgrammingError: (pymssql.ProgrammingError) (102, b"Incorrect syntax near '?'.DB-Lib error message 20018, severity 15:\nGeneral SQL Server error: Check messages from the SQL Server\n")
[SQL: EXEC PythonTest_Align_RSrptAccountCurrentMunich #EffectiveDateFrom=?,#EffectiveDateTo=?,#ProducerLocationID=?]
[parameters: ('2017-02-01', '2017-02-28', None)]
(Background on this error at: http://sqlalche.me/e/f405)
How can I pass parameters using sqlalchemy?
If you are executing a raw SQL query with parameter placeholders then you must use the paramstyle supported by the DBAPI layer. pymssql used the "format" paramstyle %s, not the "qmark" paramstyle ? (which pyodbc uses).
However, you can avoid the ambiguity by wrapping the query in a SQLAlchemy text object and consistently use the "named" paramstyle. SQLAlchemy will automatically translate the parameter placeholders to the appropriate style for the DBAPI you are using. For example, to call a stored procedure named echo_datetimes:
import datetime
import sqlalchemy as sa
# ...
query = sa.text("EXEC echo_datetimes #p1 = :param1, #p2 = :param2")
values = {'param1': datetime.datetime(2020, 1, 1, 1, 1, 1),
'param2': datetime.datetime(2020, 2, 2, 2, 2, 2)}
df = pd.read_sql_query(query, engine, params=values)
print(df)
# dt_start dt_end
# 0 2020-01-01 01:01:01 2020-02-02 02:02:02
Read_sql is just for reading tables. To execute someting with parameters try using
engine.execute(your sql query)#you can try entering with parameters as how you give
in your sql
engine.execution_options(autocommit=True)

python pandas with to_sql() , SQLAlchemy and schema in exasol

I'm trying to upload a pandas data frame to an SQL table. It seemed to me that pandas to_sql function is the best solution for larger data frames, but I can't get it to work. I can easily extract data, but get an error message when trying to write it to a new table:
# connect to Exasol DB
exaString='DSN=exa'
conDB = pyodbc.connect(exaString)
# get some data from somewhere, works without error
sqlString = "SELECT * FROM SOMETABLE"
data = pd.read_sql(sqlString, conDB)
# now upload this data to a new table
data.to_sql('MYTABLENAME', conDB, flavor='mysql')
conDB.close()
The error message I get is
pyodbc.ProgrammingError: ('42000', "[42000] [EXASOL][EXASolution driver]syntax error, unexpected identifier_chain2, expecting
assignment_operator or ':' [line 1, column 6] (-1)
(SQLExecDirectW)")
Unfortunately I have no idea how the query that caused this syntax error looks like or what else is wrong. Can someone please point me in the right direction?
(Second) EDIT:
Following Humayuns and Joris suggestions, I now use Pandas version 0.14 and SQLAlchemy in combination with the Exasol dialect (?). Since I am connecting to a defined schema, I am using the meta data option, but the programm crashes with "Bus error (core dumped)".
engine = create_engine('exa+pyodbc://uid:passwd#exa/mySchemaName', echo=True)
# get some data
sqlString = "SELECT * FROM SOMETABLE" # SOMETABLE is a view in mySchemaName
df = pd.read_sql(sqlString, con=engine) # works
print engine.has_table('MYTABLENAME') # MYTABLENAME is a view in mySchemaName
# prints "True"
# upload it to a new table
meta = sqlalchemy.MetaData(engine, schema='mySchemaName')
meta.reflect(engine, schema='mySchemaName')
pdsql = sql.PandasSQLAlchemy(engine, meta=meta)
pdsql.to_sql(df, 'MYTABLENAME')
I am not sure about setting "mySchemaName" in create_engine(..), but the outcome is the same.
Pandas does not support the EXASOL syntax out of the box, so it need to be changed a bit, here is a working example of your code without SQLAlchemy:
import pyodbc
import pandas as pd
con = pyodbc.connect('DSN=EXA')
con.execute('OPEN SCHEMA TEST2')
# configure pandas to understand EXASOL as mysql flavor
pd.io.sql._SQL_TYPES['int']['mysql'] = 'INT'
pd.io.sql._SQL_SYMB['mysql']['br_l'] = ''
pd.io.sql._SQL_SYMB['mysql']['br_r'] = ''
pd.io.sql._SQL_SYMB['mysql']['wld'] = '?'
pd.io.sql.PandasSQLLegacy.has_table = \
lambda self, name: name.upper() in [t[0].upper() for t in con.execute('SELECT table_name FROM cat').fetchall()]
data = pd.read_sql('SELECT * FROM services', con)
data.to_sql('SERVICES2', con, flavor = 'mysql', index = False)
If you use the EXASolution Python package, then the code would look like follows:
import exasol
con = exasol.connect(dsn='EXA') # normal pyodbc connection with additional functions
con.execute('OPEN SCHEMA TEST2')
data = con.readData('SELECT * FROM services') # pandas data frame per default
con.writeData(data, table = 'services2')
The problem is that also in pandas 0.14 the read_sql and to_sql functions cannot deal with schemas, but using exasol without schemas makes no sense. This will be fixed in 0.15. If you want to use it now look at this pull request https://github.com/pydata/pandas/pull/7952

How to insert pandas dataframe via mysqldb into database?

I can connect to my local mysql database from python, and I can create, select from, and insert individual rows.
My question is: can I directly instruct mysqldb to take an entire dataframe and insert it into an existing table, or do I need to iterate over the rows?
In either case, what would the python script look like for a very simple table with ID and two data columns, and a matching dataframe?
Update:
There is now a to_sql method, which is the preferred way to do this, rather than write_frame:
df.to_sql(con=con, name='table_name_for_df', if_exists='replace', flavor='mysql')
Also note: the syntax may change in pandas 0.14...
You can set up the connection with MySQLdb:
from pandas.io import sql
import MySQLdb
con = MySQLdb.connect() # may need to add some other options to connect
Setting the flavor of write_frame to 'mysql' means you can write to mysql:
sql.write_frame(df, con=con, name='table_name_for_df',
if_exists='replace', flavor='mysql')
The argument if_exists tells pandas how to deal if the table already exists:
if_exists: {'fail', 'replace', 'append'}, default 'fail'
fail: If table exists, do nothing.
replace: If table exists, drop it, recreate it, and insert data.
append: If table exists, insert data. Create if does not exist.
Although the write_frame docs currently suggest it only works on sqlite, mysql appears to be supported and in fact there is quite a bit of mysql testing in the codebase.
Andy Hayden mentioned the correct function (to_sql). In this answer, I'll give a complete example, which I tested with Python 3.5 but should also work for Python 2.7 (and Python 3.x):
First, let's create the dataframe:
# Create dataframe
import pandas as pd
import numpy as np
np.random.seed(0)
number_of_samples = 10
frame = pd.DataFrame({
'feature1': np.random.random(number_of_samples),
'feature2': np.random.random(number_of_samples),
'class': np.random.binomial(2, 0.1, size=number_of_samples),
},columns=['feature1','feature2','class'])
print(frame)
Which gives:
feature1 feature2 class
0 0.548814 0.791725 1
1 0.715189 0.528895 0
2 0.602763 0.568045 0
3 0.544883 0.925597 0
4 0.423655 0.071036 0
5 0.645894 0.087129 0
6 0.437587 0.020218 0
7 0.891773 0.832620 1
8 0.963663 0.778157 0
9 0.383442 0.870012 0
To import this dataframe into a MySQL table:
# Import dataframe into MySQL
import sqlalchemy
database_username = 'ENTER USERNAME'
database_password = 'ENTER USERNAME PASSWORD'
database_ip = 'ENTER DATABASE IP'
database_name = 'ENTER DATABASE NAME'
database_connection = sqlalchemy.create_engine('mysql+mysqlconnector://{0}:{1}#{2}/{3}'.
format(database_username, database_password,
database_ip, database_name))
frame.to_sql(con=database_connection, name='table_name_for_df', if_exists='replace')
One trick is that MySQLdb doesn't work with Python 3.x. So instead we use mysqlconnector, which may be installed as follows:
pip install mysql-connector==2.1.4 # version avoids Protobuf error
Output:
Note that to_sql creates the table as well as the columns if they do not already exist in the database.
You can do it by using pymysql:
For example, let's suppose you have a MySQL database with the next user, password, host and port and you want to write in the database 'data_2', if it is already there or not.
import pymysql
user = 'root'
passw = 'my-secret-pw-for-mysql-12ud'
host = '172.17.0.2'
port = 3306
database = 'data_2'
If you already have the database created:
conn = pymysql.connect(host=host,
port=port,
user=user,
passwd=passw,
db=database,
charset='utf8')
data.to_sql(name=database, con=conn, if_exists = 'replace', index=False, flavor = 'mysql')
If you do NOT have the database created, also valid when the database is already there:
conn = pymysql.connect(host=host, port=port, user=user, passwd=passw)
conn.cursor().execute("CREATE DATABASE IF NOT EXISTS {0} ".format(database))
conn = pymysql.connect(host=host,
port=port,
user=user,
passwd=passw,
db=database,
charset='utf8')
data.to_sql(name=database, con=conn, if_exists = 'replace', index=False, flavor = 'mysql')
Similar threads:
Writing to MySQL database with pandas using SQLAlchemy, to_sql
Writing a Pandas Dataframe to MySQL
The to_sql method works for me.
However, keep in mind that the it looks like it's going to be deprecated in favor of SQLAlchemy:
FutureWarning: The 'mysql' flavor with DBAPI connection is deprecated and will be removed in future versions. MySQL will be further supported with SQLAlchemy connectables. chunksize=chunksize, dtype=dtype)
Python 2 + 3
Prerequesites
Pandas
MySQL server
sqlalchemy
pymysql: pure python mysql client
Code
from pandas.io import sql
from sqlalchemy import create_engine
engine = create_engine("mysql+pymysql://{user}:{pw}#localhost/{db}"
.format(user="root",
pw="your_password",
db="pandas"))
df.to_sql(con=engine, name='table_name', if_exists='replace')
This should do the trick:
import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy import create_engine
# Create engine
engine = create_engine('mysql://USER_NAME_HERE:PASS_HERE#HOST_ADRESS_HERE/DB_NAME_HERE')
# Create the connection and close it(whether successed of failed)
with engine.begin() as connection:
df.to_sql(name='INSERT_TABLE_NAME_HERE/INSERT_NEW_TABLE_NAME', con=connection, if_exists='append', index=False)
You might output your DataFrame as a csv file and then use mysqlimport to import your csv into your mysql.
EDIT
Seems pandas's build-in sql util provide a write_frame function but only works in sqlite.
I found something useful, you might try this
This has worked for me. At first I've created only the database, no predefined table I created.
from platform import python_version
print(python_version())
3.7.3
path='glass.data'
df=pd.read_csv(path)
df.head()
!conda install sqlalchemy
!conda install pymysql
pd.__version__
'0.24.2'
sqlalchemy.__version__
'1.3.20'
restarted the Kernel after installation.
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://USER:PASSWORD#HOST:PORT/DATABASE_NAME', echo=False)
try:
df.to_sql(name='glasstable',con=engine,index=False, if_exists='replace')
print('Sucessfully written to Database!!!')
except Exception as e:
print(e)
df.to_sql(name = "owner", con= db_connection, schema = 'aws', if_exists='replace', index = >True, index_label='id')

Categories