I am trying to create table in database as this is my connection as the below code:
# pyodbc connection connect to server
conn = pyodbc.connect(
"driver={SQL Server};server=xxxxxxxxxxx; database=master; trusted_connection=true",
autocommit=True, Trusted_Connection='Yes')
crsr = conn.cursor()
# connect db (connect to database name) using SQL-Alchemy
engine = create_engine(
'mssql+pyodbc://xxxxxxxxxxx/master?driver=SQL+Server+Native+Client+11.0')
connection = engine.connect()
it's just a pyodbc conncetion
and this is the error I found:
Traceback (most recent call last):
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/app.py", line 22, in <module>
dfeed_gsm_relation_m.push_dfeed_gsm_relation_sql()
File "C:\Users\haroo501\PycharmProjects\ToolUpdated\meta_data\dfeed_gsm_relation_m.py", line 31, in push_dfeed_gsm_relation_sql
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
AttributeError: 'pyodbc.Cursor' object has no attribute 'dialect'
and this is the code that creates the table in the database using MetaData():
from sqlalchemy import MetaData, Table, Column, Integer, String, Date, Float
from database import connec
import sqlalchemy as db
import pandas as pd
import numpy as np
from txt_to_csv import convert_to_csv
import os
def push_dfeed_gsm_relation_sql():
# Create a ditionary for all gsm_relations_mnm relation excel file
dataf_gsm_relation_col_dict = {
'cell_name': 'Cellname',
'n_cell_name': 'Ncellname',
'technology': 'Technology',
}
# table name in database 'df_gsm_relation'
DATAF_GSM_RELATION = 'df_gsm_relation'
# Create a list for dataf_gsm_relation_cols and put the dictionary in the list
dataf_gsm_relation_cols = list(dataf_gsm_relation_col_dict.keys())
dataf_gsm_relation_cols_meta = MetaData()
dataf_gsm_relation_relations = Table(
DATAF_GSM_RELATION, dataf_gsm_relation_cols_meta,
Column('id', Integer, primary_key=True),
Column(dataf_gsm_relation_cols[0], Integer),
Column(dataf_gsm_relation_cols[1], Integer),
Column(dataf_gsm_relation_cols[2], String),
)
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
dataf_gsm_rel_txt = 'gsmrelation_mnm.txt'
dataf_gsm_txt_df = pd.read_csv(dataf_gsm_rel_txt, sep=';')
dataf_gsm_rel_df_column_index = list(dataf_gsm_txt_df.columns)
dataf_gsm_txt_df.reset_index(inplace=True)
dataf_gsm_txt_df.drop(columns=dataf_gsm_txt_df.columns[-1], inplace=True)
dataf_gsm_rel_df_column_index = dict(zip(list(dataf_gsm_txt_df.columns), dataf_gsm_rel_df_column_index))
dataf_gsm_txt_df.rename(columns=dataf_gsm_rel_df_column_index, inplace=True)
dataf_gsm_txt_df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1', index=False)
dataf_gsm_rel_excel = 'gsmrelation_mnm.csv'
dataf_gsm_rel_df = pd.read_csv(os.path.join(os.path.dirname(__file__), dataf_gsm_rel_excel), dtype={
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[0]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[1]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[2]]: str,
})
dataf_gsm_relations_table_query = db.insert(dataf_gsm_relation_relations)
dataf_gsm_relations_values_list = []
dataf_gsm_relations_row_count = 1
for i in dataf_gsm_rel_df.index:
dataf_gsm_relations_row = dataf_gsm_rel_df.loc[i]
dataf_gsm_rel_df_record = {'id': dataf_gsm_relations_row_count}
for col in dataf_gsm_relation_col_dict.keys():
if col == dataf_gsm_relation_cols[0] or col == dataf_gsm_relation_cols[1]:
dataf_gsm_rel_df_record[col] = int(dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]])
else:
dataf_gsm_rel_df_record[col] = dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]]
dataf_gsm_relations_values_list.append(dataf_gsm_rel_df_record)
dataf_gsm_relations_row_count += 1
ResultProxy_dataf_gsm_relations = connec.crsr.execute(dataf_gsm_relations_table_query,
dataf_gsm_relations_values_list)
as the problem in this part:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
I know dialect function is related to from sqlalchemy import create_engine and this is my old connection as I changed to new connection using import pyodbc .....
So how can I solve this case using pyodbc module?
Edited
The other way to solve this is how to CREATE and DROP table in existing database using SQL ALCHEMY
and this is the related code example:
from database import connec
def create_db():
create_bd_query = "CREATE DATABASE MyNewDatabase"
connec.crsr.execute(create_bd_query)
def delete_database():
delete_bd_query = "DROP DATABASE MyNewDatabase"
connec.crsr.execute(delete_bd_query)
You cannot just import a completley different module and expect it to be the same :)
Dialects are what SQLalchemy uses to communicate to different drivers.
In this instance Pyodbc IS the driver so it has no need for a dialect.
From SQLAlchemy:
Dialects
The dialect is the system SQLAlchemy uses to communicate with various types of DBAPI implementations and databases. The sections that follow contain reference documentation and notes specific to the usage of each backend, as well as notes for the various DBAPIs.
All dialects require that an appropriate DBAPI driver is installed.
Included Dialects
PostgreSQL
MySQL
SQLite
Oracle
Microsoft SQL Server
Microsoft SQL Server
Support for the Microsoft SQL Server database.
DBAPI Support
The following dialect/DBAPI options are available. Please refer to individual
DBAPI sections for connect information.
PyODBC
mxODBC
pymssql
zxJDBC for Jython
adodbapi
Judging from the error and by looking at the PyODBC Wiki Documentation
I think this line:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
should read:
# Does table 'DATAF_GSM_RELATION' exist?
if connec.crsr.tables(table=DATAF_GSM_RELATION).fetchone():
...
Related
I am trying to writer my DataFrame to Snowflake using to_sql method.
sf_conn = snowflake.connector.connect(
account=*****,
user=*****,
password=*****,
role=*****,
warehouse=*****,
database=*****
)
sf_cur = sf_conn.cursor()
df = pd.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance'])
df.to_sql('TEST3',con=sf_cur, schema='public', index=False)
But no luck yet.
File "/home/karma/.local/lib/python3.6/site-packages/pandas/io/sql.py", line 1584, in execute
cur = self.con.cursor()
AttributeError: 'SnowflakeCursor' object has no attribute 'cursor'
Even tried giving con=sf_conn but get the following error:
pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
I am able to do the same job using sqlAlchemy create_engine lib, but wanted to use specifically snowflake connection.
You need to use an SQLAlchemy engine as the connection when using pandas.DataFrame.to_sql with Snowflake.
When you use df.to_sql, you need to pass in a SQLAlchemy engine and not a standard Snowflake connection object (and not a cursor either as you've tried to do). You'll need to install snowflake-sqlalchemy using pip but you don't need to install snowflake-connector-python since the snowflake-sqlalchemy does this for you.
Here is an example:
from sqlalchemy import create_engine
import os
import pandas as pd
snowflake_username = os.environ['SNOWFLAKE_USERNAME']
snowflake_password = os.environ['SNOWFLAKE_PASSWORD']
snowflake_account = os.environ['SNOWFLAKE_ACCOUNT']
snowflake_warehouse = os.environ['SNOWFLAKE_WAREHOUSE']
snowflake_database = 'test_db'
snowflake_schema = 'public'
if __name__ == '__main__':
engine = create_engine(
'snowflake://{user}:{password}#{account}/{db}/{schema}?warehouse={warehouse}'.format(
user=snowflake_username,
password=snowflake_password,
account=snowflake_account,
db=snowflake_database,
schema=snowflake_schema,
warehouse=snowflake_warehouse,
)
)
df = pd.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance'])
df.to_sql('TEST_TABLE', con=engine, schema='public', index=False, if_exists='append')
Every time I run the above script the Mark and Luke records get appended to my test_db.public.test_table table.
Assuming I have 30 databases in MySQL from db1 to db30. I have a python script that will create engine and connect to one db,
import pandas as pd
import MySQLdb
from sqlalchemy import create_engine
df = pd.read_csv('pricelist.csv')
new_df = df[['date','time','new_price']]
engine = create_engine('mysql+mysqldb://root:python#localhost:3306/db1', echo = False)
new_df.to_sql(name='temporary_table', con=engine, if_exists = 'append', index=False)
with engine.begin() as cnx:
sql_insert_query_new = 'REPLACE INTO newlist (SELECT * FROM temporary_table)'
cnx.execute(sql_insert_query_new)
cnx.execute("DROP TABLE temporary_table")
Now with the above script, I will need to have 30 python scripts to create engine and connect each db to conduct the query. And to call these 30 scripts, I will need to use a batch file on a task scheduler.
Is there an optimize way of connecting to multiple databases with a single script? I read up on sessions and don't think it is able to take in multiple databases. And if I have 30 python scripts doing this creation engine and connection, will there be any issue in terms of processing performance? Eventually, I will have like hundreds of db in MySQL.
Thanks!
Note: Each database has their own unique table names.
Using Python 3.7
I think may be you can do something like this:
import pandas as pd
import MySQLdb
from sqlalchemy import create_engine
df = pd.read_csv('pricelist.csv')
new_df = df[['date','time','new_price']]
db_names = [f'db{i}' for i in range(1, 31)]
table_names = ['temporary_table', 'table_name_2', 'table_name_3', ...]
for db, tb in zip(db_names, table_names):
engine = create_engine(f'mysql+mysqldb://root:python#localhost:3306/{db}', echo=False)
new_df.to_sql(name=tb, con=engine, if_exists='append', index=False)
with engine.begin() as cnx:
sql_insert_query_new = f'REPLACE INTO newlist (SELECT * FROM {tb})'
cnx.execute(sql_insert_query_new)
cnx.execute(f"DROP TABLE {tb}")
I would like to switch on the fast_executemany option for the pyODBC driver while using SQLAlchemy to insert rows to a table. By default it is of and the code runs really slow... Could anyone suggest how to do this?
Edits:
I am using pyODBC 4.0.21 and SQLAlchemy 1.1.13 and a simplified sample of the code I am using are presented below.
import sqlalchemy as sa
def InsertIntoDB(self, tablename, colnames, data, create = False):
"""
Inserts data into given db table
Args:
tablename - name of db table with dbname
colnames - column names to insert to
data - a list of tuples, a tuple per row
"""
# reflect table into a sqlalchemy object
meta = sa.MetaData(bind=self.engine)
reflected_table = sa.Table(tablename, meta, autoload=True)
# prepare an input object for sa.connection.execute
execute_inp = []
for i in data:
execute_inp.append(dict(zip(colnames, i)))
# Insert values
self.connection.execute(reflected_table.insert(),execute_inp)
Try this for pyodbc
crsr = cnxn.cursor()
crsr.fast_executemany = True
Starting with version 1.3, SQLAlchemy has directly supported fast_executemany, e.g.,
engine = create_engine(connection_uri, fast_executemany=True)
Background:
I am very new to SQLAlchemy and it seems to be fairly confusing as to how I should be selecting things.
I have a table in my mysql database which is called genes, where I have gene_id, gene_description, and gene_symbol
What I want to do:
All I want to do is a simple select query:
Select * from Genes
But I seem to be confused as to how we shall achieve this
Here is what I have done:
import sqlalchemy
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.automap import automap_base
import csv
import pandas as pd
engine = sqlalchemy.create_engine('mysql://root:toor#localhost') # connect to server
metadata = sqlalchemy.MetaData(bind=engine)
engine.execute("USE TestDB")
genes = sqlalchemy.table('Genes')
s = sqlalchemy.select([genes])
engine.execute(s)
The problem:
ProgrammingError: (_mysql_exceptions.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'FROM `Genes`' at line 2") [SQL: u'SELECT \nFROM `Genes`']
Also is there some type of "intellisense" where I can just do something like gene_table = engine.Gene. If I am not mistake there is a way to do this with mapping but it didn't work for me
EDIT:
This may help:
How to automatically reflect database to sqlalchemy declarative?
So we can use reflection and do not have to create classes explicitly, but if we want speed we can create them using something like sqlautocode as stated here:
Reverse engineer SQLAlchemy declarative class definition from existing MySQL database?
Also there is an issue with mysql databases where it will give an error that looks like the following: (taken from bitbucket: https://bitbucket.org/zzzeek/sqlalchemy/issues/1909/reflection-issue-with-mysql-url-with-no)
SNIP...
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/schema.py", line 1927, in __init__
self.reflect()
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/schema.py", line 2037, in reflect
connection=conn))
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/engine/base.py", line 1852, in table_names
return self.dialect.get_table_names(conn, schema)
File "<string>", line 1, in <lambda>
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/engine/reflection.py", line 32, in cache
return fn(self, con, *args, **kw)
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/dialects/mysql/base.py", line 1791, in get_table_names
self.identifier_preparer.quote_identifier(current_schema))
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/sql/compiler.py", line 1517, in quote_identifier
return self.initial_quote + self._escape_identifier(value) + self.final_quote
File "/opt/buildout-eggs/SQLAlchemy-0.6.4-py2.5.egg/sqlalchemy/dialects/mysql/mysqldb.py", line 77, in _escape_identifier
value = value.replace(self.escape_quote, self.escape_to_quote)
AttributeError: 'NoneType' object has no attribute 'replace'
This is resolved by adding a database name (the one you are using) as follows:
engine = create_engine('mysql+mysqldb://USER_NAME:PASSWORD#127.0.0.1/DATABASE_NAME', pool_recycle=3600) # connect to server
I used this to connect correctly:
http://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html
and this:
http://docs.sqlalchemy.org/en/latest/core/engines.html
This also may help:
How to automatically reflect database to sqlalchemy declarative?
My code finally looks like this:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
Base = automap_base()
# engine, suppose it has two tables 'user' and 'address' set up
engine = create_engine('mysql+mysqldb://root:toor#127.0.0.1/TestDB', pool_recycle=3600) # connect to server
# reflect the tables
Base.prepare(engine, reflect=True)
# mapped classes are now created with names by default
# matching that of the table name.
Genes = Base.classes.Genes
Address = Base.classes.address
#Start Session
session = Session(engine)
#add a row:
session.add(Genes(Gene_Id=1,Gene_Symbol = "GENE_SYMBOL", Gene_Description="GENE_DESCRIPTION"))
session.commit()
q = session.query(Genes).all()
for gene in q:
print "This is the Gene ID {},\n This is the Gene Desc {},\n this is the Gene symbol {}.".format(gene.Gene_Id,gene.Gene_Description, gene.Gene_Symbol )
The documentation for Pandas has numerous examples of best practices for working with data stored in various formats.
However, I am unable to find any good examples for working with databases like MySQL for example.
Can anyone point me to links or give some code snippets of how to convert query results using mysql-python to data frames in Pandas efficiently ?
As Wes says, io/sql's read_sql will do it, once you've gotten a database connection using a DBI compatible library. We can look at two short examples using the MySQLdb and cx_Oracle libraries to connect to Oracle and MySQL and query their data dictionaries. Here is the example for cx_Oracle:
import pandas as pd
import cx_Oracle
ora_conn = cx_Oracle.connect('your_connection_string')
df_ora = pd.read_sql('select * from user_objects', con=ora_conn)
print 'loaded dataframe from Oracle. # Records: ', len(df_ora)
ora_conn.close()
And here is the equivalent example for MySQLdb:
import MySQLdb
mysql_cn= MySQLdb.connect(host='myhost',
port=3306,user='myusername', passwd='mypassword',
db='information_schema')
df_mysql = pd.read_sql('select * from VIEWS;', con=mysql_cn)
print 'loaded dataframe from MySQL. records:', len(df_mysql)
mysql_cn.close()
For recent readers of this question: pandas have the following warning in their docs for version 14.0:
Warning: Some of the existing functions or function aliases have been
deprecated and will be removed in future versions. This includes:
tquery, uquery, read_frame, frame_query, write_frame.
And:
Warning: The support for the ‘mysql’ flavor when using DBAPI connection objects has
been deprecated. MySQL will be further supported with SQLAlchemy
engines (GH6900).
This makes many of the answers here outdated. You should use sqlalchemy:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('dialect://user:pass#host:port/schema', echo=False)
f = pd.read_sql_query('SELECT * FROM mytable', engine, index_col = 'ID')
For the record, here is an example using a sqlite database:
import pandas as pd
import sqlite3
with sqlite3.connect("whatever.sqlite") as con:
sql = "SELECT * FROM table_name"
df = pd.read_sql_query(sql, con)
print df.shape
I prefer to create queries with SQLAlchemy, and then make a DataFrame from it. SQLAlchemy makes it easier to combine SQL conditions Pythonically if you intend to mix and match things over and over.
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pandas import DataFrame
import datetime
# We are connecting to an existing service
engine = create_engine('dialect://user:pwd#host:port/db', echo=False)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
# And we want to query an existing table
tablename = Table('tablename',
Base.metadata,
autoload=True,
autoload_with=engine,
schema='ownername')
# These are the "Where" parameters, but I could as easily
# create joins and limit results
us = tablename.c.country_code.in_(['US','MX'])
dc = tablename.c.locn_name.like('%DC%')
dt = tablename.c.arr_date >= datetime.date.today() # Give me convenience or...
q = session.query(tablename).\
filter(us & dc & dt) # That's where the magic happens!!!
def querydb(query):
"""
Function to execute query and return DataFrame.
"""
df = DataFrame(query.all());
df.columns = [x['name'] for x in query.column_descriptions]
return df
querydb(q)
MySQL example:
import MySQLdb as db
from pandas import DataFrame
from pandas.io.sql import frame_query
database = db.connect('localhost','username','password','database')
data = frame_query("SELECT * FROM data", database)
The same syntax works for Ms SQL server using podbc also.
import pyodbc
import pandas.io.sql as psql
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=servername;DATABASE=mydb;UID=username;PWD=password')
cursor = cnxn.cursor()
sql = ("""select * from mytable""")
df = psql.frame_query(sql, cnxn)
cnxn.close()
And this is how you connect to PostgreSQL using psycopg2 driver (install with "apt-get install python-psycopg2" if you're on Debian Linux derivative OS).
import pandas.io.sql as psql
import psycopg2
conn = psycopg2.connect("dbname='datawarehouse' user='user1' host='localhost' password='uberdba'")
q = """select month_idx, sum(payment) from bi_some_table"""
df3 = psql.frame_query(q, conn)
For Sybase the following works (with http://python-sybase.sourceforge.net)
import pandas.io.sql as psql
import Sybase
df = psql.frame_query("<Query>", con=Sybase.connect("<dsn>", "<user>", "<pwd>"))
pandas.io.sql.frame_query is deprecated. Use pandas.read_sql instead.
import the module
import pandas as pd
import oursql
connect
conn=oursql.connect(host="localhost",user="me",passwd="mypassword",db="classicmodels")
sql="Select customerName, city,country from customers order by customerName,country,city"
df_mysql = pd.read_sql(sql,conn)
print df_mysql
That works just fine and using pandas.io.sql frame_works (with the deprecation warning). Database used is the sample database from mysql tutorial.
This should work just fine.
import MySQLdb as mdb
import pandas as pd
con = mdb.connect(‘127.0.0.1’, ‘root’, ‘password’, ‘database_name’);
with con:
cur = con.cursor()
cur.execute(“select random_number_one, random_number_two, random_number_three from randomness.a_random_table”)
rows = cur.fetchall()
df = pd.DataFrame( [[ij for ij in i] for i in rows] )
df.rename(columns={0: ‘Random Number One’, 1: ‘Random Number Two’, 2: ‘Random Number Three’}, inplace=True);
print(df.head(20))
This helped for me for connecting to AWS MYSQL(RDS) from python 3.x based lambda function and loading into a pandas DataFrame
import json
import boto3
import pymysql
import pandas as pd
user = 'username'
password = 'XXXXXXX'
client = boto3.client('rds')
def lambda_handler(event, context):
conn = pymysql.connect(host='xxx.xxxxus-west-2.rds.amazonaws.com', port=3306, user=user, passwd=password, db='database name', connect_timeout=5)
df= pd.read_sql('select * from TableName limit 10',con=conn)
print(df)
# TODO implement
#return {
# 'statusCode': 200,
# 'df': df
#}
For Postgres users
import psycopg2
import pandas as pd
conn = psycopg2.connect("database='datawarehouse' user='user1' host='localhost' password='uberdba'")
customers = 'select * from customers'
customers_df = pd.read_sql(customers,conn)
customers_df