Pulling MS access tables and putting them in data frames in python - python

I have tried many different things to pull the data from Access and put it into a neat data frame. right now my code looks like this.
from pandas import DataFrame
import numpy as np
import pyodbc
from sqlalchemy import create_engine
db_file = r'C:\Users\username\file.accdb'
user = 'user'
password = 'pw'
odbc_conn_str = 'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=%s;UID=%s;PWD=%s' % (db_file, user, password)
conn = pyodbc.connect(odbc_conn_str)
cur = conn.cursor()
qry = cur.execute("SELECT * FROM table WHERE INST = '796116'")
dataf = DataFrame(qry.fetchall())
print(dataf)
this puts the data into a data frame but the second row is a list. I need the snippet below to be in 4 separate columns, not 2 with a list.
0 (u'RM257095', u'c1', u'796116')
1 (u'RM257097', u'c2', u'796116')
2 (u'RM257043', u'c3', u'796116')
3 (u'RM257044', u'c4', u'796116')
I have used modules like kdb_utils which has a read_query function and it pulled the data from kdb and separated it into a neat dataframe. Is there anything like this for access or another way to pull the data and neatly put it into a data frame?

Consider using pandas' direct read_sql method:
import pyodbc
import pandas as pd
...
cnxn = pyodbc.connect('DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ=' + \
'{};Uid={};Pwd={};'.format(db_file, user, password)
query = "SELECT * FROM mytable WHERE INST = '796116'"
dataf = pd.read_sql(query, cnxn)
cnxn.close()

Related

Printing data results from postgresql to panda dataframe

I am trying to print the results of the joined table from postgresql to python. However when I try to print the results, the table shows up but I receive NaN data. Can someone help?
conn = psy.connect( dbname = "funda_project", host = "localhost", user =
"postgres", password = "ledidhima2021.")
cursor = conn.cursor()
conn.commit()
createjointable2 = '''SELECT(
distance_data."Municipality",
distance_data."Childcare/Nursery",
distance_data."Leisure/Culture/Library",
sales_details."Purchase_price",
sales_details."Publication_date",
sales_details."Date_of_signature",
house_details."Type_of_house",
house_details."Object_categorie",
house_details."Construction_year",
house_details."Energy_label_class",
demo_data."Age_Group_Relation_(15-20)",
demo_data."Age_Group_Relation_(20-25)",
demo_data."Age_Group_Relation_(25-45)")
FROM "distance_data"
INNER JOIN "zip_data"
ON "distance_data"."Municipality" = "zip_data"."Municipality"
INNER JOIN "demo_data"
ON "zip_data"."Municipality" = "demo_data"."Municipality"
INNER JOIN "sales_details"
ON "zip_data"."globalId" = "sales_details"."GlobalID"
INNER JOIN "house_details"
ON "zip_data"."globalId" = "house_details"."GlobalID"
;'''
cursor.execute(createjointable2);
from pandas import DataFrame
eri= pd.DataFrame(cursor.fetchall())
datalist = list(eri)
results = pd.DataFrame (eri, columns = ["Municipality", "Childcare/Nursery",
"Leisure/Culture/Library", "Purchase_price", "Publication_date", "Date_of_signature",
"Type_of_house", "Object_categorie", "Construction_year", "Energy_label_class",
"Age_Group_Relation_(15-20)", "Age_Group_Relation_(20-25)", "Age_Group_Relation_(25-45)"])
results
Pandas has a built-in SQL query reading function pd.read_sql_query(query, connection), which assign the returned table value to a dataframe.
dataframe = pd.read_sql_query("SELECT * FROM table;", conn)
conn being the connection object you created and is also in your code.
Another way is almost what you tried as well:
from pandas import DataFrame
df = DataFrame(cursor.fetchall())
df.columns = cursor.keys()

How to store all table in a sql database in dataframes? [duplicate]

I have a database that contains multiple tables, and I am trying to import each table as a pandas dataframe. I can do this for a single table as follows:
import pandas as pd
import pandas.io.sql as psql
import pypyodbc
conn = pypyodbc.connect("DRIVER={SQL Server};\
SERVER=serveraddress;\
UID=uid;\
PWD=pwd;\
DATABASE=db")
df1 = psql.read_frame('SELECT * FROM dbo.table1', conn)
The number of tables in the database will change, and at any time I would like to be able to import each table into its own dataframe. How can I get all of these tables into pandas?
Depending on your SQL server, you can inspect the tables in a database.
For example:
tables_df = pd.read_sql('SELECT table_name FROM database_name', conn)
Now your table names are accessible as a pandas data frame, you just need to parse it out:
table_name_list = tables_df.table_name
select_template = 'SELECT * FROM {table_name}'
frames_dict = {}
for tname in table_name_list:
query = select_template.format(table_name = tname)
frames_dict[tname] = pd.read_sql(query, conn)
Your dictionary frames_dict contains all the dataframes with the table_name as the key

Convert SQL query output into pandas dataframe

I have been looking since yesterday about the way I could convert the output of an SQL Query into a Pandas dataframe.
For example a code that does this :
data = select * from table
I've tried so many codes I've found on the internet but nothing seems to work.
Note that my database is stored in Azure DataBricks and I can only access the table using its URL.
Thank you so much !
Hope this would help you out. Both insertion & selection are in this code for reference.
def db_insert_user_level_info(table_name):
#Call Your DF Here , as an argument in the function or pass directly
df=df_parameter
params = urllib.parse.quote_plus("DRIVER={SQL Server};SERVER=DESKTOP-ITAJUJ2;DATABASE=githubAnalytics")
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
engine.connect()
table_row_count=select_row_count(table_name)
df_row_count=df.shape[0]
if table_row_count == df_row_count:
print("Data Cannot Be Inserted Because The Row Count is Same")
else:
df.to_sql(name=table_name,con=engine, index=False, if_exists='append')
print("********************************** DONE EXECTUTED SUCCESSFULLY ***************************************************")
def select_row_count(table_name):
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=DESKTOP-ITAJUJ2;"
"Database=githubAnalytics;"
"Trusted_Connection=yes;")
cur = cnxn.cursor()
try:
db_cmd = "SELECT count(*) FROM "+table_name
res = cur.execute(db_cmd)
# Do something with your result set, for example print out all the results:
for x in res:
return x[0]
except:
print("Table is not Available , Please Wait...")
Using sqlalchemy to connect to the database, and the built-in method read_sql_query from pandas to go straight to a DataFrame:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine(url)
connection = engine.connect()
query = "SELECT * FROM table"
df = pd.read_sql_query(query,connection)

Writing Data in Sql table from a text file using Pandas Module

I was trying to read some data from a text file and write it down in a Sql server table using Pandas Module and FOR LOOP. Below is my code..
import pandas as pd
import pyodbc
driver = '{SQL Server Native Client 11.0}'
conn = pyodbc.connect(
Trusted_Connection = 'Yes',
Driver = driver,
Server = '***********',
Database = 'Sullins_Data'
)
def createdata():
cursor = conn.cursor()
cursor.execute(
'insert into Sullins_Datasheet(Part_Number,Web_Link) values(?,?);',
(a,j))
conn.commit()
a = pd.read_csv('check9.txt',header=None, names=['Part_Number','Web_Links'] ) # 2 Columns, 8 rows
b = pd.DataFrame(a)
p_no = (b['Part_Number'])
w_link = (b['Web_Links'])
# print(p_no)
for i in p_no:
a = i
for l in w_link:
j = l
createdata()
As you can see from the code that I have created 2 variables a and j to hold the value of both the columns of the text file one by one and write it in the sql table.
But after running the code I have got only the last row value in the table out of 8 rows.
When I used createdate function inside w_link for loop, it write the duplicate value in the table.
Please suggest where I am doing wrong.
here is sample of how your code is working
a = 0
b = 0
ptr=['s','d','f','e']
pt=['a','b','c','d']
for i in ptr:
a=i
print(a,end='')
for j in pt:
b=j
print(b,end='')

How to create a new table in a MySQL DB from a pandas dataframe

I recently transitioned from using SQLite for most of my data storage and management needs to MySQL. I think I've finally gotten the correct libraries installed to work with Python 3.6, but now I am having trouble creating a new table from a dataframe in the MySQL database.
Here are the libraries I import:
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
In my code, I first create a dataframe from a CSV file (no issues here).
def csv_to_df(infile):
return pd.read_csv(infile)
Then I establish a connection to the MySQL database using this def function:
def mysql_connection():
user = 'root'
password = 'abc'
host = '127.0.0.1'
port = '3306'
database = 'a001_db'
engine = create_engine("mysql://{0}:{1}#{2}:{3}/{4}?charset=utf8".format(user, password, host, port, database))
return engine
Lastly, I use the pandas function "to_sql" to create the database table in the MySQL database:
def df_to_mysql(df, db_tbl_name, conn=mysql_connection(), index=False):
df.to_sql(con = conn, name = db_tbl_name, if_exists='replace', index = False)
I run the code using this line:
df_to_mysql(csv_to_df(r'path/to/file.csv'), 'new_database_table')
The yields the following error:
InvalidRequestError: Could not reflect: requested table(s) not available in Engine(mysql://root:***#127.0.0.1:3306/a001_db?charset=utf8): (new_database_table)
I think this is telling me that I must first create a table in the database before passing the data in the dataframe to this table, but I'm not 100% positive about that. Regardless, I'm looking for a way to create a table in a MySQL database without manually creating the table first (I have many CSVs, each with 50+ fields, that have to be uploaded as new tables in a MySQL database).
Any suggestions?
I took an approach suggested by aws_apprentice above which was to create the table first, then write data to the table.
The code below first auto-generates a mysql table from a df (auto defining table names and datatypes) then writes the df data to that table.
There were a couple of hiccups I had to overcome, such as: unnamed csv columns, determining the correct data type for each field in the mysql table.
I'm sure there are multiple other (better?) ways to do this, but this seems to work.
import pandas as pd
from sqlalchemy import create_engine
infile = r'path/to/file.csv'
db = 'a001_db'
db_tbl_name = 'a001_rd004_db004'
'''
Load a csv file into a dataframe; if csv does not have headers, use the headers arg to create a list of headers; rename unnamed columns to conform to mysql column requirements
'''
def csv_to_df(infile, headers = []):
if len(headers) == 0:
df = pd.read_csv(infile)
else:
df = pd.read_csv(infile, header = None)
df.columns = headers
for r in range(10):
try:
df.rename( columns={'Unnamed: {0}'.format(r):'Unnamed{0}'.format(r)}, inplace=True )
except:
pass
return df
'''
Create a mapping of df dtypes to mysql data types (not perfect, but close enough)
'''
def dtype_mapping():
return {'object' : 'TEXT',
'int64' : 'INT',
'float64' : 'FLOAT',
'datetime64' : 'DATETIME',
'bool' : 'TINYINT',
'category' : 'TEXT',
'timedelta[ns]' : 'TEXT'}
'''
Create a sqlalchemy engine
'''
def mysql_engine(user = 'root', password = 'abc', host = '127.0.0.1', port = '3306', database = 'a001_db'):
engine = create_engine("mysql://{0}:{1}#{2}:{3}/{4}?charset=utf8".format(user, password, host, port, database))
return engine
'''
Create a mysql connection from sqlalchemy engine
'''
def mysql_conn(engine):
conn = engine.raw_connection()
return conn
'''
Create sql input for table names and types
'''
def gen_tbl_cols_sql(df):
dmap = dtype_mapping()
sql = "pi_db_uid INT AUTO_INCREMENT PRIMARY KEY"
df1 = df.rename(columns = {"" : "nocolname"})
hdrs = df1.dtypes.index
hdrs_list = [(hdr, str(df1[hdr].dtype)) for hdr in hdrs]
for hl in hdrs_list:
sql += " ,{0} {1}".format(hl[0], dmap[hl[1]])
return sql
'''
Create a mysql table from a df
'''
def create_mysql_tbl_schema(df, conn, db, tbl_name):
tbl_cols_sql = gen_tbl_cols_sql(df)
sql = "USE {0}; CREATE TABLE {1} ({2})".format(db, tbl_name, tbl_cols_sql)
cur = conn.cursor()
cur.execute(sql)
cur.close()
conn.commit()
'''
Write df data to newly create mysql table
'''
def df_to_mysql(df, engine, tbl_name):
df.to_sql(tbl_name, engine, if_exists='replace')
df = csv_to_df(infile)
create_mysql_tbl_schema(df, mysql_conn(mysql_engine()), db, db_tbl_name)
df_to_mysql(df, mysql_engine(), db_tbl_name)
This
connection = engine.connect()
df.to_sql(con=connection, name='TBL_NAME', schema='SCHEMA', index=False, if_exists='replace')
works with oracle DB in specific schema wothout errors, but will not work if you have limited permissions. And note that table names is case sensative.

Categories