Writing Data in Sql table from a text file using Pandas Module - python

I was trying to read some data from a text file and write it down in a Sql server table using Pandas Module and FOR LOOP. Below is my code..
import pandas as pd
import pyodbc
driver = '{SQL Server Native Client 11.0}'
conn = pyodbc.connect(
Trusted_Connection = 'Yes',
Driver = driver,
Server = '***********',
Database = 'Sullins_Data'
)
def createdata():
cursor = conn.cursor()
cursor.execute(
'insert into Sullins_Datasheet(Part_Number,Web_Link) values(?,?);',
(a,j))
conn.commit()
a = pd.read_csv('check9.txt',header=None, names=['Part_Number','Web_Links'] ) # 2 Columns, 8 rows
b = pd.DataFrame(a)
p_no = (b['Part_Number'])
w_link = (b['Web_Links'])
# print(p_no)
for i in p_no:
a = i
for l in w_link:
j = l
createdata()
As you can see from the code that I have created 2 variables a and j to hold the value of both the columns of the text file one by one and write it in the sql table.
But after running the code I have got only the last row value in the table out of 8 rows.
When I used createdate function inside w_link for loop, it write the duplicate value in the table.
Please suggest where I am doing wrong.

here is sample of how your code is working
a = 0
b = 0
ptr=['s','d','f','e']
pt=['a','b','c','d']
for i in ptr:
a=i
print(a,end='')
for j in pt:
b=j
print(b,end='')

Related

Printing data results from postgresql to panda dataframe

I am trying to print the results of the joined table from postgresql to python. However when I try to print the results, the table shows up but I receive NaN data. Can someone help?
conn = psy.connect( dbname = "funda_project", host = "localhost", user =
"postgres", password = "ledidhima2021.")
cursor = conn.cursor()
conn.commit()
createjointable2 = '''SELECT(
distance_data."Municipality",
distance_data."Childcare/Nursery",
distance_data."Leisure/Culture/Library",
sales_details."Purchase_price",
sales_details."Publication_date",
sales_details."Date_of_signature",
house_details."Type_of_house",
house_details."Object_categorie",
house_details."Construction_year",
house_details."Energy_label_class",
demo_data."Age_Group_Relation_(15-20)",
demo_data."Age_Group_Relation_(20-25)",
demo_data."Age_Group_Relation_(25-45)")
FROM "distance_data"
INNER JOIN "zip_data"
ON "distance_data"."Municipality" = "zip_data"."Municipality"
INNER JOIN "demo_data"
ON "zip_data"."Municipality" = "demo_data"."Municipality"
INNER JOIN "sales_details"
ON "zip_data"."globalId" = "sales_details"."GlobalID"
INNER JOIN "house_details"
ON "zip_data"."globalId" = "house_details"."GlobalID"
;'''
cursor.execute(createjointable2);
from pandas import DataFrame
eri= pd.DataFrame(cursor.fetchall())
datalist = list(eri)
results = pd.DataFrame (eri, columns = ["Municipality", "Childcare/Nursery",
"Leisure/Culture/Library", "Purchase_price", "Publication_date", "Date_of_signature",
"Type_of_house", "Object_categorie", "Construction_year", "Energy_label_class",
"Age_Group_Relation_(15-20)", "Age_Group_Relation_(20-25)", "Age_Group_Relation_(25-45)"])
results
Pandas has a built-in SQL query reading function pd.read_sql_query(query, connection), which assign the returned table value to a dataframe.
dataframe = pd.read_sql_query("SELECT * FROM table;", conn)
conn being the connection object you created and is also in your code.
Another way is almost what you tried as well:
from pandas import DataFrame
df = DataFrame(cursor.fetchall())
df.columns = cursor.keys()

Python read mysql to csv

I would like to read a mysql database in chunks and write its contents to a bunch of csv files.
While this can be done easily with pandas using below:
df_chunks = pd.read_sql_table(table_name, con, chunksize=CHUNK_SIZE)
for i, df in enumerate(chunks):
df.to_csv("file_{}.csv".format(i)
Assuming I cannot use pandas, what other alternative can I use? I tried using
import sqlalchemy as sqldb
import csv
CHUNK_SIZE = 100000
table_name = "XXXXX"
host = "XXXXXX"
user = "XXXX"
password = "XXXXX"
database = "XXXXX"
port = "XXXX"
engine = sqldb.create_engine('mysql+pymysql://{}:{}#{}:{}/{}'.format(user,password,host,port,database))
con = engine.connect()
metadata = sqldb.MetaData()
table = sqldb.Table(table_name, metadata, autoload=True, autoload_with=engine)
query = table.select()
proxy = con.execution_options(stream_results=True).execute(query)
cols = [""] + [column.name for column in table.c]
file_num = 0
while True:
batch = proxy.fetchmany(CHUNK_SIZE)
if not batch:
break
csv_writer = csv.writer("file_{}.csv".format(file_num), delimiter=',')
csv_writer.writerow(cols)
#csv_writer.writerows(batch) # while this work, it does not have the index similar to df.to_csv()
for i, row in enumerate(batch):
csv_writer.writerow(i + row) # will error here
file_num += 1
proxy.close()
While using .writerows(batch) works fine, it does not have the index like the result you get from df.to_csv(). I would like to add the row number equivalent as well, but cant seem to add to the row which is a sqlalchemy.engine.result.RowProxy. How can I do it? Or what other faster alternative can I use?
Look up SELECT ... INTO OUTFILE ...
It will do the task in 1 SQL statement; 0 lines of Python (other than invoking that SQL).

Unable to insert all rows into SQL table using python script

I have two data sets in my JSON API. I am unable to insert both into SQL Server. The Iteration using for loop doesnt seem to pick up the second data. Can someone please help me understand how to fix this. this is new for me, so am not able to find out whats wrong since the coding is bit different from SQL
import urllib, json
import pyodbc
#read data from API
url = "http://nagiosdatagateway.vestas.net/esq/ITE1452552/logstash- 2018.12.16/2/desc"
response = urllib.urlopen(url)
data = json.loads(response.read())
#define db connection
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=DKCDCVDCP42\DPA;"
"Database=VPDC;"
"Trusted_Connection=yes;")
cursor = cnxn.cursor()
i = 0
j = len(data)
print j
for i in range(i,j-1):
# print data[1]["_source"]["utc_timestamp"]
print i
print data[i]["_source"]["nagios_comment"]
print data[i]["_source"]["nagios_author"]
cursor.execute("insert into vpdc.pa.Pythontable(nagios_comment,nagios_author) values (?,?)",(data[i]
["_source"]["nagios_comment"],data[i]["_source"]["nagios_author"] ))
i += 1
print i
cnxn.commit()
both these two sets of values should be in the SQL table for columns
Nagios_comment & Nagios_author
307262828 Alex Christopher Ramos
307160348 Alex Christopher Ramos
the issue had been resolved by correctly indenting the cursor.execute statement in the script as below. In my original script, there was no indentation done for this line.so it was called outside the loop
import urllib, json
import pyodbc
#read data from API
url = "http://nagiosdatagateway.vestas.net/esq/ITE1452552/logstash-2018.12.16/2/desc"
response = urllib.urlopen(url)
data = json.loads(response.read())
#define db connection
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=DKCDCVDCP42\DPA;"
"Database=VPDC;"
"Trusted_Connection=yes;")
cursor = cnxn.cursor()
i = 0
j = len(data)
print j
for i in range(0,2):
#print data[1]["_source"]["utc_timestamp"]
print data[i]["_source"]["nagios_comment"]
print data[i]["_source"]["nagios_author"]
cursor.execute("insert into vpdc.pa.Pythontable(nagios_comment,nagios_author)
values (?,?)",(data[i]["_source"]["nagios_comment"],data[i]["_source"]
["nagios_author"] ))
cnxn.commit()

How to return a list from SQL query using pyodbc?

I am trying to run a select query to retrieve data from SQL Server using pyodbc in python 2.7. I want the data to be returned in a list. The code I have written is below.
It works, kinda, but not in the way I expected. My returned list looks something like below:
Index Type Size Value
0 Row 1 Row object of pyodbc module
1 Row 1 Row object of pyodbc module
...
105 Row 1 Row object of pyodbc module
I was hoping to see something like below (i.e. my table in SQL)
ActionId AnnDate Name SaleValue
128929 2018-01-01 Bob 105.3
193329 2018-04-05 Bob 1006.98
...
23654 2018-11-21 Bob 103.32
Is a list not the best way to return data from a SQL query using pyodbc?
Code
import pyodbc
def GetSQLData(dbName, query):
sPass = 'MyPassword'
sServer = 'MyServer\\SQL1'
uname = 'MyUser'
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=" + sServer + ";"
"Database=" + dbName + ";"
"uid=" + uname + ";pwd=" + sPass)
cursor = cnxn.cursor()
cursor.execute(query)
return list(cursor.fetchall())
If you want to return your query results as a list of lists with your column names as the first sublist (similar to the example output in your question), then you can do something like the following:
import pyodbc
cnxn = pyodbc.connect("YOUR_CONNECTION_STRING")
cursor = cnxn.cursor()
cursor.execute("YOUR_QUERY")
columns = [column[0] for column in cursor.description]
results = [columns] + [row for row in cursor.fetchall()]
for result in results:
print result
# EXAMPLE OUTPUT
# ['col1', 'col2']
# ['r1c1', 'r1c2']
# ['r2c1', 'r2c2']
Depending on how you are using the results, I often find it more useful to a have a list of dicts. For example:
results = [dict(zip(columns, row)) for row in cursor.fetchall()]
for result in results:
print result
# EXAMPLE OUTPUT
# {'col1': 'r1c1', 'col2':'r1c2'}
# {'col1': 'r2c1', 'col2':'r2c2'}
There is even a better option than a list, try Pandas DataFrame!
It helps to deal with column names and apply column wise operations!
import pandas as pd
import pyodbc
def GetSQLData(dbName, query):
sPass = 'MyPassword'
sServer = 'MyServer\\SQL1'
uname = 'MyUser'
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=" + sServer + ";"
"Database=" + dbName + ";"
"uid=" + uname + ";pwd=" + sPass)
df = pd.read_sql(cnxn, query)
return df # Pandas Dataframe
EDIT:
If you prefer a list of lists, (this means one list per row) you can obtain it by:
df.values.tolist() # list of lists
But I highly recommend you to start working with pandas

Pulling MS access tables and putting them in data frames in python

I have tried many different things to pull the data from Access and put it into a neat data frame. right now my code looks like this.
from pandas import DataFrame
import numpy as np
import pyodbc
from sqlalchemy import create_engine
db_file = r'C:\Users\username\file.accdb'
user = 'user'
password = 'pw'
odbc_conn_str = 'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=%s;UID=%s;PWD=%s' % (db_file, user, password)
conn = pyodbc.connect(odbc_conn_str)
cur = conn.cursor()
qry = cur.execute("SELECT * FROM table WHERE INST = '796116'")
dataf = DataFrame(qry.fetchall())
print(dataf)
this puts the data into a data frame but the second row is a list. I need the snippet below to be in 4 separate columns, not 2 with a list.
0 (u'RM257095', u'c1', u'796116')
1 (u'RM257097', u'c2', u'796116')
2 (u'RM257043', u'c3', u'796116')
3 (u'RM257044', u'c4', u'796116')
I have used modules like kdb_utils which has a read_query function and it pulled the data from kdb and separated it into a neat dataframe. Is there anything like this for access or another way to pull the data and neatly put it into a data frame?
Consider using pandas' direct read_sql method:
import pyodbc
import pandas as pd
...
cnxn = pyodbc.connect('DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ=' + \
'{};Uid={};Pwd={};'.format(db_file, user, password)
query = "SELECT * FROM mytable WHERE INST = '796116'"
dataf = pd.read_sql(query, cnxn)
cnxn.close()

Categories