I have a python script (written in Jupyter notebook) and I would like to run this script in Azure. The python script basically gets data from API source (which updated every 24 hours) and updates the SQL database which is Azure. So this automated python script will update the database table whenever it runs
Can someone please me with this?
Below is the python code i have written,
import pyodbc
import requests
import json
import pandas as pd
responses = requests.get("https://data.buffalony.gov/resource/d6g9-xbgu.json")
crime_data = json.loads(responses.text)
dic = {}
dic = crime_data
df = pd.DataFrame.from_dict(dic)
dff = df[['case_number','day_of_week','incident_datetime','incident_description','incident_id','incident_type_primary']].copy()
connection = pyodbc.connect ('Driver={ODBC Driver 17 for SQL Server};Server=servername;Database=Databasename;UID=admin;PWD=admin')
cur = connection.cursor()
row = []
for i in range(dff.shape[0]):
row.append(dff.iloc[i].tolist())
sql = '''\
INSERT INTO [dbo].[FF] ([case_number],[day_of_week],[incident_datetime],[incident_description],[incident_id],[incident_type_primary]) values (?,?,?,?,?,?)
'''
for i in range(dff.shape[0]):
cur.execute(sql,row[i])
connection.commit()
I don't use azure and jupyter notebook but I think I have a solution
If you leave your computer run all night change your code into this :
import time
import pyodbc
import requests
import json
import pandas as pd
while 1:
responses = requests.get("https://data.buffalony.gov/resource/d6g9-xbgu.json")
crime_data = json.loads(responses.text)
dic = {}
dic = crime_data
df = pd.DataFrame.from_dict(dic)
dff = df [['case_number','day_of_week','incident_datetime','incident_description','incident_i d','incident_type_primary']].copy()
connection = pyodbc.connect ('Driver={ODBC Driver 17 for SQL Server};Server=servername;Database=Databasename;UID=admin;PWD=admin')
cur = connection.cursor()
row = []
for i in range(dff.shape[0]):
row.append(dff.iloc[i].tolist())
sql = '''\
INSERT INTO [dbo].[FF] ([case_number],[day_of_week],[incident_datetime], [incident_description],[incident_id],[incident_type_primary]) values (?,?,?,?,?,?)
'''
for i in range(dff.shape[0]):
cur.execute(sql,row[i])
connection.commit()
time.sleep(86400)
if not create a new python program in the startup file like this:
import time, os
while 1:
if time.ctime()[11:13] >= "update hour" and time.ctime()[0:4] != open("path/to/any_file.txt").read():
file = open("path/to/any_file.txt", "w")
file.write(time.ctime()[0:4])
file.close()
os.system("python /path/to/file.py")
A task scheduler like Azure WebJobs will do this for you.
From this source I get to know to read lat long coordinate from a csv file and get the reverse geocoding done, but when I try to do the same but using source as sql table instead of csv file, it gives some syntactical error.
reading data from sql table
import pyodbc
import urllib.request
import urllib.parse
import json
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=localhost;DATABASE=mydb;UID=test;PWD=abc#123;autocommit=True')
cursor = cnxn.cursor()
cursor.execute("select GEOCODE_ID, lat, longt from GEOCODE_TBL where JSON_str is NULL")
ID=[]
px_val=[]
py_val=[]
for row in cursor.fetchall():
ID.append(row[0])
px_val.append(row[1])
py_val.append(row[2])
#code to update the O/P from google reverse geocoding(JSON String) into table
for i in range(1,len(px_val)):
wp = urllib.request.urlopen("http://maps.googleapis.com/maps/api/geocode/json?latlng={0},{1}&sensor=false".format(px_val[i],py_val[i]))
pw=wp.read().decode('utf-8')
pw = json.loads(wp)
cursor.execute("UPDATE GEOCODE_TBL SET JSON_str = ? WHERE GEOCODE_ID = ?", str(pw[i]),str(ID(i)))
print('Done')
cnxn.commit()
Any help on this will be grateful.
Thanks.
I am accessing oracle database and trying to update it using python. Below is my code :
import cx_Oracle
import pandas as pd
import datetime
import numpy
import math
conn = cx_Oracle.connect(conn_str)
c = conn.cursor()
def update_output_table(customer_id_list,column_name,column_vlaue_list) :
num_rows_to_add = len(customer_id_list)
conn = cx_Oracle.connect(conn_str)
c = conn.cursor()
for i in range(0,num_rows_to_add,1) :
c.execute("""UPDATE output SET """+column_name+""" = %s WHERE customer_id = %s""" %(column_vlaue_list[i],customer_id_list[i]))
total_transaction_df = pd.read_sql("""select distinct b.customer_id,count(a.transaction_id) as total_transaction from transaction_fact a,customer_dim b where a.customer_id = b.CUSTOMER_ID group by b.CUSTOMER_ID""",conn)
# Update this details to the output table
update_output_table(list(total_transaction_df['CUSTOMER_ID']),'TOTAL_TRANSACTION',list(total_transaction_df['TOTAL_TRANSACTION']))
conn.close()
My program is getting executed completely but I don't see my database table getting updated. Can someone suggest where I am going wrong?
Note : I am a newbie.Sorry for asking silly doubts. Thanks in advance.
You're missing conn.commit() before conn.close():
Here you will find some info why you need it explicitely. Without commit your code is doing update then when closing connection all non-commited changes are rolled back so you see no changes in DB.
You can also set cx_Oracle.Connection.autocommit = 1 but this is not recommended way as you're loosing control over transactions.
Has anyone found a way to read a Teradata query into a Pandas dataframe? It looks like SQLAlchemy does not have a Teradata dialect.
http://docs.sqlalchemy.org/en/latest/dialects/
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql.html
You can use slqalchemy but you will need to install sqlalchemy-teradata too. You can do that via PIP
pip install sqlachemy-teradata
The rest of the code remains the same :)
from sqlalchemy import create_engine
import pandas as pd
user, pasw, host = 'username','userpass', 'hostname'
# connect
td_engine = create_engine('teradata://{}:{}#{}:22/'.format(user,pasw,hostname))
# execute sql
query = 'select * from dbc.usersV'
result = td_engine.execute(query)
#To read your query to Pandas
df = pd.read_sql(query,td_engine)
I did it using read_sql . Below id the code snip :
def dqm() :
conn_rw = create_connection()
dataframes = []
srcfile = open('srcqueries.sql', 'rU').read()
querylist = srcfile.split(';')
querylist.pop()
for query in querylist :
dataframes.append(pd.read_sql(query, conn_rw))
close_connection(conn_rw)
return dataframes,querylist
You can create connection as below :
def create_connection():
conn = pyodbc.connect("DRIVER=Teradata;DBCNAME=tddb;UID=uid;PWD=pwd;QUIETMODE=YES", autocommit=True,unicode_results=True)
return conn
You can check complete code here : GitHub Link
Let me know if this answers your query .
The documentation for Pandas has numerous examples of best practices for working with data stored in various formats.
However, I am unable to find any good examples for working with databases like MySQL for example.
Can anyone point me to links or give some code snippets of how to convert query results using mysql-python to data frames in Pandas efficiently ?
As Wes says, io/sql's read_sql will do it, once you've gotten a database connection using a DBI compatible library. We can look at two short examples using the MySQLdb and cx_Oracle libraries to connect to Oracle and MySQL and query their data dictionaries. Here is the example for cx_Oracle:
import pandas as pd
import cx_Oracle
ora_conn = cx_Oracle.connect('your_connection_string')
df_ora = pd.read_sql('select * from user_objects', con=ora_conn)
print 'loaded dataframe from Oracle. # Records: ', len(df_ora)
ora_conn.close()
And here is the equivalent example for MySQLdb:
import MySQLdb
mysql_cn= MySQLdb.connect(host='myhost',
port=3306,user='myusername', passwd='mypassword',
db='information_schema')
df_mysql = pd.read_sql('select * from VIEWS;', con=mysql_cn)
print 'loaded dataframe from MySQL. records:', len(df_mysql)
mysql_cn.close()
For recent readers of this question: pandas have the following warning in their docs for version 14.0:
Warning: Some of the existing functions or function aliases have been
deprecated and will be removed in future versions. This includes:
tquery, uquery, read_frame, frame_query, write_frame.
And:
Warning: The support for the ‘mysql’ flavor when using DBAPI connection objects has
been deprecated. MySQL will be further supported with SQLAlchemy
engines (GH6900).
This makes many of the answers here outdated. You should use sqlalchemy:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('dialect://user:pass#host:port/schema', echo=False)
f = pd.read_sql_query('SELECT * FROM mytable', engine, index_col = 'ID')
For the record, here is an example using a sqlite database:
import pandas as pd
import sqlite3
with sqlite3.connect("whatever.sqlite") as con:
sql = "SELECT * FROM table_name"
df = pd.read_sql_query(sql, con)
print df.shape
I prefer to create queries with SQLAlchemy, and then make a DataFrame from it. SQLAlchemy makes it easier to combine SQL conditions Pythonically if you intend to mix and match things over and over.
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pandas import DataFrame
import datetime
# We are connecting to an existing service
engine = create_engine('dialect://user:pwd#host:port/db', echo=False)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
# And we want to query an existing table
tablename = Table('tablename',
Base.metadata,
autoload=True,
autoload_with=engine,
schema='ownername')
# These are the "Where" parameters, but I could as easily
# create joins and limit results
us = tablename.c.country_code.in_(['US','MX'])
dc = tablename.c.locn_name.like('%DC%')
dt = tablename.c.arr_date >= datetime.date.today() # Give me convenience or...
q = session.query(tablename).\
filter(us & dc & dt) # That's where the magic happens!!!
def querydb(query):
"""
Function to execute query and return DataFrame.
"""
df = DataFrame(query.all());
df.columns = [x['name'] for x in query.column_descriptions]
return df
querydb(q)
MySQL example:
import MySQLdb as db
from pandas import DataFrame
from pandas.io.sql import frame_query
database = db.connect('localhost','username','password','database')
data = frame_query("SELECT * FROM data", database)
The same syntax works for Ms SQL server using podbc also.
import pyodbc
import pandas.io.sql as psql
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=servername;DATABASE=mydb;UID=username;PWD=password')
cursor = cnxn.cursor()
sql = ("""select * from mytable""")
df = psql.frame_query(sql, cnxn)
cnxn.close()
And this is how you connect to PostgreSQL using psycopg2 driver (install with "apt-get install python-psycopg2" if you're on Debian Linux derivative OS).
import pandas.io.sql as psql
import psycopg2
conn = psycopg2.connect("dbname='datawarehouse' user='user1' host='localhost' password='uberdba'")
q = """select month_idx, sum(payment) from bi_some_table"""
df3 = psql.frame_query(q, conn)
For Sybase the following works (with http://python-sybase.sourceforge.net)
import pandas.io.sql as psql
import Sybase
df = psql.frame_query("<Query>", con=Sybase.connect("<dsn>", "<user>", "<pwd>"))
pandas.io.sql.frame_query is deprecated. Use pandas.read_sql instead.
import the module
import pandas as pd
import oursql
connect
conn=oursql.connect(host="localhost",user="me",passwd="mypassword",db="classicmodels")
sql="Select customerName, city,country from customers order by customerName,country,city"
df_mysql = pd.read_sql(sql,conn)
print df_mysql
That works just fine and using pandas.io.sql frame_works (with the deprecation warning). Database used is the sample database from mysql tutorial.
This should work just fine.
import MySQLdb as mdb
import pandas as pd
con = mdb.connect(‘127.0.0.1’, ‘root’, ‘password’, ‘database_name’);
with con:
cur = con.cursor()
cur.execute(“select random_number_one, random_number_two, random_number_three from randomness.a_random_table”)
rows = cur.fetchall()
df = pd.DataFrame( [[ij for ij in i] for i in rows] )
df.rename(columns={0: ‘Random Number One’, 1: ‘Random Number Two’, 2: ‘Random Number Three’}, inplace=True);
print(df.head(20))
This helped for me for connecting to AWS MYSQL(RDS) from python 3.x based lambda function and loading into a pandas DataFrame
import json
import boto3
import pymysql
import pandas as pd
user = 'username'
password = 'XXXXXXX'
client = boto3.client('rds')
def lambda_handler(event, context):
conn = pymysql.connect(host='xxx.xxxxus-west-2.rds.amazonaws.com', port=3306, user=user, passwd=password, db='database name', connect_timeout=5)
df= pd.read_sql('select * from TableName limit 10',con=conn)
print(df)
# TODO implement
#return {
# 'statusCode': 200,
# 'df': df
#}
For Postgres users
import psycopg2
import pandas as pd
conn = psycopg2.connect("database='datawarehouse' user='user1' host='localhost' password='uberdba'")
customers = 'select * from customers'
customers_df = pd.read_sql(customers,conn)
customers_df