Sending data from variable to sql query in pandas read_sql(') - python

I want to send start date and end date value into my sql query from 2 separate variables. Suppose,
I have start_date = '2020-05-14' and end_date = '2020-07-08' stored in a variable. Now my query is:
db_connection_str = 'mysql+pymysql://username:password#host/table_name'
db_connection = create_engine(db_connection_str)
def myfunc(start_date, end_date):
sql_syn = "SELECT col_id, col_a, col_b, date, FROM en_stat where date between :start_date and :end_date"
sql_df = pd.read_sql(sql_syn, con=db_connection, chunksize=100)
how to pass the start_date and end_date values dynamically for this particular code.

Use python3 f-strings:
def myfunc(start_date, end_date):
sql_syn = f"SELECT col_id, col_a, col_b, date, FROM en_stat where date between {start_date} and {end_date}"
sql_df = pd.read_sql(sql_syn, con=db_connection, chunksize=100)

Related

Extract dataframe value based on SQL query

I'm looking to store data into a dataframe field based on an sql query. Here's my attempt:
import pyodbc
import pandas as pd
import pandasql as ps
from datetime import datetime
from pandasql import sqldf
conx_string = "driver={SQL SERVER}; server=mssql_db; database=db; UID=usr; PWD=my_pwd;"
conn = pyodbc.connect(conx_string)
crsr = conn.cursor()
query = "select OrderID, OrderDate, OrigOrderID, from tbl_Orders"
'''
Example of return for my sql statement:
OrderID | OrderDate | OrigOrderID
-----------------------------------
23 | 15-02-2023 | NULL
24 | 16-02-2023 | 23
'''
data = crsr.execute(query)
rows = [list(x) for x in data]
columns = [column[0] for column in crsr.description]
df = pd.DataFrame(rows, columns=columns)
for i,row in df.iterrows():
df.at[i, 'Date of receipt of OrigOrder'] = ps.sqldf(f"select OrderDate from tbl_Orders where OrderID='{df.at[i, 'OrigOrderID']}'")
print(df.at[i, 'Date of receipt of OrigOrder'])
'''
Example of 'Date of receipt of OrigOrder' for 'OrderID = 24'
--> print(df.at[i, 'Date of receipt of OrigOrder']) should return '15-02-2023' as it is the date of its 'OrigOrderID = 23'
I'm expecting to have a new column df['Date of receipt of OrigOrder'] (object) having df['OrderDate'] of df['OrigOrderID'] per row.
Any help please?

How can we join data of two database and that data into third table by Django query

Here I have two database table from dbtable StudentDetailModel and BooKDetailsModel I want to get the data from this in schedular.py file by todays. when I get data from this table from todays date I want to save it in third database table named as BilledBook So Firstly I get query data from this table. I do this
def get_book_date():
today = date.today()
book_details = BookByDayDetailModel.objects.filter(get_book_date=int(today.day)).values()
print("Student_details : ",student_details)
for student in student_details:
print("meter ca : ",student['ca_number_id'])
#Get all data from student_number which gets from todays date
student_details = get_student_details(student['student_number'])
#Get all data from book_details which gets from todays date
print("student book_id : ",student['book_id_id'])
book_details = get_book_details(student['book_id_id'])
def get_student_details(ca_number):
student_details = StudentDetailModel.objects.filter(student_number = student_number).values()
return student_details
def get_book_details(book_id):
book_details = BookDetailsModel.objects.filter(book_id = book_id).values()
return book_details
Here I get queryset of data which get added today but Now I want to join this query set and add it into third table which named as BilledBookModel I tried this
bill_book_details = BilledBookModel.objects.create(student_details,book_details)
But its not working Can You give some Idea how can Do this

Create Dataframe with Cx_Oracle based on different query date

Below is a sample of DB table
date id name
01.02.11 4 aaaa
21.05.19 5 aaaa
31.12.12 5 aaaa
01.05.15 6 aaaa
In order to query data in the right way (avoiding duplicates), while querying I have to set a 'reporting date' which is the first month day.
The below code gives me the requested results but only for one month.
sql = 'select * from db where date = '01.03.20''
def oracle(user, pwd, dsn, sql, columns):
# Connection to databases
con = cx_Oracle.connect(user=user, password=pwd, dsn=dsn, encoding="UTF-8")
con.outputtypehandler = OutputHandler
# Cursor allows Python code to execute PostgreSQL command in a database session
cur = con.cursor()
# Check Connection
print('Connected')
# Create DF
df = pd.DataFrame(cur.execute(sql).fetchall(), columns= columns, dtype='object')[:]
print('Shape:', df.shape)
return df
Question: How can I query Data using CX_Oracle with different reporting date without doing it manually?
There are multiple way to solve this issue directly using SQL.
However, the expected solution should use 'a for loop'.
I was thinking about changing the reporting date with
for i in [str(i).zfill(2) for i in range(1,13)]:
for j in [str(j).zfill(2) for j in range(0,21)]
sql = f'select * from db where date = '01.{i}.{j}''
For eg: date = 01.01.19
The idea is to query data for this date --> store it within DF
Go to Next month 01.02.19 --> Store it in DF
And so on until reached range 21 or reached last current month (latest date)
If someone has any idea to query data using a loop with cx_Oracle and Pandas for different date thanks for helping!
How about something like this
from datetime import date, datetime, timedelta
import calendar
# Choose Start Month
start_month = date(2019, 1, 1)
# Get Current Month
current_month = date(datetime.today().year, datetime.today().month, 1)
# Create list to collect all successfully run queries
executed_sql_queries = []
# Create list for failed queries
failed_queries = []
# Create list to collect dfs
dfs = []
while start_month <= current_month:
query_date = start_month.strftime('%d.%m.%y')
sql = f"""select * from db where date = '{query_date}' """
try:
df = oracle(user, pwd, dsn, sql=sql, columns)
except sql_error as e:
print(e)
failed_queries.append(sql)
pass # move onto the next query or you can try re-running the query
else:
executed_sql_queries.append(sql)
dfs.append(df)
finally:
# Add one Month to the date for each run
days_in_month = calendar.monthrange(start_month.year, start_month.month)[1]
start_month = start_month + timedelta(days=days_in_month)
all_dfs = pd.concat(dfs)
executed_sql_queries:
["select * from db where date = '01.01.19' ",
"select * from db where date = '01.02.19' ",
"select * from db where date = '01.03.19' ",
"select * from db where date = '01.04.19' ",
"select * from db where date = '01.05.19' ",
"select * from db where date = '01.06.19' ",
"select * from db where date = '01.07.19' ",
"select * from db where date = '01.08.19' ",
"select * from db where date = '01.09.19' ",
"select * from db where date = '01.10.19' ",
"select * from db where date = '01.11.19' ",
"select * from db where date = '01.12.19' ",
"select * from db where date = '01.01.20' ",
"select * from db where date = '01.02.20' ",
"select * from db where date = '01.03.20' ",
"select * from db where date = '01.04.20' "]

How to insert today's date in SQL select statement using python?

I'm trying to send today variable into SQL but it is not working.
import datetime from date
today = date.today()
stmt = "select agent_email from customer_interaction_fact where to_date(DT) >= + today + ORDER BY CONVERSATION_CREATED_TIME DESC"
You don't have to compute today's date in Python. Just use the PostgreSQL function CURRENT_DATE:
stmt = "SELECT ... WHERE TO_DATE(DT) >= CURRENT_DATE ..."
What database engine you're using? You'd need to convert the python datetime object into string with format accepted by the database.
# In case YYYY-MM-DD
today_str = str(today)
stmt = f"""select agent_email
from customer_interaction_fact
where to_date(DT) >= datetime({today}, "YYYY-MM-DD")
order by CONVERSATION_CREATED_TIME DESC"""
Another solution, assuming the client (your program) is in the same timezone as the database engine, you could use your database engine datetime.now function. In SQLite for instance datetime('now')
try like below
from datetime import date
today = date.today()
stmt = "select agent_email,aht_in_secs,queueid,EFFORTSCORE from facts.public.customer_interaction_fact where agent_email <> 'Bot' and aht_in_secs is not NULL and to_date(DT) >=" + today + "ORDER BY CONVERSATION_CREATED_TIME DESC"

python: query a DATETIME field in a sqlite db for a date range

Have a sqlite db that I've created in python that has a DATETIME field:
import sqlite3
con = sqlite3.connect('some.db',detect_types=sqlite3.PARSE_DECLTYPES)
with con:
cur = con.cursor()
cur.execute("CREATE TABLE Table(...Date DATETIME...)")
...
Date = datetime.datetime(<a format that resolves to the correct datetime object>)
...
altogether = (..., Date, ...)
cur.execute("INSERT INTO Table VALUES(...?...)", altogether)
con.commit()
This populates correctly. I later want to be able to query this DB by datetime, and have a function to manage my queries generally:
def query_db(path, query, args=(), one=False):
connection = sqlite3.connect(path)
cur = connection.execute(query, args)
rv = [dict((cur.description[idx][0], value)
for idx, value in enumerate(row)) for row in cur.fetchall()]
return (rv[0] if rv else None) if one else rv
LOCAL_FOLDER = os.getcwd()
samplequery = "SELECT * FROM Table"
dbFile = os.path.join(LOCAL_FOLDER, "some.db")
result = query_db(dbFile, samplequery)
The above would successfully produce a resultthat gave me everything in the Table.
However, how do I structure a query that would, for instance, give me all entries in the Table table of some.db that have a Date within the past 60 days?
You can do a query like this:
SELECT *
FROM Table
where date >= date('now', '-60 day');
EDIT:
Based on your actual query:
select <field1>, <field2>, count(1) as num
FROM Table
where date >= date('now', '-60 day');
group by <field1>, <field2>;
SELECT DISTINCT is unnecessary when you are using GROUP BY.

Categories