Create Dataframe with Cx_Oracle based on different query date - python

Below is a sample of DB table
date id name
01.02.11 4 aaaa
21.05.19 5 aaaa
31.12.12 5 aaaa
01.05.15 6 aaaa
In order to query data in the right way (avoiding duplicates), while querying I have to set a 'reporting date' which is the first month day.
The below code gives me the requested results but only for one month.
sql = 'select * from db where date = '01.03.20''
def oracle(user, pwd, dsn, sql, columns):
# Connection to databases
con = cx_Oracle.connect(user=user, password=pwd, dsn=dsn, encoding="UTF-8")
con.outputtypehandler = OutputHandler
# Cursor allows Python code to execute PostgreSQL command in a database session
cur = con.cursor()
# Check Connection
print('Connected')
# Create DF
df = pd.DataFrame(cur.execute(sql).fetchall(), columns= columns, dtype='object')[:]
print('Shape:', df.shape)
return df
Question: How can I query Data using CX_Oracle with different reporting date without doing it manually?
There are multiple way to solve this issue directly using SQL.
However, the expected solution should use 'a for loop'.
I was thinking about changing the reporting date with
for i in [str(i).zfill(2) for i in range(1,13)]:
for j in [str(j).zfill(2) for j in range(0,21)]
sql = f'select * from db where date = '01.{i}.{j}''
For eg: date = 01.01.19
The idea is to query data for this date --> store it within DF
Go to Next month 01.02.19 --> Store it in DF
And so on until reached range 21 or reached last current month (latest date)
If someone has any idea to query data using a loop with cx_Oracle and Pandas for different date thanks for helping!

How about something like this
from datetime import date, datetime, timedelta
import calendar
# Choose Start Month
start_month = date(2019, 1, 1)
# Get Current Month
current_month = date(datetime.today().year, datetime.today().month, 1)
# Create list to collect all successfully run queries
executed_sql_queries = []
# Create list for failed queries
failed_queries = []
# Create list to collect dfs
dfs = []
while start_month <= current_month:
query_date = start_month.strftime('%d.%m.%y')
sql = f"""select * from db where date = '{query_date}' """
try:
df = oracle(user, pwd, dsn, sql=sql, columns)
except sql_error as e:
print(e)
failed_queries.append(sql)
pass # move onto the next query or you can try re-running the query
else:
executed_sql_queries.append(sql)
dfs.append(df)
finally:
# Add one Month to the date for each run
days_in_month = calendar.monthrange(start_month.year, start_month.month)[1]
start_month = start_month + timedelta(days=days_in_month)
all_dfs = pd.concat(dfs)
executed_sql_queries:
["select * from db where date = '01.01.19' ",
"select * from db where date = '01.02.19' ",
"select * from db where date = '01.03.19' ",
"select * from db where date = '01.04.19' ",
"select * from db where date = '01.05.19' ",
"select * from db where date = '01.06.19' ",
"select * from db where date = '01.07.19' ",
"select * from db where date = '01.08.19' ",
"select * from db where date = '01.09.19' ",
"select * from db where date = '01.10.19' ",
"select * from db where date = '01.11.19' ",
"select * from db where date = '01.12.19' ",
"select * from db where date = '01.01.20' ",
"select * from db where date = '01.02.20' ",
"select * from db where date = '01.03.20' ",
"select * from db where date = '01.04.20' "]

Related

Writing to db from python durables engine

I have been trying to write business rules using durables engine in python. The source code is as below :
import pymysql
import pandas as pd
import datetime
from durable.lang import *
from tickets import *
con = pymysql.connect(host='localhost', user='XXXX', password='XXXX', database='nms')
#df_30min = pd.read_sql('select * from recentalarms where dbinserttime > now() - interval 30 minute', con)
df = pd.read_sql('select * from recentalarms', con)
df_tt = pd.read_sql('select tt_id from ticket', con)
format = "%Y%m%d%H%M%S"
time_before_5min = datetime.datetime.strftime(datetime.datetime.now() - datetime.timedelta(minutes=5), format)
time_before_15min = datetime.datetime.strftime(datetime.datetime.now() - datetime.timedelta(minutes=15), format)
with ruleset('nms'):
#when_all((m.status == 'open') & ( m.instance >= 3) & (m.tt_id == None) & (m.readflag == None ))
def rule_action1(c):
tt_id = Ticket.create(domain_prefix,entity=entity,start_time=down_since,severity=severity)
cursor = con.cursor()
sql_query = f"UPDATE recentalarm SET read_flag = 'true' , tt_id = '{tt_id}' WHERE id = '{alarm_id}';"
cursor.execute(sql_query)
con.commit()
print(f"1. Create ticket Run Completed")
#when_all((m.status == 'close') & ( m.uptime <= time_before_15min) & (m.uptime != None))
def rule_action2(c):
Ticket.resolve(tt_id,time_stmp)
cursor = con.cursor()
sql_query = f"UPDATE recentalarm SET read_flag = 'true' , tt_id = '{tt_id}' WHERE id = '{alarm_id}';"
cursor.execute(sql_query)
con.commit()
print(f"2. Resolve ticket Run Completed")
############some more rules
for i, row in df.iterrows():
stat = row.status.lower()
olt = row.oltname
result = df.iloc[0:i+1]['oltname'] == f'{row.oltname}'
instance_count = int(result.value_counts()[True])
ONTPort = row.ONTParentPort
alarm_id = row.id
entity = row.entity
severity = row.severity
up_since = str(row.eventclosetime)
down_since = str(row.eventime)
try:
post('gponnms', { 'status' : stat , 'instance' : instance_count , 'readflag' : row.read_flag , 'uptime' : up_since , 'downtime' : down_since})
except:
pass
con.close()
I have 2 queries.
In the rule_action I require to update the field back in the sql table for read_flag. This is being done through sql query. Is there another way to update the database other than writing sql query in the action function
My second question is in the last there is post message. I need to understand if this is really required.But if I delete this I get error message. The documentation on durables does not give much insight on it.

Incorrect date value when loading xlsx file to table using pymysql and xlrd

(Very) beginner python user here. I'm trying to load an xlsx file into a MySQL table using xlrd and pymysql python libraries and I'm getting an error:
pymysql.err.InternalError: (1292, "Incorrect date value: '43500' for column 'invoice_date' at row 1")
The datatype for invoice_date for my table is DATE. The format for this field on my xlsx file is also Date. Things work fine if I change the table datatype to varchar, but I'd prefer to have the data load into my table as a date instead of converting after the fact. Any ideas as to why I'm getting this error? It appears that xlrd or pymysql is reading '2/4/2019' in my xlxs file as '43500' and mysql is rejecting it due to a datatype mismatch.
import xlrd
import pymysql as MySQLdb
# Open workbook and define first sheet
book = xlrd.open_workbook("2019_Complete.xlsx")
sheet = book.sheet_by_index(0)
# MySQL connection
database = MySQLdb.connect (host="localhost", user="root",passwd="password", db="vendor")
# Get cursor, which is used to traverse the databse, line by line
cursor = database.cursor()
# INSERT INTO SQL query
query = """insert into table values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r in range(1, sheet.nrows):
lp = sheet.cell(r,0).value
pallet_lp = sheet.cell(r,1).value
bol = sheet.cell(r,2).value
invoice_date = sheet.cell(r,3).value
date_received = sheet.cell(r,4).value
date_repaired = sheet.cell(r,5).value
time_in_repair = sheet.cell(r,6).value
date_shipped = sheet.cell(r,7).value
serial_number = sheet.cell(r,8).value
upc = sheet.cell(r,9).value
product_type = sheet.cell(r,10).value
product_description = sheet.cell(r,11).value
repair_code = sheet.cell(r,12).value
condition = sheet.cell(r,13).value
repair_cost = sheet.cell(r,14).value
parts_cost = sheet.cell(r,15).value
total_cost = sheet.cell(r,16).value
repair_notes = sheet.cell(r,17).value
repair_cap = sheet.cell(r,18).value
complaint = sheet.cell(r,19).value
delta = sheet.cell(r,20).value
# Assign values from each row
values = (lp, pallet_lp, bol, invoice_date, date_received, date_repaired, time_in_repair, date_shipped, serial_number, upc, product_type, product_description, repair_code, condition, repair_cost, parts_cost, total_cost, repair_notes, repair_cap, complaint, delta)
# Execute sql Query
cursor.execute(query, values)
# Close the cursor
cursor.close()
# Commit the transaction
database.commit()
# Close the database connection
database.close()
# Print results
print ("")
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("I just imported " + columns + " columns and " + rows + " rows to MySQL!")
You can see this answer for a more detailed explanation, but basically Excel treats dates as a number relative to 1899-12-31, and so to convert your date value to an actual date you need to convert that number into an ISO format date which MySQL will accept. You can do that using date.fromordinal and date.isoformat. For example:
dval = 43500
d = date.fromordinal(dval + 693594)
print(d.isoformat())
Output:
2019-02-04

Add 30 days to the existing datetime column MYSQL Python

Hi guys I'm trying to update the date on the database in python with mysql.connector but the UPDATE STATEMENT has no effect on column. This is my try code:
conn = mysql.connector.connect(**self.db)
cur = conn.cursor()
query = "UPDATE LICENSE_STATUS SET expired_date = (select date_add(expired_date, INTERVAL 30 DAY)) WHERE user_id = %s"
cur.execute(query, (user_id,))
conn.commit()
I tried the same query with mysql-workbench and it works
I tried also this query:
query = "UPDATE LICENSE_STATUS SET expired_date = date_add(expired_date, INTERVAL 30 DAY) WHERE user_id = %s"

How to change the cursor to the next row using pyodbc in Python

I am trying to fetch records after a regular interval from a database table which growing with records. I am using Python and its pyodbc package to carry out the fetching of records. While fetching, how can I point the cursor to the next row of the row which was read/fetched last so that with every fetch I can only get the new set of records inserted.
To explain more,
my table has 100 records and they are fetched.
after an interval the table has 200 records and I want to fetch rows from 101 to 200. And so on.
Is there a way with pyodbc cursor?
Or any other suggestion would be very helpful.
Below is the code I am trying:
#!/usr/bin/python
import pyodbc
import csv
import time
conn_str = (
"DRIVER={PostgreSQL Unicode};"
"DATABASE=postgres;"
"UID=userid;"
"PWD=database;"
"SERVER=localhost;"
"PORT=5432;"
)
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()
def fetch_table(**kwargs):
qry = kwargs['qrystr']
try:
#cursor = conn.cursor()
cursor.execute(qry)
all_rows = cursor.fetchall()
rowcnt = cursor.rowcount
rownum = cursor.description
#return (rowcnt, rownum)
return all_rows
except pyodbc.ProgrammingError as e:
print ("Exception occured as :", type(e) , e)
def poll_db():
for i in [1, 2]:
stmt = "select * from my_database_table"
rows = fetch_table(qrystr = stmt)
print("***** For i = " , i , "******")
for r in rows:
print("ROW-> ", r)
time.sleep(10)
poll_db()
conn.close()
I don't think you can use pyodbc, or any other odbc package, to find "new" rows. But if there is a 'timestamp' column in your database, or if you can add such a column (some databases allow for it to be automatically populated as the time of insertion so you don't have to change the insert queries) then you can change your query to select only the rows whose timestamp is greater than the previous timestamp. And you can keep changing the prev_timestamp variable on each iteration.
def poll_db():
prev_timestamp = ""
for i in [1, 2]:
if prev_timestamp == "":
stmt = "select * from my_database_table"
else:
# convert your timestamp str to match the database's format
stmt = "select * from my_database_table where timestamp > " + str(prev_timestamp)
rows = fetch_table(qrystr = stmt)
prev_timestamp = datetime.datetime.now()
print("***** For i = " , i , "******")
for r in rows:
print("ROW-> ", r)
time.sleep(10)

python: query a DATETIME field in a sqlite db for a date range

Have a sqlite db that I've created in python that has a DATETIME field:
import sqlite3
con = sqlite3.connect('some.db',detect_types=sqlite3.PARSE_DECLTYPES)
with con:
cur = con.cursor()
cur.execute("CREATE TABLE Table(...Date DATETIME...)")
...
Date = datetime.datetime(<a format that resolves to the correct datetime object>)
...
altogether = (..., Date, ...)
cur.execute("INSERT INTO Table VALUES(...?...)", altogether)
con.commit()
This populates correctly. I later want to be able to query this DB by datetime, and have a function to manage my queries generally:
def query_db(path, query, args=(), one=False):
connection = sqlite3.connect(path)
cur = connection.execute(query, args)
rv = [dict((cur.description[idx][0], value)
for idx, value in enumerate(row)) for row in cur.fetchall()]
return (rv[0] if rv else None) if one else rv
LOCAL_FOLDER = os.getcwd()
samplequery = "SELECT * FROM Table"
dbFile = os.path.join(LOCAL_FOLDER, "some.db")
result = query_db(dbFile, samplequery)
The above would successfully produce a resultthat gave me everything in the Table.
However, how do I structure a query that would, for instance, give me all entries in the Table table of some.db that have a Date within the past 60 days?
You can do a query like this:
SELECT *
FROM Table
where date >= date('now', '-60 day');
EDIT:
Based on your actual query:
select <field1>, <field2>, count(1) as num
FROM Table
where date >= date('now', '-60 day');
group by <field1>, <field2>;
SELECT DISTINCT is unnecessary when you are using GROUP BY.

Categories