reading different Excel sheets in Python with if-elif-else - python

I am trying to read different sheets from Excel with if-elif-else statement depending upon the input and have written following code
import numpy as np
import pandas as pd
def ABSMATDATA(a,b,c,d,Material,Tmpref):
if Material == 2.016:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='H2')
elif Material == 28.016:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='N2')
elif Material == 32.000:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='O2')
elif Material == 32.065:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='S')
elif Material == 18.016:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='H2O')
elif Material == 64.065:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='SO2')
elif Material == 12.001:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='C Graphite')
elif Material == 28.011:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='CO')
elif Material == 44.011:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='CO2')
elif Material == 16.043:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='CH4')
elif Material == 30.070:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='C2H6')
elif Material == 44.097:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='C3H8')
elif Material == 58.124:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name='C4H10')
else:
print('No data for this material available')
df =[list(np.arange(0,1100,100)),list(np.arange(0,11,1)),list(np.arange(0,11,1)),list(np.arange(0,11,1)),list(np.arange(0,11,1))]
return df
I am trying to run the Code calling ABSMATDATA(1,2,3,4,28.011,100) in the IPython Console but it is not giving any output. I was expecting to see the df in my Variable Explorer as an 2-dimensional array.

Your function is not returning anything, and you can cut your code a bit:
def ABSMATDATA(a,b,c,d,Material,Tmpref):
material_map = {2.016: 'H2',
28.016: 'N2',
32.000: 'O2',
32.065: 'S',
18.016: 'H20'}
if Material in material_map:
df = pd.read_excel('F:\MAschinenbau\Bachelorarbeit\ABSMAT.xlsx',sheet_name=material_map[Material])
else:
df = [list(np.arange(0,1100,100)),list(np.arange(0,11,1)),list(np.arange(0,11,1)),list(np.arange(0,11,1)),list(np.arange(0,11,1))]
print('No data for this material available')
return df

Related

Optimize dataframe filtering on large datasets, pandas

I have a little challenge here and to be honest, I have absolutely no idea how to handle it.
I have this dataframe composed of 660,000 rows and about 50 columns. I need to filter this dataframe very frequently and retrieve the filtered dataframe as fast as possible (goal is to have a processing time <1second). I'd like to be able to run that locally on a laptop, therefore my "processing power" is limited.
I have multiple inputs to filter the dataframe, some are set manually (see input 1) some are retrieved from another script (see input 2, the other script is not included in the code here for simplicity).
I was hoping to simple filter through the dataset using df[(df.column == filtervalue)]. However, it seems that the processing time is way too long.
Therefore, I am wondering whether there are some technics to optimize such processing time or if on the contrary the only way to optimize that is to go with a server that has a good CPU / Memory capacity?
Thanks for the help
import pandas as pd
df = pd.read_csv('xxxxxxxx', sep=";", dtype={"id": str,"dataset1": str,"dataset2":str,"myposition":str,"bet_1_preflop":float,"bet_2_preflop":float,"bet_3_preflop":float,"bet_1_flop":float,"bet_2_flop":float,
"bet_3_flop":float,"bet_1_turn":float ,"bet_2_turn":float,"bet_3_turn":float,"bet_1_river":float,"bet_2_river":float, "bet_3_river":float,
"myhand":str,"myposition":str,"cards_flop":str,"cards_turn":str,"cards_river":str,"action1_preflop":str," action2_preflop":str,
"action3_preflop":str,"action4_preflop":str, "action1_flop":str, "action2_flop":str, "action3_flop":str,"action4_flop":str,"action1_turn":str,
"action2_turn":str, "action3_turn":str, "action4_turn":str, "action1_river":str,"action2_river":str, "action3_river":str, "action4_river":str,
"action1_preflop_binary":'Int64', "action2_preflop_binary":'Int64', "action3_preflop_binary":'Int64', "action4_preflop_binary":'Int64',
"action1_flop_binary":'Int64',"action2_flop_binary":'Int64', "action3_flop_binary":'Int64', "action4_flop_binary":'Int64', "action1_turn_binary":'Int64',
"action2_turn_binary":'Int64', "action3_turn_binary":'Int64', "action4_turn_binary":'Int64',"action1_river_binary":'Int64', "action2_river_binary":'Int64',
"action3_river_binary":'Int64', "action4_river_binary":'Int64', "tiers":'Int64',"assorties":str,
"besthand_flop":str,"checker_flop":float,"handtype_flop":str,"topsuite_flop":'Int64',"topcolor_flop":'Int64',"besthand_turn":str,"checker_turn":float,"handtype_turn":str,
"topsuite_turn":'Int64',"topcolor_turn":'Int64',"besthand_river":str,"checker_river":float,"handtype_river":str,"topsuite_river":'Int64',"topcolor_river":'Int64'})
df = df.reset_index()
#Inputs for filters 1
myposition ="sb"
myhand = "ackc"
flop = "ad9d4h"
turn = "8d"
river = "th"
a1_preflop = "r"
a2_preflop = "r"
a3_preflop = "c"
a4_preflop = ""
a1_flop = "r"
a2_flop = "f"
a3_flop = ""
a4_flop = ""
a1_turn = ""
a2_turn = ""
a3_turn = ""
a4_turn = ""
a1_river = ""
a2_river = ""
a3_river = ""
a4_river = ""
#Inputs for filters 2 (from a different script)
tiers
assorties_status
best_allhands_flop[0]
best_allhands_flop[1]
best_allhands_flop[2]
highest_suite_flop
highest_color_flop
best_allhands_turn[0]
best_allhands_turn[1]
best_allhands_turn[2]
highest_suite_turn
highest_color_turn
best_allhands_river[0]
best_allhands_river[1]
best_allhands_river[2]
highest_suite_river
highest_color_river
#filtre_preflop_a1 = df[(df.myposition == myposition) & (df.tiers == tiers) & (df.assorties == assorties_status) & (df.action1_preflop == a1_preflop)]
#filtre_preflop_a2 = df[(df.myposition == myposition) & (df.tiers == tiers) & (df.assorties == assorties_status) & (df.action1_preflop == a1_preflop) & (df.action2_preflop == a2_preflop)]
#filtre_preflop_a3 = df[(df.myposition == myposition) & (df.tiers == tiers) & (df.assorties == assorties_status) & (df.action1_preflop == a1_preflop) & (df.action2_preflop == a2_preflop) & (df.action3_preflop == a3_preflop)]
#filtre_preflop_a4 = df[(df.myposition == myposition) & (df.tiers == tiers) & (df.assorties == assorties_status) & (df.action1_preflop == a1_preflop) & (df.action2_preflop == a2_preflop) & (df.action3_preflop == a3_preflop) & (df.action4_preflop == a4_preflop)]

Dataframe not appending values to column

I have an empty dataframe that I've loaded into my code. If the user says a keyword, it'll append their value to a specific column in the dataframe. However, whenever the user says the keyword, it's not appending it. This is my code:
c3 = pd.read_csv('c3_homework.csv')
homework = input("")
homework = homework.lower().split()
grade_class = homework[0]
subject = homework[1]
work = homework[2:]
work = ' '.join(work)
print(grade_class)
if grade_class == 'c3':
if subject == 'math':
print(work)
c3 = c3.append({'math':work}, ignore_index=True)
Do you have to make an empty csv?
You can create an empty data frame and save it as a csv file.
below is my suggestion code:
df = pd.DataFrame()
homework = input("")
homework = homework.lower().split()
grade_class = homework[0]
subject = homework[1]
work = homework[2:]
work = ' '.join(work)
print(grade_class)
if grade_class == 'c3':
if subject == 'math':
print(work)
df = df.append({'math':work}, ignore_index=True)
df.to_csv('c3_homework.csv')

How to create library of a self-written static function in python

I have a python script which has multiple static functions. I want to convert that complete python script into a python library
import pandas as pd
import numpy as np
import EA_Upload_config as cfg
import datetime
#%%
def clockPrint(sentence):
now = datetime.datetime.now()
date_time = now.strftime("%H:%M:%S")
print(date_time + " : " + sentence)
def uploadToEA(df_,ds_api_name,operation_,instance,xmd_=None): #Upsert #Overwrite
import SalesforceEinsteinAnalytics as EA
clockPrint("Upload Process Initiated for "+instance+" instance...")
if instance.lower() == 'commercial':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratings.my.salesforce.com', browser='chrome')
if instance.lower() == 'analytical':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratingsae.my.salesforce.com', browser='chrome')
EAS.load_df_to_EA(df_,dataset_api_name=ds_api_name, operation=operation_,xmd=xmd_,fillna=False) #Error because of fillna=False
clockPrint("Upload Process Completed successfully for "+instance+" instance. Navigate to (Einstein Analytics --> Data Manager --> Monitor) to check progress.")
def processDate(date):
if pd.isnull(date):
return np.nan
else:
date = pd.to_datetime(date)
date = datetime.datetime.strftime(date,"%m/%d/%Y")
return date
if __name__ == '__main__':
df = pd.read_csv(cfg.FILE_PATH)
if len(cfg.DATE_COLUMNS) != 0:
for c in cfg.DATE_COLUMNS:
df[c] = df[c].apply(lambda x: processDate(x))
for c in df.columns:
if df[c].dtype == "O":
df[c].fillna('', inplace=True)
elif np.issubdtype(df[c].dtype, np.number):
df[c].fillna(0, inplace=True)
elif df[c].dtype == "datetime64[ns]":
df[c] = df[c].apply(lambda x: processDate(x))
df[c].fillna("", inplace=True)
df.fillna("", inplace=True)
for instance in cfg.INSTANCES:
if instance.lower() == 'commercial':
uploadToEA(df, cfg.COM_DATASET_API_NAME, cfg.COM_OPERATION, instance, cfg.COM_XMD)
elif instance.lower() == 'analytical':
uploadToEA(df, cfg.ANA_DATASET_API_NAME, cfg.ANA_OPERATION, instance, cfg.ANA_XMD)
else: clockPrint("Update INSTANCES variable as ['Commercial'] or ['Analytical'] or ['Commercial','Analytical'].")
This is my complete python script which I want to convert it into a library. How should I do it?

Converting python script(which has __name__ == '__main__') to a Package/Library

I have a python script
import pandas as pd
import numpy as np
import EA_Upload_config as cfg
import datetime
#%%
def clockPrint(sentence):
now = datetime.datetime.now()
date_time = now.strftime("%H:%M:%S")
print(date_time + " : " + sentence)
def uploadToEA(df_,ds_api_name,operation_,instance,xmd_=None): #Upsert #Overwrite
import SalesforceEinsteinAnalytics as EA
clockPrint("Upload Process Initiated for "+instance+" instance...")
if instance.lower() == 'commercial':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratings.my.salesforce.com', browser='chrome')
if instance.lower() == 'analytical':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratingsae.my.salesforce.com', browser='chrome')
EAS.load_df_to_EA(df_,dataset_api_name=ds_api_name, operation=operation_,xmd=xmd_,fillna=False) #Error because of fillna=False
clockPrint("Upload Process Completed successfully for "+instance+" instance. Navigate to (Einstein Analytics --> Data Manager --> Monitor) to check progress.")
def processDate(date):
if pd.isnull(date):
return np.nan
else:
date = pd.to_datetime(date)
date = datetime.datetime.strftime(date,"%m/%d/%Y")
return date
if __name__ == '__main__':
df = pd.read_csv(cfg.FILE_PATH, dtype={"As of Date": str})
if len(cfg.DATE_COLUMNS) != 0:
for c in cfg.DATE_COLUMNS:
df[c] = df[c].apply(lambda x: processDate(x))
for c in df.columns:
if df[c].dtype == "O":
df[c].fillna('', inplace=True)
elif np.issubdtype(df[c].dtype, np.number):
df[c].fillna(0, inplace=True)
elif df[c].dtype == "datetime64[ns]":
df[c] = df[c].apply(lambda x: processDate(x))
df[c].fillna("", inplace=True)
df.fillna("", inplace=True)
for instance in cfg.INSTANCES:
if instance.lower() == 'commercial':
uploadToEA(df, cfg.COM_DATASET_API_NAME, cfg.COM_OPERATION, instance, cfg.COM_XMD)
elif instance.lower() == 'analytical':
uploadToEA(df, cfg.ANA_DATASET_API_NAME, cfg.ANA_OPERATION, instance, cfg.ANA_XMD)
else: clockPrint("Update INSTANCES variable as ['Commercial'] or ['Analytical'] or ['Commercial','Analytical'].")
It has three functions within it -
clockPrint
ProcessDate
uploadtoEA
** There is a link between this python script with another python script named EA_Upload_config which is being imported in this python script as cfg.
Now I want to convert this entire python script into a package. I am confused as to how should I handle name == 'main' ??
Aim is to convert this entire python script into a package

How to update PySimpleGUI Listbox that reads an excel file

I am using python3.7 and this is the current code base(apologies for putting so much code but thought it would help overall)
def TRADE_ENTRY(df_names, df_underlyings,df_strategies, columns, param, out_path,recovery_path):
nameUpdate =0
strategyUpdate=0
underlyingUpdate=0
sg.theme('Dark Brown 1')
listing = [sg.Text(u, size = param) for u in columns]
header = [[x] for x in listing]
now = datetime.datetime.now()
core = [
sg.Input(f"{now.month}/{now.day}/{now.year}",size = param),
sg.Input(f"{now.hour}:{now.minute}:{now.second}",size = param),
sg.Listbox(list(df_strategies.STRATEGIES), size=(20,2), enable_events=False, key='_PLAYERS0_'),
sg.Listbox(['ETF', 'EQT', 'FUT', 'OPT', 'BOND'],enable_events=False,key='_PLAYERS20_',size = (20,2)),
sg.Listbox(list(df_names.NAMES), size=(20,4), enable_events=False,key='_PLAYERS6_'),
sg.Listbox( ['B', 'S'],size = (20,1),enable_events=False,key='_PLAYERS12_'),
sg.Input(size = param),
sg.Input(size = param),
sg.CalendarButton('Calendar', pad=None, font=('MS Sans Serif', 10, 'bold'),
button_color=('yellow', 'brown'), format=('%d/%m/%Y'), key='_CALENDAR_', target='_INP_'),
sg.Input(size = param),
sg.Listbox(list(df_underlyings.UNDERLYINGS), size=(20,4), enable_events=False,key='_PLAYERS2_'),
sg.Listbox(['C', 'P', 'N/A'],size = param),
]
mesh = [[x,y] for (x,y) in list(zip(listing, core))]
mesh[8].append(sg.Input(size = (10,2),key = '_INP_'))
layout =[[sg.Button("SEND"),sg.Button("NEW_NAME"), sg.Button("NEW_STRAT"), sg.Button("NEW_UND")] ]+ mesh
window = sg.Window('Trade Entry System', layout, font='Courier 12').Finalize()
while True:
event, values = window.read(timeout=500)
#print('EVENT, VALUES', event, values)# all the inputs with extra information for compiler
if event == "SEND":
data = values
a = list(data.values())
a = [x if isinstance(x, list) == False else empty_handler(x) for x in a]
a = [x if x !="" else "EMPTY" for x in a ]
#print('A', a)#all the inputs now in a list
df = pd.DataFrame(a, index = columns)
print('DF1', df)#columns dataframe with column names and then the values
df = df.transpose()
#print('DF2', df)#rows dataframe with column names and then the values
status = error_handling(df)
#print('STATUS', status)
if status == "ERROR":
print("YOU MUST RECTIFY INPUT")
elif status == "CORRECT":
#if a future then will overwrite its name
if df['TYPE'][0] == "FUT":
df['NAME'][0] = "F-"+ df['UNDERLYING'][0] + "-" +df['EXPIRATION'][0]
#if an option then will overwrite its name
elif df['TYPE'][0] =="OPT":
df['NAME'][0] = 'O-' + df['UNDERLYING'][0] + "--" + df['OPTION_TYPE'][0] +df['STRIKE'][0] +"--" +df['EXPIRATION'][0]
else:
pass
processing(df, recovery_path, out_path)
else:
print("ERROR WITH USER INPUT FATAL")
break
elif event == "NEW_NAME":
security_creation(r'Y:\NAMES.xlsx', "Sheet1", "NAME", param)
nameUpdate=1
continue
elif event == "NEW_STRAT":
security_creation(r'Y:\STRATEGIES.xlsx', "Sheet1", "STRATEGY", param)
strategyUpdate=1
continue
elif event == "NEW_UND":
security_creation(r'Y:\UNDERLYINGS.xlsx', "Sheet1", "UNDERLYINGS", param)
underlyingUpdate=1
continue
elif event == sg.TIMEOUT_KEY:
if(nameUpdate==1):
df_names = pd.read_excel(r'Y:\NAMES.xlsx', "Sheet1")
df =df_names.values.tolist()
window['_PLAYERS6_'].update(values=df, set_to_index=0)
if(underlyingUpdate==1):
df_underlyings = pd.read_excel(r'Y:\UNDERLYINGS.xlsx', "Sheet1")
df =df_underlyings.values.tolist()
window['_PLAYERS2_'].update(values=df, set_to_index=0)
if(strategyUpdate==1):
df_strategies = pd.read_excel(r'Y:\STRATEGIES.xlsx', "Sheet1")
df =df_strategies.values.tolist()
window['_PLAYERS0_'].update(values=df, set_to_index=0)
print("Listboxes updated !")
else:
print("OVER")
break
window.close()
TRADE_ENTRY(df_names, df_underlyings,df_strategies, columns, param,out_path, recovery_path)
Towards the end of the function there's 3 elif, all NEW_NAME, NEW_STRAT and NEW_UND are the user submitting information to the corresponding 3 excel files. The function security_creation actually updates said excel files. Below that I am trying to update the Listboxes but no luck.
Any help would be greatly appreciated since i am so confused

Categories