Running multiple functions based on one imported excel dataframe

Running multiple functions based on one imported excel dataframe - python

I have created multiple functions for each client, They are all basically the same (it only changes the workers names inside of each one of them).
It runs the first functions as it should, but then the second one does not work. It's like the dataframe isn't being carried out throughout the code.
import pandas as pd
import sys
#file loc
R1 = input('Data do Relatório desejado (dd.mm) ---> ')
loc = r'C:\Users\lucas.mascia\Downloads\relatorio-{0}.xlsx'.format(R1)
#opening file with exact needed columns
df = pd.read_excel(loc)
df = df[[2,15,16,17]]
[...]
def func1():
global df, R1, bcsulp1, bcsulp2
#List of solicitantes in Postal Saude
list_sol = [lista_solic["worker1"]]
#filter Postal Saude Solicitantes
df = df[(df['Client']==lista_clientes[2])
& (df['worker'].isin(list_sol))]
#Alphabetical order
df = df.sort_index(by=['worker', 'place'])
#Grouping data of column
gp = df.groupby('worker')
# Loop?
for i in range(1,2):
df = gp.get_group(list_sol[(i-1)])
#Name_i #############################################################
#Protocolo interno e externo --------------------------------------------
p_interno = df[(df['place'].str.contains("C. Martins"))
& (df['task']==lista_tarefas[1])]
globals()['fi'+str(i)] = len(p_interno)
f_i = globals()['fi'+str(i)]
p_externo = df[(~df['place'].str.contains("C. Martins"))
& (df['task']==lista_tarefas[1])]
globals()['fe'+str(i)] = len(p_externo)
f_e = globals()['fe'+str(i)]
#Protocolo Virtual interno e externo ------------------------------------
pv_interno = df[(df['place'].str.contains("C. Martins"))
& (df['task']==lista_tarefas[3])]
globals()['vi'+str(i)] = len(pv_interno)
v_i = globals()['vi'+str(i)]
pv_externo = df[(~df['place'].str.contains("C. Martins"))
& (df['task']==lista_tarefas[3])]
globals()['ve'+str(i)] = len(pv_externo)
v_e = globals()['ve'+str(i)]
#Protocolo postal normal e especial
pp_normal = df[(df['task']==lista_tarefas[51])]
pp_especial = df[(df['task']==lista_tarefas[52])]
globals()['postal'+str(i)] = len(pp_especial) + len(pp_normal)
post = globals()['postal'+str(i)]
#Copia integral e parcial 6,1 - 6,2
copia_i = df[(df['task']==lista_tarefas[61])]
copia_p = df[(df['task']==lista_tarefas[62])]
globals()['copia'+str(i)] = len(copia_p) + len(copia_i)
cop = globals()['copia'+str(i)]
#Copia eletronica
copia_elet = df[(df['task']==lista_tarefas[7])]
globals()['copia_elet'+str(i)] = len(copia_elet)
cop_e = globals()['copia_elet'+str(i)]
#AIJ / Audiencia / Conciliatoria
aij = df[(df['task']==lista_tarefas[81])]
aud = df[(df['task']==lista_tarefas[82])]
conc = df[(df['task']==lista_tarefas[83])]
globals()['audiencia'+str(i)] = len(aij) + len(aud) + len(conc)
audi = globals()['audiencia'+str(i)]
globals()['bcsulp'+str(i)] = [f_i, f_e, v_i, v_e, post, cop, cop_e, audi]
def func2(): [...]
def func3(): [...]
def func4(): [...]
func1()
fucn2()
The following error comes up:
Traceback (most recent call last):
File "Relatorio_Filtro.py", line 736
func2()
File "Relatorio_Filtro.py", line 682
df = gp.get_group(list_sol[(i-1)])
File "C:..."
inds = self._get_index(name)
File "C:..."
return self.indices[name]
KeyError: 'WORKER1'
Question:
Am I missing something so that the excel dataframe imported at the beginnig is carried out throughout the program?

Related

Blank Strings Are Returned in Python Dataframe

I wrote a code to convert PDF to CSV, read the CSV file, and export only relevant information from the CSV file. The function is supposed to return filtered information such as english_name: 'someones name', original_language_name: 'someones name' etc, but instead the command returned english_name: '', original_language_name: '' etc. Below is the code that I wrote:
import pandas as pd
import tabula
from pandas import DataFrame
from backend.classes import Shareholder, Officer
from typing import List
def strip_string(string):
return str(string).strip()
def get_float_without_thousands_separator(string, thousands_separator):
return float(string.replace(thousands_separator, ''))
def extract_officers_and_shareholders_lists_from_df(df, total_number_of_shares, no_data_placeholder, number_of_shares, thousands_separator):
officers = []
shareholders = []
NAME = 'Nama'
POSITION = 'Jabatan'
for row in range((df.shape[0])):
if str(df[POSITION][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
position = strip_string(df[POSITION][row])
officer = Officer(english_name=english_name, original_language_name=original_language_name, position=position)
officers.append(officer)
elif str(df[number_of_shares][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
number_of_shares_string = strip_string(df[number_of_shares][row])
number_of_shares_number = get_float_without_thousands_separator(number_of_shares_string, thousands_separator)
shareholding_percentage = (number_of_shares_number / total_number_of_shares) * 100
shareholder = Shareholder(english_name=english_name, original_language_name=original_language_name, shareholding_percentage=shareholding_percentage)
shareholders.append(shareholder)
return officers, shareholders
def get_officers_and_shareholders_lists(pdf_input_file):
NO_DATA_PLACEHOLDER = '-'
NUMBER_OF_SHARES = 'Jumlah Lembar Saham'
THOUSANDS_SEPARATOR = '.'
output_file_path = 'CSV/Officers_and_Shareholders.csv'
tabula.convert_into(pdf_input_file, output_file_path, output_format='csv', pages='all')
df = pd.read_csv(output_file_path, header=3, on_bad_lines='skip')
all_shares = df[NUMBER_OF_SHARES].to_list()
all_shares_strings = [strip_string(shares) for shares in all_shares if strip_string(shares) != NO_DATA_PLACEHOLDER]
all_shares_numbers = [get_float_without_thousands_separator(shares, THOUSANDS_SEPARATOR) for shares in all_shares_strings]
total_number_of_shares = sum(all_shares_numbers)
return extract_officers_and_shareholders_lists_from_df(
df=df,
total_number_of_shares=total_number_of_shares,
number_of_shares=NUMBER_OF_SHARES,
no_data_placeholder=NO_DATA_PLACEHOLDER,
thousands_separator=THOUSANDS_SEPARATOR)
The command call that I use for the codes on the above is python3 -m backend.officers_and_shareholders. Is there a method to pass in so that english_name returns a name, original_language_name returns a name?

RuntimeError: input(): lost sys.stdin from executable file

Seeking your help regarding this executable file I have converted from my .py file using auto-py-to-exe. I have this code below that I made for my csv automation report. Looks fine when running on IDE and CMD but when I tried to convert it to .exe this what happens.
Traceback (most recent call last):
File "new.py", line 7, in <module>
input_file = input("Enter the file name of your HC file: ")
RuntimeError: input(): lost sys.stdin
Here is my code below for your reference. Hoping you could help me with this issue.
import pandas as pd
import numpy as np
print("Fixed Network Health Check Checker")
input_file = input("Enter the file name of your HC file: ")
file = input_file + str('.xlsx')
df = pd.read_excel(file, sheet_name = 'MSAN Cabinets')
print("Done")
#fixed
df['MSAN Interface'] = df['MSAN Interface'].replace(np.nan, 0)
df['ACCESS Interface 1 (IPRAN, ATN, LSA, VLAN)'] = df['ACCESS Interface 1 (IPRAN, ATN, LSA, VLAN)'].replace(np.nan, 0)
df['Homing AG2'] = df['Homing AG1'].replace(np.nan, 0)
df = df.iloc[:11255]
# filter "REGION" and drop unnecessary columns
f_df1 = df[df['REGION'] == 'MIN']
dropcols_df1 = f_df1.drop(df.iloc[:, 1:6], axis = 1)
dropcols_df2 = dropcols_df1.drop(df.iloc[:, 22:27], axis = 1)
dropcols_df3 = dropcols_df2.drop(df.iloc[:, 37:50], axis = 1)
# filter "MSAN Interface" and filter the peak util for >= 50%
f_d2 = dropcols_df3['MSAN Interface'] != 0
msan_int = dropcols_df3[f_d2]
f_msan_int = msan_int['Peak Util'] >= 0.5
new_df = msan_int[f_msan_int]
# filter "ACCESS Interface 1 (IPRAN, ATN, LSA, VLAN)" and filter the peak util for >= 50%
fblank_msan_int = dropcols_df3['MSAN Interface'] == 0
msan_int1 = dropcols_df3[fblank_msan_int]
f_df3 = dropcols_df3['ACCESS Interface 1 (IPRAN, ATN, LSA, VLAN)'] != 0
access_int1 = dropcols_df3[f_df3]
f_access_int1 = access_int1['Peak Util.1'] >= 0.5
new_df1 = access_int1[f_access_int1]
# filter "Homing AG1" and filter the peak util for >= 50%
fblank_msan_int1 = dropcols_df3['MSAN Interface'] == 0
msan_int2 = dropcols_df3[fblank_msan_int1]
f_access_int2 = msan_int2['ACCESS Interface 1 (IPRAN, ATN, LSA, VLAN)'] == 0
new_df2 = msan_int2[f_access_int2]
ag1 = new_df2['Peak Util.3'] >= 0.5
new_df3 = new_df2[ag1]
# Concatenate all DataFrames
pdList = [new_df, new_df1, new_df3]
final_df = pd.concat(pdList)
print(final_df.to_csv('output.csv', index = False))
Thank you. Btw I'm new in Python :).

I am getting 'index out of bound error' when reading from csv in pandas but not when I extract the data via api. What could be the reason?

So for my bot, I am first extracting data via api and storing it in csv. When I run my for loop on data via api, it gives no error and runs smoothly.
But when the csv file is read and run, it gives out of bound error.
This is my function to generate data:
full_list = pd.DataFrame(columns=("date","open","high","low","close","volume","ticker","RSI","ADX","20_sma","max_100"))
def stock_data(ticker):
create_data = fetchOHLC(ticker,'minute',60)
create_data["ticker"] = ticker
create_data["RSI"] = round(rsi(create_data,25),2)
create_data["ADX"] = round(adx(create_data,14),2)
create_data["20_sma"] = round(create_data.close.rolling(10).mean().shift(),2)
create_data["max_100"] = create_data.close.rolling(100).max().shift()
create_data.dropna(inplace=True,axis=0)
create_data.reset_index(inplace=True)
return create_data
stocklist = open("stocklist.txt","r+")
tickers = stocklist.readlines()
for x in tickers:
try:
full_list = full_list.append(stock_data(x.strip()))
except:
print(f'{x.strip()} did not work')
full_list.to_csv("All_Data")
full_list
So when I run the same code below on dataframe created I got no error. But when I run the same code on the csv file, I get out of bound error.
list_tickers = full_list["ticker"].unique()
for y in list_tickers[:2]:
main = full_list[full_list["ticker"]==y]
pos = 0
num = 0
tick = y
signal_time = 0
signal_rsi = 0
signal_adx = 0
buy_time = 0
buy_price = 0
sl = 0
#to add trailing sl in this.
for x in main.index:
maxx = main.iloc[x]["max_100"]
rsi = main.iloc[x]["RSI"]
adx = main.iloc[x]["ADX"]
sma = main.iloc[x]["20_sma"]
close = main.iloc[x]["close"]
high = main.iloc[x]["high"]
if rsi > 80 and adx > 35 and close > maxx:
if pos == 0:
buy_price = main.iloc[x+1]["open"]
buy_time = main.iloc[x+1]["date"]
pos=1
signal_time = main.iloc[x]["date"]
signal_rsi = main.iloc[x]["RSI"]
signal_adx = main.iloc[x]["ADX"]
elif close < sma:
if pos == 1:
sell_time = main.iloc[x]["date"]
sell_price = sma*.998
pos=0
positions.loc[positions.shape[0]] = [y,signal_time,signal_rsi,signal_adx,buy_time,buy_price,sell_time,sell_price]
Any idea why?
Here is a cleanup and file call code:
full_list = pd.read_csv("All_data")
full_list.dropna(inplace=True,axis=0)
full_list.drop(labels="Unnamed: 0",axis=1) < index of previous dataframe
full_list.head(5)
Thanks

How to pass two or more dataframes from a module to main script

Edit with #RJ Adriaansen update:
I'm trying to pull two or more dataframes from a module so that I can use the data in the main script.
I only get 4 empty dataframes returned from the df_make module.
The main and df_make codes are below.
Any advice would be great thanks.
import pandas as pd
import df_make
df_trn = pd.DataFrame()
df_trn_trk = pd.DataFrame()
df_jky = pd.DataFrame()
df_jky_code = pd.DataFrame()
def main():
df_make.jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code)
#df_make.jky_trn([df_trn])
print(df_trn)
print(df_trn_trk)
print(df_jky)
print(df_jky_code)
if __name__ == '__main__':
main()
import pandas as pd
#def jky_trn(df_trn):
def jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code):
#global df_trn
#global df_trn_trk
#global df_jky
#global df_jky_code
path = (r"C:\Users\chris\Documents\UKHR\PythonSand\PY_Scripts\StackOF")
xls_tbl = "\Racecards.xlsx"
xls_link = path + xls_tbl
df1 = pd.read_excel(xls_link, usecols=["Jockey","Course","RaceDesc"])
df2 = pd.read_excel(xls_link, usecols=["Trainer","Course","RaceDesc"])
df1 = df1.drop_duplicates(subset=["Jockey","Course","RaceDesc"])
df1 = df1.dropna() # Remove rows with NaN
df1['Course'] = df1['Course'].str.replace(' \(AW\)', '') #Replace (AW) in Course
df2['Course'] = df2['Course'].str.replace(' \(AW\)', '')
df_jky = df1[['Jockey']].copy()
df_jky_code = df1[['Jockey', 'Course']].copy()
df_jky = df_jky.drop_duplicates()
df_jky_code = df_jky_code.drop_duplicates()
df_trn = df2[['Trainer']].copy()
df_trn_trk = df2[['Trainer', 'Course']].copy()
df_trn = df_trn.drop_duplicates()
df_trn_trk = df_trn_trk.drop_duplicates()
#print(df_jky_code)
#print(df_trn_trk)
return df_jky, df_jky_code, df_trn, df_trn_trk

So, it turns out that I needed to refer to the dataframes as a tuple item in the main script e.g. df_jt = df_make.jky_trn()
The new main script code is:
import pandas as pd
import df_make
def main():
df_jt = df_make.jky_trn()
print(df_jt[0])
print(df_jt[1])
print(df_jt[2])
print(df_jt[3])
if name == 'main':
main()

Python: invalid syntax: <string>, line 1, pos 16

I have developed a code in Python in which -in order to run the program- I need to take some arguments from the command line. But I am getting continuously the same error:
Traceback (most recent call last):
File "<string>", line 1, in <fragment>
invalid syntax: <string>, line 1, pos 16
I have the faintest idea what is wrong with my code. So, I present my code below in case someone could help me:
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import math
import copy
import QSTK.qstkstudy.EventProfiler as ep
import csv
import sys
import argparse
def readData(li_startDate, li_endDate, ls_symbols):
#Create datetime objects for Start and End dates (STL)
dt_start = dt.datetime(li_startDate[0], li_startDate[1], li_startDate[2])
dt_end = dt.datetime(li_endDate[0], li_endDate[1], li_endDate[2])
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
return [d_data, dt_start, dt_end, dt_timeofday, ldt_timestamps]
def marketsim(cash,orders_file,values_file):
orders = pd.read_csv(orders_file,index_col='Date',parse_dates=True,header=None)
ls_symbols = list(set(orders['X.4'].values))
df_lastrow = len(orders) - 1
dt_start = dt.datetime(orders.get_value(0, 'X.1'),orders.get_value(0, 'X.2'),orders.get_value(0, 'X.3'))
dt_end = dt.datetime(orders.get_value(df_lastrow, 'X.1'),orders.get_value(df_lastrow, 'X.2'),orders.get_value(df_lastrow, 'X.3') + 1 )
#d_data = readData(dt_start,dt_end,ls_symbols)
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
df_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
ls_symbols.append("_CASH")
trades = pd.Dataframe(index=list(ldt_timestamps[0]),columns=list(ls_symbols))
current_cash = cash
trades["_CASH"][ldt_timestamps[0]] = current_cash
current_stocks = dict()
for symb in ls_symbols:
current_stocks[symb] = 0
trades[symb][ldt_timestamps[0]] = 0
for row in orders.iterrows():
row_data = row[1]
current_date = dt.datetime(row_data['X.1'],row_data['X.2'],row_data['X.3'],16)
symb = row_data['X.4']
stock_value = d_data['close'][symb][current_date]
stock_amount = row_data['X.6']
if row_data['X.5'] == "Buy":
current_cash = current_cash - (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] + stock_amount
trades[symb][current_date] = current_stocks[symb]
else:
current_cash = current_cash + (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] - stock_amount
trades[symb][current_date] = current_stocks[symb]
#trades.fillna(method='ffill',inplace=True)
#trades.fillna(method='bfill',inplace=False)
trades.fillna(0)
#alt_cash = current_cash
#alt_cash = trades.cumsum()
value_data = pd.Dataframe(index=list(ldt_timestamps),columns=list("V"))
value_data = value_data.fillna(0)
value_data = value_data.cumsum(axis=0)
for day in ldt_timestamps:
value = 0
for sym in ls_symbols:
if sym == "_CASH":
value = value + trades[sym][day]
else:
value = calue + trades[sym][day]*d_data['close'][sym][day]
value_data["V"][day] = value
fileout = open(values_file,"w")
for row in value_data.iterrows():
file_out.writelines(str(row[0].strftime('%Y,%m,%d')) + ", " + str(row[1]["V"].round()) + "\n" )
fileout.close()
def main(argv):
if len(sys.argv) != 3:
print "Invalid arguments for marketsim.py. It should be of the following syntax: marketsim.py orders_file.csv values_file.csv"
sys.exit(0)
#initial_cash = int (sys.argv[1])
initial_cash = 1000000
ordersFile = str(sys.argv[1])
valuesFile = str(sys.argv[2])
marketsim(initial_cash,ordersFile,valuesFile)
if __name__ == "__main__":
main(sys.argv[1:])
The input I gave to the command line was:
python marketsim.py orders.csv values.csv
I guess that the problem lies either into the imports or probably into the main function(incl. the if below the def main(argv)
I have to point out that the files orders.csv and values.csv exist and are located into the same folder.
I hope have made everything clear.
So, I am looking forward to reading your answers community-mates! :D
Thank you!

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Running multiple functions based on one imported excel dataframe - python

Related

Blank Strings Are Returned in Python Dataframe

RuntimeError: input(): lost sys.stdin from executable file

I am getting 'index out of bound error' when reading from csv in pandas but not when I extract the data via api. What could be the reason?

How to pass two or more dataframes from a module to main script

Python: invalid syntax: <string>, line 1, pos 16

Categories

Resources