I am trying to implement a "user-friendly" portfolio optimization program in Python.
Since I am still a beginner I did not quite manage to realize it.
The only thing the program should use as input are the stock codes.
I tried to create a mwe below:
import numpy as np
import yfinance as yf
import pandas as pd
def daily_returns(price):
price = price.to_numpy()
shift_1 = price[1:]
shift_2 = price[:-1]
return (shift_1 - shift_2)/shift_1
def annual_returns(price):
price = price.to_numpy()
start = price[0]
end = price[len(price)-1]
return (end-start)/start
def adjusting(price):
adj = len(price)
diff = adj - adjvalue
if diff != 0:
price_new = price[:-diff]
else: price_new = price
return price_new
#Minimal Reproducible Example
#getting user input
names = input('Stock codes:')
names = names.split()
a = len(names)
msft = yf.Ticker(names[0])
aapl = yf.Ticker(names[1])
#import data
hist_msft = msft.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_msft = pd.DataFrame(hist_msft,columns=['Close'])
#hist_msft = hist_msft.to_numpy()
hist_aapl = aapl.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_aapl = pd.DataFrame(hist_aapl,columns=['Close'])
#hist_aapl = hist_aapl.to_numpy()
#daily returns
aapl_daily_returns = daily_returns(hist_aapl)
aapl_daily_returns = np.ravel(aapl_daily_returns)
msft_daily_returns = daily_returns(hist_msft)
msft_daily_returns = np.ravel(msft_daily_returns)
#adjusting for different trading periods
adjvalue = min(len(aapl_daily_returns),len(msft_daily_returns))
aapl_adj = adjusting(aapl_daily_returns)
msft_adj = adjusting(msft_daily_returns)
#annual returns
aapl_ann_returns = annual_returns(hist_aapl)
msft_ann_returns = annual_returns(hist_msft)
#inputs for optimization
cov_mat = np.cov([aapl_adj,msft_adj])*252
ann_returns = np.concatenate((aapl_ann_returns,msft_ann_returns))
Now I just want the code to work with a various, unknown number of inputs. I tried reading a lot about global variables or tried to figure it out with dictionaries but couldn't really achieve any progress.
I think using the for loop can solve your problem!
...
names = input('Stock codes:')
names = names.split()
for name in names:
#analyze here
#I don't know anything about stocks so I wont write anything here
...
Related
I have the following development which I'm working with the ElementTree and Pandas module in Python:
import xml.etree.ElementTree as ET
import pandas as pd
file_xml = ET.parse('example1.xml')
rootXML = file_xml.getroot()
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
# "Value": signal.attrib["Value"]
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
signal_ig_st = signals_df[signals_df.Name.isin(["Status"])]
row_values_ig_st = signal_ig_st.T
vector_ig_st = row_values_ig_st.iloc[[1]]
signal_nav_DSP_rq = signals_df[signals_df.Name.isin(["SetDSP"])]
row_values_nav_DSP_rq = signal_nav_DSP_rq.T
vector_nav_DSP_rq = row_values_nav_DSP_rq.iloc[[1]]
signal_HMI_st = signals_df[signals_df.Name.isin(["HMI"])]
row_values_HMI_st = signal_HMI_st.T
vector_HMI_st = row_values_HMI_st.iloc[[1]]
signal_delay_ac = signals_df[signals_df.Name.isin(["Delay"])]
row_values_delay_ac = signal_delay_ac.T
vector_delay_ac = row_values_delay_ac.iloc[[1]]
signal_AutoConfigO_Rear = signals_df[signals_df.Name.isin(["AutoConfigO_Rear"])]
row_values_AutoConfigO_Rear = signal_AutoConfigO_Rear.T
vector_AutoConfigO_Rear = row_values_AutoConfigO_Rear.iloc[[1]]
signal_ACO_Front = signals_df[signals_df.Name.isin(["AutoConfigO_Front"])]
row_values_ACO_Front = signal_ACO_Front.T
vertor_ACO_Front = row_values_ACO_Front.iloc[[1]]
signal_ACO_Drvr = signals_df[signals_df.Name.isin(["AutoConfigO_Drvr"])]
row_values_ACO_Drvr = signal_ACO_Drvr.T
vector_ACO_Drvr = row_values_ACO_Drvr.iloc[[1]]
signal_ACO_Allst = signals_df[signals_df.Name.isin(["AutoConfigO_Allst"])]
row_values_ACO_Allst = signal_ACO_Allst.T
vector_ACO_Allst = row_values_ACO_Allst.iloc[[1]]
signal_RURRq_st = signals_df[signals_df.Name.isin(["RUResReqstStat"])]
row_values_RURRq_st = signal_RURRq_st.T
vector_RURRq_st = row_values_RURRq_st.iloc[[1]]
signal_RURqSy_st = signals_df[signals_df.Name.isin(["RUReqstrSystem"])]
row_values_RURqSy_st = signal_RURqSy_st.T
vector_RURqSy_st = row_values_RURqSy_st.iloc[[1]]
signal_RUAudS_st = signals_df[signals_df.Name.isin(["RUSource"])]
row_values_RUAudS_st = signal_RUAudS_st.T
vector_RUAudS_st = row_values_RUAudS_st.iloc[[1]]
signal_DSP_st = signals_df[signals_df.Name.isin(["DSP"])]
row_values_DSP = signal_DSP.T
vector_DSP = row_values_DSP.iloc[[1]]
print('1: ', vector_ig_st)
print('2: ', vector_nav_DSP_rq)
print('3: ', vector_HMI_st)
print('4: ', vector_delay_ac)
The output of the above is the following, they are the first 4 prints and it is fine, because it is what they want, but I have to simplify the code, so that any type of xml file of the type example.xml, can be read not only example1.xml:
The simplified code is required to bring the data as it is in the names_list variable, but not to use this variable, which is actually hard-coded:
names_list = [
'Status', 'SetDSP', 'HMI', 'Delay', 'AutoConfigO_Rear',
'AutoConfigO_Front', 'AutoConfigO_Drvr','AutoConfigO_Allst',
'RUResReqstStat', 'RUReqstrSystem', 'RUSource', 'DSP'
]
So when the client wants to put another XML file with the same structure, but with other names that are not in the code, it can read them without problem. Beforehand thank you very much.
I hope I'm understanding the questions correctly. my understanding is that
you want to dynamically produce the extract_name_value() function, and make it not as bulky in your code.
Im sorry, but I failed to comprehend the for i in signal_name: print(i) part of the question. perhaps you can rephrase the question, and help me understand?
my solution to the extract_name_value() part would be using the exec() function.
it is a built-in solution for dynamic execution.
name_list = ['Status', 'SetDSP', 'HMI', 'Delay', 'AutoConfigO_Rear',
'AutoConfigO_Front', 'AutoConfigO_Drvr', 'AutoConfigO_Allst',
'RUResReqstStat', 'RUReqstrSystem', 'RUSource', 'DSP']
def _build_extract_name_value_func(name_list):
extract_name_value_func = ""
for name in name_list:
holder_func = f"""
signal_{name} = signals_df[signals_df.Name.isin([{name}])]
row_values_{name} = signal_{name}.T
vector_{name} = row_values_{name}.iloc[[1]]
vector_list.append(vector_{name})
"""
extract_name_value_func += holder_func
return extract_name_value_func
def extract_name_value(name_list):
extract_name_value_func = build_extract_name_value_func(name_list)
exec(extract_name_value_func)
the code was not tested with actual data, because I am not familiar with handling xml structures. But I hope the python part can be some help to you.
I was able to solve it, I used a for loop and iterated the dataframe itself:
for i in signals_df.Name:
signal = signals_df [signals_df.Name.isin ([i])]
row_values = signal.T
vector = row_values.iloc [[1]]
print (vector)
I am trying to make a form where if I input a medicine's name, it will show the solution of the medicine serially. But it is kind of limited bythe way I'm making it like more lines I'll code more spaces they will get to have the number of feedback. It would be great if you could help me to make something short but have the infinity process like loop.
df = pd.DataFrame({'FEVER':['NAPA_PLUS','JERIN','PARASITAMOL'],
'GASTRIC':['SECLO40','SECLO20','ANTACID'],
'WATERINESS':['ORSALINE','TESTY_SALINE','HOME_MADE_SALINE']})
def word_list(text):
return list(filter(None, re.split('\W+', text)))
session = raw_input("INPUT THE NAME OF THE MEDICINES ONE BY ONE BY KEEPING SPACE:")
feedback = session
print(word_list(feedback))
dff = pd.DataFrame({'itemlist':[feedback]})
dff['1'] = dff['itemlist'].astype(str).str.split().str[0]
dff['2'] = dff['itemlist'].astype(str).str.split().str[1]
dff['3'] = dff['itemlist'].astype(str).str.split().str[2]
dff['4'] = dff['itemlist'].astype(str).str.split().str[3]
dff['5'] = dff['itemlist'].astype(str).str.split().str[4]
for pts1 in dff['1']:
pts1 = df.columns[df.isin([pts1]).any()]
for pts2 in dff['2']:
pts2 = df.columns[df.isin([pts2]).any()]
for pts3 in dff['3']:
pts3 = df.columns[df.isin([pts3]).any()]
for pts4 in dff['4']:
pts4 = df.columns[df.isin([pts4]).any()]
for pts5 in dff['5']:
pts5 = df.columns[df.isin([pts5]).any()]
This wraps your repeated code into two loops:
...
dff = pd.DataFrame({'itemlist':[feedback]})
limit = 5
for i in xrange(limit):
name = str(i+1)
dff[name] = dff['itemlist'].astype(str).str.split().str[i]
for pts in dff[name]:
pts = df.columns[df.isin([pts]).any()]
I'm using pyalgotrade to create a trading strategy. I'm going through a list of tickers(testlist) and adding them to a dictionary(list_large{}) alongside their score which I'm getting using a get_score function. My latest problem is that each ticker in the dictionary(list_large{}) is getting the same score. Any idea why?
Code:
from pyalgotrade import strategy
from pyalgotrade.tools import yahoofinance
import numpy as np
import pandas as pd
from collections import OrderedDict
from pyalgotrade.technical import ma
from talib import MA_Type
import talib
smaPeriod = 10
testlist = ['aapl','ddd','gg','z']
class MyStrategy(strategy.BacktestingStrategy):
def __init__(self, feed, instrument):
super(MyStrategy, self).__init__(feed, 1000)
self.__position = []
self.__instrument = instrument
self.setUseAdjustedValues(True)
self.__prices = feed[instrument].getPriceDataSeries()
self.__sma = ma.SMA(feed[instrument].getPriceDataSeries(), smaPeriod)
def get_score(self,slope):
MA_Score = self.__sma[-1] * slope
return MA_Score
def onBars(self, bars):
global bar
bar = bars[self.__instrument]
slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
organized_list = OrderedDict(sorted(list_large.items(), key=lambda t: -t[1]))#organize the list from highest to lowest score
print list_large
def run_strategy(inst):
# Load the yahoo feed from the CSV file
feed = yahoofinance.build_feed([inst],2015,2016, ".") # feed = yahoofinance.build_feed([inst],2015,2016, ".")
# Evaluate the strategy with the feed.
myStrategy = MyStrategy(feed, inst)
myStrategy.run()
print "Final portfolio value: $%.2f" % myStrategy.getBroker().getEquity()
def main():
instruments = ['ddd','msft']
for inst in instruments:
run_strategy(inst)
if __name__ == '__main__':
main()
Check this code of the onBars() function:
slope = 8 # <---- value of slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
# Updating dict of each ticker based on ^
Each time self.get_score(slope) is called, it returns the same value and hence, all the value of tickers hold the same value in dict
I do not know how you want to deal with slope and how you want to update it's value. But this logic can be simplified without using .update as:
list_large = {}
for tickers in testlist:
list_large[tickers] = self.get_score(slope)
# ^ Update value of `tickers` key
I am trying to make a program that will display total stock of cement, brick, spanner, mirror, and hammer. But the problem Please help me
is thsi program keeps showing me' cementinVar is not defined' but i already defined it above as (cementinVar = Tkinter.IntVar()) .
def textboxvalue2():
Total_StockIn = 0
CementIn = cementinVar.get()
HammerIn = hammerinVar.get()
SpannerIn = spannerinVar.get()
BrickIn = brickinVar.get()
MirrorIn = mirrorinVar.get()
CementOut = cementoutVar.get()
HammerOur = hammeroutVar.get()
SpannerOut = spanneroutVar.get()
BrickOut = brickoutVar.get()
MirrorOut = mirroroutVar.get()
Total_StockIn = (CementIn + HammerIn + SpannerIn + BrickIn + MirrorIn)+Total_StockIn
StockInLabel = Tkinter.Label(sub,text='The total stock in is '+str(Total_StockIn))
StockInLabel.grid(row=7, column =2)
You define cementinVar in another scope. You have two possible solutions:
Use a global variable
Use a class variable
I am trying to write a script to generate data. I am using random package for this. I execute the script and everything works fine. But when I check through the results, I found out that the script fails to generate the last 100+ rows for some reason.
Can someone suggest me why this could be happening?
from __future__ import print_function
from faker import Faker;
import random;
## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;
## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;
## Function to generate sales related information
def sales_data():
for x in range(0,1):
quantity_sold = random.randint(5,20);
discount = random.choice(range(5,11));
sales_price = random.uniform(20,30);
return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));
## Format the month to quarter and return the value
def quarter(month):
if month >= 1 and month <= 3:
return "Q1";
elif month > 3 and month <= 6:
return "Q2";
elif month > 6 and month <= 9:
return "Q3";
else:
return "Q4";
## Generate product_id
def product_name():
str2 = "PROD";
sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
string_list = [];
for x in sample2:
string_list.append(str(x));
return (str2+''.join(string_list));
### Main starts here ###
result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')
print (Column_Names, result_log);
### Loop and Generate Data ###
for pop in range(0,population):
pop = random.randint(55000,85000);
for prod_id in range(0,product):
product_name2 = product_name();
for year in range(1,years):
for month in range(1,months):
for day in range(1,31):
a = sales_data();
rows = str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
print(rows,file=result_log);
#print (rows);
tax = tax+1;
You need to close a file to have the buffers flushed:
result_log.close()
Better still, use the file object as a context manager and have the with statement close it for you when the block exits:
filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
# code writing to result_log
Rather than manually writing strings with delimiters in between, you should really use the csv module:
import csv
# ..
column_names = (
"Population_ID", "Product_Name", "Product_ID", "Year",
"Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
"Actual_Sales_Price", tax)
# ..
with result_log = open(filename, 'wb'):
writer = csv.writer(result_log, delimiter=';')
writer.writerow(column_names)
# looping
row = [pop, product_name2, prod_id, year, month, quarter(month), day,
a[0], a[1], a[2], tax, a[3]]
writer.writerow(row)