I can't seem to get the following for loop in range to work. This is the entire code. Perhaps is more helpful to post the entire program
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.foreignexchange import ForeignExchange
from scipy.stats import ttest_ind
from pandas import ExcelWriter
import time
import numpy as np
import os
api_key="xxx"
cc = ForeignExchange(key=api_key, output_format="pandas", indexing_type= "date")
#ti = TechIndicators(key= api_key, output_format="pandas", indexing_type= "date")
ts = TimeSeries(key= api_key, output_format="pandas", indexing_type= "date")
filePath = r"/Users/LaCasa/PycharmProjects/Forex_Breakout_Backtest_15MIN/forex_pairs.xlsx"
filePath1 = r"/Users/LaCasa/PycharmProjects/Forex_Breakout_Backtest_15MIN/"
stocklist = pd.read_excel(filePath, engine='openpyxl')
stocklist = stocklist.head(5)
exportList = pd.DataFrame(columns=['Base','Quote'])
for i in stocklist.index:
fx_from = str(stocklist["fx_from"][i])
fx_to = str(stocklist["fx_to"][i])
data_fx, meta_data_fx = cc.get_currency_exchange_intraday(from_symbol=fx_from,to_symbol=fx_to,interval='15min',
outputsize='full')
data_fx.sort_index(inplace=True)
total_df = data_fx
total_df["BASE"] = fx_from
total_df["QUOTE"] = fx_to
total_df.rename(columns={'1. open': 'OPEN','2. high': 'HIGH','3. low': 'LOW','4. close':'CLOSE'},inplace=True)
result = []
train_size = 0.6
n_forward = 5
total_df['Forward Close'] = total_df['CLOSE'].shift(-n_forward)
total_df['Forward Return'] = (total_df['Forward Close'] - total_df['CLOSE']) / total_df['CLOSE']
for sma_length, sma_length2, sma_length3 in range(10, 200, 10):
print(sma_length)
total_df["MA1"] = round(total_df["CLOSE"].rolling(window=sma_length).mean(), 5)
total_df["SD1"] = round(total_df["CLOSE"].rolling(window=sma_length).std(), 5)
total_df["z1"] = round((total_df["CLOSE"].sub(total_df["MA1"])).div(total_df["SD1"]), 3)
total_df["Z1"] = round(total_df["z1"].rolling(window=1).mean(), 3)
total_df["MAZ1"] = round(total_df["Z1"].rolling(window=3).mean(), 5)
total_df["SDMA1"] = round(total_df["SD1"].rolling(window=sma_length).mean(), 5)
total_df["STD1"] = round(total_df["SD1"].rolling(window=sma_length).std(), 5)
total_df["zd1"] = round((total_df["SD1"].sub(total_df["SDMA1"])).div(total_df["STD1"]), 3)
total_df["ZDEV1"] = round(total_df["zd1"].rolling(window=1).mean(), 3)
total_df["MAZDEV1"] = round(total_df["ZDEV1"].rolling(window=3).mean(), 3)
total_df["MA2"] = round(total_df["CLOSE"].rolling(window=sma_length2).mean(), 5)
total_df["SD2"] = round(total_df["CLOSE"].rolling(window=sma_length2).std(), 5)
total_df["z2"] = round((total_df["CLOSE"].sub(total_df["MA2"])).div(total_df["SD2"]), 3)
total_df["Z2"] = round(total_df["z2"].rolling(window=1).mean(), 3)
total_df["MAZ2"] = round(total_df["Z2"].rolling(window=3).mean(), 5)
total_df["SDMA2"] = round(total_df["SD2"].rolling(window=sma_length2).mean(), 5)
total_df["STD2"] = round(total_df["SD2"].rolling(window=sma_length2).std(), 5)
total_df["zd2"] = round((total_df["SD2"].sub(total_df["SDMA2"])).div(total_df["STD2"]), 3)
total_df["ZDEV2"] = round(total_df["zd2"].rolling(window=1).mean(), 3)
total_df["MAZDEV2"] = round(total_df["ZDEV2"].rolling(window=3).mean(), 3)
total_df["MA3"] = round(total_df["CLOSE"].rolling(window=sma_length3).mean(), 5)
total_df["SD3"] = round(total_df["CLOSE"].rolling(window=sma_length3).std(), 5)
total_df["z3"] = round((total_df["CLOSE"].sub(total_df["MA3"])).div(total_df["SD3"]), 3)
total_df["Z3"] = round(total_df["z3"].rolling(window=1).mean(), 3)
total_df["MAZ3"] = round(total_df["Z3"].rolling(window=3).mean(), 5)
total_df["SDMA3"] = round(total_df["SD3"].rolling(window=sma_length3).mean(), 5)
total_df["STD3"] = round(total_df["SD3"].rolling(window=sma_length3).std(), 5)
total_df["zd3"] = round((total_df["SD3"].sub(total_df["SDMA3"])).div(total_df["STD3"]), 3)
total_df["ZDEV3"] = round(total_df["zd3"].rolling(window=1).mean(), 3)
total_df["MAZDEV3"] = round(total_df["ZDEV3"].rolling(window=3).mean(), 3)
# BREAKOUT
total_df['input1'] = [int(x) for x in total_df['Z1'] > 2]
total_df['input2'] = [int(x) for x in total_df['Z1'].shift(1) < 2]
# VOLATILITY
total_df['input3'] = [int(x) for x in (total_df['ZDEV2'] > total_df['MAZDEV2'])]
#total_df = total_df.dropna(subset=["MAZDEV2"], inplace=False)
#VOLATILITY #2
total_df['input4'] = [int(x) for x in (total_df['ZDEV3'] > total_df['MAZDEV3'])]
total_df['input5'] = [int(x) for x in total_df['ZDEV3'] < 1]
#
# #TREND
total_df['input6'] = [int(x) for x in (total_df['Z3'] > total_df['MAZ3'])]
#total_df['input7'] = [int(x) for x in total_df['Z3'] > 1]
print(total_df['input4'])
training = total_df.head(int(train_size * total_df.shape[0]))
test = total_df.tail(int((1 - train_size) * total_df.shape[0]))
tr_returns = training[training['input1' and 'input2' and 'input3' and 'input4' and 'input5' and 'input6'] == 1]['Forward Return']
test_returns = test[test['input1' and 'input2' and 'input3' and 'input4' and 'input5' and 'input6'] == 1]['Forward Return']
mean_forward_return_training = tr_returns.mean()
mean_forward_return_test = test_returns.mean()
pvalue = ttest_ind(tr_returns, test_returns, equal_var=False)[1]
result.append({
'base': fx_from,
'quote': fx_to,
'sma_length': sma_length,
'sma_length2': sma_length2,
'sma_length3': sma_length3,
'training_forward_return': mean_forward_return_training,
'test_forward_return': mean_forward_return_test,
'p-value': pvalue
})
result.sort(key=lambda x: -x['training_forward_return'])
print(result[0])
time.sleep(15)
newFile = os.path.dirname(filePath1) + "/period.xlsx"
writer = ExcelWriter(newFile)
total_df.to_excel(writer, "Sheet1", float_format="%.7f")
writer.save()
error: TypeError: cannot unpack non-iterable int object
Ideally I like to find the best rolling window for each of the ZScore formulas you see above but don't know how to make the loop work
As the error says, you are trying to unpack a single integer into 3 values.
The iterator range(20, 500) only returns a single integer on each iteration.
for i in range(20, 50):
do_something(i)
do_something_else(i)
do_a_third_thing(i)
# i is the same single integer in each case
Other than that I'm not sure what you are trying to do. If you need 3 different values you could use an iterator with 3 different values. You could do something like:
for i, j, k in [(a1, b1, c1), (a2, b2, c2), ...]:
...
but there have to be three values to 'unpack'.
EDIT:
As far as I can see from your script there might be two things you could try...
Maybe removing sma_length2 and sma_length3 variables entirely, since the nature of a for loop is that it will iterate over the values it is given, so you will get all of the results within the range you define.
Something like this:
for sma_length in range(10, 201): # remember that with the iterable returned from range, the last value will not be included.
print(sma_length)
total_df["MA1"] = round(total_df["CLOSE"].rolling(window=sma_length).mean(), 5)
total_df["SD1"] = round(total_df["CLOSE"].rolling(window=sma_length).std(), 5)
total_df["z1"] = round((total_df["CLOSE"].sub(total_df["MA1"])).div(total_df["SD1"]), 3)
total_df["Z1"] = round(total_df["z1"].rolling(window=1).mean(), 3)
total_df["MAZ1"] = round(total_df["Z1"].rolling(window=3).mean(), 5)
total_df["SDMA1"] = round(total_df["SD1"].rolling(window=sma_length).mean(), 5)
total_df["STD1"] = round(total_df["SD1"].rolling(window=sma_length).std(), 5)
total_df["zd1"] = round((total_df["SD1"].sub(total_df["SDMA1"])).div(total_df["STD1"]), 3)
total_df["ZDEV1"] = round(total_df["zd1"].rolling(window=1).mean(), 3)
total_df["MAZDEV1"] = round(total_df["ZDEV1"].rolling(window=3).mean(), 3)
Since you are testing all of those values between 10 and 200 anyway with this loop, I'm not sure why you need the other 2 sma_length variables.
One thing to note separately about the code above is that you have some "magic numbers" which will be the same on every iteration.
When you write "window=3" or "window=1" for example, this will never change and you are just wastefully recalculating the same value for every loop.
If, however, you actually want 3 different sma_lengths at the same time you could use the zip() inbuilt function to create an iterable as described above (a sequence of tuples of length 3).
You could do something like this:
iterable = zip(range(100), range(100, 200), range(200, 300))
for a, b, c in iterable:
print((a, b, c))
# (0, 100, 200)
# (1, 101, 201)
# (2, 102, 202)
# ... etc.
But as I said, I think you can do what you require with only one variable as in the previous example. Hope this helps.
Related
T_list = []
Temp_k = np.linspace(298, 398, 10)
#print (Temp_k)
current = np.linspace(0, 1.4, 5)
ppH2O = np.linspace(-2, -1, 5)
H2_pressure = []
H2O_pp = ppH2O
for i in (Temp_k):
print(i, 'i')
for j in (H2O_pp):
print(j, 'j')
for k in (current):
print (k, 'k')
partial_H2 = 5*np.exp((1.653 * k)/i) - 1/j
H2_pressure.append(partial_H2)
#print (H2_pressure)
I want to make a list of an array that gives me the values of
5*np.exp((1.653 * k)/i) - 1/j,
At each Temp_k, H2O_pp, current.
For example, value of when Temp_k , H2O_pp and current is at their initial value,
value at their second value, .... till it reaches to the end values.
Could someone please help me with this?
What about this
import numpy as np
temp_k = np.linspace(298, 398, 10)
pp_h2o = np.linspace(-2, -1, 5)
currents = np.linspace(0, 1.4, 5)
h2_pressures = []
for temp in temp_k:
for pp in pp_h2o:
for current in currents:
h2_pressure = 5 * np.exp((1.653 * current) / temp) - 1 / pp
h2_pressures.append(h2_pressure)
print(f'temp: {temp:0.1f}, pp: {pp:.2f}, current: {current:.3f}, h2_pressure: {h2_pressure:.4f}')
#print(h2_pressures)
Changes:
choose more logical variable names conform Python convention
removed the () around the lists in the for loops
put the h2_pressures.append in the inner for loop
format the print out
Your problem is you put append outside of every loop. You should append what you calculated at the most inner loop, where you calculated it. Otherwise when loop is terminated there is no access to previously calculated values.
T_list = []
Temp_k = np.linspace(298, 398, 10)
#print (Temp_k)
current = np.linspace(0, 1.4, 5)
ppH2O = np.linspace(-2, -1, 5)
H2_pressure = []
H2O_pp = ppH2O
for i in (Temp_k):
print(i, 'i')
for j in (H2O_pp):
print(j, 'j')
for k in (current):
print (k, 'k')
partial_H2 = 5*np.exp((1.653 * k)/i) - 1/j
H2_pressure.append(partial_H2)
I am exporting a datafile in CSV format and using panda's DataFrame for it. The data file contains real numbers as well as complex numbers. When the data is exported, all kinds of numbers are in complex format even a zero value is also shown in complex format '''0.000000+0.000000j''' How can I export the file so that real numbers won't be in a complex format?
FYI: I have created the real and complex numbers separately. Here is the code,
import pandas as pd
import numpy as np
L=2;
Lmax = 9
data = np.zeros((Lmax, Lmax))
com_data = np.zeros((Lmax, Lmax), dtype = np.complex128)
all_data_row = np.zeros((Lmax, Lmax))
for a in range (1, 4):
for b in range (1, 4):
if (a == 3 or b ==3) and (a != b):
for L1 in range(1, Lmax+1):
for L2 in range(1, Lmax+1):
com_data[L1-1, L2-1] = DYu(a,b,L1,L,L2)
ar = np.concatenate((ar, com_data),axis=1)
else:
for L1 in range(1, Lmax+1):
for L2 in range(1, Lmax+1):
data[L1-1, L2-1] = DYu(a,b,L1,L,L2)
if b == 1:
ar = np.concatenate((all_data_row, data),axis=1)
else:
ar = np.concatenate((ar, data),axis=1)
data_array = pd.DataFrame(ar)
data_array = data_array.drop(data_array.iloc[:,0:10], axis=1)
data_array.to_csv(f'folder_directory\\L = {L}, a = {a}, b = {b}.csv')
Here DYu is a long function that is generating complex values if the condition
if (a == 3 or b ==3) and (a != b):
is met.
It seems like I have got the answer. I am just posting it here in case someone needs it. The key is to separate the real and complex values at the time of forming data_array
real_part = np.round(data_array . real, 10)
enter code hereimag_part = np.round(data_array . imag, 10)
data_array_real = pd.DataFrame(real_part)
data_array_real = data_array_real.drop(data_array_real.iloc[:, 0:Lmax], axis=0)
data_array_real = data_array_real.drop(data_array_real.iloc[:, 0:Lmax], axis=1)
data_array_imag = pd.DataFrame(imag_part)
data_array_imag = data_array_imag.drop(data_array_imag.iloc[:, 0:Lmax], axis=0)
data_array_imag = data_array_imag.drop(data_array_imag.iloc[:, 0:Lmax], axis=1)
data_array_real.to_csv(dirName_M + "/" + fileName_M_real , index=False, header=False)
data_array_imag.to_csv(dirName_M + "/" + fileName_M_imag , index=False, header=False)
Suppose I've a range (section) and an additional list of ranges to exclude, represented by tuples (start, end):
section=(1, 100) #Range from 1 to 100
toexclude=[(10,15),(40,50),(80,83)] #3 sub-ranges
I'm looking for an efficient algorithm which returns, from these two inputs, a new list of ranges like:
[(1,9),(16,39),(51,79),(84,100)]
Which is the main range exluding the second list of ranges.
Thanks!
EDIT:
Actually the advice from deceze to use intervaltree seems to be interesting. With a few lines:
from intervaltree import Interval, IntervalTree
t=IntervalTree()
t[1:100]="main"
t.chop(10,15)
t.chop(40,50)
t.chop(80,83)
t
IntervalTree([Interval(1, 10, 'main'), Interval(15, 40, 'main'), Interval(50, 80, 'main'), Interval(83, 100, 'main')])
The intervals are considered closed apparently, but this is a minor issue.
section=(1, 100) #Range from 1 to 100
toexclude=[(10,15),(40,50),(80,83)] #3 sub-ranges
rangelists = [x for excRange in toexclude for x in range(excRange[0], excRange[1] + 1)]
first, last = section[0], section[0]
out_ranges = []
for x in range(section[0],section[1] + 1):
if x not in rangelists:
if first == 'unset':
first = x
last = x
elif x in rangelists:
if last == x - 1:
out_ranges.append((first, last))
first = 'unset'
else:
continue
if first != 'unset':
out_ranges.append((first, last))
print out_ranges
Somethink like this?
start, end = section
this_start = start
result = []
for exc_start, exc_end in toexclude:
this_end = exc_start - 1
result.append((this_start, this_end))
this_start = exc_end + 1
result.append((this_start, end))
EDIT: added if clause to correct according to Paco H. comment
start, end = section
this_start = start
result = []
for exc_start, exc_end in toexclude:
if this_end == start:
start = exc_end + 1
else:
this_end = exc_start - 1
result.append((this_start, this_end))
this_start = exc_end + 1
if this_end <= end:
result.append((this_start, end))
section=(1, 100) #Range from 1 to 100
toexclude=[(10,15),(40,50),(80,83)] #3 sub-rang
list1 = []
list2 = [section[0]]
[list1.append(x[0]-1) for x in toexclude]
[list2.append(x[1]+1) for x in toexclude]
list1.append(section[1])
print list(zip(list2, list1)
# [(1, 9), (16, 39), (51, 79), (84, 100)]
I have a data frame which can be constructed as follows:
df = pd.DataFrame({'value':scipy.stats.norm.rvs(0, 1, size=1000),
'start':np.abs(scipy.stats.norm.rvs(0, 20, size=1000))})
df['end'] = df['start'] + np.abs(scipy.stats.norm.rvs(5, 5, size=1000))
df[:10]
start value end
0 9.521781 -0.570097 17.708335
1 3.929711 -0.927318 15.065047
2 3.990466 0.756413 4.841934
3 20.676291 -1.418172 28.284301
4 13.084246 1.280723 14.121626
5 29.784740 0.236915 32.791751
6 21.626625 1.144663 28.739413
7 18.524309 0.101871 27.271344
8 21.288152 -0.727120 27.049582
9 13.556664 0.713141 22.136275
Each row represents a value assigned to an interval (start, end)
Now, I would like to get a list of best values occuring at time 10,13,15, ... ,70. (It is similar to the geometric index in SQL if you are familiar with that.)
Below is my 1st attempt in python with pandas, it takes 18.5ms. Can any one help to improve it? (This procedure would be called 1M or more times with different data frames in my program)
def get_values(data):
data.sort_index(by='value', ascending=False, inplace=True) # this takes 0.2ms
# can we get rid of it? since we don't really need sort...
# all we need is the max value for each interval.
# But if we have to keep it for simplicity it is ok.
ret = []
#data = data[(data['end'] >= 10) & (data['start'] <= 71)]
for t in range(10, 71, 2):
interval = data[(data['end'] >= t) & (data['start'] <= t)]
if not interval.empty:
ret.append(interval['value'].values[0])
else:
for i in range(t, 71, 2):
ret.append(None)
break
return ret
#%prun -l 10 print get_values(df)
%timeit get_values(df)
The 2nd attemp involves decompose pandas into numpy as much as possible, and it takes around 0.7ms
def get_values(data):
data.sort_index(by='value', ascending=False, inplace=True)
ret = []
df_end = data['end'].values
df_start = data['start'].values
df_value = data['value'].values
for t in range(10, 71, 2):
values = df_value[(df_end >= t) & (df_start <= t)]
if len(values) != 0:
ret.append(values[0])
else:
for i in range(t, 71, 2):
ret.append(None)
break
return ret
#%prun -l 10 print get_values(df)
%timeit get_values(df)
Can we improve further? I guess the next step is algorithm level, both of the above are just naive logic implementations.
I don't understand empty process in your code, here is a faster version if ignore your empty process:
import scipy.stats as stats
import pandas as pd
import numpy as np
df = pd.DataFrame({'value':stats.norm.rvs(0, 1, size=1000),
'start':np.abs(stats.norm.rvs(0, 20, size=1000))})
df['end'] = df['start'] + np.abs(stats.norm.rvs(5, 5, size=1000))
def get_value(df, target):
value = df["value"].values
idx = np.argsort(value)[::-1]
start = df["start"].values[idx]
end = df["end"].values[idx]
value = value[idx]
mask = (target[:, None] >= start[None, :]) & (target[:, None] <= end[None, :])
index = np.argmax(mask, axis=1)
flags = mask[np.arange(len(target)), index]
result = value[index]
result[~flags] = np.nan
return result
get_value(df, np.arange(10, 71, 2))
I have recently been trying to convert a piece of Matlab code into Python code.
I have made most of the changes that I need to however, the issue I am having is the line where it says:
y(index(m)) = 1-x(index(m));
I get the error:
"Can't assign to function call"
However I am not sure how to restructure it in order to remove this error.
I have had a look around and people mention "get item" and "set item" however I have tried to use them, but I can't get them to work (probably because I can't figure out the structure)
Here is the full code:
import numpy
N = 100;
B = N+1;
M = 5e4;
burnin = M;
Niter = 20;
p = ones(B,Niter+1)/B;
hit = zeros(B,1);
for j in range(1,Niter):
x = double(rand(1,N)>0.5);
bin_x = 1+sum(x);
index = ceil(N*rand(1,M+burnin));
acceptval = rand(1,M+burnin);
for m in range(1,M+burnin):
y = x;
y(index(m)) = 1-x(index(m));
bin_y = 1+sum(y);
alpha = min(1, p(bin_x,j)/p(bin_y,j) );
if acceptval(m)<alpha:
x = y; bin_x = bin_y;
end
if m > burnin: hit(bin_x) = hit(bin_x)+1; end
end
pnew = p[:,j];
for b in range(1,B-1):
if (hit(b+1)*hit(b) == 0):
pnew(b+1) = pnew(b)*(p(b+1,j)/p(b,j));
else:
g(b,j) = hit(b+1)*hit(b) / (hit(b+1)+hit(b));
g_hat(b) = g(b,j)/sum(g(b,arange(1,j)));
pnew(b+1) = pnew(b)*(p(b+1,j)/p(b,j))+((hit(b+1)/hit(b))^g_hat(b));
end
end
p[:,j+1] = pnew/sum(pnew);
hit[:] = 0;
end
Thanks in advance
The round brackets () indicate a function. For indexing you need [] square brackets - but that is only the first of many, many errors... I am currently going through line by line, but it's taking a while.
This code at least runs... you need to figure out whether the indexing is doing what you are expecting since Python arrays are indexed from zero, and Matlab arrays start at 1. I tried to fix that in a couple of places but didn't go through line by line - that's debugging.
Some key learnings:
There is no end statement... just stop indenting
When you import a library, you need to reference it (numpy.zeros, not zeros)
Lists are indexed from zero, not one
Indexing is done with [], not ()
Creating an array of random numbers is done with [random.random() for r in xrange(N)], not random(N).
... and many other things you will find as you look through the code below.
Good luck!
import numpy
import random
N = int(100);
B = N+1;
M = 5e4;
burnin = M;
Niter = 20;
p = numpy.ones([B,Niter+1])/B;
hit = numpy.zeros([B,1]);
g = numpy.zeros([B, Niter]);
b_hat = numpy.zeros(B);
for j in range(1,Niter):
x = [float(random.randint(0,1)>0.5) for r in xrange(N)];
bin_x = 1+sum(x);
index = [random.randint(0,N-1) for r in xrange(int(M+burnin))];
#acceptval = rand(1,M+burnin);
acceptval = [random.random() for r in xrange(int(M+burnin))];
for m in range(1,int(M+burnin)):
y = x;
y[index[m]] = 1-x[index[m]];
bin_y = 1+sum(y);
alpha = min(1, p[bin_x,j]/p[bin_y,j] );
if acceptval[m]<alpha:
x = y; bin_x = bin_y;
if m > burnin:
hit[bin_x] = hit[bin_x]+1;
pnew = p[:,j];
for b in range(1,B-1):
if (hit[b+1]*hit[b] == 0):
pnew[b+1] = pnew[b]*(p[b+1,j]/p[b,j]);
else:
g[b,j] = hit[b+1]*hit[b] / [hit[b+1]+hit[b]];
g_hat[b] = g[b,j]/sum(g[b,numpy.arange(1,j)]);
pnew[b+1] = pnew[b]*(p[b+1,j]/p[b,j])+((hit[b+1]/hit[b])^g_hat[b]);
p[:,j+1] = pnew/sum(pnew);
hit[:] = 0;