I'm trying to follow the youtube tutorial from the link below, but I'm getting the error: "Tuple index out of range" I've compared my code to the video and it seems to match. It's the very last line of code that returns the error. I did create and compile the model, had to remove that code in order to post. Any ideas on how to correct this?
#https://www.youtube.com/watch?v=PuZY9q-aKLw
#Load data
company = 'FB'
start = dt.datetime(2012,1,1)
end = dt.datetime(2020,1,1)
data = web.DataReader(company, 'yahoo', start, end)
#Prepare Data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
prediction_days=60
x_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x, 0])
y_train.append(scaled_data[x, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
#Test the model accuracy
test_start = dt.date(2020,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'], test_data['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset)- len(test_data) - prediction_days:].values
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)
#Make predictions on test data
x_test = []
for x in range(prediction_days, len(model.inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
Related
Iv'e made a basic prediction to learn this complicated area, the prediction works but only as long as i already have data.
The result i want is to predict data further than my existing data to predict what hasn't been yet.
So like i said i'm new and if someone can show me how or tell me what i dont understand good enough in the code to do this i would be grateful.
heres the code:
from cProfile import label
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv('./Data/market-price-3y.csv')
# Use only one column
df = df[['Date', 'Close']]
# Change type of date from object to datetime
df['Date'] = pd.to_datetime(df.Date, format='%Y-%m-%d %H:%M:%S')
# Set Date as index
df.index = df['Date']
print(df.head())
# Show the data as a graph
# plt.plot(df['Close'], label='Close Price History', color='red')
# plt.show()
df = df.sort_index(ascending=True, axis=0)
data = pd.DataFrame(index=range(0, len(df)), columns=['Date', 'Close'])
for i in range(0, len(data)):
data['Date'][i] = df['Date'][i]
data['Close'][i] = df['Close'][i]
scaler = MinMaxScaler(feature_range=(0,1))
data.index = data.Date
data.drop('Date', axis=1, inplace=True)
# Split data into training and testing datasets
final_data = data.values
train_data = final_data[0:900,:]
valid_data = final_data[900:,:]
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(final_data)
x_train_data, y_train_data = [], []
for i in range(60, len(train_data)):
x_train_data.append(scaled_data[i-60:i,0])
y_train_data.append(scaled_data[i,0])
x_train_data = np.asarray(x_train_data)
y_train_data = np.asarray(y_train_data)
x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1],1))
# Create the LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(np.shape(x_train_data)[1], 1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))
model_data = data[len(data) - len(valid_data)-60:].values
model_data = model_data.reshape(-1,1)
model_data = scaler.transform(model_data)
# Train and test the data
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
lstm_model.fit(x_train_data, y_train_data, epochs=1, batch_size=1, verbose=2)
# Test Data
X_test = []
for i in range(60, model_data.shape[0]):
X_test.append(model_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Result
predicted_price = lstm_model.predict(X_test)
predicted_price = scaler.inverse_transform(predicted_price)
train_data = data[:900]
valid_data = data[900:]
valid_data['Predictions'] = predicted_price
plt.plot(train_data['Close'])
plt.plot(valid_data[['Close', 'Predictions']])
plt.show()
[![Matplotlib output][1]][1] [1]: https://i.stack.imgur.com/7j0Yf.png
So i want the prediction to keep going for lets say 15more points instead of ending when the test data ends.
here you can increase the data length at this code phase:
# Test Data
X_test = []
for i in range(60, model_data.shape[0]):
X_test.append(model_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
here instead of 60, you can give 60+15.
or you can create a new test dataset like similar to the above coding it depends on your data set. :)
I am trying to take the feature but not getting the results.
df_close = df['Close']
df_train = df_close[:'2019-12-31']
df_train.shape
training_set = df_close
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
training_set_scaled[1]
import numpy as np
X_train = []
y_train = []
for i in range(100, training_set.shape[1]):
X_train.append(training_set_scaled[i-100:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train
and the result is:
array([], dtype=float64)
If the value of training_set.shape[1] is smaller then 100 then the inside of the for loop is skipped, leaving X_train empty.
You could test this case by adding a print statement inside the for loop. Let me know if it worked, good luck!
I have a problem I don't know how to fix transform to add new features in order to make more proper forecast. The code below predicts stock prices by Close value. Data:
Open High Low Close Adj Close Volume
Datetime
2020-03-10 09:30:00+03:00 5033.0 5033.0 4690.0 4840.0 4840.0 702508
2020-03-10 10:30:00+03:00 4840.0 4870.0 4700.0 4746.5 4746.5 1300648
2020-03-10 11:30:00+03:00 4746.5 4783.0 4706.0 4745.5 4745.5 1156482
2020-03-10 12:30:00+03:00 4745.5 4884.0 4730.0 4870.0 4870.0 1213268
2020-03-10 13:30:00+03:00 4874.0 4990.5 4867.5 4886.5 4886.5 1958028
... ... ... ... ... ... ...
2020-04-03 14:30:00+03:00 5177.0 5217.0 5164.0 5211.5 5211.5 385696
2020-04-03 15:30:00+03:00 5212.0 5364.0 5191.0 5269.5 5269.5 1091066
2020-04-03 16:30:00+03:00 5270.0 5297.0 5209.0 5220.5 5220.5 518686
2020-04-03 17:30:00+03:00 5222.0 5271.0 5184.0 5220.5 5220.5 665096
2020-04-03 18:30:00+03:00 5217.5 5223.5 5197.0 5204.5 5204.5 261400
I want to add Volume and Open features, but getting error:
predictions = scaler.inverse_transform(predictions)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/preprocessing/_data.py", line 436, in inverse_transform
X -= self.min_
ValueError: non-broadcastable output operand with shape (40,1) doesn't match the broadcast shape (40,3)
Q1: How to change inverse_transform and what else do I need to change (input_shape argument maybe) to get correct results?
Q2: The result will be prediction of Close value. But how do I predict Volume value also? I guess I need to set model.add(Dense(2)), but can I do 2 predictions correctly in one code, or I need to execute script separately? How to do that? How do I get Volume than Open when model.add(Dense(2))?
Full code:
from math import sqrt
from numpy import concatenate
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import LSTM
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf
start = (datetime.now() - timedelta(days=30))
end = (datetime.now() - timedelta(days=0))
df = yf.download(tickers="LKOH.ME", start=start.strftime("%Y-%m-%d"), end=end.strftime("%Y-%m-%d"), interval="60m")
df = df.loc[start.strftime("%Y-%m-%d"):end.strftime("%Y-%m-%d")]
# I need here add another features
# df.filter(['Close', 'Open', 'Volume']) <-- this will make further an error with shapes
data = df.filter(['Close'])
dataset = data.values
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
model = Sequential()
# should i change to input_shape=(x_train.shape[1], 3) ?
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=1, epochs=1)
test_data = scaled_data[training_data_len - 60: , :]
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) # error here
The problem is that you are fitting MinMaxScaler on dataset, then splitting dataset into x_train and y_train and then later on trying to use the inverse_transform method on the predictions, which have the same shape as y_train. I suggest you create x_train and y_train and fit MinMaxScaler only to x_train. y_train doesn't need to be scaled for the model and that will save you needing to inverse_transform the predictions completely.
So instead of
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
Use
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
scaler = MinMaxScaler(feature_range=(0,1))
x_train = scaler.fit_transform(x_train) # Only scaling x_train
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
and just delete the line predictions = scaler.inverse_transform(predictions).
Updates relating to additional questions in the comments
The definition of y_test is inconsistent with y_train. Specifically, y_test is defined as y_test = dataset[training_data_len:, :] which is using all of the columns of dataset. Instead, to be consistent with y_train, it should be dataset[training_data_len:, 0].
Handling splitting the data is often clearer and less error-prone if done in pandas:
# Starting with the dataframe 'data'
data = df.filter(['Close', 'Open', 'Volume'])
# Create x/y test/train directly from 'data'
training_data_len = len(data) - 40
x_train = data[['Open', 'Volume']][:training_data_len]
y_train = data.Close[:training_data_len]
x_test = data[['Open', 'Volume']][training_data_len:]
y_test = data.Close[training_data_len:]
# Then confirm you have the expected subsets by checking things like
# shape (and info(), describe(), etc.)
x_train.shape, x_test.shape
> ((160, 2), (40, 2))
y_train.shape, y_test.shape
> ((160,), (40,))
What changes should I make in this code to predict the output based on all the parameters listed on the dataset and predict the next day opening stock price?
When I tried to run it showed an out of shape error. This code works fine with only 1 parameter.
My code is as follows:
dataset_train = pd.read_csv('ongc_train.csv')
dataset_train = dataset_train.dropna()
training_set = dataset_train.iloc[:, 1:2].value
# Creating a dataset with 60 timesteps and 1 output
X_train = []
Y_train = []
for i in range(60,2493):
X_train.append(training_set_scaled[i-60:i, 0])
Y_train.append(training_set_scaled[i, 0])
X_train, Y_train = np.array(X_train), np.array(Y_train)
# Reshaping
X_train np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# Fitting the RNN to the training set
regressor.fit(X_train, Y_train, epochs=100, batch_size=32)
# Getting the predicted stock price of 2017
# Concatenating the original training and test set
# Vertical concatenating of open stock prices
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis=0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 61):
X_test.append(inputs[i-60:i, 0])
X_test=np.array(X_test)
X_test=np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
actual = dataset_test.iloc[:, 1:2].values
print("Predicted Stock Price:",predicted_stock_price)
Thank you.
I found this function definition on Stack Overflow:
def fold_i_of_k(dataset, i, k):
n = len(dataset)
return len(dataset[n*(i-1)//k:n*i//k])
# this is my code below
#Constants
FOLD_I = 1
FOLD_K =10
#Creating 10 folds
counter = 1
s=0
total_ac = 0
while counter!=FOLD_K+1:
print("Fold ",counter)
fold = fold_i_of_k(dataset,counter,10)
d_fold = dataset[s:s + fold]
#print(d_fold.index.values)
#print(d_fold.iloc[1:3,0:2])
d_test = d_fold
X_test = d_test.iloc[:,0:11]
y_test = d_test.iloc[:,11:12]
d_train = dataset.drop(dataset.index[s:s+fold])
X_train = d_train.iloc[:,0:11]
y_train = d_train.iloc[:,11:12]
##print(dataset)
##print(d_fold)
##print(d_train)
##print(d_test)
##print(len(X_train))
##print(len(y_train))
##print(X_test)
##print(y_test)
#print(fold)
X_train = X_train.as_matrix()
X_train = preprocessing.scale(X_train)
y_train = y_train.as_matrix()
X_test = X_test.as_matrix()
X_test = preprocessing.scale(X_test)
y_test = y_test.as_matrix()
#l1 = len(y_train)
#np.reshape(y_train, l1)
#print(y_train)
from numpy import array
#l = len(y_test)
#np.reshape(y_test, l)
#print(y_test)
data.reshape((data.shape[0], 1))
y_train = array(y_train)
print(y_train.shape)
lr = LogisticRegression()
lr.fit(X_train,y_train)
#lr_pred = lr.predict(X_test)
#ac = accuracy_score(y_test,lr_pred)
#print(ac)
##print(classification_report(y_test,lr_pred))
total_ac = total_ac + ac
s = s + fold
counter= counter+1
total_ac = total_ac / FOLD_K
print("Cross validation accuracy is: ",total_ac)`
I am getting following error:
/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:578:
DataConversionWarning: A column-vector y was passed when a 1d array
was expected. Please change the shape of y to (n_samples, ), for
example using ravel().
y = column_or_1d(y, warn=True)
How can I fix it?
y_train.ravel() solved the problem.