Tuple index out of range, trying to reshape

Tuple index out of range, trying to reshape - python

I'm trying to follow the youtube tutorial from the link below, but I'm getting the error: "Tuple index out of range" I've compared my code to the video and it seems to match. It's the very last line of code that returns the error. I did create and compile the model, had to remove that code in order to post. Any ideas on how to correct this?
#https://www.youtube.com/watch?v=PuZY9q-aKLw
#Load data
company = 'FB'
start = dt.datetime(2012,1,1)
end = dt.datetime(2020,1,1)
data = web.DataReader(company, 'yahoo', start, end)
#Prepare Data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
prediction_days=60
x_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x, 0])
y_train.append(scaled_data[x, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
#Test the model accuracy
test_start = dt.date(2020,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'], test_data['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset)- len(test_data) - prediction_days:].values
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)
#Make predictions on test data
x_test = []
for x in range(prediction_days, len(model.inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

Related

Predict data outside existing data

Iv'e made a basic prediction to learn this complicated area, the prediction works but only as long as i already have data.
The result i want is to predict data further than my existing data to predict what hasn't been yet.
So like i said i'm new and if someone can show me how or tell me what i dont understand good enough in the code to do this i would be grateful.
heres the code:
from cProfile import label
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv('./Data/market-price-3y.csv')
# Use only one column
df = df[['Date', 'Close']]
# Change type of date from object to datetime
df['Date'] = pd.to_datetime(df.Date, format='%Y-%m-%d %H:%M:%S')
# Set Date as index
df.index = df['Date']
print(df.head())
# Show the data as a graph
# plt.plot(df['Close'], label='Close Price History', color='red')
# plt.show()
df = df.sort_index(ascending=True, axis=0)
data = pd.DataFrame(index=range(0, len(df)), columns=['Date', 'Close'])
for i in range(0, len(data)):
data['Date'][i] = df['Date'][i]
data['Close'][i] = df['Close'][i]
scaler = MinMaxScaler(feature_range=(0,1))
data.index = data.Date
data.drop('Date', axis=1, inplace=True)
# Split data into training and testing datasets
final_data = data.values
train_data = final_data[0:900,:]
valid_data = final_data[900:,:]
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(final_data)
x_train_data, y_train_data = [], []
for i in range(60, len(train_data)):
x_train_data.append(scaled_data[i-60:i,0])
y_train_data.append(scaled_data[i,0])
x_train_data = np.asarray(x_train_data)
y_train_data = np.asarray(y_train_data)
x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1],1))
# Create the LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(np.shape(x_train_data)[1], 1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))
model_data = data[len(data) - len(valid_data)-60:].values
model_data = model_data.reshape(-1,1)
model_data = scaler.transform(model_data)
# Train and test the data
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
lstm_model.fit(x_train_data, y_train_data, epochs=1, batch_size=1, verbose=2)
# Test Data
X_test = []
for i in range(60, model_data.shape[0]):
X_test.append(model_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Result
predicted_price = lstm_model.predict(X_test)
predicted_price = scaler.inverse_transform(predicted_price)
train_data = data[:900]
valid_data = data[900:]
valid_data['Predictions'] = predicted_price
plt.plot(train_data['Close'])
plt.plot(valid_data[['Close', 'Predictions']])
plt.show()
[![Matplotlib output][1]][1] [1]: https://i.stack.imgur.com/7j0Yf.png
So i want the prediction to keep going for lets say 15more points instead of ending when the test data ends.

here you can increase the data length at this code phase:
# Test Data
X_test = []
for i in range(60, model_data.shape[0]):
X_test.append(model_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
here instead of 60, you can give 60+15.
or you can create a new test dataset like similar to the above coding it depends on your data set. :)

how to extract data from dataframe in python (Index)

I am trying to take the feature but not getting the results.
df_close = df['Close']
df_train = df_close[:'2019-12-31']
df_train.shape
training_set = df_close
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
training_set_scaled[1]
import numpy as np
X_train = []
y_train = []
for i in range(100, training_set.shape[1]):
X_train.append(training_set_scaled[i-100:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train
and the result is:
array([], dtype=float64)

If the value of training_set.shape[1] is smaller then 100 then the inside of the for loop is skipped, leaving X_train empty.
You could test this case by adding a print statement inside the for loop. Let me know if it worked, good luck!

How to predict multiple features using keras with time series?

I have a problem I don't know how to fix transform to add new features in order to make more proper forecast. The code below predicts stock prices by Close value. Data:
Open High Low Close Adj Close Volume
Datetime
2020-03-10 09:30:00+03:00 5033.0 5033.0 4690.0 4840.0 4840.0 702508
2020-03-10 10:30:00+03:00 4840.0 4870.0 4700.0 4746.5 4746.5 1300648
2020-03-10 11:30:00+03:00 4746.5 4783.0 4706.0 4745.5 4745.5 1156482
2020-03-10 12:30:00+03:00 4745.5 4884.0 4730.0 4870.0 4870.0 1213268
2020-03-10 13:30:00+03:00 4874.0 4990.5 4867.5 4886.5 4886.5 1958028
... ... ... ... ... ... ...
2020-04-03 14:30:00+03:00 5177.0 5217.0 5164.0 5211.5 5211.5 385696
2020-04-03 15:30:00+03:00 5212.0 5364.0 5191.0 5269.5 5269.5 1091066
2020-04-03 16:30:00+03:00 5270.0 5297.0 5209.0 5220.5 5220.5 518686
2020-04-03 17:30:00+03:00 5222.0 5271.0 5184.0 5220.5 5220.5 665096
2020-04-03 18:30:00+03:00 5217.5 5223.5 5197.0 5204.5 5204.5 261400
I want to add Volume and Open features, but getting error:
predictions = scaler.inverse_transform(predictions)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/preprocessing/_data.py", line 436, in inverse_transform
X -= self.min_
ValueError: non-broadcastable output operand with shape (40,1) doesn't match the broadcast shape (40,3)
Q1: How to change inverse_transform and what else do I need to change (input_shape argument maybe) to get correct results?
Q2: The result will be prediction of Close value. But how do I predict Volume value also? I guess I need to set model.add(Dense(2)), but can I do 2 predictions correctly in one code, or I need to execute script separately? How to do that? How do I get Volume than Open when model.add(Dense(2))?
Full code:
from math import sqrt
from numpy import concatenate
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import LSTM
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf
start = (datetime.now() - timedelta(days=30))
end = (datetime.now() - timedelta(days=0))
df = yf.download(tickers="LKOH.ME", start=start.strftime("%Y-%m-%d"), end=end.strftime("%Y-%m-%d"), interval="60m")
df = df.loc[start.strftime("%Y-%m-%d"):end.strftime("%Y-%m-%d")]
# I need here add another features
# df.filter(['Close', 'Open', 'Volume']) <-- this will make further an error with shapes
data = df.filter(['Close'])
dataset = data.values
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
model = Sequential()
# should i change to input_shape=(x_train.shape[1], 3) ?
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=1, epochs=1)
test_data = scaled_data[training_data_len - 60: , :]
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) # error here

The problem is that you are fitting MinMaxScaler on dataset, then splitting dataset into x_train and y_train and then later on trying to use the inverse_transform method on the predictions, which have the same shape as y_train. I suggest you create x_train and y_train and fit MinMaxScaler only to x_train. y_train doesn't need to be scaled for the model and that will save you needing to inverse_transform the predictions completely.
So instead of
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
Use
#Get the number of rows to train the model on, 40 rows for test
training_data_len = len(dataset) - 40
train_data = scaled_data[0:int(training_data_len), :]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
scaler = MinMaxScaler(feature_range=(0,1))
x_train = scaler.fit_transform(x_train) # Only scaling x_train
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
and just delete the line predictions = scaler.inverse_transform(predictions).
Updates relating to additional questions in the comments
The definition of y_test is inconsistent with y_train. Specifically, y_test is defined as y_test = dataset[training_data_len:, :] which is using all of the columns of dataset. Instead, to be consistent with y_train, it should be dataset[training_data_len:, 0].
Handling splitting the data is often clearer and less error-prone if done in pandas:
# Starting with the dataframe 'data'
data = df.filter(['Close', 'Open', 'Volume'])
# Create x/y test/train directly from 'data'
training_data_len = len(data) - 40
x_train = data[['Open', 'Volume']][:training_data_len]
y_train = data.Close[:training_data_len]
x_test = data[['Open', 'Volume']][training_data_len:]
y_test = data.Close[training_data_len:]
# Then confirm you have the expected subsets by checking things like
# shape (and info(), describe(), etc.)
x_train.shape, x_test.shape
> ((160, 2), (40, 2))
y_train.shape, y_test.shape
> ((160,), (40,))

How to add more parameters while predicting using LSTM

What changes should I make in this code to predict the output based on all the parameters listed on the dataset and predict the next day opening stock price?
When I tried to run it showed an out of shape error. This code works fine with only 1 parameter.
My code is as follows:
dataset_train = pd.read_csv('ongc_train.csv')
dataset_train = dataset_train.dropna()
training_set = dataset_train.iloc[:, 1:2].value
# Creating a dataset with 60 timesteps and 1 output
X_train = []
Y_train = []
for i in range(60,2493):
X_train.append(training_set_scaled[i-60:i, 0])
Y_train.append(training_set_scaled[i, 0])
X_train, Y_train = np.array(X_train), np.array(Y_train)
# Reshaping
X_train np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# Fitting the RNN to the training set
regressor.fit(X_train, Y_train, epochs=100, batch_size=32)
# Getting the predicted stock price of 2017
# Concatenating the original training and test set
# Vertical concatenating of open stock prices
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis=0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 61):
X_test.append(inputs[i-60:i, 0])
X_test=np.array(X_test)
X_test=np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
actual = dataset_test.iloc[:, 1:2].values
print("Predicted Stock Price:",predicted_stock_price)
Thank you.

Doing cross validation from scratch

I found this function definition on Stack Overflow:
def fold_i_of_k(dataset, i, k):
n = len(dataset)
return len(dataset[n*(i-1)//k:n*i//k])
# this is my code below
#Constants
FOLD_I = 1
FOLD_K =10
#Creating 10 folds
counter = 1
s=0
total_ac = 0
while counter!=FOLD_K+1:
print("Fold ",counter)
fold = fold_i_of_k(dataset,counter,10)
d_fold = dataset[s:s + fold]
#print(d_fold.index.values)
#print(d_fold.iloc[1:3,0:2])
d_test = d_fold
X_test = d_test.iloc[:,0:11]
y_test = d_test.iloc[:,11:12]
d_train = dataset.drop(dataset.index[s:s+fold])
X_train = d_train.iloc[:,0:11]
y_train = d_train.iloc[:,11:12]
##print(dataset)
##print(d_fold)
##print(d_train)
##print(d_test)
##print(len(X_train))
##print(len(y_train))
##print(X_test)
##print(y_test)
#print(fold)
X_train = X_train.as_matrix()
X_train = preprocessing.scale(X_train)
y_train = y_train.as_matrix()
X_test = X_test.as_matrix()
X_test = preprocessing.scale(X_test)
y_test = y_test.as_matrix()
#l1 = len(y_train)
#np.reshape(y_train, l1)
#print(y_train)
from numpy import array
#l = len(y_test)
#np.reshape(y_test, l)
#print(y_test)
data.reshape((data.shape[0], 1))
y_train = array(y_train)
print(y_train.shape)
lr = LogisticRegression()
lr.fit(X_train,y_train)
#lr_pred = lr.predict(X_test)
#ac = accuracy_score(y_test,lr_pred)
#print(ac)
##print(classification_report(y_test,lr_pred))
total_ac = total_ac + ac
s = s + fold
counter= counter+1
total_ac = total_ac / FOLD_K
print("Cross validation accuracy is: ",total_ac)`
I am getting following error:
/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:578:
DataConversionWarning: A column-vector y was passed when a 1d array
was expected. Please change the shape of y to (n_samples, ), for
example using ravel().
y = column_or_1d(y, warn=True)
How can I fix it?

y_train.ravel() solved the problem.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tuple index out of range, trying to reshape - python

Related

Predict data outside existing data

how to extract data from dataframe in python (Index)

How to predict multiple features using keras with time series?

How to add more parameters while predicting using LSTM

Doing cross validation from scratch

Categories

Resources