LSTM Keras prediction Price use more attribute - python

I'm a beginner to this AI things, I am working on a stock prediction project, I've made a multilayer LSTM model that uses Close Price to prediction the next day Close Price, my question is how to use more attribute to prediction Close Price, like: use High, Low, Open, Close, Volume to prediction at the same time
MY CODE:
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import datetime
plt.style.use('fivethirtyeight')
apple = web.DataReader('AAPL', 'yahoo', start='2012-01-01', end='2021-04-26')
apple
plt.figure(figsize=(16, 8))
plt.title('Close Price History')
plt.plot(apple.Close)
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.show()
# Create a new dataframe with only Close
data = apple.filter(['Close'])
dataset = data.values
training_data_len = math.ceil(len(dataset) * 0.8)
training_data_len
scaler = MinMaxScaler(feature_range=(0, 1))
scaler_data = scaler.fit_transform(dataset)
train_data = scaler_data[0:training_data_len, :]
train_data
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
if i <= 60:
print(x_train)
print(y_train)
print()
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
from tensorflow import keras
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(10))
model.add(Dense(1))
keras.utils.plot_model(model, show_shapes=True)
model.compile(optimizer='adam',
loss='mean_squared_error')
model.fit(x_train, y_train, epochs=15, batch_size=1)
due to my limited ability, Any help or explanation is welcome! Thank you.

Related

Using Quandl with Keras and Pandas

I am finished following a tutorial on how to make an RNN LSTM algorithm for stock prediction. I want to repurpose it to be able to use Quandl. I am really unfamiliar with python and this is my first project with it. I went straight into the deep end. I tried a few methods to get it to work but my inexperience with Pandas is the main issue. I feel like there could be some better way to do this. My main proficiency is with Java. Most of this is just filler so I can post this question.
#Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values
dataset_train.head()
#Data Normalization
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
#Incorporating Timesteps Into Data
X_train = []
y_train = []
for i in range(60, 2035):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
#Creating the LSTM Model
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X_train,y_train,epochs=100,batch_size=32)
#Making Predictions on the Test Set
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/tatatest.csv'
dataset_test = pd.read_csv(url)
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 76):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
#Plotting the Results
plt.plot(real_stock_price, color = 'black', label = 'TATA Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted TATA Stock Price')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('TATA Stock Price')
plt.legend()
plt.show()

How to predict next X values in my model?

I have this program that is used to predict outflow of a reservoir and I cannot seem to get it to predict the next day worth of data. The data is in 30 minute increments and would expect 48 points of data to represent the next 24 hours. Is there something I am doing incorrect or have missed?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
# Prepare data
data = pd.read_csv('data.csv')
data["Date"] = pd.to_datetime(data["Date"])
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data["Outlet"].values.reshape(-1, 1))
prediction_days = 30
x_train, y_train = [], []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x, 0])
y_train.append(scaled_data[x, 0])
x_train, y_train = np.array(x_train), np.array(x_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# Create neural network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=25)
# Test the model
test_start = dt.datetime(2022, 1, 1)
test_end = dt.datetime.now()
actual_outlet = data[(data['Date'] >= test_start) & (data['Date'] <= test_end)]['Outlet']
total_dataset = pd.concat((data['Outlet'], actual_outlet), axis=0)
model_inputs = total_dataset[len(total_dataset) - len(actual_outlet) - prediction_days:].values
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)
x_test = []
for x in range(prediction_days, len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
prediction_outlet = model.predict(x_test)
prediction_outlet = scaler.inverse_transform(prediction_outlet)
print(len(prediction_outlet))
The length of my prediction_outlet comes to 1056. Have I missed some steps?
Your test data has 22 days of data since you are taking the slice from test_start to today. since you have 48 data points for each day it adds up to 1056 data points to predict. So you are making the right length of predictions. On the other hand, since you don't have it ordered some of the dates are mixed up. In one instance you have half of the data points from Feb-02 and the rest from Jan-13 which will be very confusing for the network. You might want to order them and make sure you don't take missing dates in your slices.

I am trying to use CNN for stock price prediction but my code does not seem to work, what do I need to change or add?

import math
import numpy as np
import pandas as pd
import pandas_datareader as pdd
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Dense, Dropout, Activation, LSTM, Convolution1D, MaxPooling1D, Flatten
from keras.models import Sequential
import matplotlib.pyplot as plt
df = pdd.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-12-31')
data = df.filter(['Close'])
dataset = data.values
len(dataset)
# 2265
training_data_size = math.ceil(len(dataset)*0.7)
training_data_size
# 1586
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data
# array([[0.04288701],
# [0.03870297],
# [0.03786614],
# ...,
# [0.96610873],
# [0.98608785],
# [1. ]])
train_data = scaled_data[0:training_data_size,:]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i,0])
if i<=60:
print(x_train)
print(y_train)
'''
[array([0.04288701, 0.03870297, 0.03786614, 0.0319038 , 0.0329498 ,
0.03577404, 0.03504182, 0.03608791, 0.03640171, 0.03493728,
0.03661088, 0.03566949, 0.03650625, 0.03368202, 0.03368202,
0.03598329, 0.04100416, 0.03953973, 0.04110879, 0.04320089,
0.04089962, 0.03985353, 0.04037657, 0.03566949, 0.03640171,
0.03619246, 0.03253139, 0.0294979 , 0.03033474, 0.02960253,
0.03002095, 0.03284518, 0.03357739, 0.03410044, 0.03368202,
0.03472803, 0.02803347, 0.02792885, 0.03556487, 0.03451886,
0.0319038 , 0.03127613, 0.03274063, 0.02688284, 0.02635988,
0.03211297, 0.03096233, 0.03472803, 0.03713392, 0.03451886,
0.03441423, 0.03493728, 0.03587866, 0.0332636 , 0.03117158,
0.02803347, 0.02897494, 0.03546024, 0.03786614, 0.0401674 ])]
[0.03933056376752886]
'''
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
# (1526, 60, 1)
model = Sequential()
model.add(Convolution1D(64, 3, input_shape= (100,4), padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Convolution1D(32, 3, padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('linear'))
model.summary()
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=50, epochs=50, validation_data = (X_test, y_test), verbose=2)
test_data = scaled_data[training_data_size-60: , :]
x_test = []
y_test = dataset[training_data_size: , :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
rsme = np.sqrt(np.mean((predictions - y_test)**2))
rsme
train = data[:training_data_size]
valid = data[training_data_size:]
valid['predictions'] = predictions
plt.figure(figsize=(16,8))
plt.title('PFE')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price in $', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'predictions']])
plt.legend(['Train', 'Val', 'predictions'], loc='lower right')
plt.show
import numpy as np
y_test, predictions = np.array(y_test), np.array(predictions)
mape = (np.mean(np.abs((predictions - y_test) / y_test))) * 100
accuracy = 100 - mape
print(accuracy)
This above is my code. I tried to edit it but does not seem to be working. I am suspecting that I did not format my dataset well but I am new to this field so I do not know what should I do to my codes such that it will fit in. I hope you guys can enlighten me on this, Thank you!
I encountered errors like : ''IndexError: index 2264 is out of bounds for axis 0 with size 2264'' and
'' ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 800 but received input with shape [None, 480]''
Your model doesn't tie to your data.
Change this line:
model.add(Convolution1D(64, 3, input_shape= (60,1), padding='same'))

Using a trained Keras model to make predictions on new csv data

so I'm making a project where basically i have to predict whether or not a house price is above or below its median price and to do that, I'm using this dataset from Kaggle(https://drive.google.com/file/d/1GfvKA0qznNVknghV4botnNxyH-KvODOC/view). 1 means "Above Median" and 0 means "Below Median". I wrote this code to train a neural network and save it as a .h5 file:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import h5py
df = pd.read_csv('housepricedata.csv')
dataset = df.values
X = dataset[:,0:10]
Y = dataset[:,10]
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
model = Sequential([
Dense(32, activation='relu', input_shape=(10,)),
Dense(32, activation='relu'),
Dense(1, activation='sigmoid'),
])
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy'])
hist = model.fit(X_train, Y_train,
batch_size=32, epochs=100,
validation_data=(X_val, Y_val))
model.save("house_price.h5")
After running it, it successfully saves the .h5 file to my directory. What I want to do now is use my trained model to make predictions on a new .csv file and determine whether or not each of those are above or below median price. This is an image of the csv file in VSCode that i want it to make predictions on:
csv file image As you can see, this file doesn't contain a 1(above median) or 0(below median) because that's what I want it to predict. This is the code I wrote to do that:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.models import load_model
import h5py
df = pd.read_csv('data.csv')
dataset = df.values
X = dataset[:,0:10]
Y = dataset[:,10]
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)
model = load_model("house_price.h5")
y_pred = model.predict(X_test)
print(y_pred)
It's output is [[0.00101464]] I have no clue what that is and why it's only returning one value even though the csv file has 4 rows. Does anyone know how I can fix that and be able to predict either a 1 or a 0 for each row in the csv file?
Thank You!
As much I understand what you want!
Let's Try ! This code work for me
import tensorflow
model = tensorflow.keras.models.load_model("house_price.h5")
y_pred=model.predict(X_test)
still you are not able to get visit following site
1:answer1
2:answer2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('C:\\Users\\acer\\Downloads\\housepricedata.csv')
dataset.head()
X=dataset.iloc[:,0:10]
y=dataset.iloc[:,10]
X.head()
from sklearn.preprocessing import StandardScaler
obj=StandardScaler()
X=obj.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split
(X,y,random_state=2020,test_size=0.25)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation =
'relu', input_dim = 10))
# classifier.add(Dropout(p = 0.1))
# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation
= 'relu'))
# classifier.add(Dropout(p = 0.1))
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation
= 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics
= ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
print(y_pred)
classifier.save("house_price.h5")
import tensorflow
model = tensorflow.keras.models.load_model("house_price.h5")
y_pred=model.predict(X_test)
y_pred = (y_pred > 0.5)
print(y_pred)
Both y_pred produce same output for me
Here one thing you not y_pred not contain 0 and 1 because you use sigmoid function which determine predication in probability
so if(y_pred>0.5) it mean value is one
#True rep one
#false rep zero
#you can use replace function or map function of pandas to get convert true
into 1

How to create a neural network for regression?

I am trying to use Keras to make a neural network. The data I am using is https://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics. My code is as follows:
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
from sklearn.model_selection import train_test_split
data = np.genfromtxt(r"""file location""", delimiter=',')
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = 6))
model.add(Dense(1,))
model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['accuracy'])
Y = data[:,-1]
X = data[:, :-1]
From here I have tried using model.fit(X, Y), but the accuracy of the model appears to remain at 0. I am new to Keras so this is probably an easy solution, apologies in advance.
My question is what is the best way to add regression to the model so that the accuracy increases? Thanks in advance.
First of all, you have to split your dataset into training set and test set using train_test_split class from sklearn.model_selection library.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.08, random_state = 0)
Also, you have to scale your values using StandardScaler class.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
Then, you should add more layers in order to get better results.
Note
Usually it's a good practice to apply following formula in order to find out the total number of hidden layers needed.
Nh = Ns/(α∗ (Ni + No))
where
Ni = number of input neurons.
No = number of output neurons.
Ns = number of samples in training data set.
α = an arbitrary scaling factor usually 2-10.
So our classifier becomes:
# Initialising the ANN
model = Sequential()
# Adding the input layer and the first hidden layer
model.add(Dense(32, activation = 'relu', input_dim = 6))
# Adding the second hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the third hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the output layer
model.add(Dense(units = 1))
The metric that you use- metrics=['accuracy'] corresponds to a classification problem. If you want to do regression, remove metrics=['accuracy']. That is, just use
model.compile(optimizer = 'adam',loss = 'mean_squared_error')
Here is a list of keras metrics for regression and classification
Also, you have to define the batch_size and epochs values for fit method.
model.fit(X_train, y_train, batch_size = 10, epochs = 100)
After you trained your network you can predict the results for X_test using model.predict method.
y_pred = model.predict(X_test)
Now, you can compare the y_pred that we obtained from neural network prediction and y_test which is real data. For this, you can create a plot using matplotlib library.
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()
It seems that our neural network learns very good
Here is how the plot looks.
Here is the full code
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Importing the dataset
dataset = np.genfromtxt("data.txt", delimiter='')
X = dataset[:, :-1]
y = dataset[:, -1]
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.08, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Initialising the ANN
model = Sequential()
# Adding the input layer and the first hidden layer
model.add(Dense(32, activation = 'relu', input_dim = 6))
# Adding the second hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the third hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the output layer
model.add(Dense(units = 1))
#model.add(Dense(1))
# Compiling the ANN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the ANN to the Training set
model.fit(X_train, y_train, batch_size = 10, epochs = 100)
y_pred = model.predict(X_test)
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()

Categories