Using Quandl with Keras and Pandas - python

I am finished following a tutorial on how to make an RNN LSTM algorithm for stock prediction. I want to repurpose it to be able to use Quandl. I am really unfamiliar with python and this is my first project with it. I went straight into the deep end. I tried a few methods to get it to work but my inexperience with Pandas is the main issue. I feel like there could be some better way to do this. My main proficiency is with Java. Most of this is just filler so I can post this question.
#Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values
dataset_train.head()
#Data Normalization
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
#Incorporating Timesteps Into Data
X_train = []
y_train = []
for i in range(60, 2035):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
#Creating the LSTM Model
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X_train,y_train,epochs=100,batch_size=32)
#Making Predictions on the Test Set
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/tatatest.csv'
dataset_test = pd.read_csv(url)
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 76):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
#Plotting the Results
plt.plot(real_stock_price, color = 'black', label = 'TATA Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted TATA Stock Price')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('TATA Stock Price')
plt.legend()
plt.show()

Related

Is there a machine learning model in Python for prediction of the next set of "random" numbers in a sequence?

I have a dataset with three columns, each has a number 0-9. There are ~6700 rows in the set. I am wondering if it is possible to create a machine learning model to predict the next set of three numbers (one in each column)? If this is possible, how difficult is it and what type of a model would you recommend?
I am newer to machine learning so any help would be great!
Thank you!
I have tried using different kinds of LSTMs using tensorflow, but the results appeared to be averages rather than predictions.
Here is one of the attempts I made:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('Numbers1.xlsx', sheet_name='midday training')
dataset_train = df.iloc[:, 1:]
dataset_train = dataset_train.iloc[::-1]
time_series_data = np.array(df)
training_set = time_series_data[:, 1:4]
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = sc.fit_transform(training_set)
x_train = []
y_train = []
for i in range(1, 6003):
x_train.append(training_set_scaled[i-1:i, :3])
y_train.append(training_set_scaled[i, :3])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (6002,1,3))
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (x_train.shape[1], 3)))
regressor.add(Dropout(.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout(.2))
regressor.add(Dense(units = 3))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(x_train, y_train, epochs = 20, batch_size = 32)
dataset_test = pd.read_excel('Numbers1.xlsx', sheet_name='midday')
dataset_test = dataset_test.iloc[:, 1:]
dataset_test = dataset_test.iloc[::-1]
real = dataset_test
dataset_total = pd.concat((dataset_train, dataset_test), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 1:].values
inputs = sc.transform(inputs)
x_test = []
for i in range(1, 6734):
x_test.append(inputs[i-1:i, :3])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (6733,1,3))
predicted = regressor.predict(x_test)
predicted = sc.inverse_transform(predicted)

ValueError: Data cardinality is ambiguous: x sizes: 1975 y sizes: 1 Make sure all arrays contain the same number of samples

I am running this on colab, I am trying to get it to predict stock movements. I am following a tutorial and I am not very familiar with python.
#Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values
dataset_train.head()
#Data Normalization
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
#Incorporating Timesteps Into Data
X_train = []
y_train = []
for i in range(60, 2035):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
#Creating the LSTM Model
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X_train,y_train,epochs=100,batch_size=32)
#Making Predictions on the Test Set
url = 'https://raw.githubusercontent.com/mwitiderrick/stockprice/master/tatatest.csv'
dataset_test = pd.read_csv(url)
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 76):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
#Plotting the Results
plt.plot(real_stock_price, color = 'black', label = 'TATA Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted TATA Stock Price')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('TATA Stock Price')
plt.legend()
plt.show()
Error:
ValueError: Data cardinality is ambiguous:
x sizes: 1975
y sizes: 1
Make sure all arrays contain the same number of samples.
There is an indentation error in your code. Please correct it as below and try running your code again:
for i in range(60, 2035):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0]) # Corrected, The indentation error
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X_train.shape)
print(y_train.shape)
Output:
(1975, 60, 1)
(1975,)
Output of the entire code:

LSTM Keras prediction Price use more attribute

I'm a beginner to this AI things, I am working on a stock prediction project, I've made a multilayer LSTM model that uses Close Price to prediction the next day Close Price, my question is how to use more attribute to prediction Close Price, like: use High, Low, Open, Close, Volume to prediction at the same time
MY CODE:
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import datetime
plt.style.use('fivethirtyeight')
apple = web.DataReader('AAPL', 'yahoo', start='2012-01-01', end='2021-04-26')
apple
plt.figure(figsize=(16, 8))
plt.title('Close Price History')
plt.plot(apple.Close)
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.show()
# Create a new dataframe with only Close
data = apple.filter(['Close'])
dataset = data.values
training_data_len = math.ceil(len(dataset) * 0.8)
training_data_len
scaler = MinMaxScaler(feature_range=(0, 1))
scaler_data = scaler.fit_transform(dataset)
train_data = scaler_data[0:training_data_len, :]
train_data
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
if i <= 60:
print(x_train)
print(y_train)
print()
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
from tensorflow import keras
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(10))
model.add(Dense(1))
keras.utils.plot_model(model, show_shapes=True)
model.compile(optimizer='adam',
loss='mean_squared_error')
model.fit(x_train, y_train, epochs=15, batch_size=1)
due to my limited ability, Any help or explanation is welcome! Thank you.

I am trying to use CNN for stock price prediction but my code does not seem to work, what do I need to change or add?

import math
import numpy as np
import pandas as pd
import pandas_datareader as pdd
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Dense, Dropout, Activation, LSTM, Convolution1D, MaxPooling1D, Flatten
from keras.models import Sequential
import matplotlib.pyplot as plt
df = pdd.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-12-31')
data = df.filter(['Close'])
dataset = data.values
len(dataset)
# 2265
training_data_size = math.ceil(len(dataset)*0.7)
training_data_size
# 1586
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data
# array([[0.04288701],
# [0.03870297],
# [0.03786614],
# ...,
# [0.96610873],
# [0.98608785],
# [1. ]])
train_data = scaled_data[0:training_data_size,:]
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i,0])
if i<=60:
print(x_train)
print(y_train)
'''
[array([0.04288701, 0.03870297, 0.03786614, 0.0319038 , 0.0329498 ,
0.03577404, 0.03504182, 0.03608791, 0.03640171, 0.03493728,
0.03661088, 0.03566949, 0.03650625, 0.03368202, 0.03368202,
0.03598329, 0.04100416, 0.03953973, 0.04110879, 0.04320089,
0.04089962, 0.03985353, 0.04037657, 0.03566949, 0.03640171,
0.03619246, 0.03253139, 0.0294979 , 0.03033474, 0.02960253,
0.03002095, 0.03284518, 0.03357739, 0.03410044, 0.03368202,
0.03472803, 0.02803347, 0.02792885, 0.03556487, 0.03451886,
0.0319038 , 0.03127613, 0.03274063, 0.02688284, 0.02635988,
0.03211297, 0.03096233, 0.03472803, 0.03713392, 0.03451886,
0.03441423, 0.03493728, 0.03587866, 0.0332636 , 0.03117158,
0.02803347, 0.02897494, 0.03546024, 0.03786614, 0.0401674 ])]
[0.03933056376752886]
'''
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
# (1526, 60, 1)
model = Sequential()
model.add(Convolution1D(64, 3, input_shape= (100,4), padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Convolution1D(32, 3, padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('linear'))
model.summary()
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=50, epochs=50, validation_data = (X_test, y_test), verbose=2)
test_data = scaled_data[training_data_size-60: , :]
x_test = []
y_test = dataset[training_data_size: , :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
rsme = np.sqrt(np.mean((predictions - y_test)**2))
rsme
train = data[:training_data_size]
valid = data[training_data_size:]
valid['predictions'] = predictions
plt.figure(figsize=(16,8))
plt.title('PFE')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price in $', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'predictions']])
plt.legend(['Train', 'Val', 'predictions'], loc='lower right')
plt.show
import numpy as np
y_test, predictions = np.array(y_test), np.array(predictions)
mape = (np.mean(np.abs((predictions - y_test) / y_test))) * 100
accuracy = 100 - mape
print(accuracy)
This above is my code. I tried to edit it but does not seem to be working. I am suspecting that I did not format my dataset well but I am new to this field so I do not know what should I do to my codes such that it will fit in. I hope you guys can enlighten me on this, Thank you!
I encountered errors like : ''IndexError: index 2264 is out of bounds for axis 0 with size 2264'' and
'' ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 800 but received input with shape [None, 480]''
Your model doesn't tie to your data.
Change this line:
model.add(Convolution1D(64, 3, input_shape= (60,1), padding='same'))

Is there any method to generate stock price predictions outside the bounds of testing data set range?

I am trying to generate the prediction values of stock prices where the testing data set contains around 15 values but say I don't want to depend upon the values that are on the testing data set. I want to just make a prediction outside the bounds of testing data set.
Data Set used from Quandl.
I tried changing the range of the loop but that just caused the reshaping of tuple to go out of range.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset_train = pd.read_csv('EOD-AAPL.csv')
training_set = dataset_train.iloc[:, 1:2].values
print(dataset_train.head())
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
X_train = []
y_train = []
for i in range(60, 2047):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)
dataset_test = pd.read_csv('EOD-AAPLtest.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
Here's the part which I'm trying to enhance
for i in range(60, 75):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
'''
plt.plot(real_stock_price, color = 'black', label = 'APPLE Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted APPLE Stock Price')
plt.title('APPLE Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('APPLE Stock Price')
plt.legend()
plt.show()
I am tying to generate say output for the next 30 days without depending upon the data values present in the testing data set.
Testing Data Set contains:
Date,Open,High,Low,Close,Volume,Dividend,Split,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Volume
2019-04-05,196.45,197.1,195.93,197.0,18526644.0,0.0,1.0,196.45,197.1,195.93,197.0,18526644.0
2019-04-04,194.79,196.37,193.14,195.69,19114275.0,0.0,1.0,194.79,196.37,193.14,195.69,19114275.0
2019-04-03,193.25,196.5,193.15,195.35,23271830.0,0.0,1.0,193.25,196.5,193.15,195.35,23271830.0
2019-04-02,191.09,194.46,191.05,194.02,22765732.0,0.0,1.0,191.09,194.46,191.05,194.02,22765732.0
2019-04-01,191.64,191.68,188.38,191.24,27861964.0,0.0,1.0,191.64,191.68,188.38,191.24,27861964.0
2019-03-29,189.83,190.08,188.54,189.95,23563961.0,0.0,1.0,189.83,190.08,188.54,189.95,23563961.0
2019-03-28,188.95,189.559,187.53,188.72,20780363.0,0.0,1.0,188.95,189.559,187.53,188.72,20780363.0
2019-03-27,188.75,189.76,186.55,188.47,29848427.0,0.0,1.0,188.75,189.76,186.55,188.47,29848427.0
2019-03-26,191.664,192.88,184.58,186.79,49800538.0,0.0,1.0,191.664,192.88,184.58,186.79,49800538.0
2019-03-25,191.51,191.98,186.6,188.74,43845293.0,0.0,1.0,191.51,191.98,186.6,188.74,43845293.0
2019-03-22,195.34,197.69,190.78,191.05,42407666.0,0.0,1.0,195.34,197.69,190.78,191.05,42407666.0
2019-03-21,190.02,196.33,189.81,195.09,51034237.0,0.0,1.0,190.02,196.33,189.81,195.09,51034237.0
2019-03-20,186.23,189.49,184.73,188.16,31035231.0,0.0,1.0,186.23,189.49,184.73,188.16,31035231.0
2019-03-19,188.35,188.99,185.92,186.53,31646369.0,0.0,1.0,188.35,188.99,185.92,186.53,31646369.0
2019-03-18,185.8,188.39,185.79,188.02,26219832.0,0.0,1.0,185.8,188.39,185.79,188.02,26219832.0
So I want to predict data till 30-04-2019. As X_test is taking 'EOD-AAPLtest.csv' as an input.

Categories