Python AttributeError: 'DataFrame' object has no attribute 'predict' - python

I am new to Python. Currently, I am working on a machine learning project. I wanted to re-use the model in another piece of code so I have successfully generated the pickle file and the code is as below.
import pickle
from sklearn.externals import joblib
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
model = joblib.load('toyota_corolla_model.pkl')
x = pd.read_csv('Dataset.csv')
train_dates = pd.to_datetime(x['Date'])
cols = list(x)[1:2]
df_for_training = x[cols].astype(float)
x_test = sc.fit_transform(df_for_training)
y_pred = model.predict(x_test)
When I run the code, it shows the below error.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-16-0b1337d9f786> in <module>()
----> 1 y_pred = model.predict[x_test]
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in __getattr__(self, name)
5139 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5140 return self[name]
-> 5141 return object.__getattribute__(self, name)
5142
5143 def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'predict'
The model training is as follows.
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
df = pd.read_csv('Dataset.csv')
# Separate dates for future plotting
train_dates = pd.to_datetime(df['Date'])
# Variables for taining
# Select the vehicle model which need to predict price
cols = list(df)[1:2]
df_for_training = df[cols].astype(float)
df_for_plot = df_for_training.tail(500)
df_for_plot.plot.line()
# Normalize the dataset
scaler = StandardScaler()
scaler = scaler.fit(df_for_training)
df_for_training_scaled = scaler.transform(df_for_training)
# Training series
trainX = []
# Prediction series
trainY = []
# Number of months we want to predict for future
n_future = 1
# Number of past months we want to use for predict the future
n_past = 14
for i in range(n_past, len(df_for_training_scaled) - n_future +1):
trainX.append(df_for_training_scaled[i - n_past:i,
0:df_for_training.shape[1]])
trainY.append(df_for_training_scaled[i + n_future - 1:i + n_future, 0])
# Convert trainX and trainY into arrays
trainX, trainY = np.array(trainX), np.array(trainY)
print('trainX shape == {}.'.format(trainX.shape))
print('trainY shape == {}.'.format(trainY.shape))
model = Sequential()
model.add(LSTM(64, activation='relu',
input_shape=(trainX.shape[1], trainX.shape[2]),
return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.summary()
import tensorflow as tf
model.compile(optimizer='sgd',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
history = model.fit(trainX, trainY,
epochs=10,
batch_size=10,
validation_split=0.1,
verbose=1)
plt.plot(history.history['accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()
# Forecast
# Start with the last day of training date and predict the future
# Redefining n_future to extend prediction months
# beyond the original n_future months
n_future = 50
forecast_period_dates = pd.date_range(list(train_dates)[-1],
periods = n_future,
freq = '1m').tolist()
# Forecast
forecast = model.predict(trainX[-n_future:])
# Inverse transforamtion
forecast_copies = np.repeat(forecast, df_for_training.shape[1], axis=-1)
y_pred_future = scaler.inverse_transform(forecast_copies)[:,0]
# Convert timestamp to date
forecast_dates = []
for time_i in forecast_period_dates:
forecast_dates.append(time_i.date())
df_forecast = pd.DataFrame({'Date':np.array(forecast_dates),
'Toyota_corolla':y_pred_future})
df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])
original = df[['Date', 'Toyota_corolla']]
original['Date'] = pd.to_datetime(original['Date'])
original = original.loc[original['Date'] >= '2015-1-1']
sns.lineplot(original['Date'],
original['Toyota_corolla'],
label = 'Original data')
sns.lineplot(df_forecast['Date'],
df_forecast['Toyota_corolla'],
label = 'Predicted data')
And this is how I generated the pickle file.
import pickle
from sklearn.externals import joblib
filename = 'toyota_corolla_model.pkl'
joblib.dump(df_forecast, filename)
Please help.

You are saving the dataframe:
joblib.dump(df_forecast, filename)
Instead you need to save your model:
joblib.dump(<your model name here>, filename)
like this:
joblib.dump(model, filename)

Related

bad prediction when having noise on the data: LSTM time-series regression

I want to predict the force plate using a smart insole using the LSTM model for time series prediction. the data on the force plate has positive and negative values (I think the resulting positive value is a noise). if I ignore the positive value, then the predicted results of the data testing will be bad. but if I change the positive value to 0 then the prediction results will be good. what should I do if I want to keep positive value without changing it but have good prediction result.
Force Plate Shape
2050,1
Smart Insole Shape
2050,89
below are my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from tensorflow.keras.layers import Dense,RepeatVector, LSTM, Dropout
from tensorflow.keras.layers import Flatten, Conv1D, MaxPooling1D
from tensorflow.keras.layers import Bidirectional, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline
## Load Data
Insole = pd.read_csv('1113_Rwalk40s1_list.txt', header=None, low_memory=False)
SIData = np.asarray(Insole)
df = pd.read_csv('1113_Rwalk40s1.csv', low_memory=False)
columns = ['Fx']
selected_df = df[columns]
FCDatas = selected_df[:2050]
## End Load Data
## Concatenate Data
SmartInsole = np.array(SIData[:2050])
FCData = np.array(FCDatas)
# FCData = np.where(FCData>0, 0, FCData) #making positive value to 0
Dataset = np.concatenate((SmartInsole, FCData), axis=1)
## End Concatenate Data
## Normalization Data
scaler_in = MinMaxScaler(feature_range=(0, 1))
scaler_out = MinMaxScaler(feature_range=(0, 1))
data_scaled_in = scaler_in.fit_transform(Dataset[:,0:89])
data_scaled_out = scaler_out.fit_transform(Dataset[:,89:90])
## End Normalization Data
steps= 50
inp = []
out = []
for i in range(len(data_scaled_out) - (steps)):
inp.append(data_scaled_in[i:i+steps])
out.append(data_scaled_out[i+steps])
inp= np.asanyarray(inp)
out= np.asanyarray(out)
x_train, x_test, y_train, y_test = train_test_split(inp, out, test_size=0.25,random_state=2)
## Model Building
model = Sequential()
model.add(LSTM(64, activation='relu', return_sequences= False, input_shape= (50,89)))
model.add(Dense(32,activation='relu'))
model.add(Dense(16,activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss = 'mse', optimizer=Adam(learning_rate=0.002), metrics=['mse'])
model.summary()
## End Model Building
## Model fit
history = model.fit(x_train,y_train, epochs=50, verbose=2, batch_size=64, validation_data=(x_test, y_test))
## End Model fit
## Model Loss Plot
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show()
## End Model Loss Plot
## Prediction and Model Evaluation
model.evaluate(inp, out)
predictions=model.predict(inp)
print('MSE: ',mean_squared_error(out, predictions))
print('RMSE: ',math.sqrt(mean_squared_error(out, predictions)))
print('Coefficient of determination (r2 Score): ', r2_score(out, predictions))
#invert normalize
predictions = scaler_out.inverse_transform(predictions)
out = scaler_out.inverse_transform(out)
x=[]
colors=['red','green','brown','teal','gray','black','maroon','orange','purple']
colors2=['green','red','orange','black','maroon','teal','blue','gray','brown']
x = np.arange(0,2000)*40/2000
for i in range(0,1):
plt.figure(figsize=(15,6))
plt.plot(x,out[0:2000,i],color=colors[i])
plt.plot(x,predictions[0:2000,i],markerfacecolor='none',color=colors2[i])
plt.title('LSTM Regression (Training Data)')
plt.ylabel('Force/Fx (N)')
plt.xlabel('Time(s)')
plt.legend(['Real value', 'Predicted Value'], loc='lower left')
plt.savefig('Regression Result.png'[i])
plt.show()
## End Prediction and Model Evaluation
## Model Validation
Test_Insole = pd.read_csv('1113_Rwalk40s2_list.txt', header=None, low_memory=False)
TestSIData = np.asarray(Test_Insole)
Test_df = pd.read_csv('1113_Rwalk40s2.csv', low_memory=False)
Test_columns = ['Fx']
Test_selected_df = Test_df[Test_columns]
Test_FCDatas = Test_selected_df[:2050]
test_SmartInsole = np.array(TestSIData[:2050])
test_FCData = np.array(Test_FCDatas)
# test_FCData = np.where(test_FCData>0, 0, test_FCData) #making positive value to 0
test_Dataset = np.concatenate((test_SmartInsole, test_FCData), axis=1)
test_scaler_in = MinMaxScaler(feature_range=(0, 1))
test_scaler_out = MinMaxScaler(feature_range=(0, 1))
test_data_scaled_in = test_scaler_in.fit_transform(test_Dataset[:,0:89])
test_data_scaled_out = test_scaler_out.fit_transform(test_Dataset[:,89:90])
test_steps= 50
test_inp = []
test_out = []
for i in range(len(test_data_scaled_out) - (test_steps)):
test_inp.append(test_data_scaled_in[i:i+test_steps])
test_out.append(test_data_scaled_out[i+test_steps])
test_inp= np.asanyarray(test_inp)
test_out= np.asanyarray(test_out)
model.evaluate(test_inp, test_out)
test_predictions=model.predict(test_inp)
test_predictions = test_scaler_out.inverse_transform(test_predictions)
test_out = test_scaler_out.inverse_transform(test_out)
x=[]
colors=['red','green','brown','teal','gray','black','maroon','orange','purple']
colors2=['green','red','orange','black','maroon','teal','blue','gray','brown']
x = np.arange(0,2000)*40/2000
for i in range(0,1):
plt.figure(figsize=(15,6))
plt.plot(x,test_out[0:2000,i],color=colors[i])
plt.plot(x,test_predictions[0:2000,i],markerfacecolor='none',color=colors2[i])
plt.title('LSTM Regression (Testing Data)')
plt.ylabel('Force/Fx (N)')
plt.xlabel('Time(s)')
plt.legend(['Real value', 'Predicted Value'], loc='lower left')
plt.savefig('Regression Result.png'[i])
plt.show()
## End Model validation
the Result without changing the positive value
the Result if I changing the positive value to 0

ValueError Failed to find data adapter that can handle input: <class 'keras_preprocessing.sequence.TimeseriesGenerator'>, <class 'NoneType'>

I am trying to build my Model but I keep getting this error:
ValueError Failed to find data adapter that can handle input: <class 'keras_preprocessing.sequence.TimeseriesGenerator'>, <class 'NoneType'>
My Arrays are numpy Arrays
This is my first time making a model in general
Down below is my code I took most of it from youtube so i am a little lost, would appeciate the help so i can advance my knowlege.
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib as mpl
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler , StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras_preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
df = pd.read_csv('UnfilteredCMS3.csv')
df['Timestamps '] = pd.to_datetime(df['Timestamps '])
df = df.set_index('Timestamps ')
df_input = df[['Hour', 'Minute','POA', 'AT', 'WS', 'BOM','MW']]
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df_input)
features = data_scaled
target = data_scaled[:,6] # We wish to predict 'MW': 5th column
TimeseriesGenerator(features, target, length = 288,
sampling_rate=1,
batch_size=1)[0]
x_train, x_test, y_train, y_test = train_test_split(features,
target,
test_size = 0.10,
random_state = 123,
shuffle = False)
win_length = 1440
batch_size = 64
num_features = 7
train_generator = TimeseriesGenerator(x_train,
y_train,
length = win_length,
sampling_rate = 1,
batch_size = batch_size)
test_generator = TimeseriesGenerator(x_test,
y_test,
length = win_length,
sampling_rate = 1,
batch_size = batch_size)
model = tf.keras.Sequential()
model.add(LSTM(30,
input_shape = (win_length,num_features),
return_sequences = True))
model.add(tf.keras.layers.LeakyReLU(alpha = 0.5))
model.add(tf.keras.layers.LSTM(30,
return_sequences = True))
model.add(tf.keras.layers.LeakyReLU(alpha = 0.5))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.LSTM(15,
return_sequences = False))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1))
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss',
patience = 10,
mode = 'min')
model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(),
metrics = [tf.metrics.MeanAbsoluteError()])
history = model.fit_generator(train_generator,
epochs = 100,
shuffle = False,
callbacks = [early_stopping])

How do I use custom CSV in my code instead of Yahoo Finance data?

I'm building a stock prediction neural network. The tutorial i was watching was importing the stock data from yahoo finance. I want to improve the code by making it fetch the data from a CSV file so the code can be used even if you are not connected to the internet.
What do I need to change In my code to have it use custom data from a CSV file?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
company = '^GDAXI'
start = dt.datetime(2012,1,1)
end = dt.datetime(2021,1,1)
data = web.DataReader(company, 'yahoo', start, end)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1))
prediction_days = 60
x_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x, 0])
y_train.append(scaled_data[x, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
#BUILD MODEL
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1)) #next day prediction
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=25, batch_size=32)
#TEST ON EXISTING DATA
test_start = dt.datetime(2020,1,1)
test_end = dt.datetime.now()
test_dataset = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_dataset['Close'].values
total_dataset = pd.concat((data['Close'], test_dataset['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset)-len(test_dataset)-prediction_days:].values
model_inputs = model_inputs.reshape(-1,1)
model_inputs = scaler.transform(model_inputs)
#PREDICTIONS ON TEST DATA
x_test = []
for x in range(prediction_days, len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test,(x_test.shape[0], x_test.shape[1],1))
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
#PLOT
plt.plot(actual_prices, color="green", label="Actual Price")
plt.plot(predicted_prices, color="blue", label="Predicted Price")
plt.title("GER40 Share Price")
plt.xlabel('Time')
plt.ylabel('GER40 Price')
plt.legend()
plt.show()
#Predict Next Day
real_dataset = [model_inputs[len(model_inputs)+1-prediction_days:len(model_inputs+1), 0]]
real_dataset = np.array(real_dataset)
real_dataset = np.reshape(real_dataset, (real_dataset.shape[0], real_dataset.shape[1], 1))
prediction = model.predict(real_dataset)
prediction = scaler.inverse_transform(prediction)
print(f"Close: {prediction}")
The CSV file i'm using doesn't have headings, but i think i can add those using excel
I think you should consider doing it this way.
from pandas_datareader import data as wb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
from sklearn.preprocessing import MinMaxScaler
start = '2019-06-30'
end = '2020-06-30'
tickers = ['GOOG']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Open','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Open', 'Adj Close']])
#names = np.reshape(price_data, (len(price_data), 1))
df = pd.concat(price_data)
df.reset_index(inplace=True)
for col in df.columns:
print(col)
#used for setting the output figure size
rcParams['figure.figsize'] = 20,10
#to normalize the given input data
scaler = MinMaxScaler(feature_range=(0, 1))
#to read input data set (place the file name inside ' ') as shown below
df['Adj Close'].plot()
plt.legend(loc=2)
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
ntrain = 80
df_train = df.head(int(len(df)*(ntrain/100)))
ntest = -80
df_test = df.tail(int(len(df)*(ntest/100)))
#importing the packages
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
#dataframe creation
seriesdata = df.sort_index(ascending=True, axis=0)
new_seriesdata = pd.DataFrame(index=range(0,len(df)),columns=['Date','Adj Close'])
length_of_data=len(seriesdata)
for i in range(0,length_of_data):
new_seriesdata['Date'][i] = seriesdata['Date'][i]
new_seriesdata['Adj Close'][i] = seriesdata['Adj Close'][i]
#setting the index again
new_seriesdata.index = new_seriesdata.Date
new_seriesdata.drop('Date', axis=1, inplace=True)
#creating train and test sets this comprises the entire data’s present in the dataset
myseriesdataset = new_seriesdata.values
totrain = myseriesdataset[0:255,:]
tovalid = myseriesdataset[255:,:]
#converting dataset into x_train and y_train
scalerdata = MinMaxScaler(feature_range=(0, 1))
scale_data = scalerdata.fit_transform(myseriesdataset)
x_totrain, y_totrain = [], []
length_of_totrain=len(totrain)
for i in range(60,length_of_totrain):
x_totrain.append(scale_data[i-60:i,0])
y_totrain.append(scale_data[i,0])
x_totrain, y_totrain = np.array(x_totrain), np.array(y_totrain)
x_totrain = np.reshape(x_totrain, (x_totrain.shape[0],x_totrain.shape[1],1))
#LSTM neural network
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(x_totrain.shape[1],1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mean_squared_error', optimizer='adadelta')
lstm_model.fit(x_totrain, y_totrain, epochs=10, batch_size=1, verbose=2)
#predicting next data stock price
myinputs = new_seriesdata[len(new_seriesdata) - (len(tovalid)+1) - 60:].values
myinputs = myinputs.reshape(-1,1)
myinputs = scalerdata.transform(myinputs)
tostore_test_result = []
for i in range(60,myinputs.shape[0]):
tostore_test_result.append(myinputs[i-60:i,0])
tostore_test_result = np.array(tostore_test_result)
tostore_test_result = np.reshape(tostore_test_result,(tostore_test_result.shape[0],tostore_test_result.shape[1],1))
myclosing_priceresult = lstm_model.predict(tostore_test_result)
myclosing_priceresult = scalerdata.inverse_transform(myclosing_priceresult)
totrain = df_train
tovalid = df_test
#predicting next data stock price
myinputs = new_seriesdata[len(new_seriesdata) - (len(tovalid)+1) - 60:].values
# Printing the next day’s predicted stock price.
print(len(tostore_test_result));
print(myclosing_priceresult);
Final Result:
[[1396.532]]

How to find out misclassified images filenames

I am building a model to detect solar panel defects. I am using 255*255 images to train the model and I want to use the confusion matrix to improve my model.
The matrix gives me incorrectly classified images, but I need to find out exact filenames for the false positive and false negative images
How can I achieve this goal?
I have provided my code below:
import numpy as np
import os
import time
import keras
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
#from keras.layers import GlobalAveragePooling2D, Dense, Dropout,Activation,Flatten
#from keras.layers import Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.layers import Input
from keras import models
from keras import layers
from keras import optimizers
from keras_applications.resnet import ResNet101
from keras.optimizers import SGD, Adagrad, Adadelta, RMSprop, Adam
from keras.callbacks import LearningRateScheduler
from keras.models import load_model
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
###########################################################################################################################
## Model Initials
IMAGE_SIZE = (255, 255)
BATCH_SIZE = 24
NUM_EPOCHS = 1
WEIGHTS_FINAL = 'defectdetection.hdf5'
MODEL_FINAL = 'defectdetection.h5'
BEST_WEIGHT ='1defectdetection.hdf5'
##############################################################################################
## Loading dataset for the training process
## Define data path
# Loading the training data
img_path = 'C:/Users/TeamSoloMid/SolarCellsImages/dataset/Sample0001.jpg'
img = image.load_img(img_path, target_size=(255, 255))
x = image.img_to_array(img)
print (x.shape)
x = np.expand_dims(x, axis=0)
print (x.shape)
x = preprocess_input(x)
print('Input image shape:', x.shape)
PATH = os.getcwd()
data_path = 'C:/Users/TeamSoloMid/SolarCellsImages'
data_dir_list = os.listdir(data_path)
img_data_list=[]
for dataset in data_dir_list:
img_list=os.listdir(data_path+'/'+ dataset)
print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
for img in img_list:
img_path = data_path + '/'+ dataset + '/'+ img
img = image.load_img(img_path, target_size=(255,255))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
#print('Input image shape:', x.shape)
img_data_list.append(x)
img_data = np.array(img_data_list)
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)
#t=time.time()
# Define the number of classes
num_classes = 2
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:1603]=0
labels[1604:3225]=1
names = ['Defect', 'Almost']
Y = np_utils.to_categorical(labels, num_classes)
#Shuffle the dataset
x,y = shuffle(img_data,Y, random_state=2)
# Split the dataset
TestPcnt = 0.2
X_train, X_test, y_train, y_test = train_test_split(x, y,
test_size=TestPcnt,
random_state=2)
epoch=NUM_EPOCHS
###############################################################################################
# Fine tune the resnet 101
image_input = Input(shape=(255, 255, 3))
model = ResNet101(include_top=False,
input_tensor=image_input,
weights='imagenet',
backend=keras.backend,
layers=keras.layers,
models=keras.models,
utils=keras.utils)
# Freeze all the layers
for layer in model.layers[:-3]:
layer.trainable = False
#model.summary()
last_layer = model.output
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(last_layer)
x = Dense(256, activation='relu',name='fc-1')(x)
x = Dropout(0.5)(x)
out = Dense(num_classes, activation='softmax',name='output_layer')(x)
# this is the model we will train
net_model = Model(inputs=model.input, outputs=out)
net_model.summary()
for layer in net_model.layers[:-5]:
layer.trainable = False
net_model.summary()
for layer in net_model.layers:
print(layer, layer.trainable)
#my_opti= optimizers.Adam(lr=0.00002)
#my_opti= optimizers.Adam(lr=0.00001)
################################################################################################
#Define learning Rate
learning_rate = 0.00002
decay_rate = learning_rate / epoch
momentum = 0.9
sgd = SGD(lr=learning_rate, momentum=momentum,
decay=decay_rate,
nesterov=False)
##############################################################################
## we will keep the weights of the epoch that scores highest in terms of accuracy on the test set.
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath=BEST_WEIGHT,
monitor = 'val_acc',
verbose=1,
save_best_only=True,
mode = 'max')
###################################################################
callback_list = [checkpointer]
net_model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
t=time.time()
hist = net_model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1,
callbacks = [checkpointer],
validation_data=(X_test, y_test))
print('Training time: %s' % (time.time()-1))
(loss, accuracy) = net_model.evaluate(X_test, y_test,
batch_size=BATCH_SIZE,
verbose=1)
print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss,accuracy * 100))
############################################################################################
## Saving The weights of the model after training
net_model.save_weights(WEIGHTS_FINAL)
print('1. Weights Saved')
net_model.save_weights(BEST_WEIGHT)
print('2. Best Weights Saved')
##############################################################################
## Saving The Complete model after training
net_model.save(MODEL_FINAL)
print('3. Model Saved')
############################################################################################
import matplotlib.pyplot as plt
# visualizing losses and accuracy
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(NUM_EPOCHS)
plt.figure(1,figsize=(7,5))
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('num of Epochs')
plt.ylabel('loss')
plt.title('train_loss vs val_loss')
plt.grid(True)
plt.legend(['train','val'])
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
plt.figure(2,figsize=(7,5))
plt.plot(xc,train_acc)
plt.plot(xc,val_acc)
plt.xlabel('num of Epochs')
plt.ylabel('accuracy')
plt.title('train_acc vs val_acc')
plt.grid(True)
plt.legend(['train','val'],loc=4)
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
############################################################################
from sklearn.metrics import confusion_matrix, classification_report
import itertools
from sklearn.utils.multiclass import unique_labels
from sklearn import metrics
import seaborn as sns
import pandas as pd
from sklearn.datasets import load_files
from sklearn.svm import LinearSVC
from sklearn import svm
LABELS= ['Defect', 'Almost']
# Print confusion matrix for training data
y_pred_train = net_model.predict(X_train)
def show_confusion_matrix(validations, predictions):
matrix = metrics.confusion_matrix(validations, predictions)
plt.figure(figsize=(10, 10))
sns.heatmap(matrix,
cmap='coolwarm',
linecolor='white',
linewidths=1,
xticklabels=LABELS,
yticklabels=LABELS,
annot=True,
fmt='d')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()
y_pred_test = net_model.predict(X_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)
max_y_test = np.argmax(y_test, axis=1)
show_confusion_matrix(max_y_test, max_y_pred_test)
print(classification_report(max_y_test, max_y_pred_test))
I would calculate a checksum (i.e. md5) and use that as a dictionary key that would keep the image path as value, i.e.
import hashlib
...
image_paths = {}
...
for dataset in data_dir_list:
img_list=os.listdir(data_path+'/'+ dataset)
print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
for img in img_list:
img_path = data_path + '/'+ dataset + '/'+ img
img = image.load_img(img_path, target_size=(255,255))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
#print('Input image shape:', x.shape)
img_data_list.append(x)
# in this example i am only hashing the first 5 pixels of the image. You would probably want to use all of the pixels of the image.
image_hash = hashlib.md5(str(x[0]) + str(x[1]) + str(x[2]) + str(x[3]) + str(x[4])).hexdigest()
image_paths[image_hash] = img_path
...
when you want to decode the image path you simply calculate the hash again and look the path up in the dictionary
image_hash = hashlib.md5(str(x[0]) + str(x[1]) + str(x[2]) + str(x[3]) + str(x[4])).hexdigest()
image_path = image_paths[image_hash]
While that is not the most flexible approach, I believe that it will still help you achieve your goal.
One note, hashing might be pretty expensive if you have a lot of images, but if your images do not change, you only need to hash them once and save somewhere. In the consequent attempts, you would only need to load the data instead of hashing everything again
Thanks for your answering, I have tried your solution, and there is an IndexError like:
File "C:/Users/TeamSoloMid/Solar cells Defect Detection.py", line 102, in <module>
image_hash = hashlib.md5(str(x[0])+str(x[1])+str(x[2])+str(x[3])+str(x[4])).hexdigest()
IndexError: index 1 is out of bounds for axis 0 with size 1
Below is the code I added your solution in
import numpy as np
import os
import time
import keras
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
#from keras.layers import GlobalAveragePooling2D, Dense, Dropout,Activation,Flatten
#from keras.layers import Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.layers import Input
from keras import models
from keras import layers
from keras import optimizers
from keras_applications.resnet import ResNet101
from keras.optimizers import SGD, Adagrad, Adadelta, RMSprop, Adam
from keras.callbacks import LearningRateScheduler
from keras.models import load_model
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import hashlib
########################################################################################
## Model Initials
IMAGE_SIZE = (255, 255)
BATCH_SIZE = 24
NUM_EPOCHS = 1
WEIGHTS_FINAL = 'defectdetection.hdf5'
MODEL_FINAL = 'defectdetection.h5'
BEST_WEIGHT ='defectdetection.hdf5'
#########################################################################################
## Loading dataset for the training process
## Define data path
# Loading the training data
img_path = 'C:/Users/TeamSoloMid/SolarCellsImages/dataset/Sample0001.jpg'
img = image.load_img(img_path, target_size=(255, 255))
x = image.img_to_array(img)
print (x.shape)
x = np.expand_dims(x, axis=0)
print (x.shape)
x = preprocess_input(x)
print('Input image shape:', x.shape)
PATH = os.getcwd()
data_path = 'C:/Users/TeamSoloMid/SolarCellsImages'
data_dir_list = os.listdir(data_path)
image_paths = {}
img_data_list=[]
for dataset in data_dir_list:
img_list=os.listdir(data_path+'/'+ dataset)
print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
for img in img_list:
img_path = data_path + '/'+ dataset + '/'+ img
img = image.load_img(img_path, target_size=(255,255))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
#print('Input image shape:', x.shape)
img_data_list.append(x)
image_hash = hashlib.md5(str(x[0])+str(x[1])+str(x[2])+str(x[3])+str(x[4])).hexdigest()
image_paths[image_hash] = img_path
img_data = np.array(img_data_list)
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)
#t=time.time()
# Define the number of classes
num_classes = 2
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:1603]=0
labels[1604:3225]=1
#labels[3226:4847]=2
names = ['Defect', 'Almost']
# convert class labels to on-hot encoding
Y = np_utils.to_categorical(labels, num_classes)
#Shuffle the dataset
x,y = shuffle(img_data,Y, random_state=2)
# Split the dataset
TestPcnt = 0.2
X_train, X_test, y_train, y_test = train_test_split(x, y,
test_size=TestPcnt,
random_state=2)
epoch=NUM_EPOCHS
########################################################################################
# Fine tune the resnet 101
image_input = Input(shape=(255, 255, 3))
model = ResNet101(include_top=False,
input_tensor=image_input,
weights='imagenet',
backend=keras.backend,
layers=keras.layers,
models=keras.models,
utils=keras.utils)
# Freeze all the layers
for layer in model.layers[:-3]:
layer.trainable = False
#model.summary()
last_layer = model.output
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(last_layer)
x = Dense(256, activation='relu',name='fc-1')(x)
x = Dropout(0.5)(x)
out = Dense(num_classes, activation='softmax',name='output_layer')(x)
# this is the model we will train
net_model = Model(inputs=model.input, outputs=out)
net_model.summary()
for layer in net_model.layers[:-5]:
layer.trainable = False
net_model.summary()
for layer in net_model.layers:
print(layer, layer.trainable)
#my_opti= optimizers.Adam(lr=0.00002)
#my_opti= optimizers.Adam(lr=0.00001)
#########################################################################################
Define learning Rate
learning_rate = 0.00002
decay_rate = learning_rate / epoch
momentum = 0.9
sgd = SGD(lr=learning_rate, momentum=momentum,
decay=decay_rate,
nesterov=False)
##############################################################################
## we will keep the weights of the epoch that scores highest in terms of accuracy on the test set.
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath=BEST_WEIGHT,
monitor = 'val_acc',
verbose=1,
save_best_only=True,
mode = 'max')
###################################################################
callback_list = [checkpointer]
net_model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
t=time.time()
hist = net_model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1,
callbacks = [checkpointer],
validation_data=(X_test, y_test))
print('Training time: %s' % (time.time()-1))
(loss, accuracy) = net_model.evaluate(X_test, y_test,
batch_size=BATCH_SIZE,
verbose=1)
print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss,accuracy * 100))
############################################################################################
## Saving The weights of the model after training
net_model.save_weights(WEIGHTS_FINAL)
print('1. Weights Saved')
net_model.save_weights(BEST_WEIGHT)
print('2. Best Weights Saved')
##############################################################################
## Saving The Complete model after training
net_model.save(MODEL_FINAL)
print('3. Model Saved')
############################################################################################
import matplotlib.pyplot as plt
# visualizing losses and accuracy
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(NUM_EPOCHS)
plt.figure(1,figsize=(7,5))
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('num of Epochs')
plt.ylabel('loss')
plt.title('train_loss vs val_loss')
plt.grid(True)
plt.legend(['train','val'])
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
plt.figure(2,figsize=(7,5))
plt.plot(xc,train_acc)
plt.plot(xc,val_acc)
plt.xlabel('num of Epochs')
plt.ylabel('accuracy')
plt.title('train_acc vs val_acc')
plt.grid(True)
plt.legend(['train','val'],loc=4)
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
############################################################################
from sklearn.metrics import confusion_matrix, classification_report
import itertools
from sklearn.utils.multiclass import unique_labels
from sklearn import metrics
import seaborn as sns
import pandas as pd
from sklearn.datasets import load_files
from sklearn.svm import LinearSVC
from sklearn import svm
LABELS= ['Defect', 'Almost']
# Print confusion matrix for training data
y_pred_train = net_model.predict(X_train)
def show_confusion_matrix(validations, predictions):
matrix = metrics.confusion_matrix(validations, predictions)
plt.figure(figsize=(10, 10))
sns.heatmap(matrix,
cmap='coolwarm',
linecolor='white',
linewidths=1,
xticklabels=LABELS,
yticklabels=LABELS,
annot=True,
fmt='d')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()
y_pred_test = net_model.predict(X_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)
max_y_test = np.argmax(y_test, axis=1)
show_confusion_matrix(max_y_test, max_y_pred_test)
print(classification_report(max_y_test, max_y_pred_test))
image_hash = hashlib.md5(str(x[0]) + str(x[1]) + str(x[2]) + str(x[3]) + str(x[4])).hexdigest()
image_path = image_paths[image_hash]

Change prediction range in Python from tutorial

I'm probably jumped into Python at the deep end here but I have followed a tutorial from start to finish which works fine, and I generally (I think) understand.
I'm trying to recreate what I learnt using my own data.
I've got this working ok as well however the tutorial showed the prediction line on the plot graph generated over the actual data.
What do I need to change for it to predict say 28 days ahead rather than on top of the data I already have?
here is my code (i say my...mostly from tutorial!)
from connectionstring import conn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pyoff
import plotly.graph_objs as go
#import Keras
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers import LSTM
from sklearn.model_selection import KFold, cross_val_score, train_test_split
params = ("20180801","20191002")
sqlString = "SELECT ODCDAT AS DATE, SUM(ODORDQ) AS DROPIN FROM mytable.mydb WHERE ODCDAT BETWEEN %s AND %s GROUP BY ODCDAT ORDER BY ODCDAT"
command = (sqlString % params)
SQL_Query = pd.read_sql_query(command, conn)
df = pd.DataFrame(SQL_Query, columns=['DATE','DROPIN'])
df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d')
print(df.head(10))
#new dataframe
df_diff = df.copy()
df_diff['prev_day'] = df_diff['DROPIN'].shift(1)
df_diff = df_diff.dropna()
df_diff['diff'] = (df_diff['DROPIN'] - df_diff['prev_day'])
df_diff.head(10)
print(df_diff)
#plot monthly sales diff
plot_data = [
go.Scatter(
x=df_diff['DATE'],
y=df_diff['diff'],
)
]
plot_layout = go.Layout(
title='Daily Drop In Diff'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)
#create dataframe for transformation from time series to supervised
df_supervised = df_diff.drop(['prev_day'],axis=1)
#adding lags
for inc in range(1,31):
field_name = 'lag_' + str(inc)
df_supervised[field_name] = df_supervised['diff'].shift(inc)
#drop null values
df_supervised = df_supervised.dropna().reset_index(drop=True)
print(df_supervised)
# Import statsmodels.formula.api
import statsmodels.formula.api as smf
# Define the regression formula
model = smf.ols(formula='diff ~ lag_1 + lag_2 + lag_3 + lag_4 + lag_5 + lag_6 + lag_7 + lag_8 + lag_9 + lag_10 + lag_11 + lag_12 + lag_13 + lag_14 + lag_15 + lag_16 + lag_17 + lag_18 + lag_19 + lag_20 + lag_21 + lag_22 + lag_23 + lag_24 + lag_24 + lag_25 + lag_26 + lag_27 + lag_28 + lag_29 + lag_30', data=df_supervised)
# Fit the regression
model_fit = model.fit()
# Extract the adjusted r-squared
regression_adj_rsq = model_fit.rsquared_adj
print(regression_adj_rsq)
#import MinMaxScaler and create a new dataframe for LSTM model
from sklearn.preprocessing import MinMaxScaler
df_model = df_supervised.drop(['DROPIN','DATE'],axis=1)
#split train and test set
train_set, test_set = df_model[0:-28].values, df_model[-28:].values
#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=100, batch_size=1, verbose=1, shuffle=False)
y_pred = model.predict(X_test,batch_size=1)
#for multistep prediction, you need to replace X_test values with the predictions coming from t-1
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
#print np.concatenate([y_pred[index],X_test[index]],axis=1)
pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)
#create dataframe that shows the predicted sales
result_list = []
sales_dates = list(df[-29:].DATE)
act_sales = list(df[-29:].DROPIN)
for index in range(0,len(pred_test_set_inverted)):
result_dict = {}
result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index])
result_dict['DATE'] = sales_dates[index+1]
result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
#for multistep prediction, replace act_sales with the predicted sales
print(df_result)
#merge with actual sales dataframe
df_sales_pred = pd.merge(df,df_result,on='DATE',how='left')
#plot actual and predicted
plot_data = [
go.Scatter(
x=df_sales_pred['DATE'],
y=df_sales_pred['DROPIN'],
name='actual'
),
go.Scatter(
x=df_sales_pred['DATE'],
y=df_sales_pred['pred_value'],
name='predicted'
)
]
plot_layout = go.Layout(
title='Sales Prediction'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)
and here is the second graph i plot which I want to predict ahead...

Categories