I trained model for predict ranking.
features has 4 columns. now rank, bidding amount, keyword, and time.
I guess, I did some mistakes when I build model.
When I check for nan value in my dataset, it doesn't have nan values.
please check my code, and If you want to find a problem while executing the code, I will attach a data set.
ps. when i get prediction value for one time stamp, I use determinerank function.
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from pandas import DataFrame
import numpy as np
import re
from sklearn.model_selection import train_test_split, StratifiedKFold
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Activation
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import load_model
from keras.optimizers import Adam
import keras.backend as K
import sklearn.preprocessing
label_binarizer = sklearn.preprocessing.LabelBinarizer()
model = load_model("C:/Users/gyuri/Downloads/01-0.582880.hdf5")
def categorize(time):
new_series = []
for i in time:
if re.match('[0-9]*:[0-9]*:[0-9]', i): # hh: mm: ss형식과 일치하는 경우에만 카테고리화 하기 위함.
if i == '24:00:00':
num = 0
else:
hr, mi, se = map(int, i.split(':')) # 리스트 내 모든 원소들에 int형 적용. : 기준으로 split함.
num = hr * 6 + mi // 10 # 10분 단위로 넘버링이 1씩 증가 하니까 분 나누기 10의 몫을 더해줌.
else:
num = -1
new_series += [num]
df['categorizedTime'] = new_series
label_binarizer.fit(range(max(df['categorizedTime']) + 1))
one_hot_time = label_binarizer.transform(df['categorizedTime'])
del df['SubmittedTime']
def categorize_time(time):
if re.match('[0-9]*:[0-9]*:[0-9]', time): # hh: mm: ss형식과 일치하는 경우에만 카테고리화 하기 위함.
if time == '24:00:00':
num = 0
else:
hr, mi, se = map(int, time.split(':')) # 리스트 내 모든 원소들에 int형 적용. : 기준으로 split함.
num = hr * 6 + mi // 10 # 10분 단위로 넘버링이 1씩 증가 하니까 분 나누기 10의 몫을 더해줌.
else:
num = -1
return num
def determineRank(t, n, bid_t, w, h, k):
# t = str(input()) # 시간
# n = int(input()) # 현재 랭킹
# bid_t = int(input()) # 비딩 가격
# w = int(input()) # 요일
# h = int(input()) # 주말 여부
# k = str(input()) # 키워드
encode = LabelEncoder()
k = encode.fit_transform([k])
bid_t = normalization(bid_t)
t = categorize_time(t)
new_list = []
new_list = [t, n, bid_t, w, h, k]
new_list = np.array(new_list)
new_list = new_list.reshape(1, 6, 1)
rank = load_model.predict(new_list)
#rank = round(rank.item(0))
return rank.item(0)
def determinehamsu(t, n, bid_t, w, h, k):
encode = LabelEncoder()
rank = determineRank(t, n, bid_t, w, h, k)
df = pd.DataFrame(
data={'Time': [t], 'Now_Rank': [n], 'Amount': [bid_t], 'Weekday': [w], 'Holiday': [h], 'Keyword': [k]},
columns=['Time', 'Now_Rank', 'Amount', 'Weekday', 'Holiday', 'Keyword'])
if bid_t <= 6010:
for bid_t in range(bid_t, 6010, 10):
rank = determineRank(t, n, bid_t, w, h, k)
df = df.append({'Time': t,
'Now_Rank': n,
'Amount': bid_t,
'Weekday': w,
'Holiday': h,
'Keyword': k,
'Target': rank}, ignore_index=True)
df.to_csv("new_sample5.csv")
print(rank)
print(bid_t)
return df
elif bid_t > 6010:
for bid_t in range(bid_t, 0, -10):
rank = determineRank(t, n, bid_t, w, h, k)
df = df.append({'Time': t,
'Now_Rank': n,
'Amount': bid_t,
'Weekday': w,
'Holiday': h,
'Keyword': k,
'Target': rank}, ignore_index=True)
df.to_csv("sample6.csv")
print(rank)
print(bid_t)
return df.item(0)
def normalization(x):
x_np = np.asarray(x)
return (x_np - x_np.min()) / (x_np.max() - x_np.min())
filename = 'C:/Users/gyuri/Desktop/진짜수정완료.csv'
df = pd.read_csv('C:/Users/gyuri/Desktop/진짜수정완료.csv', encoding='euc-kr', delimiter=',')
df = pd.DataFrame(df)
df.info()
del df['ExecutedDate']
del df['queueName']
del df['cycleMinutes']
del df['shoudBid']
del df['fetchMethod']
del df['siteUrl']
del df['retryQueueName']
del df['retryCount']
del df['Submitted']
del df['submittedAt']
del df['SubmittedDate']
del df['nccKeywordId']
del df['statDt']
del df['statDtTimestamp']
del df['delaySeconds']
del df['customerId']
del df['bidStatusCode']
df.groupby('currentRank').count()
df.groupby('targetRank').count()
df.groupby('bidAmt').count()
scaler = MinMaxScaler()
encoder = LabelEncoder()
normalization(df['bidAmt'])
categorize(df['SubmittedTime'])
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(max(df['categorizedTime'])+1))
one_hot_time = label_binarizer.transform(df['categorizedTime'])
print('{0}'.format(one_hot_time))
one_hot_time.shape
print(df['categorizedTime'].shape)
#del df['SubmittedTime']
df['keyword'] = encoder.fit_transform(df['keyword'])
df.info()
#df['Holiday'].loc[df.Holiday=='Y']=1
#df['Holiday'].loc[df.Holiday == 'N']=0
df.info()
seed = tf.set_random_seed(42)
first_file_info = df.values[0:].astype(np.float)
print("file info.shape : ", first_file_info.shape)
print("file info[0] : ", first_file_info[0])
time = one_hot_time
print("date.shape : ", time.shape)
print("date[0] : ", time[0])
print("=" * 120)
now = first_file_info[:,:1]
print("now.shape : ", now.shape)
print("now[0] : ", now[0])
print("="*120)
target = first_file_info[:,1:2]
print("target.shape : ", target.shape)
print("target[0] : ", target[0])
print("="*120)
amount = first_file_info[:, 2:3]
#norm_amount = scaler.fit_transform(amount)
print("amount.shape : ", amount.shape)
print("amount[0] : ", amount[0])
print("norm_amount[0]: ", norm_amount[0])
print("="*120)
keyword = first_file_info[:, 3:4]
print("keyword.shape : ", keyword.shape)
print("keyword[0]: ", keyword[0])
print("="*120)
x = np.concatenate((time, now, amount, keyword), axis = 1)
print("x.shape : ", x.shape)
print("x[0] : ", x[0])
print("x[-1] : ", x[-1])
print("="*100)
y = target
print("y[0] : ", y[0])
print("y[-1] : ", y[-1])
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = seed, stratify = y)
X_train = X_train.reshape(89334, 1, 4)
X_test = X_test.reshape (38287, 1, 4)
modelpath = "C:/Users/gyuri/ML_PATH/논문 파일/model/랭킹모델4/{epoch:02d}-{val_loss:4f}.hdf5"
early_stopping_callback = EarlyStopping(monitor = 'acc', patience = 10)
checkpointer = ModelCheckpoint(filepath = modelpath, monitor = 'acc', verbose = 1, save_best_only = True)
model = Sequential()
model.add(GRU(32, batch_input_shape=(1, 1, 4), return_sequences=True, stateful=True))
model.add(GRU(16, return_sequences=True, stateful=True))
model.add(GRU(8, activation='relu', return_sequences=True, stateful=True))
model.add(GRU(4, activation='relu', stateful=True))
model.add(Dense(1))
model.compile(optimizer=Adam(lr=0.001), loss='mae', metrics=['acc'])
model.fit(X_train, Y_train, epochs=30, batch_size=1, verbose=0,
callbacks=[early_stopping_callback, checkpointer],
validation_data=(X_test, Y_test))
Related
I'm trying to create a program that show top 3 of similar image to query image, using python.
I thought Siamese Network by Triplet Loss can be good option for what I want to do.
I wrote some codes and created model with small dataset in my pc. And I inputted one of the dataset into the program to evaluate my program. I expected that the program would show same image as what I input, but the program doesn't always do so.
For example, there are five images, A, B, C, D and E. I created a model which learned the five images with Siamese Network by Triplet Loss and saved the model. And I loaded the model and input the image B for prediction, expecting that the program return B as a result, but it returns D.
When comparison of distance between dataset and query follows model training, results are as I expected. (Input Image A and return Image A)
However, after model training completed, when I load trained model and try to predict, it doesn't return correctly.
Did I do something wrong in model structure?
Or Siamese Network is not appropriate for my problem?
If structure of my code is not so bad, I guess it should be an issue of quality of the dataset.
My program is as below.
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.applications import resnet
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, History
import numpy as np
import random
import os
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import datetime
from sklearn.metrics import euclidean_distances, roc_auc_score
import load_dataset2_1 as ld
now = datetime.datetime.now()
def create_resnet(size, channel, num_classes):
input_tensor = layers.Input((size, size, channel))
ResNet50 = resnet.ResNet50(weights="imagenet", input_tensor=input_tensor, include_top=False, pooling="avg")
embedding_model = Sequential()
embedding_model.add(layers.Flatten(input_shape=ResNet50.output_shape[1:]))
embedding_model.add(layers.Dense(256, activation="relu"))
embedding_model.add(layers.BatchNormalization())
embedding_model.add(layers.Dropout(0.5))
embedding_model.add(layers.Dense(num_classes))
embedding_model = Model(inputs=ResNet50.input, outputs=embedding_model(ResNet50.output))
embedding_model.summary()
trainable = False
for layer in ResNet50.layers:
if layer.name == "conv5_block1_out":
trainable = True
layer.trainable = trainable
return embedding_model
def create_concatenate_layer(embedding_model, size, channel):
input_anchor = layers.Input(shape=(size, size, channel))
input_positive = layers.Input(shape=(size, size, channel))
input_negative = layers.Input(shape=(size, size, channel))
embedding_anchor = embedding_model(input_anchor)
embedding_positive = embedding_model(input_positive)
embedding_negative = embedding_model(input_negative)
output = layers.concatenate([embedding_anchor, embedding_positive,embedding_negative])
net = Model([input_anchor, input_positive, input_negative], output)
return net
# Online Triplet
def triplet_loss(label, embeddings):
x1 = tf.expand_dims(embeddings, axis=0)
x2 = tf.expand_dims(embeddings, axis=1)
euclidean = tf.reduce_sum((x1-x2)**2, axis=-1)
lb1 = tf.expand_dims(label[:, 0], axis=0)
lb2 = tf.expand_dims(label[:, 0], axis=1)
equal_mat = tf.equal(lb1, lb2)
# positives
positive_ind = tf.where(equal_mat)
positive_dists = tf.gather_nd(euclidean, positive_ind)
print("positive_ind : ", positive_ind)
print("positive_dists : ", positive_dists)
# negatives
negative_ind = tf.where(tf.logical_not(equal_mat))
negative_dists = tf.gather_nd(euclidean, negative_ind)
print("negative_ind : ", positive_ind)
print("negative_dists : ", positive_dists)
# [P, N]
margin = 0.25
positives = tf.expand_dims(positive_dists, axis=1)
negatives = tf.expand_dims(negative_dists, axis=0)
triplets = tf.maximum(positives - negatives + margin, 0.0)
return tf.reduce_mean(triplets)
def create_batch(x_train, y_train, size, channel, batch_size):
x_anchors = np.zeros((batch_size, size, size, channel))
x_positives = np.zeros((batch_size, size, size, channel))
x_negatives = np.zeros((batch_size, size, size, channel))
for i in range(0, batch_size):
random_index = random.randint(0, x_train.shape[0]-1)
x_anchor = x_train[random_index]
y = y_train[random_index]
dogs_for_pos = np.squeeze(np.where(y_train==y))
dogs_for_neg = np.squeeze(np.where(y_train!=y))
# print("len(dogs_for_pos) : ", len(dogs_for_pos))
# print("len(dogs_for_neg) : ", len(dogs_for_neg))
x_positive = x_train[dogs_for_pos[random.randint(0, len(dogs_for_pos)-1)]]
x_negative = x_train[dogs_for_neg[random.randint(0, len(dogs_for_neg)-1)]]
x_anchors[i] = x_anchor
x_positives[i] = x_positive
x_negatives[i] = x_negative
print("x_anchors.shape___", x_anchors.shape)
print("x_positives.shape___", x_positives.shape)
print("x_negatives.shape___", x_negatives.shape)
return [x_anchors, x_positives, x_negatives]
def data_generator(x_train, y_train, size, channel, batch_size, emb_size):
while True:
x = create_batch(x_train, y_train, size, channel, batch_size)
y = np.zeros((batch_size, 3*emb_size))
yield x, y
def train_generator(X, y_label, batch_size):
while True:
indices = np.random.permutation(X.shape[0])
for i in range(len(indices)//batch_size):
current_indices = indices[i*batch_size:(i+1)*batch_size]
X_batch = X[current_indices] / 255.0
y_batch = np.zeros((batch_size, 128), np.float32)
y_batch[:, 0] = y_label[current_indices]
yield X_batch, y_batch
def step_decay(epoch):
x = 1e-3
if epoch >= 25: x /= 10.0
if epoch >= 45: x /= 10.0
return x
size = 128
channel = 3
batch_size = 64
epochs = 1000
emb = 64
def train(folder, size=size, batch_size=batch_size, channel=channel, epochs=epochs):
print("TensorFlow version: ", tf.__version__)
print("folder : ", folder)
print("size : {0}, batch_size : {1}, channel : {2}, epochs : {3}".format(size, batch_size, channel, epochs))
switch = input("Load data : On or Off: ")
if switch == "On" or switch == "ON" or switch == "on":
switch = "On"
size = ''
(x_train, y_train), (x_test, y_test), size = ld.main(switch, folder)
x_train = np.reshape(x_train, (x_train.shape[0], size, size, channel))/255.0
x_test = np.reshape(x_test, (x_test.shape[0], size, size, channel))/255.0
# print('num_files: ', num_files)
num_classes=len(os.listdir(folder))
steps_per_epoch = x_train.shape[0]//batch_size
# opt = tf.keras.optimizers.SGD(1e-3, 0.9)
opt = tf.keras.optimizers.Adam(lr=0.0001)
scheduler = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("./triplet_model/model_dbt_2.hdf5", monitor="loss", save_best_only=True, save_weight_only=True)
# es_cb = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=3, mode='auto')
net = create_resnet(size, channel, 256)
net.summary()
net.compile(loss=triplet_loss, metrics=['accuracy'], optimizer=opt)
# hist = net.fit(data_generator(x_train, y_train, size, channel, batch_size, emb_size=emb),
# steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1,
# # validation_data = (x_test, y_test),
# callbacks=[checkpoint, scheduler]
# )
hist = net.fit_generator(train_generator(x_train, y_train, batch_size),
steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1,
# validation_data = train_generator(x_test, y_test, batch_size),
callbacks=[checkpoint, scheduler],
max_queue_size = 1
)
net.save("./triplet_model/new_model.h5")
net.save("./triplet_model/new_model")
net.save_weights("./triplet_model/new_weights.hdf5", save_format="h5")
x = range(epochs)
plt.title("Model accuracy")
plt.plot(x, hist.history["accuracy"], label="accuracy")
plt.plot(x, hist.history["loss"], label="loss")
plt.xlabel("Epoch")
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5), borderaxespad=0, ncol=2)
name="resnet_dogs_trian_result {0} {1}h-{2}m-{3}s.jpg".format(now.strftime("%Y-%m-%d"), now.hour, now.minute, now.second)
plt.savefig(name, bbox_inches="tight")
plt.close()
def main(test_img, folder="Base_dogs_2", base_folder="Base_dogs"):
num_files = len(os.listdir(base_folder))
# model = create_resnet(size, channel, 256)
if not os.path.isfile("./triplet_model/model_dbt_2.hdf5") and not os.path.isfile("./triplet_model/new_weights.hdf5"):
train(folder=folder)
# train(folder="Base_dogs_2")
if os.path.isfile("./triplet_model/model_dbt_2.hdf5"):
print("Loading weights: 'model_dbt_2.hdf5'")
model = tf.keras.models.load_model("./triplet_model/new_model.h5", custom_objects={'triplet_loss' : triplet_loss})
model.load_weights("./triplet_model/model_dbt_2.hdf5")
# model.summary()
elif os.path.isfile("./triplet_model/new_weights.hdf5"):
print("Loading weights: 'new_weights.hdf5'")
model = tf.keras.models.load_model("./triplet_model/new_model.h5", custom_objects={'triplet_loss' : triplet_loss})
model.load_weights("./triplet_model/new_weights.hdf5")
else:
print("Cannot load weights")
X_base=[]
Y=[]
bbb=0
#db = Dogs.objects.all()
db = os.listdir(base_folder)
print("db : ", db)
for b_img in db:
bbb += 1
file_name = b_img
b_img = base_folder + "/" + b_img
print("img_path : ", b_img)
bmg = Image.open(b_img)
bmg.show()
bmg.save("{0}{1}.jpg".format(bbb, file_name))
# bmg = bmg.convert("L")
bmg = bmg.resize((size, size))
b_data = np.asarray(bmg)
X_base.append(b_data)
Y.append(b_img)
X_base = np.array(X_base)
Y = np.array(Y)
print("X_base.shape : ", X_base.shape)
print("Y.shape : ", Y.shape)
label_ec = LabelEncoder()
label = label_ec.fit_transform(Y)
X_base = X_base.astype(np.float32)
# X_base = tf.expand_dims(X_base, axis=-1)
print("X_base.shape after expand_dims : ", X_base.shape)
(x_base, y_base) = (X_base, label)
file = test_img
print("test_img : ", file)
X=[]
img = Image.open(file)
img.show()
# img = img.convert("L")
img = img.resize((size, size))
data = np.asarray(img)
X.append(data)
X = np.array(X)
X = X.astype(np.float32)
X = np.reshape(X, (X.shape[0], size, size, channel))/255.0
print("X.shape : ", X.shape)
#X = np.expand_dims(X, axis=0)
anchor_embedding = model.predict(x_base, verbose=1)
test_embedding = model.predict(X, verbose=1)
dist_matrix = np.zeros((test_embedding.shape[0], anchor_embedding.shape[0]), np.float32)
print("dist_matrix.shape : ", dist_matrix.shape)
for i in range(dist_matrix.shape[0]):
dist_matrix[i, :] = euclidean_distances(test_embedding[i, :].reshape(1, -1), anchor_embedding)[0]
print("dist_matrix : ", dist_matrix)
#distance against query image
min_dist = np.min(dist_matrix, axis=-1)
min_idx = np.argmin(dist_matrix)
print("min_dist : ", min_dist)
print("min_idx : ", min_idx)
print("base_file_name : ", db)
time = "{0} {1}h-{2}m-{3}s_{4}".format(now.strftime('%Y-%m-%d'), now.hour, now.minute, now.second, base_folder)
print("file_name : time__", time)
results_path = []
path = 'static/results_'
# print("x_base[min_idx] : ", x_base[min_idx])
pil_img_1 = np.reshape(x_base[min_idx], (size, size, channel))
print("pil_img_1.shape : ", pil_img_1.shape)
pil_img_1 = Image.fromarray(pil_img_1.astype(np.uint8))
results_path_1 = '{0}{1}_1.jpg'.format(path, time)
pil_img_1.save(results_path_1)
results_path.append(str(results_path_1))
min_dist_2 = np.min(dist_matrix[dist_matrix > min_dist], axis=-1)
print("min_dist_2 : ", min_dist_2)
print("np.squeeze(np.where(dist_matrix==min_dist_2)) : ",np.squeeze(np.where(dist_matrix==min_dist_2)))
min_idx_2 = np.squeeze(np.where(dist_matrix==min_dist_2))[1]
print("min_idx_2 : ", min_idx_2)
img_2 = np.reshape(x_base[min_idx_2], (size, size, channel))
pil_img_2 = Image.fromarray(img_2.astype(np.uint8))
results_path_2 = '{0}{1}_2.jpg'.format(path, time)
pil_img_2.save(results_path_2)
results_path.append(str(results_path_2))
min_dist_3 = np.min(dist_matrix[dist_matrix > min_dist_2], axis=-1)
print("min_dist_3___", min_dist_3)
min_idx_3 = np.squeeze(np.where(dist_matrix==min_dist_3))[1]
print("np.squeeze(np.where(dist_matrix==min_dist_3)) : ",np.squeeze(np.where(dist_matrix==min_dist_3)))
print("min_idx_3___", min_idx_3)
img_3 = np.reshape(x_base[min_idx_3], (size, size, channel))
pil_img_3 = Image.fromarray(img_3.astype(np.uint8))
results_path_3 = '{0}{1}_3.jpg'.format(path, time)
pil_img_3.save(results_path_3)
results_path.append(str(results_path_3))
print(results_path)
dist_matrix = np.empty(0)
print("dist_matrix after clear : ", dist_matrix)
return results_path # return top 3 similar images
if __name__ == "__main__":
t = "A.jpg"
b_fn = "test"
result = main(test_img=t)
print(result)
I am working on clinical EHR.
I am currently referring to this blog and github link here.
https://sparklerussell.com/post/using-electronic-health-records-to-predict-future-diagnosis-codes-with-gated-recurrent-units/
https://github.com/sparalic/Electronic-Health-Records-GRUs
I have generated the dataset and processed it as per the instructions in the notebooks present in the repository. I am facing an issue trying to train the model.
using : jupytor notebook (with google colab)
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools
import pickle
import sys, random
Load Data :
def load_data(sequences, labels):
dataSize = len(labels)
idx = np.random.permutation(dataSize)
nTest = int(np.ceil(0.15 * dataSize))
nValid = int(np.ceil(0.10 * dataSize))
test_idx = idx[:nTest]
valid_idx = idx[nTest:nTest+nValid]
train_idx = idx[nTest+nValid:]
train_x = sequences[train_idx]
train_y = labels[train_idx]
test_x = sequences[test_idx]
test_y = labels[test_idx]
valid_x = sequences[valid_idx]
valid_y = labels[valid_idx]
train_x = [sorted(seq) for seq in train_x]
train_y = [sorted(seq) for seq in train_y]
valid_x = [sorted(seq) for seq in valid_x]
valid_y = [sorted(seq) for seq in valid_y]
test_x = [sorted(seq) for seq in test_x]
test_y = [sorted(seq) for seq in test_y]
train = (train_x, train_y)
test = (test_x, test_y)
valid = (valid_x, valid_y)
return (train, test, valid)
Padding the input :
def padding(seqs, labels, vocab, n_classes):
lengths = np.array([len(seq) for seq in seqs]) - 1 # remove the last list in each patient's sequences for labels
n_samples = len(lengths)
maxlen = np.max(lengths)
x = torch.zeros(maxlen, n_samples, vocab) # maxlen = number of visits, n_samples = samples
y = torch.zeros(maxlen, n_samples, n_classes)
mask = torch.zeros(maxlen, n_samples)
for idx, (seq,label) in enumerate(zip(seqs,labels)):
for xvec, subseq in zip(x[:,idx,:], seq[:-1]):
xvec[subseq] = 1.
for yvec, subseq in zip(y[:,idx,:], label[1:]):
yvec[subseq] = 1.
mask[:lengths[idx], idx] = 1.
return x, y, lengths, mask
GRU Class :
torch.manual_seed(1)
class EHRNN(nn.Module):
def __init__(self, inputDimSize, hiddenDimSize,embSize, batchSize, numClass):
super(EHRNN, self).__init__()
self.hiddenDimSize = hiddenDimSize
self.inputDimSize = inputDimSize
self.embSize = embSize
self.numClass = numClass
self.batchSize = batchSize
#Initialize random weights
self.W_z = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_r = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_h = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.U_z = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_r = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_h = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.b_z = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_r = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_h = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.params = [self.W_z, self.W_r, self.W_h,
self.U_z, self.U_r, self.U_h,
self.b_z, self.b_r, self.b_h]
def forward(self,emb,h):
z = torch.sigmoid(torch.matmul(emb, self.W_z) + torch.matmul(h, self.U_z) + self.b_z)
r = torch.sigmoid(torch.matmul(emb, self.W_r) + torch.matmul(h, self.U_r) + self.b_r)
h_tilde = torch.tanh(torch.matmul(emb, self.W_h) + torch.matmul(r * h, self.U_h) + self.b_h)
h = z * h + ((1. - z) * h_tilde)
return h
def init_hidden(self):
return Variable(torch.zeros(self.batchSize,self.hiddenDimSize))
Custom Layer for handling two layer GRU :
torch.manual_seed(1)
class build_EHRNN(nn.Module):
def __init__(self, inputDimSize=4894, hiddenDimSize=[200,200], batchSize=100, embSize=200,numClass=4894, dropout=0.5,logEps=1e-8):
super(build_EHRNN, self).__init__()
self.inputDimSize = inputDimSize
self.hiddenDimSize = hiddenDimSize
self.numClass = numClass
self.embSize = embSize
self.batchSize = batchSize
self.dropout = nn.Dropout(p=0.5)
self.logEps = logEps
# Embedding inputs
self.W_emb = nn.Parameter(torch.randn(self.inputDimSize, self.embSize).cuda())
self.b_emb = nn.Parameter(torch.zeros(self.embSize).cuda())
self.W_out = nn.Parameter(torch.randn(self.hiddenDimSize, self.numClass).cuda())
self.b_out = nn.Parameter(torch.zeros(self.numClass).cuda())
self.params = [self.W_emb, self.W_out,
self.b_emb, self.b_out]
def forward(self,x, y, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
# Loss calculation
cross_entropy = -(y * torch.log(yhat + self.logEps) + (1. - y) * torch.log(1. - yhat + self.logEps))
last_step = -torch.mean(y[-1] * torch.log(yhat[-1] + self.logEps) + (1. - y[-1]) * torch.log(1. - yhat[-1] + self.logEps))
prediction_loss = torch.sum(torch.sum(cross_entropy, dim=0),dim=1)/ torch.cuda.FloatTensor(lengths)
cost = torch.mean(prediction_loss) + 0.000001 * (self.W_out ** 2).sum() # regularize
return (yhat, hidden_state, cost)
def init_hidden(self):
return torch.zeros(self.batchSize, self.hiddenDimSize) # initial state
Load data :
train, test, valid = load_data(sequences, labels)
Batch Size:
batchSize = 100
n_batches = int(np.ceil(float(len(train[0])) / float(batchSize)))-1
n_batches_valid = int(np.ceil(float(len(valid[0])) / float(batchSize)))-1
Model:
model = build_EHRNN(inputDimSize=4894, hiddenDimSize=200, batchSize=20, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8)
model = model.to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr = 0.01, rho=0.90)
max_epochs = 5
loss_all = []
iteration = 0
for e in range(max_epochs):
for index in random.sample(range(n_batches), n_batches):
batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
optimizer.zero_grad()
x, y, lengths, mask = padding(batchX, batchY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
outputs, hidden, cost = model(x,y, h, lengths, mask)
if torch.cuda.is_available():
cost.cuda()
cost.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
loss_all.append(cost.item())
iteration +=1
if iteration % 10 == 0:
# Calculate Accuracy
losses = []
model.eval()
val_loss = []
for index in random.sample(range(n_batches_valid), n_batches_valid):
validX = valid[0][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
validY = valid[1][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
x, y, lengths, mask = padding(validX, validY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lenghts, mask.cuda()
outputs, hidden_val, cost_val = model(x,y, h, lengths, mask)
losses.append(cost_val)
model.train()
print("Epoch: {}/{}...".format(e+1, max_epochs),
"Step: {}...".format(iteration),
"Training Loss: {:.4f}...".format(np.mean(loss_all)),
"Val Loss: {:.4f}".format(torch.mean(torch.tensor(losses))))
ERROR :
ValueError Traceback (most recent call last)
<ipython-input-76-9ca4916456a9> in <module>()
8
9 for e in range(max_epochs):
---> 10 for index in random.sample(range(n_batches), n_batches):
11 batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
12 batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
/usr/lib/python3.7/random.py in sample(self, population, k)
319 n = len(population)
320 if not 0 <= k <= n:
--> 321 raise ValueError("Sample larger than population or is negative")
322 result = [None] * k
323 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
I tried many things but I couldn't solve the problem.
I have the following dataframe:
#create some data with Names column
data = pd.DataFrame({'Names': ['Joe', 'John', 'Jasper', 'Jez'] *4, 'Add': ['Lo', 'Po', 'Fa',
'It']*4,'Ob1' : np.random.rand(16), 'Ob2' : np.random.rand(16)})
I want to run hyperopt based on only a few combination of 'Names' & 'Add'. Having said that I have split this dataframe into a subset using the folloiwng code:
# create a dataframe of all combinations
combinations=data[['Names','Add']].drop_duplicates()
comboList=list(zip(combinations['Names'],combinations['Add']))
comboList
[('Joe', 'Lo'), ('John', 'Po'), ('Jasper', 'Fa'), ('Jez', 'It')]
In [14]:
for i,combo in enumerate(comboList):
print(combo)
subset=data[(data['Names']==combo[0]) & (data['Add']==combo[1])]
# run multiple functions on subset here
print(subset.head())
Using the above logic I have built the functions:
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Activation, Flatten
from keras import optimizers
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys
def trim_dataset(mat, batch_size):
"""
trims dataset to a size that's divisible by BATCH_SIZE
"""
no_of_rows_drop = mat.shape[0] % batch_size
if no_of_rows_drop > 0:
return mat[:-no_of_rows_drop]
else:
return mat
def build_timeseries(mat, y_col_index, time_steps):
# total number of time-series samples would be len(mat) - TIME_STEPS
dim_0 = mat.shape[0] - time_steps
dim_1 = mat.shape[1]
x = np.zeros((dim_0, time_steps, dim_1))
y = np.zeros((x.shape[0],))
for i in tqdm(range(dim_0)):
x[i] = mat[i:time_steps + i]
y[i] = mat[time_steps + i, y_col_index]
# print("length of time-series i/o {} {}".format(x.shape, y.shape))
return x, y
def data(df,batch_size, time_steps):
global mat
BATCH_SIZE = 10
TIME_STEPS = 1
x_train, x_test = train_test_split(df, train_size=883, shuffle=False)
# scale the train and test dataset
min_max_scaler = MinMaxScaler()
x_train = min_max_scaler.fit_transform(x_train)
x_test = min_max_scaler.transform(x_test)
x_train_ts, y_train_ts = build_timeseries(x_train, 0, TIME_STEPS)
x_test_ts, y_test_ts = build_timeseries(x_test, 0, TIME_STEPS)
x_train_ts = trim_dataset(x_train_ts, BATCH_SIZE)
y_train_ts = trim_dataset(y_train_ts, BATCH_SIZE)
x_test_ts = trim_dataset(x_test_ts, BATCH_SIZE)
y_test_ts = trim_dataset(y_test_ts, BATCH_SIZE)
str(np.isnan(x_test).any())))
return x_train_ts, y_train_ts, x_test_ts, y_test_ts
stime = time.time()
def print_time(text, stime):
seconds = (time.time() - stime)
print(text + " " + str(seconds / 60) + " minutes : " + str(np.round(seconds % 60)) + " seconds")
def get_readable_ctime():
return time.strftime("%d-%m-%Y %H_%M_%S")
from keras.callbacks import Callback
class LogMetrics(Callback):
def __init__(self, search_params, param, comb_no):
self.param = param
self.self_params = search_params
self.comb_no = comb_no
def on_epoch_end(self, epoch, logs):
for i, key in enumerate(self.self_params.keys()):
logs[key] = self.param[key]
logs["combination_number"] = self.comb_no
search_space = {
'batch_size': hp.choice('bs', [10]),
'time_steps': hp.choice('ts', [1]),
'lstm1_nodes':hp.choice('units_lsmt1', [100,150,200,250,300]),
'lstm1_dropouts':hp.uniform('dos_lstm1',0,1),
'lstm_layers': hp.choice('num_layers_lstm',[
{
'layers':'one',
},
{
'layers':'two',
'lstm2_nodes':hp.choice('units_lstm2', [50,100,150]),
'lstm2_dropouts':hp.uniform('dos_lstm2',0,1)
}
]),
'dense_layers': hp.choice('num_layers_dense',[
{
'layers':'one'
},
{
'layers':'two',
'dense2_nodes':hp.choice('units_dense', [10,20,30,40])
}
]),
"lr": hp.uniform('lr',0,1),
"epochs": hp.choice('epochs', [50,100,150,200,300,500,1000,2000]),
"optimizer": hp.choice('optmz',["rms"])
}
def create_model_hypopt(params):
print("Trying params:",params)
batch_size = params["batch_size"]
time_steps = params["time_steps"]
start_time = time.clock()
comboList=list(zip(Roaming_Map['roaming_partner_country'],Roaming_Map['opco']))
for i,combo in enumerate(comboList):
print("As above for the country & opco - ",i,combo)
subset=data[(data['Names']==combo[0]) & (data['add']==combo[1])]
subset=subset[["Ob1","Ob2"]]
x_train_ts, y_train_ts, x_test_ts, y_test_ts = data(subset,10, 1)
lstm_model = Sequential()
lstm_model.add(LSTM(params["lstm1_nodes"], batch_input_shape=(batch_size, time_steps, x_train_ts.shape[2]), dropout=params["lstm1_dropouts"],
recurrent_dropout=params["lstm1_dropouts"], stateful=True, return_sequences=True,
kernel_initializer='random_uniform'))
# ,return_sequences=True #LSTM params => dropout=0.2, recurrent_dropout=0.2
if params["lstm_layers"]["layers"] == "two":
lstm_model.add(LSTM(params["lstm_layers"]["lstm2_nodes"], dropout=params["lstm_layers"]["lstm2_dropouts"]))
else:
lstm_model.add(Flatten())
if params["dense_layers"]["layers"] == 'two':
lstm_model.add(Dense(params["dense_layers"]["dense2_nodes"], activation='relu'))
lstm_model.add(Dense(1, activation='sigmoid'))
lr = params["lr"]
epochs = params["epochs"]
if params["optimizer"] == 'rms':
optimizer = optimizers.RMSprop(lr=lr)
else:
optimizer = optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
lstm_model.compile(loss='mean_squared_error', optimizer=optimizer) # binary_crossentropy
history = lstm_model.fit(x_train_ts, y_train_ts, epochs=epochs, verbose=2, batch_size=batch_size,
validation_data=[x_test_ts, y_test_ts],
callbacks=[LogMetrics(search_space, params, -1)])
# for key in history.history.keys():
# print(key, "--",history.history[key])
# get the highest validation accuracy of the training epochs
val_error = np.amin(history.history['val_loss'])
print('Best validation error of epoch number',epochs,'for the combination:',combo, val_error)
return {'loss': val_error, 'status': STATUS_OK, 'model': lstm_model} # if accuracy use '-' sign
trials = Trials()
best = fmin(create_model_hypopt,
space=search_space,
algo=tpe.suggest,
max_evals=10,
trials=trials)
print_time("program completed in", stime)
best_trials = sorted(trials.results, key=lambda x: x['loss'], reverse=False)
best_trials.append(best_trials)
But this runs for all the combination of Name & Add at once with the same hyperparameters & then goes to the next set of hyperparameters. Instead I want it to run all possible hyperparameters for one combination of name & add & then go for the next combination.
I solved this problem by running the loop outside the create_model function:
for i,combo in enumerate(comboList): print(combo)
subset=data[(data['country']==combo[0]) & (data['type']==combo[1])]
subset=subset[["Data"]]
x_train_ts, y_train_ts, x_test_ts, y_test_ts = data(subset,10, 1)
trials = Trials()
best = fmin(create_model_hypopt,
space=search_space,
algo=tpe.suggest,
max_evals=1,
trials=trials)
loss=trials.losses()
loss.append(loss)
I am trying to run some code on Deep embedding clustering on mnist with the help of Keras , however, I get the following error
from keras.datasets import mnist
import numpy as np
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
# input
input_img = Input(shape=(dims[0],), name='input')
x = input_img
# internal layers in encoder
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
# hidden layer
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
# internal layers in decoder
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
# output
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',
distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
pretrain_epochs = 300
batch_size = 256
save_dir = './results'
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb)
autoencoder.save_weights(save_dir + '/ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
y_pred_last = np.copy(y_pred_kmeans)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# computing an auxiliary target distribution
def target_distribution(q):
weight = q ** 2 / q.sum(0)
return (weight.T / weight.sum(1)).T
loss = 0
index = 0
maxiter = 8000
update_interval = 140
index_array = np.arange(x.shape[0])
tol = 0.001 # tolerance threshold to stop training
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q = model.predict(x, verbose=2 )
p = target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, y_pred), 5)
nmi = np.round(metrics.nmi(y, y_pred), 5)
ari = np.round(metrics.ari(y, y_pred), 5)
loss = np.round(loss, 5)
print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)
# check stop criterion - model convergence
delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
y_pred_last = np.copy(y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
break
idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
loss = model.train_on_batch(x=x[idx], y=p[idx])
index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0
model.save_weights(save_dir + '/DEC_model_final.h5')
model.load_weights(save_dir + '/DEC_model_final.h5')
the error:
with session.graph.as_default():
AttributeError: 'NoneType' object has no attribute 'graph'
(the problem might be in saving the model but I can't figure out why I am wrong.) my code runs perfectly in jupyter notebook but I can't run it in an editor like pycharm .please help.
I just got a new GTX 1070 Founders Addition for my desktop, and I am trying to run tensorflow on this new GPU. I am using tensorflow.device() to run tensorflow on my GPU, but it seems like this is not happening. Instead it is using cpu, and almost all of my systems 8GB of ram. Here is my code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.image as mpimg
import math
print("\n\n")
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#
with tf.device("/gpu:0"):
# Helper Function To Print Percentage
def showPercent(num, den, roundAmount):
print( str( round((num / den) * roundAmount )/roundAmount ) + " % ", end="\r")
# Defince The Number Of Images To Get
def getFile(dir, getEveryNthLine):
allFiles = list(os.listdir(dir))
fileNameList = []
numOfFiles = len(allFiles)
i = 0
for fichier in allFiles:
if(i % 100 == 0):
showPercent(i, numOfFiles, 100)
if(i % getEveryNthLine == 0):
if(fichier.endswith(".png")):
fileNameList.append(dir + "/" + fichier[0:-4])
i += 1
return fileNameList
# Other Helper Functions
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float16)
return tf.Variable(init_random_dist)
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape, dtype=tf.float16)
return tf.Variable(init_bias_vals)
def conv2d(x, W):
# x --> [batch, H, W, Channels]
# W --> [filter H, filter W, Channels IN, Channels Out]
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2by2(x):
# x --> [batch, H, W, Channels]
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def convolutional_layer(input_x, shape):
W = init_weights(shape)
b = init_bias([ shape[3] ])
return tf.nn.relu(conv2d(input_x, W) + b)
def normal_full_layer(input_layer, size):
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias([size])
return tf.matmul(input_layer, W) + b
print("Getting Images")
fileNameList = getFile("F:\cartoonset10k-small", 1000)
print("\nloaded " + str(len(fileNameList)) + " files")
print("Defining Placeholders")
x_ph = tf.placeholder(tf.float16, shape=[None, 400, 400, 4])
y_ph = tf.placeholder(tf.float16, shape=[None])
print("Defining Conv and Pool layer 1")
convo_1 = convolutional_layer(x_ph, shape=[5, 5, 4, 32])
convo_1_pooling = max_pool_2by2(convo_1)
print("Defining Conv and Pool layer 2")
convo_2 = convolutional_layer(convo_1_pooling, shape=[5, 5, 32, 64])
convo_2_pooling = max_pool_2by2(convo_2)
print("Define Flat later and a Full layer")
convo_2_flat = tf.reshape(convo_2_pooling, [-1, 400 * 400 * 64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat, 1024))
y_pred = full_layer_one # Add Dropout Later
def getLabels(filePath):
df = []
with open(filePath, "r") as file:
for line in list(file):
tempList = line.replace("\n", "").replace('"', "").replace(" ", "").split(",")
df.append({
"attr": tempList[0],
"value":int(tempList[1]),
"maxValue":int(tempList[2])
})
return df
print("\nSplitting And Formating X, and Y Data")
x_data = []
y_data = []
numOfFiles = len(fileNameList)
i = 0
for file in fileNameList:
if i % 10 == 0:
showPercent(i, numOfFiles, 100)
x_data.append(mpimg.imread(file + ".png"))
y_data.append(pd.DataFrame(getLabels(file + ".csv"))["value"][0])
i += 1
print("\nConveting x_data to list")
i = 0
for indx in range(len(x_data)):
if i % 10 == 0:
showPercent(i, numOfFiles, 100)
x_data[indx] = x_data[indx].tolist()
i += 1
print("\n\nPerforming Train Test Split")
train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.2)
print("Defining Loss And Optimizer")
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y_ph,
logits=y_pred
)
)
optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
print("Define Var Init")
init = tf.global_variables_initializer()
with tf.Session() as sess:
print("Checkpoint Before Initializer")
sess.run(init)
print("Checkpoint After Initializer")
batch_size = 8
steps = 1
i = 0
for i in range(steps):
if i % 10:
print(i / 100, end="\r")
batch_x = []
i = 0
for i in np.random.randint(len(train_x), size=batch_size):
showPercent(i, len(train_x), 100)
train_x[i]
batch_x = [train_x[i] for i in np.random.randint(len(train_x), size=batch_size) ]
batch_y = [train_y[i] for i in np.random.randint(len(train_y), size=batch_size) ]
print(sess.run(train, {
x_ph:train_x,
y_ph:train_y,
}))
If you run this, this program seems to quit when I run global_variable_initializer(). It also prints in the terminal:
Allocation of 20971520000 exceeds 10% of system memory. When looking at my task manager, I see this:
The program is using a lot of my CPU.
The program is using a lot of my Memory.
The program is using none of my GPU.
I am not shore why this is happening. I am using an anaconda environment, and have installed tensorflow-gpu. I would really appreciate anyones suggestions and help.
In addition, when I run this, the program stops after global_variable_initializer(). I am not sure if this is related to the problem above.
Tensorflow is version 1.12. CUDA is version 10.0.130.
Help would be greatly appreciated.
Try compare time (GPU vs CPU) with this simple example:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
def create_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
epoch = 3
print('GPU:')
with tf.device('/gpu:0'):
model = create_model()
model.fit(x_train, y_train, epochs=epoch)
print('\nCPU:')
with tf.device('/cpu:0'):
model = create_model()
model.fit(x_train, y_train, epochs=epoch)