how can i train kears model for different volumes - python

****i working in a project to segmenting liver from CT image volumes , the CT volumes have different number of slice so the shape of each volumes is different ex:(512,512,183) and (512,512,64) and (512,512,335) and so on**
i tried to use None and Global MaxPooling3D() as i seen in another post
but have same error which is :-
Traceback (most recent call last):
File "E:\Liver_Seg_Project\", line 123, in train
H = model.fit_generator(aug.flow(data, label, batch_size=100),
File "C:\python3.6.1\Python\lib\site-packages\keras_preprocessing\image\", line 430, in flow subset=subset
File "C:\python3.6.1\Python\lib\site-packages\keras_preprocessing\image\", line 72, in init
(len(x), len(xx)))
ValueError: All of the arrays in x should have the same length. Found a pair with: len(x[0]) = 183, len(x[?]) = 64
Here is my model:
class ModelNw2:
def build(depth,height, width):
input_size = (None,None,None,1)
x = Input(input_size)
# layer 1
x1=Conv3D(32, 7, padding="same",data_format="channels_last")(x)
x1=MaxPooling3D(pool_size=(2, 2,2), strides=(2, 2,2))(x1)
# layer 2
x2=Conv3D(64, 5, padding="same")(x1)
x2=MaxPooling3D(pool_size=(2, 2,2), strides=(2, 2,2))(x2)
# layer 3
x3=Conv3D(128, 5, padding="same")(x2)
# layer 4
x4=Conv3D(128, 3, padding="same")(x3)
# concat layer 3 and 4
concat34 = concatenate([x3,x4], axis = -1)
Here is part of training code
# initialize the data and labels
print("[INFO] loading images...")
for i in range(10):
if i>5:
filename ='TrainingF/image/liver-orig0' + str(j+1) + '.mhd'
filename ='TrainingF/image/liver-orig00' + str(j+1) + '.mhd'
image = sitk.ReadImage(filename)
image = sitk.GetArrayFromImage(image)
image = img_to_array(image)
print("inssss=" + str(len(data)))
print("[INFO] loading maskss...")
for i in range(10):
if i>5:
# print("label "+ str(j+1))
filename ='TrainingF/label/liver-seg0' + str(j+1) + '.mhd'
filename ='TrainingF/label/liver-seg00' + str(j+1) + '.mhd'
image = sitk.ReadImage(filename)
mask = sitk.GetArrayFromImage(image)
mask = img_to_array(mask)
Finally i Fit the data and mask numpy array to the model :
aug = ImageDataGenerator(rotation_range=45, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# initialize the model
print("[INFO] compiling model...")
model =,width=512, height=512)
# train the network
print("[INFO] training network...")
weight_saver = ModelCheckpoint('weights1.h1', monitor='val_dice_coef', save_best_only=True, save_weights_only=True)
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.8 ** x)
stop_here = EarlyStopping(patience=5)
start = timeit.default_timer()
H = model.fit_generator(aug.flow(data, label, batch_size=100),
validation_data=(testX, testY), steps_per_epoch=50,
epochs=EPOCHS, verbose=2, callbacks = [weight_saver, annealer])
end = timeit.default_timer()


ModuleNotFoundError: No module named 'torch.hub'

I am using the following packages in my project:
When I try to execute the following dependencies:
import seaborn as sns
import copy
import boto3
from scipy.stats import spearmanr
import random
import csv
from sklearn.model_selection import train_test_split
import copy
import time
import numpy as np
import os
import pickle
import torch
from torchvision import transforms
import torchvision.models as models
from torch.utils import data
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.metrics import f1_score
from scipy.stats import pearsonr
from flashtorch.saliency import Backprop
from scipy.ndimage.filters import gaussian_filter
def split_train_test(test_size=0.1):
# Make partition directory.
print("make partition directory if not exists")
partition_p = os.path.join("..", "data", "partition")
if not os.path.exists(partition_p):
# List all input filenames and shuffle.
base_p = os.path.join("..", "data", "imgs")
img_ps = [] # e.g. ../data/imgs/MINT/foo.jpg.
for dir_name in os.listdir(base_p):
dir_p = os.path.join(base_p, dir_name)
for img_fname in os.listdir(dir_p):
img_p = os.path.join(dir_name, img_fname)
# Keep only baseball cards in MINT or POOR condition, for now.
print("Keep only baseball cards, for now")
baseball_sport = "185223"
img_ps = [p for p in img_ps if baseball_sport in p]
# Keep only desired conditions.
print("Exclude good and fair cards to create a 9, 7, 5, 3, 1 scale")
img_ps = [p for p in img_ps if "GOOD" not in p and "FAIR" not in p]
# Remove "bad" images.
# Try to load each image, and only retain it if we can load it
# successfully as a (255, 255, 3) image.
print("remove 'bad' images")
old_img_ps = copy.copy(img_ps)
img_ps = []
counter = 0
for p in old_img_ps:
# counter.
counter += 1
if counter % 100 == 0:
# validate image.
full_p = os.path.join("..", "data", "imgs", p)
img =
img = np.array(img)
assert img.shape[2] == 3
# add image to paths.
except Exception as e:
print("Skip bad image {}".format(p))
# Train/test split.
print("split train, test")
train_ps, test_ps = train_test_split(img_ps, test_size=test_size)
# Create partition dictionary.
print("create partition object")
partition = {
"train": train_ps,
"test": test_ps
# Pickle partition object.
of_p = os.path.join(partition_p, "partition.p")
with open(of_p, "wb") as of:
pickle.dump(partition, of)
print("wrote {}".format(of_p))
# Map all unique labels to integer IDs.
# lbl_to_idx is a dictinary mapping e.g. "EX" -> 0, etc.
# str_labels = ["MINT", "NM", "EX", "VG", "GOOD", "FAIR", "POOR"]
str_labels = ["MINT", "NM", "EX", "VG", "POOR"]
lbl_to_idx = {lbl: i for i, lbl in enumerate(str_labels)}
# Create labels dictionary.
# fname_to_lbl is a dictionary mapping e.g. "some_fname.npy" -> 2
# Example filename.100097_sport185226_conditionVG_preprocessed.npy
fname_to_lbl = {}
for p in img_ps:
# Extract label.
str_lbl = p.split("_")[-1].lstrip("condition").rstrip(".jpg")
int_lbl = lbl_to_idx[str_lbl]
# Add to dictionary.
fname_to_lbl[p] = int_lbl
# Pickle fname_to_lbl mapping.
of_p = os.path.join(partition_p, "labels.p")
with open(of_p, "wb") as of:
pickle.dump(fname_to_lbl, of)
print("wrote {}".format(of_p))
class Dataset(data.Dataset):
def __init__(self, list_IDs, labels, apply_rotations=False):
list_IDs (str []) file paths.
labels (str []) target labels.
apply_rotations (bool) : if True then randomly rotate images.
NOTE! This should be FALSE for testing data.
self.labels = labels # dictionary. key=fname, value=integer lbl.
self.list_IDs = list_IDs # list of filenames.
self.apply_rotations = apply_rotations
self.rotation_fn = transforms.Compose([
transforms.RandomAffine(degrees=90, shear=90)
# self.rotation_fn = transforms.RandomRotation(90) # torch rotation function.
# Initialize AWS storage (s3) resource.
self.s3_resource = boto3.resource("s3")
# Make ML_images directory and subdirectories.
conditions = ["MINT", "NM", "EX", "VG", "FAIR", "GOOD", "POOR"]
base_p = os.path.join("..", "data", "ml_images")
if not os.path.exists(base_p):
for condition in conditions:
p = os.path.join(base_p, condition)
def __len__(self):
return len(self.list_IDs)
def download_from_s3(self, remote_fname):
# download fname from s3 as ../data/ml_images/{REMOTE FNAME}
local_fname = os.path.join("..", "data", "ml_images", remote_fname)
print("download {}".format(remote_fname))
# download image from S3 as "img.jpg"
resp = self.s3_resource.Object(
return None
def load_and_preprocess_img(self, img_p, apply_rotations=False):
""" load and preprocess img, optionally applying rotations
img_p (str) path to image.
apply_rotations (bool) if True apply rotations to the image.
should be false to all testing.
X (torch tensor) Image as torch tensor of shape (3, 255, 255)
# Load image and reshape to (3, 255, 255)
img =
img = img.resize((255, 255), Image.ANTIALIAS)
# optionally rotate.
if apply_rotations:
img = self.rotation_fn(img)
# Cast to torch tensor.
X = np.array(img) # (255, 255, 3) numpy
assert X.shape == (255, 255, 3)
X = X / 255 # "normalize"
X = X.swapaxes(1, 2) # (255, 3, 255)
X = X.swapaxes(0, 1) # (3, 255, 255) numpy
X = torch.from_numpy(X).float() # (3, 255, 255) torch
return X
def __getitem__(self, index):
# Select sample.
remote_p = self.list_IDs[index]
# Get label.
y = self.labels[remote_p]
# If the image does not exist locally, get it from S3.
local_p = os.path.join("..", "data", "ml_images", remote_p)
if not os.path.exists(local_p):
# Load image and reshape to torch tensor of shape (3, 255, 255)
X = self.load_and_preprocess_img(local_p, apply_rotations=self.apply_rotations) # torch tensor.
except Exception as e:
print("exception loading data..using random image, label instead")
X = np.random.random((3, 255, 255))
X = torch.from_numpy(X).float()
y = 0
return X, y
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def reenable_gradients(model):
for param in model.parameters():
param.requires_grad = True
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = torch.nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "alexnet":
""" Alexnet
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = torch.nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "vgg":
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "squeezenet":
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 299
print("Invalid model name, exiting...")
return model_ft, input_size
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, device="cpu"):
since = time.time()
val_acc_history = []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
# Initiailize a file to track accuracy over epochs.
acc_of_p = os.path.join("..", "data", "model_accuracy.csv")
acc_of = open(acc_of_p, "w", newline="")
header = ["epoch", "phase", "accuracy", "F", "r"]
w = csv.writer(acc_of)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'test']:
if phase == 'train':
model.train() # Set model to training mode
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
ypreds = []
ytrues = []
# Iterate over data.
batch_num = 0
for inputs, labels in dataloaders[phase]:
inputs =
labels =
# zero the parameter gradients
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# Special case for inception because in training it has an auxiliary output. In train
# mode we calculate the loss by summing the final output and the auxiliary output
# but in testing we only consider the final output.
if is_inception and phase == 'train':
# From
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4 * loss2
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# backward + optimize only if in training phase
if phase == 'train':
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds ==
F = f1_score(preds.numpy(),, average="micro")
# counter.
batch_num += 1
if batch_num % 1 == 0:
correct = float(torch.sum(preds ==
incorrect = float(torch.sum(preds !=
perc_correct = 100 * correct / (correct + incorrect)
msg = """
epoch {} batch {} : percent correct={:.4f} F={:.4f}
""".format(epoch, batch_num, perc_correct, F)
# rank correlation of predicted, actual.
rho, p = spearmanr(preds.numpy(),
print("correlation of pred, actual: rho = {:.4f}".format(rho))
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
epoch_F = f1_score(ypreds, ytrues, average="micro")
epoch_R = pearsonr(ypreds, ytrues)[0]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
# Write latest train and test accuracies to output file.
out = [epoch, phase, epoch_acc.numpy(), epoch_F, epoch_R]
# Pickle the model after end of epoch.
of_p = os.path.join("..", "models", "latest_model.p")
with open(of_p, "wb") as of:
pickle.dump(model, of)
print("wrote {}".format(of_p))
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# close output file.
print("wrote {}".format(acc_of_p))
# load best model weights
return model, val_acc_history
def train_CNN_model(num_classes=5, load_latest_model=False):
# Use CUDA if available.
use_cuda = torch.cuda.is_available()
print("check if CUDA can be used -> {}".format(use_cuda))
device = torch.device("cuda:0" if use_cuda else "cpu")
# Define transformations of the images.
# Note that the normalization transform with these specific
# Parameters is necessary for working with pretrained AlexNet.
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
img_transforms = transforms.Compose([
# Parameters
params = {
"batch_size": 64,
"shuffle": True,
"num_workers": 4
max_epochs = 10
# Datasets.
print("load train/test partition and labels")
partition_p = os.path.join("..", "data", "partition", "partition.p")
labels_p = os.path.join("..", "data", "partition", "labels.p")
partition = pickle.load(open(partition_p, "rb"))
labels = pickle.load(open(labels_p, "rb"))
print("create generators over train, test data")
train_set = Dataset(partition["train"], labels, apply_rotations=True) # do rotate train data.
train_g = data.DataLoader(train_set, **params)
test_set = Dataset(partition["test"], labels, apply_rotations=False) # don't rotate test data.
test_g = data.DataLoader(test_set, **params)
if load_latest_model:
print("load pickled latest_model.p")
model_p = os.path.join("..", "models", "latest_model.p")
model = pickle.load(open(model_p, "rb"))
model, _ = initialize_model(
model_name="resnet", # resnet 18 with skip connections.
feature_extract=True, # if True only finetune top layer.
# Initialize optimizer, loss criterion.
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.CrossEntropyLoss()
# Train model.
model, history = train_model(
dataloaders={"train": train_g, "test": test_g},
# Pickle best performing model.
of_p = os.path.join("..", "models", "best_model.p")
with open(of_p, "wb") as of:
pickle.dump(model, of)
print("wrote {}".format(of_p))
# Pickle history of best performing model.
of_p = os.path.join("..", "models", "history.p")
with open(of_p, "wb") as of:
pickle.dump(history, of)
print("wrote {}".format(of_p))
def load_and_preprocess_img(img_p):
# Load image and reshape to (3, 255, 255)
img =
img = img.resize((255, 255), Image.ANTIALIAS)
# Cast to torch tensor.
X = np.array(img) # (255, 255, 3) numpy
X = X.reshape((1, 255, 255, 3))
X = X / 255 # "normalize"
X = X.swapaxes(2, 3) # (64, 255, 3, 255)
X = X.swapaxes(1, 2) # (64, 3, 255, 255) numpy
X = torch.from_numpy(X).float() # (64, 3, 255, 255) torch
return X
def saliency_map(model, img_p):
# Load and preprocess image.
X = load_and_preprocess_img(img_p) # (3, 255, 255 torch tensor)
X.requires_grad_() # This is critical to actually get gradients.
with torch.set_grad_enabled(True):
backprop = Backprop(model) # flashtorch.saliency Backprop object.
gradients = backprop.calculate_gradients(X, take_max=True, guided=False) # (1, 255, 255)
# Cast image, saliency maps to numpy arrays.
X = X.detach() # must 'detach' from gradients before slicing.
img_np = X.numpy()[0] # (3, 255, 255)
img_np = img_np.swapaxes(0, 1) # (255, 3, 255)
img_np = img_np.swapaxes(1, 2) # (255, 255, 3)
saliency_map_np = gradients.numpy()[0] # (255, 255)
print(max(np.max(saliency_map_np, axis=0)))
# Smooth heatmap.
saliency_map_np = gaussian_filter(saliency_map_np, sigma=10)
# Plot image and overlay saliency map.
heatmap = sns.heatmap(saliency_map_np, alpha=0.5)
heatmap.imshow(img_np, cmap="YlGnBu")
return saliency_map_np
if __name__ == "__main__":
# split_train_test()
# train_CNN_model(load_latest_model=False, num_classes=5)
model_p = os.path.join("..", "models", "cloud_model.p")
model = pickle.load(open(model_p, "rb"))
reenable_gradients(model) # e.g., disable fine-tuning mode
# img_p = os.path.join("..", "demos", "demo_cards",
# "very_good.jpg")
img_p = os.path.join("..", "data", "imgs", "POOR",
sm = saliency_map(model, img_p)
I get the following error:
Traceback (most recent call last):
File "/home/admin/Desktop/Code/project/venv/lib/python3.6/site-packages/torchvision/", line 14, in <module>
from torch.hub import load_state_dict_from_url
ModuleNotFoundError: No module named 'torch.hub'
Any suggestions why? What is wrong with my installed packages?
I appreciate your replies!

Similar Image Searching by Metric Learning

I'm trying to create a program that show top 3 of similar image to query image, using python.
I thought Siamese Network by Triplet Loss can be good option for what I want to do.
I wrote some codes and created model with small dataset in my pc. And I inputted one of the dataset into the program to evaluate my program. I expected that the program would show same image as what I input, but the program doesn't always do so.
For example, there are five images, A, B, C, D and E. I created a model which learned the five images with Siamese Network by Triplet Loss and saved the model. And I loaded the model and input the image B for prediction, expecting that the program return B as a result, but it returns D.
When comparison of distance between dataset and query follows model training, results are as I expected. (Input Image A and return Image A)
However, after model training completed, when I load trained model and try to predict, it doesn't return correctly.
Did I do something wrong in model structure?
Or Siamese Network is not appropriate for my problem?
If structure of my code is not so bad, I guess it should be an issue of quality of the dataset.
My program is as below.
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.applications import resnet
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, History
import numpy as np
import random
import os
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import datetime
from sklearn.metrics import euclidean_distances, roc_auc_score
import load_dataset2_1 as ld
now =
def create_resnet(size, channel, num_classes):
input_tensor = layers.Input((size, size, channel))
ResNet50 = resnet.ResNet50(weights="imagenet", input_tensor=input_tensor, include_top=False, pooling="avg")
embedding_model = Sequential()
embedding_model.add(layers.Dense(256, activation="relu"))
embedding_model = Model(inputs=ResNet50.input, outputs=embedding_model(ResNet50.output))
trainable = False
for layer in ResNet50.layers:
if == "conv5_block1_out":
trainable = True
layer.trainable = trainable
return embedding_model
def create_concatenate_layer(embedding_model, size, channel):
input_anchor = layers.Input(shape=(size, size, channel))
input_positive = layers.Input(shape=(size, size, channel))
input_negative = layers.Input(shape=(size, size, channel))
embedding_anchor = embedding_model(input_anchor)
embedding_positive = embedding_model(input_positive)
embedding_negative = embedding_model(input_negative)
output = layers.concatenate([embedding_anchor, embedding_positive,embedding_negative])
net = Model([input_anchor, input_positive, input_negative], output)
return net
# Online Triplet
def triplet_loss(label, embeddings):
x1 = tf.expand_dims(embeddings, axis=0)
x2 = tf.expand_dims(embeddings, axis=1)
euclidean = tf.reduce_sum((x1-x2)**2, axis=-1)
lb1 = tf.expand_dims(label[:, 0], axis=0)
lb2 = tf.expand_dims(label[:, 0], axis=1)
equal_mat = tf.equal(lb1, lb2)
# positives
positive_ind = tf.where(equal_mat)
positive_dists = tf.gather_nd(euclidean, positive_ind)
print("positive_ind : ", positive_ind)
print("positive_dists : ", positive_dists)
# negatives
negative_ind = tf.where(tf.logical_not(equal_mat))
negative_dists = tf.gather_nd(euclidean, negative_ind)
print("negative_ind : ", positive_ind)
print("negative_dists : ", positive_dists)
# [P, N]
margin = 0.25
positives = tf.expand_dims(positive_dists, axis=1)
negatives = tf.expand_dims(negative_dists, axis=0)
triplets = tf.maximum(positives - negatives + margin, 0.0)
return tf.reduce_mean(triplets)
def create_batch(x_train, y_train, size, channel, batch_size):
x_anchors = np.zeros((batch_size, size, size, channel))
x_positives = np.zeros((batch_size, size, size, channel))
x_negatives = np.zeros((batch_size, size, size, channel))
for i in range(0, batch_size):
random_index = random.randint(0, x_train.shape[0]-1)
x_anchor = x_train[random_index]
y = y_train[random_index]
dogs_for_pos = np.squeeze(np.where(y_train==y))
dogs_for_neg = np.squeeze(np.where(y_train!=y))
# print("len(dogs_for_pos) : ", len(dogs_for_pos))
# print("len(dogs_for_neg) : ", len(dogs_for_neg))
x_positive = x_train[dogs_for_pos[random.randint(0, len(dogs_for_pos)-1)]]
x_negative = x_train[dogs_for_neg[random.randint(0, len(dogs_for_neg)-1)]]
x_anchors[i] = x_anchor
x_positives[i] = x_positive
x_negatives[i] = x_negative
print("x_anchors.shape___", x_anchors.shape)
print("x_positives.shape___", x_positives.shape)
print("x_negatives.shape___", x_negatives.shape)
return [x_anchors, x_positives, x_negatives]
def data_generator(x_train, y_train, size, channel, batch_size, emb_size):
while True:
x = create_batch(x_train, y_train, size, channel, batch_size)
y = np.zeros((batch_size, 3*emb_size))
yield x, y
def train_generator(X, y_label, batch_size):
while True:
indices = np.random.permutation(X.shape[0])
for i in range(len(indices)//batch_size):
current_indices = indices[i*batch_size:(i+1)*batch_size]
X_batch = X[current_indices] / 255.0
y_batch = np.zeros((batch_size, 128), np.float32)
y_batch[:, 0] = y_label[current_indices]
yield X_batch, y_batch
def step_decay(epoch):
x = 1e-3
if epoch >= 25: x /= 10.0
if epoch >= 45: x /= 10.0
return x
size = 128
channel = 3
batch_size = 64
epochs = 1000
emb = 64
def train(folder, size=size, batch_size=batch_size, channel=channel, epochs=epochs):
print("TensorFlow version: ", tf.__version__)
print("folder : ", folder)
print("size : {0}, batch_size : {1}, channel : {2}, epochs : {3}".format(size, batch_size, channel, epochs))
switch = input("Load data : On or Off: ")
if switch == "On" or switch == "ON" or switch == "on":
switch = "On"
size = ''
(x_train, y_train), (x_test, y_test), size = ld.main(switch, folder)
x_train = np.reshape(x_train, (x_train.shape[0], size, size, channel))/255.0
x_test = np.reshape(x_test, (x_test.shape[0], size, size, channel))/255.0
# print('num_files: ', num_files)
steps_per_epoch = x_train.shape[0]//batch_size
# opt = tf.keras.optimizers.SGD(1e-3, 0.9)
opt = tf.keras.optimizers.Adam(lr=0.0001)
scheduler = LearningRateScheduler(step_decay)
checkpoint = ModelCheckpoint("./triplet_model/model_dbt_2.hdf5", monitor="loss", save_best_only=True, save_weight_only=True)
# es_cb = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=3, mode='auto')
net = create_resnet(size, channel, 256)
net.compile(loss=triplet_loss, metrics=['accuracy'], optimizer=opt)
# hist =, y_train, size, channel, batch_size, emb_size=emb),
# steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1,
# # validation_data = (x_test, y_test),
# callbacks=[checkpoint, scheduler]
# )
hist = net.fit_generator(train_generator(x_train, y_train, batch_size),
steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1,
# validation_data = train_generator(x_test, y_test, batch_size),
callbacks=[checkpoint, scheduler],
max_queue_size = 1
net.save_weights("./triplet_model/new_weights.hdf5", save_format="h5")
x = range(epochs)
plt.title("Model accuracy")
plt.plot(x, hist.history["accuracy"], label="accuracy")
plt.plot(x, hist.history["loss"], label="loss")
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5), borderaxespad=0, ncol=2)
name="resnet_dogs_trian_result {0} {1}h-{2}m-{3}s.jpg".format(now.strftime("%Y-%m-%d"), now.hour, now.minute, now.second)
plt.savefig(name, bbox_inches="tight")
def main(test_img, folder="Base_dogs_2", base_folder="Base_dogs"):
num_files = len(os.listdir(base_folder))
# model = create_resnet(size, channel, 256)
if not os.path.isfile("./triplet_model/model_dbt_2.hdf5") and not os.path.isfile("./triplet_model/new_weights.hdf5"):
# train(folder="Base_dogs_2")
if os.path.isfile("./triplet_model/model_dbt_2.hdf5"):
print("Loading weights: 'model_dbt_2.hdf5'")
model = tf.keras.models.load_model("./triplet_model/new_model.h5", custom_objects={'triplet_loss' : triplet_loss})
# model.summary()
elif os.path.isfile("./triplet_model/new_weights.hdf5"):
print("Loading weights: 'new_weights.hdf5'")
model = tf.keras.models.load_model("./triplet_model/new_model.h5", custom_objects={'triplet_loss' : triplet_loss})
print("Cannot load weights")
#db = Dogs.objects.all()
db = os.listdir(base_folder)
print("db : ", db)
for b_img in db:
bbb += 1
file_name = b_img
b_img = base_folder + "/" + b_img
print("img_path : ", b_img)
bmg ="{0}{1}.jpg".format(bbb, file_name))
# bmg = bmg.convert("L")
bmg = bmg.resize((size, size))
b_data = np.asarray(bmg)
X_base = np.array(X_base)
Y = np.array(Y)
print("X_base.shape : ", X_base.shape)
print("Y.shape : ", Y.shape)
label_ec = LabelEncoder()
label = label_ec.fit_transform(Y)
X_base = X_base.astype(np.float32)
# X_base = tf.expand_dims(X_base, axis=-1)
print("X_base.shape after expand_dims : ", X_base.shape)
(x_base, y_base) = (X_base, label)
file = test_img
print("test_img : ", file)
img =
# img = img.convert("L")
img = img.resize((size, size))
data = np.asarray(img)
X = np.array(X)
X = X.astype(np.float32)
X = np.reshape(X, (X.shape[0], size, size, channel))/255.0
print("X.shape : ", X.shape)
#X = np.expand_dims(X, axis=0)
anchor_embedding = model.predict(x_base, verbose=1)
test_embedding = model.predict(X, verbose=1)
dist_matrix = np.zeros((test_embedding.shape[0], anchor_embedding.shape[0]), np.float32)
print("dist_matrix.shape : ", dist_matrix.shape)
for i in range(dist_matrix.shape[0]):
dist_matrix[i, :] = euclidean_distances(test_embedding[i, :].reshape(1, -1), anchor_embedding)[0]
print("dist_matrix : ", dist_matrix)
#distance against query image
min_dist = np.min(dist_matrix, axis=-1)
min_idx = np.argmin(dist_matrix)
print("min_dist : ", min_dist)
print("min_idx : ", min_idx)
print("base_file_name : ", db)
time = "{0} {1}h-{2}m-{3}s_{4}".format(now.strftime('%Y-%m-%d'), now.hour, now.minute, now.second, base_folder)
print("file_name : time__", time)
results_path = []
path = 'static/results_'
# print("x_base[min_idx] : ", x_base[min_idx])
pil_img_1 = np.reshape(x_base[min_idx], (size, size, channel))
print("pil_img_1.shape : ", pil_img_1.shape)
pil_img_1 = Image.fromarray(pil_img_1.astype(np.uint8))
results_path_1 = '{0}{1}_1.jpg'.format(path, time)
min_dist_2 = np.min(dist_matrix[dist_matrix > min_dist], axis=-1)
print("min_dist_2 : ", min_dist_2)
print("np.squeeze(np.where(dist_matrix==min_dist_2)) : ",np.squeeze(np.where(dist_matrix==min_dist_2)))
min_idx_2 = np.squeeze(np.where(dist_matrix==min_dist_2))[1]
print("min_idx_2 : ", min_idx_2)
img_2 = np.reshape(x_base[min_idx_2], (size, size, channel))
pil_img_2 = Image.fromarray(img_2.astype(np.uint8))
results_path_2 = '{0}{1}_2.jpg'.format(path, time)
min_dist_3 = np.min(dist_matrix[dist_matrix > min_dist_2], axis=-1)
print("min_dist_3___", min_dist_3)
min_idx_3 = np.squeeze(np.where(dist_matrix==min_dist_3))[1]
print("np.squeeze(np.where(dist_matrix==min_dist_3)) : ",np.squeeze(np.where(dist_matrix==min_dist_3)))
print("min_idx_3___", min_idx_3)
img_3 = np.reshape(x_base[min_idx_3], (size, size, channel))
pil_img_3 = Image.fromarray(img_3.astype(np.uint8))
results_path_3 = '{0}{1}_3.jpg'.format(path, time)
dist_matrix = np.empty(0)
print("dist_matrix after clear : ", dist_matrix)
return results_path # return top 3 similar images
if __name__ == "__main__":
t = "A.jpg"
b_fn = "test"
result = main(test_img=t)

Keras - how to pass a array of images to ImageDataGenerator.flow

I'm learning about image classification in keras. I've downloaded sample dataset of donuts and waffles, but they differ in size. To standardise their size I'm loading images from their directories, resize them and store them in numpy arrays:
test_data_dir = 'v_data/train/donuts_and_waffles/'
validation_data_dir = 'v_data/test/donuts_and_waffles/'
loaded_test_donuts = list()
for filename in listdir(test_data_dir + 'donuts/'):
image1 = + 'donuts/' + filename)
img_resized = image1.resize((224,224))
img_data = asarray(img_resized)
loaded_test_waffles = list()
for filename in listdir(test_data_dir + 'waffles/'):
image1 = + 'waffles/' + filename)
img_resized = image1.resize((224,224))
img_data = asarray(img_resized)
loaded_validation_donuts = list()
for filename in listdir(validation_data_dir + 'donuts/'):
image1 = + 'donuts/' + filename)
img_resized = image1.resize((224,224))
img_data = asarray(img_resized)
loaded_validation_waffles = list()
for filename in listdir(validation_data_dir + 'waffles/'):
image1 = + 'waffles/' + filename)
img_resized = image1.resize((224,224))
img_data = asarray(img_resized)
test_data = list()
validation_data = list()
test_data = np.array(test_data)
validation_data = np.array(validation_data)
Then I want to create an ImageDataGenerator for my data:
train_datagen = ImageDataGenerator(
rescale=1. / 255,
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow(
#how can I pass here test_data to make it work (along with which parameters)
validation_generator = test_datagen.flow(
#how can I pass here validation_data to make it work (along with which parameters)
How to achieve that?
I have tried like this:
train_generator = train_datagen.flow(
test_data, #does not work
validation_generator = test_datagen.flow(
validation_data, #does not work
but then I get this error:
Traceback (most recent call last):
ValueError: ('Input data in `NumpyArrayIterator` should have rank 4. You passed an array with shape', (2, 770, 224, 224, 3))
It's hard to say what does not work without error message, but I assume the problem is that you pass lists to your ImageDataGenerators. You can fix this easily by converting your lists to numpy-arrays:
test_data = list()
validation_data = list()
test_data = np.array(test_data)
validation_data = np.array(validation_data)
Edit: A better way, stacking instead of appending to lists and converting
test_data = np.vstack((np.array(loaded_test_donuts),np.array(loaded_test_waffles)))
validation_data = np.vstack((np.array(loaded_validation_donuts),np.array(loaded_validation_waffles)))
What I would recommend is that you create a folder where you have n folders representing your classes such as "dog", "cat" and do the preprocessing step first and then save the produced images like this:
from PIL import Image
import glob
from keras.preprocessing import image
for folder in glob.glob("*"): #goes through every folder
ims = glob.glob(folder+ "\\*.png") #reads image names from folder assuming images are png
for im in ims:
img =
if (img.size != (W, H)):
imgr = process(img, W, H) # where "process" is reszing in your case
then spilt your data into train and validation folders and do:
traingen = image.ImageDataGenerator(rescale=1./255)
validationgen = image.ImageDataGenerator(rescale=1./255)
train = traingen.flow_from_directory("train",target_size=(H,W), batch_size=s,shuffle=True)
val = validationgen.flow_from_directory("validation",target_size=(500, 825), batch_size=32, shuffle=False)
You test_data does not have the correct shape, you have to convert into an array of shape 4 for example (770, 224, 224, 3), 770 refers the number of the images, 224x224 refers to the size of the images (pixels) and the 3 refers the color of the images.

PyTorch why does the forward function run multiple times and can I change the input shape?

import torch
import torch.nn as nn
import torchvision.datasets as dsets
from skimage import transform
import torchvision.transforms as transforms
from torch.autograd import Variable
import pandas as pd;
import numpy as np;
from import Dataset, DataLoader
import statistics
import random
import math
class FashionMNISTDataset(Dataset):
'''Fashion MNIST Dataset'''
def __init__(self, csv_file, transform=None):
csv_file (string): Path to the csv file
transform (callable): Optional transform to apply to sample
data = pd.read_csv(csv_file)
self.X = np.array(data.iloc[:, 1:]).reshape(-1, 1, 28, 28)
self.Y = np.array(data.iloc[:, 0])
del data
self.transform = transform
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
item = self.X[idx]
label = self.Y[idx]
if self.transform:
item = self.transform(item)
return (item, label)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
self.layer2 = nn.Sequential(
self.fc = nn.Linear(100, 10)
def forward(self, x):
print("x shape",x.shape)
out = self.layer1(x)
out = self.layer2(out)
out = self.fc(out)
return out
def run():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 15
batch_size = 100
learning_rate = 0.0001
train_dataset = FashionMNISTDataset(csv_file='fashion-mnist_train.csv')
test_dataset = FashionMNISTDataset(csv_file='fashion-mnist_test.csv')
train_loader =,batch_size=batch_size,shuffle=True)
test_loader =,batch_size=batch_size,shuffle=True)
#instance of the Conv Net
cnn = CNN()
#loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
losses = []
for epoch in range(num_epochs):
l = 0
for i, (images, labels) in enumerate(train_loader):
images = Variable(images.float())
labels = Variable(labels)
images =
labels =
print("img shape=",images.shape, "label shape=",labels.shape)
images = images.resize_((100,616))
print("img shape=",images.shape, "label shape=",labels.shape)
# Forward + Backward + Optimize
outputs = cnn(images)
loss = criterion(outputs, labels)
l = loss.item()
with torch.no_grad():
val_loss = []
for images, labels in test_loader:
images = Variable(images.float()).to(device)
labels =
outputs = cnn.forward(images)
batch_loss = criterion(outputs, labels)
avgloss = statistics.mean(val_loss)
if avgloss < min(losses):, 'model')
if (i+1) % 100 == 0:
print ('Epoch : %d/%d, Iter : %d/%d, Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.item()))
final_model = CNN()
correct = 0
total = 0
for images, labels in test_loader:
images = Variable(images.float()).to(device)
outputs = final_model(images).to(device)
_, predicted = torch.max(, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('Test Accuracy of the model on the 10000 test images: %.4f %%' % (100 * correct / total))
if __name__ == '__main__':
I have enclosed all the code for testing purposes. But Here is the error I get
img shape= torch.Size([100, 1, 28, 28]) label shape= torch.Size([100])
img shape= torch.Size([100, 616]) label shape= torch.Size([100]) x
shape torch.Size([100, 616]) x shape torch.Size([100, 1, 28, 28])
Traceback (most recent call last): File "", line 145, in
run() File "", line 115, in run
outputs = cnn.forward(images) File "", line 56, in forward
out = self.layer1(x) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/",
line 489, in call
result = self.forward(*input, **kwargs) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/",
line 92, in forward
input = module(input) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/",
line 489, in call
result = self.forward(*input, **kwargs) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/",
line 67, in forward
return F.linear(input, self.weight, self.bias) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/",
line 1354, in linear
output = input.matmul(weight.t()) RuntimeError: size mismatch, m1: [2800 x 28], m2: [616 x 300] at
The problem here is that I want all 616 pixels to feed as input into the neural network but I dont know how to do so. I tried to reshape the input to solve the problem but it ran model.forward twice, once with the correct shape and then the wrong shape.
You are calling forward twice in run:
Once for the training data
Once for the validation data
However, you do not appear to have applied the following transformation to your validation data:
images = images.resize_((100,616))
Maybe consider doing the resize in the forward function.

Save Predicted images in CNN network

I wrote a CNN network with Tensorflow which works properly and I want to check classified images during the testing stage.
In my dataset, I have 5 different categories, during the test stage I am looking for a way to save classified images in a new folder for each category to check the results from my net make or not sense.
This is the whole of my code in the test stage:'learning_rate', 0.0001, 'Learning rate for adam optimizer')'num_classes', 3, 'Number of classes')'batch_size', 128, 'Batch size')'keep_prob', 0.8, 'Dropout keep probability')'num_channel',3 , 'Image channel, RGB=3, Grayscale=1')'img_size', 80, 'Size of images')'test_file', 'data/test.txt', 'Test dataset file')
checkpoint_dir = '/home/xyrio/Desktop/classier/training/checkpoints/model_epoch.ckpt89'
def main(_):
x = tf.placeholder(tf.float32, shape=[FLAGS.batch_size, FLAGS.img_size, FLAGS.img_size, FLAGS.num_channel], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 64
filter_size_conv4 = 3
num_filters_conv4 = 128
filter_size_conv5 = 3
num_filters_conv5 = 256
fc_layer_size = 512
fc_layer_size2 = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, mean=0, stddev=0.01))
def create_biases(size):
return tf.Variable(tf.constant(0.01, shape=[size]))
def create_convolutional_layer(input, num_input_channels, conv_filter_size, num_filters, useBatchNorm=False,
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
biases = create_biases(num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
layer = tf.nn.relu(layer)
if useBatchNorm == True:
layer = tf.layers.batch_normalization(layer)
if usePooling:
layer = tf.nn.max_pool(value=layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def create_flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input, num_inputs, num_outputs, useRelu=True, useDropout=False):
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
layer = tf.matmul(input, weights) + biases
if useRelu:
layer = tf.nn.relu(layer)
if useDropout == True:
layer = tf.nn.dropout(layer, keep_prob=FLAGS.keep_prob)
return layer
layer_conv1 = create_convolutional_layer(x, FLAGS.num_channel, filter_size_conv1, num_filters_conv1,
useBatchNorm=True, usePooling=True)
layer_conv2 = create_convolutional_layer(layer_conv1, num_filters_conv1, filter_size_conv2, num_filters_conv2,
useBatchNorm=True, usePooling=True)
layer_conv3 = create_convolutional_layer(layer_conv2, num_filters_conv2, filter_size_conv3, num_filters_conv3,
useBatchNorm=True, usePooling=True)
layer_conv4 = create_convolutional_layer(layer_conv3, num_filters_conv3, filter_size_conv4, num_filters_conv4,
useBatchNorm=True, usePooling=True)
layer_conv5 = create_convolutional_layer(layer_conv4, num_filters_conv4, filter_size_conv5, num_filters_conv5,
useBatchNorm=True, usePooling=True)
layer_flat = create_flatten_layer(layer_conv5)
layer_fc1 = create_fc_layer(layer_flat, layer_flat.get_shape()[1:4].num_elements(), fc_layer_size, useRelu=True,
layer_fc2 = create_fc_layer(layer_fc1, fc_layer_size, fc_layer_size2, useRelu=True, useDropout=True)
layer_fc3 = create_fc_layer(layer_fc2, fc_layer_size2, FLAGS.num_classes, useRelu=False)
y_pred = tf.nn.softmax(layer_fc3, name='y_pred', axis=1)
y_pred_cls = tf.argmax(y_pred, axis=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
test_preprocessor = BatchPreprocessor(dataset_file_path=FLAGS.test_file, num_classes=FLAGS.num_classes,
output_size=[FLAGS.img_size, FLAGS.img_size])
test_batches_per_epoch = np.floor(len(test_preprocessor.labels) / FLAGS.batch_size).astype(np.int16)
conf_mat = tf.confusion_matrix(y_true_cls,y_pred_cls,FLAGS.num_classes)
with tf.Session() as sess:
saver.restore(sess, checkpoint_dir)
# Start Testing
test_acc = 0.
test_count = 0
cm_total = None
for _ in range(test_batches_per_epoch):
batch_tx, batch_ty = test_preprocessor.next_batch(FLAGS.batch_size)
acc, conf_m =[accuracy, conf_mat],
feed_dict={x: batch_tx, y_true: batch_ty})
if cm_total is None:
cm_total = conf_m
cm_total += conf_m
test_acc += acc
test_count += 1
test_acc /= test_count
print("{} Testing Accuracy = {:.2%}".format(, test_acc))
This code is for test data, as you can see I restored checkpoint which I saved during training and validating, and after that, I used the best checkpoint for predict on my test data.
batch_tx is my test data and batch_ty is my test label.
is anyone have an idea how can I do this?
thanks in advance
Ok, after the discussion above, you added the line, {x:batch_tx})
after your confusion matrix summation and now you have your predicted labels. Print them out in a format that you can turn into a np array for the code below. Provided your test code runs in a single thread, and it does not shuffle the test batches, you now have your predicted labels in the same order as the images appear in the input file. Assuming your input file is a .bin file, you should be able to extract images (using PIL) from it like this:
from PIL import Image
# your image dimensions here
width = 80
height = 80
channels = 3
# most labels are 1 byte
labelSize = 1
pixelSize = width * height * channels
recordSize = labelSize + pixelSize
label_names = ['cat', 'horse', 'dog'....]
predictions = [...] # put your predictions here
with open(inputFilename, "rb") as f:
allTheData = np.fromfile(f, 'u1')
numRecords = allTheData.shape[0] / recordSize
allTheData = allTheData.reshape(numRecords, recordSize)
for idx, d in enumerate(allTheData):
label = label_names[d[0]]
rgbData = d[1:] #records are label first, then all pixel data and rgb
predlabel = label_names[data_labels[idx]]
filename = "{}_pred{}_actual{}.png".format(idx, predlabel, label)
pictureA = rgbData.reshape(3, width, height)
pictureA = np.swapaxes(pictureA,0,1)
pictureA = np.swapaxes(pictureA,1,2)
pictureA = np.ndarray.flatten(pictureA)
imageA = Image.frombytes('RGB', (height, width), pictureA)
#display(imageA), "PNG")
Please note that the code above won't run until you add in proper label names and your predictions. Also, if the input file is a .csv, you will have to change the reading of it slightly.
