Tensorflow dataset generator inverted colors - python

I have a problem with TF dataset generator. I do not why, but when I get picture from dataset by running it through session, it returns Tensors where colors are inverted. I tried to changed BGR to RGB, but this is not the problem.
It is partially solved by inverting the image array (img = 1 - img ), but I would like not this problem to occur in first place. Does somebody know what could be the cause?
import os
import glob
import random
import tensorflow as tf
from tensorflow import Tensor
class PairGenerator(object):
person1 = 'img'
person2 = 'person2'
label = 'same_person'
#def __init__(self, lfw_path='./tf_dataset/resources' + os.path.sep + 'lfw'):
def __init__(self, lfw_path='/home/tom/Devel/ai-dev/tensorflow-triplet-loss/data/augmentor'):
self.all_people = self.generate_all_people_dict(lfw_path)
print(self.all_people.keys())
def generate_all_people_dict(self, lfw_path):
# generates a dictionary between a person and all the photos of that person
all_people = {}
for person_folder in os.listdir(lfw_path):
person_photos = glob.glob(lfw_path + os.path.sep + person_folder + os.path.sep + '*.jpg')
all_people[person_folder] = person_photos
return all_people
def get_next_pair(self):
all_people_names = list(self.all_people.keys())
while True:
# draw a person at random
person1 = random.choice(all_people_names)
# flip a coin to decide whether we fetch a photo of the same person vs different person
same_person = random.random() > 0.5
if same_person:
person2 = person1
else:
# repeatedly pick random names until we find a different name
person2 = person1
while person2 == person1:
person2 = random.choice(all_people_names)
person1_photo = random.choice(self.all_people[person1])
yield ({self.person1: person1_photo,
self.label: same_person})
class Inputs(object):
def __init__(self, img: Tensor, label: Tensor):
self.img = img
self.label = label
def feed_input(self, input_img, input_label=None):
# feed the input images that are necessary to make a prediction
feed_dict = {self.img: input_img}
# optionally also include the label:
# if we're just making a prediction without calculating loss, that won't be necessary
if input_label is not None:
feed_dict[self.label] = input_label
return feed_dict
class Dataset(object):
img_resized = 'img_resized'
label = 'same_person'
def __init__(self, generator=PairGenerator()):
self.next_element = self.build_iterator(generator)
def build_iterator(self, pair_gen: PairGenerator):
batch_size = 10
prefetch_batch_buffer = 5
dataset = tf.data.Dataset.from_generator(pair_gen.get_next_pair,
output_types={PairGenerator.person1: tf.string,
PairGenerator.label: tf.bool})
dataset = dataset.map(self._read_image_and_resize)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(prefetch_batch_buffer)
iter = dataset.make_one_shot_iterator()
element = iter.get_next()
return Inputs(element[self.img_resized],
element[PairGenerator.label])
def _read_image_and_resize(self, pair_element):
target_size = [224, 224]
# read images from disk
img_file = tf.read_file(pair_element[PairGenerator.person1])
print("////")
print(PairGenerator.person1)
img = tf.image.decode_image(img_file, channels=3)
# let tensorflow know that the loaded images have unknown dimensions, and 3 color channels (rgb)
img.set_shape([None, None, 3])
# resize to model input size
img_resized = tf.image.resize_images(img, target_size)
#img_resized = tf.image.flip_up_down(img_resized)
#img_resized = tf.image.rot90(img_resized)
pair_element[self.img_resized] = img_resized
pair_element[self.label] = tf.cast(pair_element[PairGenerator.label], tf.float32)
return pair_element
generator = PairGenerator()
iter = generator.get_next_pair()
for i in range(10):
print(next(iter))
ds = Dataset(generator)
import matplotlib.pyplot as plt
imgplot = plt.imshow(out)
imgplot = plt.imshow(1 - out)

Ok so the solution was
imgplot = plt.imshow(out/255)

Related

concatenate two different color spaces as input image for image.summary tensorflow

I'm using tensorflow v1 and have a code which runs perfectly for one color space only (such as RGB or HSV)
Class Database_loader:
#Defines a loading scheme for database
def __init__(self, directory, size, proportion = 1.0, seed=42) :
# data init
self.dir = directory # directory with the train / test / validation
self.size = size # size of the sub image that should be croped
self.nb_channels = 3 # return only the green channel of the images
self.file_train = [] # list of the train images : tuple (image name / class)
self.file_test = [] # list of the test images : tuple (image name / class)
self.file_validation = [] # list of the validation images : tuple (image name / class)
self.image_class = ['Real', 'CGG'] # list of the class (label) used in the process
self.nb_class = 0
self.train_iterator = 0 # iterator over the train images
self.test_iterator = 0 # iterator over the test images
self.validation_iterator = 0 # iterator over the validation images
self.load_images(seed) # load the data base
def get_next_train(self) :
# load next image (size should be big enough)
image = []
while True:
# pop file name and class
data = self.file_train[self.train_iterator]
self.train_iterator += 1
if self.train_iterator >= len(self.file_train) :
self.train_iterator = 0
# load image
file_name = self.dir + '/train/' + data[1] + '/' + data[0]
image = Image.open(file_name)
# convert to float image
image = image.astype(np.float32) / 255.
#image = image.reshape(1, self.size, self.size, 3)
if self.size == None:
image = image.reshape(image.shape[0], image.shape[1], self.nb_channels)
else:
image = image.reshape(self.size, self.size, self.nb_channels)
and the code for model:
def image_summaries(var, name):
tf.summary.image(name + '_1', var[:,:,:,0:1], max_outputs = 1)
tf.summary.image(name + '_2', var[:,:,:,1:2], max_outputs = 1)
tf.summary.image(name + '_3', var[:,:,:,2:3], max_outputs = 1)
database_path = 'I:/users/Database/test/'
# to change to the format of your image
image_size = 100
# define a single-image classifier
clf = Model(database_path, image_size, config = 'Personal', filters = [32,32,64],
batch_size = 50, feature_extractor = 'Stats',
remove_filter_size = 5, using_GPU = True)
# trains the classifier and test it on the testing set
clf.train(nb_train_batch = 800,
nb_test_batch = 80,
nb_validation_batch = 40,
validation_frequency = 20,
show_filters = False)
but i want to concatenate rgb images and hsv images in channel dimension and have 6 number of channels instead of 3 channels so i made these changes:
self.nb_channels = 6
image1 = image.convert("HSV")
t1 = np.asarray(image1)
t2 = np.asarray(image)
t3 = np.concatenate((t1, t2))
image2 = Image.fromarray(t3)
image = np.asarray(image2)
# convert to float image
image = image.astype(np.float32) / 255.
def image_summaries(var, name):
tf.summary.image(name + '_1', var[:,:,:,0:1], max_outputs = 1)
tf.summary.image(name + '_2', var[:,:,:,1:2], max_outputs = 1)
tf.summary.image(name + '_3', var[:,:,:,2:3], max_outputs = 1)
tf.summary.image(name + '_4', var[:,:,:,3:4], max_outputs = 1)
tf.summary.image(name + '_5', var[:,:,:,4:5], max_outputs = 1)
tf.summary.image(name + '_6', var[:,:,:,5:6], max_outputs = 1)
and got this error:
InvalidArgumentError: Tensor must be 4-D with last dim 1, 3, or 4, not [50,100,100,6]
[[Node: Input_Data/Image_Visualization/Input_Data = ImageSummary[T=DT_FLOAT, bad_color=Tensor<type: uint8 shape: [4] values: 255 0 0...>, max_images=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Input_Data/Image_Visualization/Input_Data/tag, Input_Data/Reshape/_91)]]
any idea about what i should do?

How to deal with None in a PyTorch custom dataset class?

I have an object detection dataset with RGB images and annotations in Json. I use a custom DataLoader class to read the images and the labels. One issue that I’m facing is that I would like to skip images when training my model if/when labels don’t contain certain objects.
For example, If one image doesn’t contain any target labels belonging to the class ‘Cars’, I would like to skip them. When parsing my Json annotation, I tried checking for labels that don’t contain the class ‘Cars’ and returned None. Subsequently, I used a collate function to filter the None but unfortunately, It is not working.
import torch
from torch.utils.data.dataset import Dataset
import json
import os
from PIL import Image
from torchvision import transforms
#import cv2
import numpy as np
general_classes = {
# Cars
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0,
# Motor-cycles
"Harley Davidson" : 1,
"Yamaha YZF-R6" : 1,
}
car_classes={
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0
}
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.ToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def my_collate(batch):
batch = list(filter(lambda x: x is not None, batch))
return torch.utils.data.dataloader.default_collate(batch)
class FilteredDataset(Dataset):
# The dataloader will skip the image and corresponding labels based on the dictionary 'car_classes'
def __init__(self, data_dir, transforms):
self.data_dir = data_dir
img_folder_list = os.listdir(self.data_dir)
self.transforms = transforms
imgs_list = []
json_list = []
self.filter_count=0
self.filtered_label_list=[]
for img_path in img_folder_list:
#img_full_path = self.data_dir + img_path
img_full_path=os.path.join(self.data_dir,img_path)
json_file = os.path.join(img_full_path, 'annotations-of-my-images.json')
img_file = os.path.join(img_full_path, 'Image-Name.png')
json_list.append(json_file)
imgs_list.append(img_file)
self.imgs = imgs_list
self.annotations = json_list
total_count=0
for one_annotation in self.annotations:
filtered_obj_id=[]
with open(one_annotation) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
for part in parts_list:
current_obj_id = part['tags'][0] # bbox label
check_obj_id = general_classes[current_obj_id]
if(check_obj_id==0):
subclass_id=car_classes[current_obj_id]
filtered_obj_id.append(subclass_id)
total_count=total_count+1
if(len(filtered_obj_id)>0):
self.filter_count=self.filter_count+1
self.filtered_label_list.append(one_annotation)
print("The total number of the objects in all images: ",total_count)
# get one image and the bboxes,img_id, labels of parts, etc in the image as target.
def __getitem__(self, idx):
img_path = self.imgs[idx]
image_id = torch.tensor([idx])
with open(self.annotations[idx]) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
obj_ids = []
boxes = []
for part in parts_list:
obj_id = part['tags'][0]
check_obj_id = general_classes[obj_id]
if(check_obj_id==0):
obj_id=car_classes[obj_id]
obj_ids.append(obj_id)
#print("---------------------------------------------------")
if(len(obj_ids)>0):
img = Image.open(img_path).convert("RGB")
labels = torch.as_tensor(obj_ids, dtype = torch.int64)
target = {}
target['labels'] = labels
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
else:
return None
def __len__(self):
return len(self.filtered_label_list)
train_data_path = "path-to-my-annotation"
# Generators
train_dataset = FilteredDataset(train_data_path,get_transform(train=True))
print("Total files in the train_dataset: ",len(train_dataset))
#print("The first instance in the train dataset : ",train_dataset[0])
#training_generator = torch.utils.data.DataLoader(train_dataset)
training_generator = torch.utils.data.DataLoader(train_dataset,collate_fn=my_collate)
print("\n\n Iterator in action! ")
print("---------------------------------------------------------")
count=0
for img,target in training_generator:
#print("The img name : ",img[0])
count=count+1
print("target name : ",target)
print("count : ",count)
print("**************************************************")
However, I get the following error,
Could anyone please suggest a way to skip the images that do not contain a particular categorical label?

How to load and map list of dictionaries/jsons with tf.data.Dataset

I have a dataset/records stored in a list of dictionaries. The dictionary can be pretty complex. I would like to load this list through TensorFlow dataset API. How can I do this? I tried something like this however, it is not working:
import tensorflow as tf
import json
LABELS_IDS = ["cat", "dog", "animal"]
def parse_record(record):
image = tf.io.read_file(record["_file"])
image = tf.image.decode_jpeg(image)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224, 224])
image = tf.image.random_flip_left_right(image, seed=None)
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot_labels = tf.reduce_sum(tf.one_hot(labels, len(LABELS_IDS)), axis=0)
return image, one_hot_labels
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
train_x = tf.data.Dataset.from_tensor_slices(records).map(parse_record)
Edit:
I found the answer to this, you can simply map records to different methods:
LABELS_IDS = ["cat", "dog", "animal"]
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
def _load_files(records):
return [record["_file"] for record in records]
def _load_labels(records):
vectors = []
for record in records:
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot = tf.reduce_sum(tf.one_hot(present, len(LABELS_IDS)), axis=0)
vectors.append(one_hot.numpy())
return vectors
def _load_data(file_path, label):
image = tf.io.read_file(file_path)
image = tf.image.decode_image(image, channels=3, expand_animations=False)
return image, label
data = (
_load_files(records),
_load_labels(records)
)
train_x = tf.data.Dataset.from_tensor_slices(data).map(_load_data)
For the benefit of Community, I am adding the #Cospel answer here
LABELS_IDS = ["cat", "dog", "animal"]
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
def _load_files(records):
return [record["_file"] for record in records]
def _load_labels(records):
vectors = []
for record in records:
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot = tf.reduce_sum(tf.one_hot(present, len(LABELS_IDS)), axis=0)
vectors.append(one_hot.numpy())
return vectors
def _load_data(file_path, label):
image = tf.io.read_file(file_path)
image = tf.image.decode_image(image, channels=3, expand_animations=False)
return image, label
data = (
_load_files(records),
_load_labels(records)
)
train_x = tf.data.Dataset.from_tensor_slices(data).map(_load_data)

Why am I getting this error in facenet?

I am trying to run facematch(facenet) on my Virtual Machine (Google Cloud Platform). At first, things were running smoothly and it was embedding the points of the faces, but then out of the blue, my code stopped working.
The first code, you can see the imports are there
For the second code, you can see the imports are there.
This is the ls commands, so you can see that all the directories/modules are there and see the errors I'm getting
Anyone can share some insight on what I'm doing wrong?
Face_match_demo code:
import tensorflow as tf
import numpy as np
import facenet
from align import detect_face
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img1", type = str, required=True)
parser.add_argument("--img2", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
def compare2face(img1,img2):
face1 = getFace(img1)
face2 = getFace(img2)
if face1 and face2:
# calculate Euclidean distance
dist = np.sqrt(np.sum(np.square(np.subtract(face1[0]['embedding'], face2[0]['embedding']))))
return dist
return -1
img1 = cv2.imread(args.img1)
img2 = cv2.imread(args.img2)
distance = compare2face(img1, img2)
threshold = 1.10 # set yourself to meet your requirement
print("distance = "+str(distance))
face_embeddings_demo code:
import tensorflow as tf
from align import detect_face
import facenet
import cv2
import imutils
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
# print(feed_dict)
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
img = cv2.imread(args.img)
img = imutils.resize(img,width=1000)
faces = getFace(img)
for face in faces:
print("Embeddings = "+str(face['embedding']))
cv2.waitKey(0)
cv2.destroyAllWindows()
You have to have the __init__.py in the package directory to be recognized as a package. It can be an empty file, but it has to be present. You don't have this in the align directory.
From the documentation:
The __init__.py files are required to make Python treat the directories as containing packages
From your comment, the error
usage: face_match_demo.py [-h] --img1 IMG1 --img2 IMG2 face_match_demo.py: error: ambiguous option: --img=images/faces.jpg could match --img2, --img1
means that face_match_demo.py is actually a utility to match two images, to say whether they contain the same face or not. So you have to provide two images to it, and it will tell if the face is the same. And you need to use the --img1 and --img2 options to do that like this:
python face_match_demo.py --img1 images/faces.jpg --img2 [[another face image]]

How to display tensorflow class names after prediction

How can I obtain the name of the classes associated to their predictions when I predict using tensorflow? Right now it returns an array of probabilities only. This is the code I use to predict an image:
class Prediction:
def __init__(self, filename, filepath, image_size = 128, number_channels = 3):
self.x_batch = []
self.images = []
self.image_size = image_size
self.number_channels = number_channels
self.image = cv2.imread(filename)
self.modelpath = filepath
self.modelfilepath = filepath + '/train-model.meta'
self.sess = tf.Session()
self.graph = None
self.y_pred = None
def resize_image(self):
self.image = cv2.resize(self.image, (self.image_size, self.image_size), cv2.INTER_LINEAR)
self.images.append(self.image)
self.images = np.array(self.images, dtype=np.uint8)
self.images = self.images.astype('float32')
self.images = np.multiply(self.images, 1.0 / 255.0)
self.x_batch = self.images.reshape(1, self.image_size, self.image_size, self.number_channels)
def restore_model(self):
saver = tf.train.import_meta_graph(self.modelfilepath)
saver.restore(self.sess, tf.train.latest_checkpoint(self.modelpath))
self.graph = tf.get_default_graph()
self.y_pred = self.graph.get_tensor_by_name("y_pred:0")
def predict_image(self):
x = self.graph.get_tensor_by_name("x:0")
y_true = self.graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, 2))
feed_dict_testing = {x: self.x_batch, y_true: y_test_images}
result = self.sess.run(self.y_pred, feed_dict=feed_dict_testing)
return result
Thanks for the help.
It would help a little to see your training code to see how you are measuring accuracies against ground truth values. That said, you need to have a label file that you can use like this -
predictions = self.sess.run(self.y_pred, feed_dict=feed_dict_testing)
# Format predicted classes for display
# use np.squeeze to convert the tensor to a 1-d vector of probability values
predictions = np.squeeze(predictions)
top_k = predictions.argsort()[-5:][::-1] # Getting the indicies of the top 5 predictions
# read the class labels in from the label file
f = open(labelPath, 'rb')
lines = f.readlines()
labels = [str(w).replace("\n", "") for w in lines]
print("")
print ("Image Classification Probabilities")
# Output the class probabilites in descending order
for node_id in top_k:
human_string = filter_delimiters(labels[node_id])
score = predictions[node_id]
print('{0:s} (score = {1:.5f})'.format(human_string, score))
This comes straight from tensorflow examples on retraining inception. Hope this helps

Categories