I want to extract face descriptors from photos of people. This is what I've done so far:
First detected faces from photos using opencv library in python.
Saved those faces in another image.
Next I have to extract descriptor from face image.
For this I have downloaded vgg face caffemodel CNN from here: http://www.robots.ox.ac.uk/~vgg/software/vgg_face/
To extract descriptor, first I did this:
net = caffe.Net('CAFFE_FACE_deploy.prototxt','CAFFE_FACE.caffemodel',caffe.TEST)
img = caffe.io.load_image( "detectedface.jpg" )
img = img[:,:,::-1]*255.0
avg = np.array([129.1863,104.7624,93.5940])
img = img - avg
img = img.transpose((2,0,1))
img = img[None,:]
out = net.forward_all( data = img )
But it gives dimension mismatch error that data should be of dimension (50,3,224,224) instead of (50,3,490,490)
Then I tried this:
# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
net.blobs['data'].reshape(50,3,224,224)
net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image('detectedface.jpg'))
out = net.forward()
feats = net.blobs['fc7'].data[0]
Here when I print feats, it displays all zeros. Why is it so?
Related
I am trying to find the closest match of an image, to a large list of other images (+10.000). Background color is all white, same camera angle and the image content shape is close to each other (see image below). I tried using opencv and ORB and BFMatcher with knnMatch to find the closest match. But I am not even close to find the match I want.
To my understanding, images needs to be greyscale, but in my case I think colors would be a very important descriptor?
I am new to both opencv and image matching, so can you help me to if I need to use another approach?
import cv2
import os
orb = cv2.ORB_create(nfeatures=1000) # Find 1000 features to match from
bf = cv2.BFMatcher()
# Image to match
findImg = 'captainA.png'
imgCur = cv2.imread(f'{"Images"}/{findImg}', 0)
kp1,des1 = orb.detectAndCompute(imgCur,None)
# Loop through all superheroe images and find closest match
images = ["img1.png","img2.png","img3.png","img4.png","img5.png","img6.png","img7.png","img8.png","img9.png","img10.png","img11.png","img12.png"]
matchList = []
names = []
for img in images:
imgCur = cv2.imread(f'{Superheroes}/{img}', 0)
kp2,des2 = orb.detectAndCompute(imgCur,None)
matches = bf.knnMatch(des1,des2,k=2)
goodMatches = []
for m, n in matches:
if m.distance < 0.75 * n.distance: # Use 75 as a threshold defining a good match
goodMatches.append([m])
matchList.append(len(goodMatches))
names.append(img)
matchIdx = matchList.index(max(matchList))
# Name of matched image
print(names[matchIdx])
What I want to find:
Here is a small code there should do the job.
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import numpy as np
from PIL import Image
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)
def extract(img):
img = img.resize((224, 224)) # Resize the image
img = img.convert('RGB') # Convert the image color space
x = image.img_to_array(img) # Reformat the image
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
feature = model.predict(x)[0] # Extract Features
return feature / np.linalg.norm(feature)
# Iterate through images and extract Features
images = ["img1.png","img2.png","img3.png","img4.png","img5.png"...+2000 more]
all_features = np.zeros(shape=(len(images),4096))
for i in range(len(images)):
feature = extract(img=Image.open(images[i]))
all_features[i] = np.array(feature)
# Match image
query = extract(img=Image.open("image_to_match.png")) # Extract its features
dists = np.linalg.norm(all_features - query, axis=1) # Calculate the similarity (distance) between images
ids = np.argsort(dists)[:5] # Extract 5 images that have lowest distance
I am trying to run a CNN where the input images have three channels (rgb) and the label (target) images are grayscale images (1 channel). The input and label images are in float32 and tif format.
I got the list of image and label tile pairs as below:
def get_train_test_lists(imdir, lbldir):
imgs = glob.glob(imdir+"/*.tif")
dset_list = []
for img in imgs:
filename_split = os.path.splitext(img)
filename_zero, fileext = filename_split
basename = os.path.basename(filename_zero)
dset_list.append(basename)
x_filenames = []
y_filenames = []
for img_id in dset_list:
x_filenames.append(os.path.join(imdir, "{}.tif".format(img_id)))
y_filenames.append(os.path.join(lbldir, "{}.tif".format(img_id)))
print("number of images: ", len(dset_list))
return dset_list, x_filenames, y_filenames
train_list, x_train_filenames, y_train_filenames = get_train_test_lists(img_dir, label_dir)
test_list, x_test_filenames, y_test_filenames = get_train_test_lists(test_img_dir, test_label_dir)
from sklearn.model_selection import train_test_split
x_train_filenames, x_val_filenames, y_train_filenames, y_val_filenames =
train_test_split(x_train_filenames, y_train_filenames, test_size=0.1, random_state=42)
num_train_examples = len(x_train_filenames)
num_val_examples = len(x_val_filenames)
num_test_examples = len(x_test_filenames)
In order to read the tiles into tensor, firstly I defined the image dimensions and batch size:
img_shape = (128, 128, 3)
batch_size = 2
I noticed that there is no decoder in tensorflow for tif image based on this link. tfio.experimental.image.decode_tiff can be used but it decodes to unit8 tensor.
here is a sample code for png images:
def _process_pathnames(fname, label_path):
# We map this function onto each pathname pair
img_str = tf.io.read_file(fname)
img = tf.image.decode_png(img_str, channels=3)
label_img_str = tf.io.read_file(label_path)
# These are png images so they return as (num_frames, h, w, c)
label_img = tf.image.decode_png(label_img_str, channels=1)
# The label image should have any values between 0 and 9, indicating pixel wise
# cropt type class or background (0). We take the first channel only.
label_img = label_img[:, :, 0]
label_img = tf.expand_dims(label_img, axis=-1)
return img, label_img
Is it possible to modify this code by tf.convert_to_tensor or any other option to get float32 tensor from tif images? (I asked this question before, but I don't know how to integrate tf.convert_to_tensor with the mentioned codes)
You can read almost any image format and convert it to a numpy array with the Pillow image package:
from PIL import Image
import numpy as np
img = Image.open("image.tiff")
img = np.array(img)
print(img.shape, img.dtype)
# (986, 1853, 4) uint8
You can integrate this function into your code and then convert the numpy array to a tensorflow tensor as well as doing the appropriated image conversions.
Side note: you can simplify a lot your get_train_test_lists function using the pathlib package (which is integrated to Python3 like os but much simpler to use).
def get_train_test_lists(imdir, lbldir):
x_filenames = list(Path(imdir).glob("*.tif"))
y_filenames = [Path(lbldir) / f.name for f in x_filenames]
dset_list = [f.stem for f in x_filenames]
return dset_list, x_filenames, y_filenames
Note that x_filenames and y_filenames are now absolute paths but this shouldn't be an issue in your code.
I get an error in the line data = np.array(data,dtype="float32"). It works fine if I do not use PIL.Image.open and instead use load_img() of tensforflow.keras. Can someone please help with what change do I need to make such that I can use Image.open() and get it to work?
The code is based on https://www.pyimagesearch.com/2020/05/04/covid-19-face-mask-detector-with-opencv-keras-tensorflow-and-deep-learning/
EPOCHS = 20
BS = 32
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []
size = 224,224
# loop over the image paths
for imagePath in imagePaths:
# extract the class label from the filename
label = imagePath.split(os.path.sep)[-2]
image =Image.open(imagePath)
image = image.convert('RGB')
image.thumbnail(size, Image.ANTIALIAS)
#image = load_img(imagePath, target_size=(224, 224))
image = img_to_array(image)
image = preprocess_input(image)
# update the data and labels lists, respectively
data.append(image)
labels.append(label)
# load the input image (224x224) and preprocess it
# convert the data and labels to NumPy arrays
data = np.array(data, dtype="float32")
labels = np.array(labels)
So I am trying to detect an aruco marker in an image and the function requires the image to be of uint8 type, so I converted the image using this formula:
IMG = ((img.astype(np.uint8)*255)) #img is the original float32 type image
Here is the image as float32 type:
Image
Converted it into uint8 type, then passed it to a function which required a uint8 type image to work with then I got this: Image uint8
The function to which I passed the Image is:
def detect_Aruco(img): #returns the detected aruco list dictionary with id: corners
aruco_list = {}
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
aruco_dict = aruco.Dictionary_get(aruco.DICT_5X5_50)
parameters = aruco.DetectorParameters_create()
#lists of ids and the corners belonging to each id
corners, ids, _ = aruco.detectMarkers(gray, aruco_dict, parameters = parameters)
# print len(corners), corners, ids
gray = aruco.drawDetectedMarkers(gray, corners,ids)
# cv2.imshow('frame',gray)
#print (type(corners[0]))
if len(corners): #returns no of arucos
#print (len(corners))
#print (len(ids))
for k in range(len(corners)):
temp_1 = corners[k]
temp_1 = temp_1[0]
temp_2 = ids[k]
temp_2 = temp_2[0]
aruco_list[temp_2] = temp_1
return aruco_list
TLDR; I want to get the Image2 but with original color that it had. Any help?
EDIT: This is my original image array(in float32): Image as float array
EDIT 2: I think I got it.
For increasing the contrast I multiplied it by 3. The values went >1. So the normalization was wrong.
Also as the comment suggested, correct line was: ENHAN_IMG = ((enhan_img*255).astype(np.uint8))
I want implement VGG Face Descriptor in python. But I keep getting an error:
TypeError: can only concatenate list (not "numpy.ndarray") to list
My code:
import numpy as np
import cv2
import caffe
img = cv2.imread("ak.png")
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
net = caffe.Net("VGG_FACE_deploy.prototxt","VGG_FACE.caffemodel", caffe.TEST)
print net.forward(img)
Can you help me ?
UPDATE 1
This working code is example in matlab
% Copyright (c) 2015, Omkar M. Parkhi
% All rights reserved.
img = imread('ak.png');
img = single(img);
Img = [129.1863,104.7624,93.5940] ;
img = cat(3,img(:,:,1)-averageImage(1),...
img(:,:,2)-averageImage(2),...
img(:,:,3)-averageImage(3));
img = img(:, :, [3, 2, 1]); % convert from RGB to BGR
img = permute(img, [2, 1, 3]); % permute width and height
model = 'VGG_FACE_16_deploy.prototxt';
weights = 'VGG_FACE.caffemodel';
caffe.set_mode_cpu();
net = caffe.Net(model, weights, 'test'); % create net and load weights
res = net.forward({img});
prob = res{1};
caffe_ft = net.blobs('fc7').get_data();
To use python interface you need to transform the input image before feeding it to the net
img = caffe.io.load_image( "ak.png" )
img = img[:,:,::-1]*255.0 # convert RGB->BGR
avg = np.array([93.5940, 104.7624, 129.1863]) # BGR mean values
img = img - avg # subtract mean (numpy takes care of dimensions :)
Now img is H-by-W-by-3 numpy array.
Caffe expects its inputs as 4D: batch_index x channel x width x height.
Therefore you need to transpose the input and add a singleton dimension to represent the "batch_index" leading dimension
img = img.transpose((2,0,1))
img = img[None,:] # add singleton dimension
Now you can run the forward pass
out = net.forward_all( data = img )
OpenCV reads in BGR and scaled to 255 format by default, so:
img = cv2.imread('ak.png')
avg = np.array([93.5940,104.7624,129.1863]) # BGR mean from VGG
img -= avg # subtract mean
img = img.transpose((2,0,1)) # to match image input dimension: 3x224x224
img = img[None,:] # add singleton dimension to match batch dimension
out = net.forward_all(data = img)
Try passing a single element list to the method.
net.forward ([img])