I have a dataset of rgb and grayscale images. While iterating over the dataset, I want to detect if the image is a grayscale image such that I can convert it to rgb. I wanted to use tf.shape(image) to detect the dimensions of the image. For a rgb image I get something like [1, 100, 100, 3]. For grayscale images the function returns for example [1, 100, 100]. I wanted to use len(tf.shape(image)) to detect if it is of length 4 (=rgb) or length 3 (=grayscale). That did not work.
This is my code so far which did not work:
def process_image(image):
# Convert numpy array to tensor
image = tf.convert_to_tensor(image, dtype=tf.uint8)
# Take care of grayscale images
dims = len(tf.shape(image))
if dims == 3:
image = np.expand_dims(image, axis=3)
image = tf.image.grayscale_to_rgb(image)
return image
Is there an alternative way to convert grayscale images to rgb?
You can use a function like this for that:
import tensorflow as tf
def process_image(image):
image = tf.convert_to_tensor(image, dtype=tf.uint8)
image_rgb = tf.cond(tf.rank(image) < 4,
lambda: tf.image.grayscale_to_rgb(tf.expand_dims(image, -1)),
lambda: tf.identity(image))
# Add shape information
s = image.shape
image_rgb.set_shape(s)
if s.ndims is not None and s.ndims < 4:
image_rgb.set_shape(s.concatenate(3))
return image_rgb
I had a very similar problem, I wanted to load rgb and greyscale images in one go. Tensorflow supports setting the channel number when reading in the images. So if the images have different numbers of channels, this might be what you are looking for:
# to get greyscale:
tf.io.decode_image(raw_img, expand_animations = False, dtype=tf.float32, channels=1)
# to get rgb:
tf.io.decode_image(raw_img, expand_animations = False, dtype=tf.float32, channels=3)
-> You can even do both on the same image and inside tf.data.Dataset mappings!
You now have to set the channels variable to match the shape you need, so all the loaded images will be of that shape. Than you could reshape without a condition.
This also allows you to directly load a grayscale image to RGB in Tensorflow. Here an example:
>> a = Image.open(r"Path/to/rgb_img.JPG")
>> np.array(a).shape
(666, 1050, 3)
>> a = a.convert('L')
>> np.array(a).shape
(666, 1050)
>> b = np.array(a)
>> im = Image.fromarray(b)
>> im.save(r"Path/to/now_it_is_greyscale.jpg")
>> raw_img = tf.io.read_file(r"Path/to/now_it_is_greyscale.jpg")
>> img = tf.io.decode_image(raw_img, dtype=tf.float32, channels=3)
>> img.shape
TensorShape([666, 1050, 3])
>> img = tf.io.decode_image(raw_img, dtype=tf.float32, channels=1)
>> img.shape
TensorShape([666, 1050, 1])
Use expand_animations = False if you get ValueError: 'images' contains no shape.! See: https://stackoverflow.com/a/59944421/9621080
Related
I am creating a PyTorch dataset and dataloader from CUB_200. When reading the images as pill, I need to change the BGR channels to RGB and I use the following code:
def _read_images_from_list(imagefile_list):
imgs = []
mean=[0.485, 0.456, 0.406]
std= [0.229, 0.224, 0.225]
Transformations = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize(mean, std)])
for imagefile in imagefile_list:
# read images as PIL instead of NUMPY
img = Image.open(imagefile)
b, g, r = img.split()
img = Image.merge("RGB", (r, g, b))
img = Transformations(img) # ToTensor and between [0,1], then normalized using image net mean and std, then transposed into shape (C,H,W)
imgs += [img]
return imgs
After going through a number of classes, I get the following error.
ValueError: not enough values to unpack (expected 3, got 1)
I wonder what should I do now? it means that one of the images has only one channel instead of one. Can this be the case or there is a problem with my code? I had a different implementation before but it worked. The reason I changed this implementation was that I could not normalize my images.
This is the old implementation:
def _read_images_from_list(imagefile_list):
imgs = []
for imagefile in imagefile_list:
img = cv2.imread(imagefile).astype(np.float32)
img = cv2.resize(img, (224, 224))
# Convert RGB to BGR
img_r, img_g, img_b = np.split(img, 3, axis=2)
img = np.concatenate((img_b, img_g, img_r), axis=2)
# Extract mean
img -= np.array((103.94,116.78,123.68), dtype=np.float32) # BGR mean
# HWC -> CHW, compatible with pytorch
img = np.transpose(img, [2, 0, 1])
imgs += [img]
return imgs
I would strongly recommend you use skimage.io to load your images, not opencv. It opens the images in RGB format by default, removing your shuffling overhead, but if you want to convert BGR to RGB you can use this:
import numpy as np
img = np.arange(27).reshape(3,3,3)
b = img[:,:,0]
g = img[:,:,1]
r = img[:,:,2]
rgb = np.dstack([r,g,b])
print(img)
print("#"*20)
print(rgb)
I am trying to run a CNN where the input images have three channels (rgb) and the label (target) images are grayscale images (1 channel). The input and label images are in float32 and tif format.
I got the list of image and label tile pairs as below:
def get_train_test_lists(imdir, lbldir):
imgs = glob.glob(imdir+"/*.tif")
dset_list = []
for img in imgs:
filename_split = os.path.splitext(img)
filename_zero, fileext = filename_split
basename = os.path.basename(filename_zero)
dset_list.append(basename)
x_filenames = []
y_filenames = []
for img_id in dset_list:
x_filenames.append(os.path.join(imdir, "{}.tif".format(img_id)))
y_filenames.append(os.path.join(lbldir, "{}.tif".format(img_id)))
print("number of images: ", len(dset_list))
return dset_list, x_filenames, y_filenames
train_list, x_train_filenames, y_train_filenames = get_train_test_lists(img_dir, label_dir)
test_list, x_test_filenames, y_test_filenames = get_train_test_lists(test_img_dir, test_label_dir)
from sklearn.model_selection import train_test_split
x_train_filenames, x_val_filenames, y_train_filenames, y_val_filenames =
train_test_split(x_train_filenames, y_train_filenames, test_size=0.1, random_state=42)
num_train_examples = len(x_train_filenames)
num_val_examples = len(x_val_filenames)
num_test_examples = len(x_test_filenames)
In order to read the tiles into tensor, firstly I defined the image dimensions and batch size:
img_shape = (128, 128, 3)
batch_size = 2
I noticed that there is no decoder in tensorflow for tif image based on this link. tfio.experimental.image.decode_tiff can be used but it decodes to unit8 tensor.
here is a sample code for png images:
def _process_pathnames(fname, label_path):
# We map this function onto each pathname pair
img_str = tf.io.read_file(fname)
img = tf.image.decode_png(img_str, channels=3)
label_img_str = tf.io.read_file(label_path)
# These are png images so they return as (num_frames, h, w, c)
label_img = tf.image.decode_png(label_img_str, channels=1)
# The label image should have any values between 0 and 9, indicating pixel wise
# cropt type class or background (0). We take the first channel only.
label_img = label_img[:, :, 0]
label_img = tf.expand_dims(label_img, axis=-1)
return img, label_img
Is it possible to modify this code by tf.convert_to_tensor or any other option to get float32 tensor from tif images? (I asked this question before, but I don't know how to integrate tf.convert_to_tensor with the mentioned codes)
You can read almost any image format and convert it to a numpy array with the Pillow image package:
from PIL import Image
import numpy as np
img = Image.open("image.tiff")
img = np.array(img)
print(img.shape, img.dtype)
# (986, 1853, 4) uint8
You can integrate this function into your code and then convert the numpy array to a tensorflow tensor as well as doing the appropriated image conversions.
Side note: you can simplify a lot your get_train_test_lists function using the pathlib package (which is integrated to Python3 like os but much simpler to use).
def get_train_test_lists(imdir, lbldir):
x_filenames = list(Path(imdir).glob("*.tif"))
y_filenames = [Path(lbldir) / f.name for f in x_filenames]
dset_list = [f.stem for f in x_filenames]
return dset_list, x_filenames, y_filenames
Note that x_filenames and y_filenames are now absolute paths but this shouldn't be an issue in your code.
I've taken the input from the folder and then reshaped it accordingly as per the model VGG16-places365. It is still showing the same error and looked into the Keras documentation of the problem (https://keras.io/applications/#vgg16) yet the error still prevails.
if __name__ == '__main__':
#from urllib.request import urlopen
import numpy as np
from PIL import Image
from cv2 import resize
pred_array = np.empty((0,6),dtype=float)
TEST_PATH = '/home/guest/Downloads/content/image/thumb'
for img in os.listdir(TEST_PATH):
image = Image.open(os.path.join(TEST_PATH, img))
image = np.array(image, dtype=np.uint8)
image = resize(image, (224, 224))
image = np.expand_dims(image, 0)
model = VGG16_Places365(weights='places')
predictions_to_return = 5
preds = model.predict(image)[0]
top_preds = np.argsort(preds)[::-1][0:predictions_to_return]
# load the class label
file_name = 'categories_places365.txt'
if not os.access(file_name, os.W_OK):
synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
os.system('wget ' + synset_url)
classes = list()
with open(file_name) as class_file:
for line in class_file:
classes.append(line.strip().split(' ')[0][3:])
classes = tuple(classes)
temprow = np.hstack((np.array([img]),top_preds))
np.append(pred_array,temprow.reshape(-1,pred_array.shape[1]),axis=0)
df = pd.DataFrame(data=pred_array,columns=['File_name','Tag_1','Tag_2','Tag_3','Tag_4','Tag_5'])
print(df)
You are probably loading an image with an alpha channel (RGBA) but the VGG16 neural network expects an image without an alpha channel (RGB).
To convert the image from RGBA to RGB, you can either use
image = image.convert("RGB")
on the PIL Image object, i.e. directly after Image.open, or use numpy array slicing on the numpy array object to cut off the first three color channels after np.array has been called:
image = image[:, :, :3]
I have an image that I have encoded and sent out using protobuf like so:
message.image = numpy.ndarray.tobytes(image)
when I receive and parse that message I use this:
image_array = numpy.frombuffer(request.image, numpy.uint8)
This gives me a one-dimensional array. I cannot get this back into an image format. I have tried using numpy's reshape command like so but with no luck:
image = image_array.reshape( 400, 600, 3 )
The image being sent is 400x600 pixels and it is a 3 channel color image. Any suggestions on what I am missing?
You would also need to store the img.shape data of the original image you wanted to encode and whole decoding you need that img.shape value to reshape the matrix to it's original form as:
import numpy as np
# Create a dummy matrix
img = np.ones((50, 50, 3), dtype=np.uint8) * 255
# Save the shape of original matrix.
img_shape = img.shape
message_image = np.ndarray.tobytes(img)
re_img = np.frombuffer(message_image, dtype=np.uint8)
# Convert back the data to original image shape.
re_img = np.reshape(re_img, img_shape)
I want implement VGG Face Descriptor in python. But I keep getting an error:
TypeError: can only concatenate list (not "numpy.ndarray") to list
My code:
import numpy as np
import cv2
import caffe
img = cv2.imread("ak.png")
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
net = caffe.Net("VGG_FACE_deploy.prototxt","VGG_FACE.caffemodel", caffe.TEST)
print net.forward(img)
Can you help me ?
UPDATE 1
This working code is example in matlab
% Copyright (c) 2015, Omkar M. Parkhi
% All rights reserved.
img = imread('ak.png');
img = single(img);
Img = [129.1863,104.7624,93.5940] ;
img = cat(3,img(:,:,1)-averageImage(1),...
img(:,:,2)-averageImage(2),...
img(:,:,3)-averageImage(3));
img = img(:, :, [3, 2, 1]); % convert from RGB to BGR
img = permute(img, [2, 1, 3]); % permute width and height
model = 'VGG_FACE_16_deploy.prototxt';
weights = 'VGG_FACE.caffemodel';
caffe.set_mode_cpu();
net = caffe.Net(model, weights, 'test'); % create net and load weights
res = net.forward({img});
prob = res{1};
caffe_ft = net.blobs('fc7').get_data();
To use python interface you need to transform the input image before feeding it to the net
img = caffe.io.load_image( "ak.png" )
img = img[:,:,::-1]*255.0 # convert RGB->BGR
avg = np.array([93.5940, 104.7624, 129.1863]) # BGR mean values
img = img - avg # subtract mean (numpy takes care of dimensions :)
Now img is H-by-W-by-3 numpy array.
Caffe expects its inputs as 4D: batch_index x channel x width x height.
Therefore you need to transpose the input and add a singleton dimension to represent the "batch_index" leading dimension
img = img.transpose((2,0,1))
img = img[None,:] # add singleton dimension
Now you can run the forward pass
out = net.forward_all( data = img )
OpenCV reads in BGR and scaled to 255 format by default, so:
img = cv2.imread('ak.png')
avg = np.array([93.5940,104.7624,129.1863]) # BGR mean from VGG
img -= avg # subtract mean
img = img.transpose((2,0,1)) # to match image input dimension: 3x224x224
img = img[None,:] # add singleton dimension to match batch dimension
out = net.forward_all(data = img)
Try passing a single element list to the method.
net.forward ([img])