I am creating a PyTorch dataset and dataloader from CUB_200. When reading the images as pill, I need to change the BGR channels to RGB and I use the following code:
def _read_images_from_list(imagefile_list):
imgs = []
mean=[0.485, 0.456, 0.406]
std= [0.229, 0.224, 0.225]
Transformations = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize(mean, std)])
for imagefile in imagefile_list:
# read images as PIL instead of NUMPY
img = Image.open(imagefile)
b, g, r = img.split()
img = Image.merge("RGB", (r, g, b))
img = Transformations(img) # ToTensor and between [0,1], then normalized using image net mean and std, then transposed into shape (C,H,W)
imgs += [img]
return imgs
After going through a number of classes, I get the following error.
ValueError: not enough values to unpack (expected 3, got 1)
I wonder what should I do now? it means that one of the images has only one channel instead of one. Can this be the case or there is a problem with my code? I had a different implementation before but it worked. The reason I changed this implementation was that I could not normalize my images.
This is the old implementation:
def _read_images_from_list(imagefile_list):
imgs = []
for imagefile in imagefile_list:
img = cv2.imread(imagefile).astype(np.float32)
img = cv2.resize(img, (224, 224))
# Convert RGB to BGR
img_r, img_g, img_b = np.split(img, 3, axis=2)
img = np.concatenate((img_b, img_g, img_r), axis=2)
# Extract mean
img -= np.array((103.94,116.78,123.68), dtype=np.float32) # BGR mean
# HWC -> CHW, compatible with pytorch
img = np.transpose(img, [2, 0, 1])
imgs += [img]
return imgs
I would strongly recommend you use skimage.io to load your images, not opencv. It opens the images in RGB format by default, removing your shuffling overhead, but if you want to convert BGR to RGB you can use this:
import numpy as np
img = np.arange(27).reshape(3,3,3)
b = img[:,:,0]
g = img[:,:,1]
r = img[:,:,2]
rgb = np.dstack([r,g,b])
print(img)
print("#"*20)
print(rgb)
Related
I'm creating an array of images using OpenCV for analysis in TensorFlow.
I've created the following function:
def files_to_img_array(path, files_list):
'''
Reads a list of image files and creates a Numpy array.
'''
# Instantiate arrays
files = [path+file for file in files_list]
img_array = np.zeros(72000000) # for flattened 4000x6000 images
image_names = []
for file in tqdm.tqdm(files_list):
full_file = path+file
image_names.append(file.split('.')[0])
img = cv2.imread(full_file, 1)
print(img.shape)
img = img.flatten()
img_array = np.vstack([img_array, img])
img_array = img_array[1:] # remove instantiating zeroes
return img_array
The problem is that the images are not uniformly sized:
0%| | 0/10 [00:00<?, ?it/s](4000, 6000, 3)
10%|███████▊ | 1/10 [00:00<00:03, 2.64it/s](4000, 6000, 3)
20%|███████████████▌ | 2/10 [00:00<00:03, 2.51it/s](2848, 4288, 3)
20%|███████████████▌ | 2/10 [00:00<00:03, 2.18it/s]
Traceback (most recent call last):
...
ValueError: all the input array dimensions for the concatenation axis
must match exactly, but along dimension 1, the array at index 0 has
size 72000000 and the array at index 1 has size 36636672
I'm genuinely uncertain about how to approach this, both from a programming and image processing perspective. Does anyone have advice about how to either pad these differently-sized images, or if there is something in OpenCV that can handle this? (I'm happy to use PIL as well, I'm not married to OpenCV.)
Here is how to stack arbitrary sized images vertically in Python/OpenCV with transparent padding.
Input Images:
import cv2
import numpy as np
# load images
img1 = cv2.imread("lena.jpg")
w1 = img1.shape[1]
img2 = cv2.imread("barn.jpg")
w2 = img2.shape[1]
img3 = cv2.imread("monet2.jpg")
w3 = img3.shape[1]
# get maximum width
ww = max(w1, w2, w3)
# pad images with transparency in width
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2BGRA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2BGRA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2BGRA)
img1 = cv2.copyMakeBorder(img1, 0, 0, 0, ww-w1, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
img2 = cv2.copyMakeBorder(img2, 0, 0, 0, ww-w2, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
img3 = cv2.copyMakeBorder(img3, 0, 0, 0, ww-w3, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
# stack images vertically
result = cv2.vconcat([img1, img2, img3])
# write result to disk
cv2.imwrite("image_stack.png", result)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Thanks to #hpaulj for the comment in the question that led to my investigation and this answer.
The following code relies upon Keras and underlying PIL:
import PIL
import tensorflow
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import concurrent.futures
def keras_pipeline(file):
TARGET_SIZE = (100,150)
img = load_img(file, target_size=TARGET_SIZE)
img_array = img_to_array(img)
return img_array
def files_to_array(path, files_list):
files = [path+file for file in files_list]
with concurrent.futures.ProcessPoolExecutor() as executor:
img_map = executor.map(keras_pipeline, files)
return img_map
keras_pipeline() creates a transformation pipeline for each image. files_to_array() maps that transformation pipeline over each image and returns a generator. That generator can then be passed as a Numpy array using np.hstack():
for img in img_map:
existing_array = np.hstack([existing_array, img])
I am trying to run a CNN where the input images have three channels (rgb) and the label (target) images are grayscale images (1 channel). The input and label images are in float32 and tif format.
I got the list of image and label tile pairs as below:
def get_train_test_lists(imdir, lbldir):
imgs = glob.glob(imdir+"/*.tif")
dset_list = []
for img in imgs:
filename_split = os.path.splitext(img)
filename_zero, fileext = filename_split
basename = os.path.basename(filename_zero)
dset_list.append(basename)
x_filenames = []
y_filenames = []
for img_id in dset_list:
x_filenames.append(os.path.join(imdir, "{}.tif".format(img_id)))
y_filenames.append(os.path.join(lbldir, "{}.tif".format(img_id)))
print("number of images: ", len(dset_list))
return dset_list, x_filenames, y_filenames
train_list, x_train_filenames, y_train_filenames = get_train_test_lists(img_dir, label_dir)
test_list, x_test_filenames, y_test_filenames = get_train_test_lists(test_img_dir, test_label_dir)
from sklearn.model_selection import train_test_split
x_train_filenames, x_val_filenames, y_train_filenames, y_val_filenames =
train_test_split(x_train_filenames, y_train_filenames, test_size=0.1, random_state=42)
num_train_examples = len(x_train_filenames)
num_val_examples = len(x_val_filenames)
num_test_examples = len(x_test_filenames)
In order to read the tiles into tensor, firstly I defined the image dimensions and batch size:
img_shape = (128, 128, 3)
batch_size = 2
I noticed that there is no decoder in tensorflow for tif image based on this link. tfio.experimental.image.decode_tiff can be used but it decodes to unit8 tensor.
here is a sample code for png images:
def _process_pathnames(fname, label_path):
# We map this function onto each pathname pair
img_str = tf.io.read_file(fname)
img = tf.image.decode_png(img_str, channels=3)
label_img_str = tf.io.read_file(label_path)
# These are png images so they return as (num_frames, h, w, c)
label_img = tf.image.decode_png(label_img_str, channels=1)
# The label image should have any values between 0 and 9, indicating pixel wise
# cropt type class or background (0). We take the first channel only.
label_img = label_img[:, :, 0]
label_img = tf.expand_dims(label_img, axis=-1)
return img, label_img
Is it possible to modify this code by tf.convert_to_tensor or any other option to get float32 tensor from tif images? (I asked this question before, but I don't know how to integrate tf.convert_to_tensor with the mentioned codes)
You can read almost any image format and convert it to a numpy array with the Pillow image package:
from PIL import Image
import numpy as np
img = Image.open("image.tiff")
img = np.array(img)
print(img.shape, img.dtype)
# (986, 1853, 4) uint8
You can integrate this function into your code and then convert the numpy array to a tensorflow tensor as well as doing the appropriated image conversions.
Side note: you can simplify a lot your get_train_test_lists function using the pathlib package (which is integrated to Python3 like os but much simpler to use).
def get_train_test_lists(imdir, lbldir):
x_filenames = list(Path(imdir).glob("*.tif"))
y_filenames = [Path(lbldir) / f.name for f in x_filenames]
dset_list = [f.stem for f in x_filenames]
return dset_list, x_filenames, y_filenames
Note that x_filenames and y_filenames are now absolute paths but this shouldn't be an issue in your code.
This is my program, I'm trying for a 1x2 array of two cv2 images to past it to a bigger one. the idea is to do that for a bigger array of images and see them as a mosaic.
Problem is that this seems a mixture of cv2 and PIL and couldn't manage it to work.
Here my little code:
from PIL import Image
from matplotlib import cm
#example of one of the two images added
new_array = cv2.resize(x_test[0], (IMG_SIZE, IMG_SIZE))
trp.append(new_array2)
im_pil=[]
#I create the big image
new_im = Image.new('RGB', (IMG_SIZE,IMG_SIZE * 2))
k=0
#here i want to place my image in the mosaic
for i in range(0,IMG_SIZE,IMG_SIZE):
for j in range(0,IMG_SIZE *2,IMG_SIZE):
#paste the image at location i,j:
im_pil.append(Image.fromarray(trp[k])
new_im.paste(im_pil[k], (i,j))
k+=1
new_im
that's all the code( a little summarized) and I don't really know where the problem is. I get different kinds of errors in each modification, from invalid syntax to not defined or some crazy ones i dont understand.
although that
plt.imshow(trp[1])
plt.show()
works and shows one image
I'm sorry if the answer is obvious buy I'm trying my best to figure it out and I can't manage it.
Thanks in advance
I don't know why you use cv2 because you can do all in PIL. You can even display it without matplot
from PIL import Image
IMG_SIZE = 128
filenames = ['image1.png', 'image2.png']
images = []
for name in filenames:
img = Image.open(name)
img = img.resize((IMG_SIZE, IMG_SIZE))
images.append(img)
new_im = Image.new('RGB', (IMG_SIZE, IMG_SIZE*2))
k = 0
for i in range(0, IMG_SIZE, IMG_SIZE):
for j in range(0, IMG_SIZE *2, IMG_SIZE):
new_im.paste(images[k], (i,j))
k += 1
new_im.save('output.png')
new_im.show()
For images from cv2 you have to
convert colors from BGR to RGB
convert array to Image
do what you want
convert back Image to array
if you want to use again with cv2 then convert colors from RGB to BGR
Code:
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
IMG_SIZE = 128
filenames = ['image1.png', 'image2.png']
images = []
for name in filenames:
im = cv2.imread(name)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (IMG_SIZE, IMG_SIZE))
img = Image.fromarray(im)
images.append(img)
new_im = Image.new('RGB', (IMG_SIZE, IMG_SIZE*2))
k = 0
for i in range(0, IMG_SIZE, IMG_SIZE):
for j in range(0, IMG_SIZE *2, IMG_SIZE):
new_im.paste(images[k], (i,j))
k += 1
im = np.array(new_im)
plt.imshow(im)
plt.show()
#cv2.imshow('window', cv2.cvtColor(np.array(new_im), cv2.COLOR_RGB2BGR))
#cv2.waitKey(0)
I have a dataset of rgb and grayscale images. While iterating over the dataset, I want to detect if the image is a grayscale image such that I can convert it to rgb. I wanted to use tf.shape(image) to detect the dimensions of the image. For a rgb image I get something like [1, 100, 100, 3]. For grayscale images the function returns for example [1, 100, 100]. I wanted to use len(tf.shape(image)) to detect if it is of length 4 (=rgb) or length 3 (=grayscale). That did not work.
This is my code so far which did not work:
def process_image(image):
# Convert numpy array to tensor
image = tf.convert_to_tensor(image, dtype=tf.uint8)
# Take care of grayscale images
dims = len(tf.shape(image))
if dims == 3:
image = np.expand_dims(image, axis=3)
image = tf.image.grayscale_to_rgb(image)
return image
Is there an alternative way to convert grayscale images to rgb?
You can use a function like this for that:
import tensorflow as tf
def process_image(image):
image = tf.convert_to_tensor(image, dtype=tf.uint8)
image_rgb = tf.cond(tf.rank(image) < 4,
lambda: tf.image.grayscale_to_rgb(tf.expand_dims(image, -1)),
lambda: tf.identity(image))
# Add shape information
s = image.shape
image_rgb.set_shape(s)
if s.ndims is not None and s.ndims < 4:
image_rgb.set_shape(s.concatenate(3))
return image_rgb
I had a very similar problem, I wanted to load rgb and greyscale images in one go. Tensorflow supports setting the channel number when reading in the images. So if the images have different numbers of channels, this might be what you are looking for:
# to get greyscale:
tf.io.decode_image(raw_img, expand_animations = False, dtype=tf.float32, channels=1)
# to get rgb:
tf.io.decode_image(raw_img, expand_animations = False, dtype=tf.float32, channels=3)
-> You can even do both on the same image and inside tf.data.Dataset mappings!
You now have to set the channels variable to match the shape you need, so all the loaded images will be of that shape. Than you could reshape without a condition.
This also allows you to directly load a grayscale image to RGB in Tensorflow. Here an example:
>> a = Image.open(r"Path/to/rgb_img.JPG")
>> np.array(a).shape
(666, 1050, 3)
>> a = a.convert('L')
>> np.array(a).shape
(666, 1050)
>> b = np.array(a)
>> im = Image.fromarray(b)
>> im.save(r"Path/to/now_it_is_greyscale.jpg")
>> raw_img = tf.io.read_file(r"Path/to/now_it_is_greyscale.jpg")
>> img = tf.io.decode_image(raw_img, dtype=tf.float32, channels=3)
>> img.shape
TensorShape([666, 1050, 3])
>> img = tf.io.decode_image(raw_img, dtype=tf.float32, channels=1)
>> img.shape
TensorShape([666, 1050, 1])
Use expand_animations = False if you get ValueError: 'images' contains no shape.! See: https://stackoverflow.com/a/59944421/9621080
I want implement VGG Face Descriptor in python. But I keep getting an error:
TypeError: can only concatenate list (not "numpy.ndarray") to list
My code:
import numpy as np
import cv2
import caffe
img = cv2.imread("ak.png")
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
net = caffe.Net("VGG_FACE_deploy.prototxt","VGG_FACE.caffemodel", caffe.TEST)
print net.forward(img)
Can you help me ?
UPDATE 1
This working code is example in matlab
% Copyright (c) 2015, Omkar M. Parkhi
% All rights reserved.
img = imread('ak.png');
img = single(img);
Img = [129.1863,104.7624,93.5940] ;
img = cat(3,img(:,:,1)-averageImage(1),...
img(:,:,2)-averageImage(2),...
img(:,:,3)-averageImage(3));
img = img(:, :, [3, 2, 1]); % convert from RGB to BGR
img = permute(img, [2, 1, 3]); % permute width and height
model = 'VGG_FACE_16_deploy.prototxt';
weights = 'VGG_FACE.caffemodel';
caffe.set_mode_cpu();
net = caffe.Net(model, weights, 'test'); % create net and load weights
res = net.forward({img});
prob = res{1};
caffe_ft = net.blobs('fc7').get_data();
To use python interface you need to transform the input image before feeding it to the net
img = caffe.io.load_image( "ak.png" )
img = img[:,:,::-1]*255.0 # convert RGB->BGR
avg = np.array([93.5940, 104.7624, 129.1863]) # BGR mean values
img = img - avg # subtract mean (numpy takes care of dimensions :)
Now img is H-by-W-by-3 numpy array.
Caffe expects its inputs as 4D: batch_index x channel x width x height.
Therefore you need to transpose the input and add a singleton dimension to represent the "batch_index" leading dimension
img = img.transpose((2,0,1))
img = img[None,:] # add singleton dimension
Now you can run the forward pass
out = net.forward_all( data = img )
OpenCV reads in BGR and scaled to 255 format by default, so:
img = cv2.imread('ak.png')
avg = np.array([93.5940,104.7624,129.1863]) # BGR mean from VGG
img -= avg # subtract mean
img = img.transpose((2,0,1)) # to match image input dimension: 3x224x224
img = img[None,:] # add singleton dimension to match batch dimension
out = net.forward_all(data = img)
Try passing a single element list to the method.
net.forward ([img])