I'm creating an array of images using OpenCV for analysis in TensorFlow.
I've created the following function:
def files_to_img_array(path, files_list):
'''
Reads a list of image files and creates a Numpy array.
'''
# Instantiate arrays
files = [path+file for file in files_list]
img_array = np.zeros(72000000) # for flattened 4000x6000 images
image_names = []
for file in tqdm.tqdm(files_list):
full_file = path+file
image_names.append(file.split('.')[0])
img = cv2.imread(full_file, 1)
print(img.shape)
img = img.flatten()
img_array = np.vstack([img_array, img])
img_array = img_array[1:] # remove instantiating zeroes
return img_array
The problem is that the images are not uniformly sized:
0%| | 0/10 [00:00<?, ?it/s](4000, 6000, 3)
10%|███████▊ | 1/10 [00:00<00:03, 2.64it/s](4000, 6000, 3)
20%|███████████████▌ | 2/10 [00:00<00:03, 2.51it/s](2848, 4288, 3)
20%|███████████████▌ | 2/10 [00:00<00:03, 2.18it/s]
Traceback (most recent call last):
...
ValueError: all the input array dimensions for the concatenation axis
must match exactly, but along dimension 1, the array at index 0 has
size 72000000 and the array at index 1 has size 36636672
I'm genuinely uncertain about how to approach this, both from a programming and image processing perspective. Does anyone have advice about how to either pad these differently-sized images, or if there is something in OpenCV that can handle this? (I'm happy to use PIL as well, I'm not married to OpenCV.)
Here is how to stack arbitrary sized images vertically in Python/OpenCV with transparent padding.
Input Images:
import cv2
import numpy as np
# load images
img1 = cv2.imread("lena.jpg")
w1 = img1.shape[1]
img2 = cv2.imread("barn.jpg")
w2 = img2.shape[1]
img3 = cv2.imread("monet2.jpg")
w3 = img3.shape[1]
# get maximum width
ww = max(w1, w2, w3)
# pad images with transparency in width
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2BGRA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2BGRA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2BGRA)
img1 = cv2.copyMakeBorder(img1, 0, 0, 0, ww-w1, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
img2 = cv2.copyMakeBorder(img2, 0, 0, 0, ww-w2, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
img3 = cv2.copyMakeBorder(img3, 0, 0, 0, ww-w3, borderType=cv2.BORDER_CONSTANT, value=(0,0,0,0))
# stack images vertically
result = cv2.vconcat([img1, img2, img3])
# write result to disk
cv2.imwrite("image_stack.png", result)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Thanks to #hpaulj for the comment in the question that led to my investigation and this answer.
The following code relies upon Keras and underlying PIL:
import PIL
import tensorflow
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import concurrent.futures
def keras_pipeline(file):
TARGET_SIZE = (100,150)
img = load_img(file, target_size=TARGET_SIZE)
img_array = img_to_array(img)
return img_array
def files_to_array(path, files_list):
files = [path+file for file in files_list]
with concurrent.futures.ProcessPoolExecutor() as executor:
img_map = executor.map(keras_pipeline, files)
return img_map
keras_pipeline() creates a transformation pipeline for each image. files_to_array() maps that transformation pipeline over each image and returns a generator. That generator can then be passed as a Numpy array using np.hstack():
for img in img_map:
existing_array = np.hstack([existing_array, img])
Related
I am creating a PyTorch dataset and dataloader from CUB_200. When reading the images as pill, I need to change the BGR channels to RGB and I use the following code:
def _read_images_from_list(imagefile_list):
imgs = []
mean=[0.485, 0.456, 0.406]
std= [0.229, 0.224, 0.225]
Transformations = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize(mean, std)])
for imagefile in imagefile_list:
# read images as PIL instead of NUMPY
img = Image.open(imagefile)
b, g, r = img.split()
img = Image.merge("RGB", (r, g, b))
img = Transformations(img) # ToTensor and between [0,1], then normalized using image net mean and std, then transposed into shape (C,H,W)
imgs += [img]
return imgs
After going through a number of classes, I get the following error.
ValueError: not enough values to unpack (expected 3, got 1)
I wonder what should I do now? it means that one of the images has only one channel instead of one. Can this be the case or there is a problem with my code? I had a different implementation before but it worked. The reason I changed this implementation was that I could not normalize my images.
This is the old implementation:
def _read_images_from_list(imagefile_list):
imgs = []
for imagefile in imagefile_list:
img = cv2.imread(imagefile).astype(np.float32)
img = cv2.resize(img, (224, 224))
# Convert RGB to BGR
img_r, img_g, img_b = np.split(img, 3, axis=2)
img = np.concatenate((img_b, img_g, img_r), axis=2)
# Extract mean
img -= np.array((103.94,116.78,123.68), dtype=np.float32) # BGR mean
# HWC -> CHW, compatible with pytorch
img = np.transpose(img, [2, 0, 1])
imgs += [img]
return imgs
I would strongly recommend you use skimage.io to load your images, not opencv. It opens the images in RGB format by default, removing your shuffling overhead, but if you want to convert BGR to RGB you can use this:
import numpy as np
img = np.arange(27).reshape(3,3,3)
b = img[:,:,0]
g = img[:,:,1]
r = img[:,:,2]
rgb = np.dstack([r,g,b])
print(img)
print("#"*20)
print(rgb)
I am trying to create a video from a sequence of images. Below is my code.
import os
import cv2
mean_width = 6000
mean_height = 4000
def generate_video():
image_folder = 'C:/New folder/Images/q/'
video_name = 'myvideo.avi'
os.chdir("C:/New folder/Images/q/")
images = [img for img in os.listdir(image_folder)
if img.endswith(".jpg") or
img.endswith(".jpeg") or
img.endswith("png")]#I'll use my own function for that, just easier to read
frame = cv2.imread(os.path.join(image_folder, images[0]))
height, width, layers = frame.shape
video = cv2.VideoWriter(video_name, 0, 0.25, (width, height))#0.25 so one image is 4 seconds
for image in images:
video.write(cv2.imread(os.path.join(image_folder, image)))
cv2.destroyAllWindows()
video.release()
generate_video()
The above code however creating the video just with one image. There are 5 other images as well in the folder C:/New folder/Images/q/ but the video is generated only for the first one. Can someone please advise if anything is missing here ? Seems like the for loop is not working
To make a video you need your images to have the same resolution. If some images have different sizes cv2.VideoWriter quietly skips them without any errors.
So you may need to resize your images to fixed size:
for image in images:
image = cv2.imread(os.path.join(image_folder, image))
image = cv2.resize(image, (width, height))
video.write(image)
An example to reproduce this behavior:
import cv2
import numpy as np
fc = cv2.VideoWriter_fourcc(*"mp4v")
video = cv2.VideoWriter("1.mp4", fc, 0.25, (500, 500))
for idx in range(10):
color = np.random.randint(0, 255, size=3)
if idx in [0, 2, 3]: # only 3 frames will be in the final video
image = np.full((500, 500, 3), fill_value=color, dtype=np.uint8)
else:
# slighly different size
image = np.full((400, 500, 3), fill_value=color, dtype=np.uint8)
video.write(image)
I am trying to run a CNN where the input images have three channels (rgb) and the label (target) images are grayscale images (1 channel). The input and label images are in float32 and tif format.
I got the list of image and label tile pairs as below:
def get_train_test_lists(imdir, lbldir):
imgs = glob.glob(imdir+"/*.tif")
dset_list = []
for img in imgs:
filename_split = os.path.splitext(img)
filename_zero, fileext = filename_split
basename = os.path.basename(filename_zero)
dset_list.append(basename)
x_filenames = []
y_filenames = []
for img_id in dset_list:
x_filenames.append(os.path.join(imdir, "{}.tif".format(img_id)))
y_filenames.append(os.path.join(lbldir, "{}.tif".format(img_id)))
print("number of images: ", len(dset_list))
return dset_list, x_filenames, y_filenames
train_list, x_train_filenames, y_train_filenames = get_train_test_lists(img_dir, label_dir)
test_list, x_test_filenames, y_test_filenames = get_train_test_lists(test_img_dir, test_label_dir)
from sklearn.model_selection import train_test_split
x_train_filenames, x_val_filenames, y_train_filenames, y_val_filenames =
train_test_split(x_train_filenames, y_train_filenames, test_size=0.1, random_state=42)
num_train_examples = len(x_train_filenames)
num_val_examples = len(x_val_filenames)
num_test_examples = len(x_test_filenames)
In order to read the tiles into tensor, firstly I defined the image dimensions and batch size:
img_shape = (128, 128, 3)
batch_size = 2
I noticed that there is no decoder in tensorflow for tif image based on this link. tfio.experimental.image.decode_tiff can be used but it decodes to unit8 tensor.
here is a sample code for png images:
def _process_pathnames(fname, label_path):
# We map this function onto each pathname pair
img_str = tf.io.read_file(fname)
img = tf.image.decode_png(img_str, channels=3)
label_img_str = tf.io.read_file(label_path)
# These are png images so they return as (num_frames, h, w, c)
label_img = tf.image.decode_png(label_img_str, channels=1)
# The label image should have any values between 0 and 9, indicating pixel wise
# cropt type class or background (0). We take the first channel only.
label_img = label_img[:, :, 0]
label_img = tf.expand_dims(label_img, axis=-1)
return img, label_img
Is it possible to modify this code by tf.convert_to_tensor or any other option to get float32 tensor from tif images? (I asked this question before, but I don't know how to integrate tf.convert_to_tensor with the mentioned codes)
You can read almost any image format and convert it to a numpy array with the Pillow image package:
from PIL import Image
import numpy as np
img = Image.open("image.tiff")
img = np.array(img)
print(img.shape, img.dtype)
# (986, 1853, 4) uint8
You can integrate this function into your code and then convert the numpy array to a tensorflow tensor as well as doing the appropriated image conversions.
Side note: you can simplify a lot your get_train_test_lists function using the pathlib package (which is integrated to Python3 like os but much simpler to use).
def get_train_test_lists(imdir, lbldir):
x_filenames = list(Path(imdir).glob("*.tif"))
y_filenames = [Path(lbldir) / f.name for f in x_filenames]
dset_list = [f.stem for f in x_filenames]
return dset_list, x_filenames, y_filenames
Note that x_filenames and y_filenames are now absolute paths but this shouldn't be an issue in your code.
I've taken the input from the folder and then reshaped it accordingly as per the model VGG16-places365. It is still showing the same error and looked into the Keras documentation of the problem (https://keras.io/applications/#vgg16) yet the error still prevails.
if __name__ == '__main__':
#from urllib.request import urlopen
import numpy as np
from PIL import Image
from cv2 import resize
pred_array = np.empty((0,6),dtype=float)
TEST_PATH = '/home/guest/Downloads/content/image/thumb'
for img in os.listdir(TEST_PATH):
image = Image.open(os.path.join(TEST_PATH, img))
image = np.array(image, dtype=np.uint8)
image = resize(image, (224, 224))
image = np.expand_dims(image, 0)
model = VGG16_Places365(weights='places')
predictions_to_return = 5
preds = model.predict(image)[0]
top_preds = np.argsort(preds)[::-1][0:predictions_to_return]
# load the class label
file_name = 'categories_places365.txt'
if not os.access(file_name, os.W_OK):
synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
os.system('wget ' + synset_url)
classes = list()
with open(file_name) as class_file:
for line in class_file:
classes.append(line.strip().split(' ')[0][3:])
classes = tuple(classes)
temprow = np.hstack((np.array([img]),top_preds))
np.append(pred_array,temprow.reshape(-1,pred_array.shape[1]),axis=0)
df = pd.DataFrame(data=pred_array,columns=['File_name','Tag_1','Tag_2','Tag_3','Tag_4','Tag_5'])
print(df)
You are probably loading an image with an alpha channel (RGBA) but the VGG16 neural network expects an image without an alpha channel (RGB).
To convert the image from RGBA to RGB, you can either use
image = image.convert("RGB")
on the PIL Image object, i.e. directly after Image.open, or use numpy array slicing on the numpy array object to cut off the first three color channels after np.array has been called:
image = image[:, :, :3]
I want implement VGG Face Descriptor in python. But I keep getting an error:
TypeError: can only concatenate list (not "numpy.ndarray") to list
My code:
import numpy as np
import cv2
import caffe
img = cv2.imread("ak.png")
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
net = caffe.Net("VGG_FACE_deploy.prototxt","VGG_FACE.caffemodel", caffe.TEST)
print net.forward(img)
Can you help me ?
UPDATE 1
This working code is example in matlab
% Copyright (c) 2015, Omkar M. Parkhi
% All rights reserved.
img = imread('ak.png');
img = single(img);
Img = [129.1863,104.7624,93.5940] ;
img = cat(3,img(:,:,1)-averageImage(1),...
img(:,:,2)-averageImage(2),...
img(:,:,3)-averageImage(3));
img = img(:, :, [3, 2, 1]); % convert from RGB to BGR
img = permute(img, [2, 1, 3]); % permute width and height
model = 'VGG_FACE_16_deploy.prototxt';
weights = 'VGG_FACE.caffemodel';
caffe.set_mode_cpu();
net = caffe.Net(model, weights, 'test'); % create net and load weights
res = net.forward({img});
prob = res{1};
caffe_ft = net.blobs('fc7').get_data();
To use python interface you need to transform the input image before feeding it to the net
img = caffe.io.load_image( "ak.png" )
img = img[:,:,::-1]*255.0 # convert RGB->BGR
avg = np.array([93.5940, 104.7624, 129.1863]) # BGR mean values
img = img - avg # subtract mean (numpy takes care of dimensions :)
Now img is H-by-W-by-3 numpy array.
Caffe expects its inputs as 4D: batch_index x channel x width x height.
Therefore you need to transpose the input and add a singleton dimension to represent the "batch_index" leading dimension
img = img.transpose((2,0,1))
img = img[None,:] # add singleton dimension
Now you can run the forward pass
out = net.forward_all( data = img )
OpenCV reads in BGR and scaled to 255 format by default, so:
img = cv2.imread('ak.png')
avg = np.array([93.5940,104.7624,129.1863]) # BGR mean from VGG
img -= avg # subtract mean
img = img.transpose((2,0,1)) # to match image input dimension: 3x224x224
img = img[None,:] # add singleton dimension to match batch dimension
out = net.forward_all(data = img)
Try passing a single element list to the method.
net.forward ([img])