why I must reshape one image to [n,height,width,channel] in CNN - python

I try to apply a convolutional layer to a picture of shape [256,256,3]
a have an error when I user the tensor of the image directly
conv1 = conv2d(input,W_conv1) +b_conv1 #<=== error
error message:
ValueError: Shape must be rank 4 but is rank 3 for 'Conv2D' (op: 'Conv2D')
with input shapes: [256,256,3], [3,3,3,1].
but when I reshape the function conv2d work normally
x_image = tf.reshape(input,[-1,256,256,3])
conv1 = conv2d(x_image,W_conv1) +b_conv1
if I must reshape the tensor what the best value to reshape in my case and why?
import tensorflow as tf
import numpy as np
from PIL import Image
def img_to_tensor(img) :
return tf.convert_to_tensor(img, np.float32)
def weight_generater(shape):
return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
def bias_generater(shape):
return tf.Variable(tf.constant(.1,shape=shape))
def conv2d(x,W):
return tf.nn.conv2d(x,W,[1,1,1,1],'SAME')
def pool_max_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,1,1,1],padding='SAME')
#read image
img = Image.open("img.tif")
sess = tf.InteractiveSession()
#convetir image to tensor
input = img_to_tensor(img).eval()
#print(input)
# get img dimension
img_dimension = tf.shape(input).eval()
print(img_dimension)
height,width,channel=img_dimension
filter_size = 3
feature_map = 32
x = tf.placeholder(tf.float32,shape=[height*width*channel])
y = tf.placeholder(tf.float32,shape=21)
# generate weigh [kernal size, kernal size,channel,number of filters]
W_conv1 = weight_generater([filter_size,filter_size,channel,1])
#for each filter W has his specific bais
b_conv1 = bias_generater([feature_map])
""" I must reshape the picture
x_image = tf.reshape(input,[-1,256,256,3])
"""
conv1 = conv2d(input,W_conv1) +b_conv1 #<=== error
h_conv1 = tf.nn.relu(conv1)
h_pool1 = pool_max_2x2(h_conv1)
layer1_dimension = tf.shape(h_pool1).eval()
print(layer1_dimension)

The first dimension is the batch size. If you are feeding 1 image at a time you can simply make the first dimension 1 and it doesn't change your data any, just changes the indexing to 4D:
x_image = tf.reshape(input, [1, 256, 256, 3])
If you reshape it with a -1 in the first dimension what you are doing is saying that you will feed in a 4D batch of images (shaped [batch_size, height, width, color_channels], and you are allowing the batch size to be dynamic (which is common to do).

You could also use
im = tf.expand_dims(input, axis=0)
to insert a dimension of 1 into the tensor's shape. im will be a rank 4 tensor. This way you do not have to specify the dimensions of the image.

Related

Tensorflow, can not figure out what shape my inputs and labels have

I am trying to load weights and for that to work i need to perform the following:
dummy_input = tf.random.uniform(input_shape) # create a tensor of input shape
dummy_label = tf.random.uniform(label_shape) # create a tensor of label shape
hist = model.fit(dummy_input, dummy_label)
I am new to this and can't figure out what these shapes should be.
Some information about my model:
Im feeding the model images with shape (224,224,3).
In batches of 16.
I have 423 different classes and use sparse_categorical_crossentropy.
I tried this
dummy_input = tf.random.uniform([16, 224, 224, 3]) # create a tensor of input shape
dummy_label = tf.random.uniform([16, 1, 423]) # create a tensor of label shape
hist = model.fit(dummy_input, dummy_label, epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_test // batch_size)
There may be many errors here but the one i am getting right now is
ValueError: Shape mismatch: The shape of labels (received (423,))
should equal the shape of logits except for the last dimension (received (1, 423)).
This was the solution
dummy_input = tf.random.uniform([32, 224, 224, 3]) # create a tensor of input shape
dummy_label = tf.random.uniform([32,]) # create a tensor of label shape
hist = model.fit(dummy_input, dummy_label)

Initializing Keras Convolution Kernel as a numpy array

I would like to initialize the weights for a (5,5) convolutional layer with four channels to be a numpy array. The input to this layer is of shape (128,128,1). In particular, I would like the following:
def custom_weights(shape, dtype=None):
matrix = np.zeros((1,5,5,4))
matrix[0,2,2,0,0] = 1
matrix[0,2,1,0,0] = -1
matrix[0,2,2,0,1] = 1
matrix[0,3,2,0,1] = -1
matrix[0,2,2,0,2] = 2
matrix[0,2,1,0,2] = -1
matrix[0,2,3,0,2] = -1
matrix[0,2,2,0,3] = 2
matrix[0,1,2,0,3] = -1
matrix[0,3,2,0,3] = -1
weights = K.variable(matrix)
return weights
input_shape = (128, 128, 1)
images = Input(input_shape, name='phi_input')
conv1 = Conv2D(4,[5, 5], use_bias = False, kernel_initializer=custom_weights, padding='valid', name='Conv2D_1', strides=1)(images)
However, when I try to do this, I get an error of
Depth of input (1) is not a multiple of input depth of filter (5) for 'Conv2D_1_19/convolution' (op: 'Conv2D') with input shapes: [?,128,128,1], [1,5,5,4].
Is my error in the shape of the weight matrix?
There are many inconsistencies (which led to errors) in your code, the error you're getting is not from the given code as it doesn't even index the matrix properly.
matrix = np.zeros((1,5,5,4))
matrix[0,2,2,0,0] = 1
You are initializing a numpy array with 4 dimensions but using 5 indices to change value.
Your dimensions for kernel weights are wrong. Here's the fixed code.
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K
import numpy as np
def custom_weights(shape, dtype=None):
kernel = np.zeros((5,5,1,4))
# change value here
kernel = K.variable(kernel)
return kernel
input_shape = (128, 128, 1)
images = Input(input_shape, name='phi_input')
conv1 = Conv2D(4,[5, 5], use_bias = False, kernel_initializer=custom_weights, padding='valid', name='Conv2D_1', strides=1)(images)

How to convolve signal with 1D kernel in TensorFlow?

I am trying to filter a TensorFlow tensor of shape (N_batch, N_data), where N_batch is the batch size (e.g. 32), and N_data is the size of the (noisy) timeseries array. I have a Gaussian kernel (taken from here), which is one-dimensional. I then want to use tensorflow.nn.conv1d to convolve this kernel with my signal.
I have been trying for most of the morning to get the dimensions of the input signal and the kernel right, but obviously with no success. From what I gathered from the interwebs, the dimensions of both the input signal and the kernel need to be aligned in some finicky way, and I just can't figure out which way that is. The TensorFlow error messages aren't particularly meaningful either (Shape must be rank 4 but is rank 3 for 'conv1d/Conv2D' (op: 'Conv2D') with input shapes: [?,1,1000], [1,81]). Below I've included a little piece of code to reproduce the situation:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Based on: https://stackoverflow.com/a/52012658/1510542
# Credits to #zephyrus
def gaussian_kernel(size, mean, std):
d = tf.distributions.Normal(tf.cast(mean, tf.float32), tf.cast(std, tf.float32))
vals = d.prob(tf.range(start=-size, limit=size+1, dtype=tf.float32))
kernel = vals # Some reshaping is required here
return kernel / tf.reduce_sum(kernel)
def gaussian_filter(input, sigma):
size = int(4*sigma + 0.5)
x = input # Some reshaping is required here
kernel = gaussian_kernel(size=size, mean=0.0, std=sigma)
conv = tf.nn.conv1d(x, kernel, stride=1, padding="SAME")
return conv
def run_filter():
tf.reset_default_graph()
# Define size of data, batch sizes
N_batch = 32
N_data = 1000
noise = 0.2 * (np.random.rand(N_batch, N_data) - 0.5)
x = np.linspace(0, 2*np.pi, N_data)
y = np.tile(np.sin(x), N_batch).reshape(N_batch, N_data)
y_noisy = y + noise
input = tf.placeholder(tf.float32, shape=[None, N_data])
smooth_input = gaussian_filter(input, sigma=10)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
y_smooth = smooth_input.eval(feed_dict={input: y_noisy})
plt.plot(y_noisy[0])
plt.plot(y_smooth[0])
plt.show()
if __name__ == "__main__":
run_filter()
Any ideas?
You need to add channel dimensions to your input/kernel, since TF convolutions are generally used for multi-channel inputs/outputs. As you are working with simple 1-channel input/output this amounts to just adding some size-1 "dummy" axes.
Since by default convolution expects channels to come last, your placeholder should have shape [None, N_data, 1] and your input be modified like
y_noisy = y + noise
y_noisy = y_noisy[:, :, np.newaxis]
Similarly, you need to add input and output channel dimensions to your filter:
kernel = gaussian_kernel(size=size, mean=0.0, std=sigma)
kernel = kernel[:, tf.newaxis, tf.newaxis]
That is, the filter is expected to have shape [width, in_channels, out_cannels].

How do I display the feature maps (filtered layers) in a tensorflow CNN?

I need some help viewing the feature maps in a plant leaf classification program using TensorFlow.
I have a function that takes in any number of images (size 128x128x3) and convolves the images using some filter (size 3x3x32).
layer_conv1 = create_convolutional_layer(input=x,
num_input_channels=num_channels,
conv_filter_size=filter_size_conv1,
num_filters=num_filters_conv1)
print(layer_conv1)
The code outputs a tensor as printed: Tensor("Relu_182:0", shape=(?, 64, 64, 32), dtype=float32)
I am trying to display an image on the console from the tensor, and I've tried the following code (using matplotlib.pyplot):
session.run(tf.global_variables_initializer())
img = session.run(layer_conv1)
plt.imshow(img)
plt.show()
and
""
img = layer_conv1[0,:,:,:].eval(session=session)
""
""
which both don't work.
You must feed a value for placeholder tensor 'x_54' with dtype float and shape [?,128,128,3] is one of the errors that occurs.
You define your layer with
layer_conv1 = create_convolutional_layer(input=x,...)
Here, x is a placeholder defined with something like
x = tf.Placeholder(tf.float32, [None, 128, 128, 3])
When you call img = session.run(layer_conv1) you need to feed a value for x like with
img = session.run(layer_conv1, feed_dict={x: myImage})
where myImage is a numpy array of shape [1, 128, 128, 3] representing your image.

Tensorflow Depthwise Convolution Understanding

I'm currently trying to understand how Tensorflow's Depthwise Convolution works. As far as I've understood, each channel in the input image is convolved with it's own set of filters, and then the results are concatenated. I'm going to stick with the parameter depth_multiplier=1 for the sake of simplicity in the remainder, so n_inputchannels == n_outputchannels.
So in theory, I could split up the depthwise convolution into N individual, regular Conv2Ds, correct? Why does the following code produce different results then I am wondering - is this a precision issue? I'm following the documentation for the ordering [filter_height, filter_width, in_channels, 1] for the depthwise convolution filters, and [filter_height, filter_width, in_channels, out_channels] for the regular convolutions, and NHWC data format.
import tensorflow as tf
import numpy as np
import random
width = 128
height = 128
channels = 32
kernel_width = 3
kernel_height = 3
with tf.Session() as sess:
_input = np.float32(np.random.rand(1, height, width, channels))
_weights = np.float32(np.random.rand(kernel_height, kernel_width, channels, 1))
_input_ph = tf.placeholder(tf.float32, shape=(1, height, width, channels))
_weights_pc = tf.placeholder(tf.float32, shape=(kernel_height, kernel_width, channels, 1))
feed = { _input_ph: _input, _weights_pc : _weights }
result = tf.nn.depthwise_conv2d(_input_ph, _weights_pc, [1,1,1,1], 'SAME')
individual_results = []
for i in range(channels):
individual_results.append(tf.nn.conv2d(tf.expand_dims(_input_ph[:,:,:,i],axis=3), tf.expand_dims(_weights_pc[:,:,i,:],axis=3), [1,1,1,1], 'SAME'))
depth_result = sess.run(result, feed_dict=feed)
concat_result = sess.run(tf.concat(individual_results, axis=3), feed_dict=feed)
channel_diff = 0.0
for i in range(channels):
channel_diff += np.sum(depth_result[:,:,:,i]-concat_result[:,:,:,i])
print(channel_diff)
Here I'm computing first the normal tf.nn.depthwise_conv2d and then slice the input and weights accordingly and do tf.nn.conv2ds individually. For these parameters I get about 1e-5 difference, but that tends to get higher when I increase the number of channels.
I would be really glad if someone could explain to me what's going on :)
Thanks!

Categories