How do you create an inception module in tensorflow - python

Looking at the tensorflow page: https://github.com/tensorflow/models/tree/master/inception
They show an image with their architecture, specifically their 'inception' modules which contain in parallel:
conv layer of 1x1
conv layer of 3x3
conv layer of 5x5
ave pooling + 1x1 conv
Followed by an 'concat' layer.
How can I create this in tensorflow?
I figured I could do something along the lines of this to create the parallel operations:
start_layer = input_data
filter = tf.Variable(tf.truncated_normal([1,1,channels,filter_count], stddev=0.1)
one_by_one = tf.nn.conv2d(start_layer, filter, strides=[1,1,1,1], padding='SAME')
filter = tf.Variable(tf.truncated_normal([3,3,channels,filter_count], stddev=0.1)
three_by_three = tf.nn.conv2d(start_layer, filter, strides=[1,1,1,1], padding='SAME')
filter = tf.Variable(tf.truncated_normal([5,5,channels,filter_count], stddev=0.1)
five_by_five = tf.nn.conv2d(start_layer, filter, strides=[1,1,1,1], padding='SAME')
filter = tf.Variable(tf.truncated_normal([1,1,channels,filter_count], stddev=0.1)
pooling = tf.nn.avg_pool(start_layer, filter, strides=[1,2,2,1], padding='SAME')
filter = tf.Variable(tf.truncated_normal([1,1,channels,filter_count], stddev=0.1)
pooling = tf.nn.conv2d(pooling, filter, strides=[1,1,1,1], padding='SAME')
#connect one_by_one, three_by_three, five_by_five, pooling into an concat layer
But how do I combine the 4 operations into an concat layer?

I did something very similar to what you did, and then finished it off with tf.concat(). Note the axis=3 which matches my 4d tensors and concats to the 4th dimension (index 3).
Documentation for it is here.
My final code ended up something like this:
def inception2d(x, in_channels, filter_count):
# bias dimension = 3*filter_count and then the extra in_channels for the avg pooling
bias = tf.Variable(tf.truncated_normal([3*filter_count + in_channels], mu, sigma)),
# 1x1
one_filter = tf.Variable(tf.truncated_normal([1, 1, in_channels, filter_count], mu, sigma))
one_by_one = tf.nn.conv2d(x, one_filter, strides=[1, 1, 1, 1], padding='SAME')
# 3x3
three_filter = tf.Variable(tf.truncated_normal([3, 3, in_channels, filter_count], mu, sigma))
three_by_three = tf.nn.conv2d(x, three_filter, strides=[1, 1, 1, 1], padding='SAME')
# 5x5
five_filter = tf.Variable(tf.truncated_normal([5, 5, in_channels, filter_count], mu, sigma))
five_by_five = tf.nn.conv2d(x, five_filter, strides=[1, 1, 1, 1], padding='SAME')
# avg pooling
pooling = tf.nn.avg_pool(x, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME')
x = tf.concat([one_by_one, three_by_three, five_by_five, pooling], axis=3) # Concat in the 4th dim to stack
x = tf.nn.bias_add(x, bias)
return tf.nn.relu(x)

Related

How do I explain this TensorFlow tf.nn.conv2d() layer shape?

My Tensorflow convolutional layer has a shape I did not expect it to have and I do not see the mistake.
I am new to TensorFlow and want to use this function to create a convolutional layer:
def new_conv_layer(input, # The previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
use_pooling=True): # Use 2x2 max-pooling.
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = new_weights(shape=shape)
biases = new_biases(length=num_filters)
layer = tf.nn.conv2d(input=input_,
filters=weights,
strides=[1, 1, 1, 1],
padding='SAME')
layer += biases
if use_pooling:
layer = tf.nn.max_pool(input=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
layer = tf.nn.relu(layer)
return layer, weights
But when I use it with
num_channels = 1
img_size = 28
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
# Convolutional Layer 1.
filter_size1 = 5 # Convolution filters are 5 x 5 pixels.
num_filters1 = 16 # There are 16 of these filters.
layer_conv1, weights_conv1 = new_conv_layer(input=x_image,
num_input_channels=num_channels,
filter_size=filter_size1,
num_filters=num_filters1,
use_pooling=True)
layer_conv1
I get this output:
<tf.Tensor 'Relu:0' shape=(None, 392, 392, 16) dtype=float32>
Because my images are of a square 28x28 shape and I apply 2x2 pooling, I would have expected this shape to be (None, 14, 14, 16).
Why is that not the case and how do I fix it?
in my case this line x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='x') was incorrect!
In particular img_size_flat was not the length of each "stretched" image, as it should have been.
img_size_flat = df.drop('label', axis=1).shape[1]

tf.layers.conv2d and tf.nn.conv2d Different outputs with same architecture

Note: I read the similar thread here, but it doesn't cover my use case.
I'm building a GAN and am converting my discriminator design from using tf.nn.conv2d (following some example code) to tf.layers.conv2d. Both designs use the same inputs, kernel sizes, strides, yet I'm getting different results between the two.
Both versions should be 28x28x1 input -> conv2d with a 5x5 kernel, 2 stride, 16 layers, leaky relu -> conv2d with a 3x3 kernel, 2 stride, 32 layers, leaky relu -> flatten to 7*7*32 -> 256 neuron dense network with leaky relu -> 1 value output.
I've checked the weight initialization. tf.layers.conv2d defaults to
xaiver init as shown here.
layers version:
def discriminator(x):
# Reshape to a 28x28 image with one layer of depth (greyscale)
x = tf.reshape(x, shape=[-1, 28, 28, 1])
with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE) as scope:
# Defaults to Xavier init for weights and Zeros for bias
disc_conv1 = tf.layers.conv2d(
inputs = x,
filters = 16,
kernel_size=5,
strides=2,
padding="same",
activation=tf.nn.leaky_relu
)
disc_conv2 = tf.layers.conv2d(
inputs = disc_conv1,
filters = 32,
kernel_size=3,
strides=2,
padding="same",
activation=tf.nn.leaky_relu
)
disc_conv2 = tf.reshape(disc_conv2, shape=[-1, 7 * 7 * 32])
disc_h1 = tf.layers.dense(disc_conv2, units=hidden1_dim, activation=tf.nn.leaky_relu)
disc_logits = tf.layers.dense(disc_h1, units=1)
disc_out = tf.nn.sigmoid(disc_logits)
return disc_logits, disc_out
nn version:
DC_D_W1 = tf.get_variable('DC_D_W1', shape=[5, 5, 1, 16], initializer=tf.contrib.layers.xavier_initializer())
DC_D_b1 = tf.get_variable('2', initializer=tf.zeros(shape=[16]))
DC_D_W2 = tf.get_variable('3', shape=[3, 3, 16, 32], initializer=tf.contrib.layers.xavier_initializer())
DC_D_b2 = tf.get_variable('4', initializer=tf.zeros(shape=[32]))
DC_D_W3 = tf.get_variable('5', shape=[7 * 7 * 32, 256], initializer=tf.contrib.layers.xavier_initializer())
DC_D_b3 = tf.get_variable('6', initializer=tf.zeros(shape=[256]))
DC_D_W4 = tf.get_variable('7', shape= [256, 1], initializer=tf.contrib.layers.xavier_initializer())
DC_D_b4 = tf.get_variable('8', initializer=tf.zeros(shape=[1]))
theta_DC_D = [DC_D_W1, DC_D_b1, DC_D_W2, DC_D_b2, DC_D_W3, DC_D_b3, DC_D_W4, DC_D_b4]
def discriminator(x):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = tf.nn.leaky_relu(tf.nn.conv2d(x, DC_D_W1, strides=[1, 2, 2, 1], padding='SAME') + DC_D_b1)
conv2 = tf.nn.leaky_relu(tf.nn.conv2d(conv1, DC_D_W2, strides=[1, 2, 2, 1], padding='SAME') + DC_D_b2)
conv2 = tf.reshape(conv2, shape=[-1, 7 * 7 * 32])
h = tf.nn.leaky_relu(tf.matmul(conv2, DC_D_W3) + DC_D_b3)
logit = tf.matmul(h, DC_D_W4) + DC_D_b4
prob = tf.nn.sigmoid(logit)
return logit, prob

Why the image converted by ndarray of the CNN output is not correct

I'm try to test my CNN by showing the output to an image.
The source code is from https://xueyangfu.github.io/projects/tip2017.html
I run the origin code is useful.
And then I change some structure of CNN. I add some layers and change some parameters.
The training is right and I use some image to test the model.
I try to print the output ndarray and it contains correct values.
However, the image show by cv2 or plt are all same dark color in every pixels.
The shape and type of output ndarray and input ndarray are all the same of (height, width, 3), but the values of output ndarray can't write into the image.
I read the pure color image that save by the output ndarray. The shape of it becomes (height, width, 4). It add the value of 1. in each array, and the value of front three are different from my output ndarray.
I waste about a week in this strange problem. Please give me some advice about how to solve it or what the problem is.
Thank you for helping.
Below is the testing code:
import os
import training_0116 as DerainNet
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
import cv2
##################### Select GPU device ####################################
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
file = "h.jpg"
ori = img.imread(file)
ori = ori/255.0
detail = ori
details = np.zeros([ori.shape[0], ori.shape[1], ori.shape[2]])
for j in range(3):
tmp = detail[:,:,j]
details[:,:,j] = np.pad(tmp, 0, 'symmetric')
details = np.expand_dims(details[:,:,:], axis = 0)
image = tf.placeholder(tf.float32, shape=(1, details.shape[1], details.shape[2], details.shape[3]))
out = DerainNet.inference(image)
saver = tf.train.Saver()
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
if tf.train.get_checkpoint_state('./model/'):
ckpt = tf.train.latest_checkpoint('./model/')
saver.restore(sess, ckpt)
print ("load new model")
else:
saver.restore(sess, "./model/test-model/model") # this model uses 128 feature maps and for debug only
print ("load pre-trained model")
detail_out = sess.run(out, feed_dict={image:details})
derained = detail_out[0,:, :, :]
derained[np.where(derained < 0. )] = 0.
derained[np.where(derained > 1. )] = 1.
img.imsave("h_11.jpg",derained)
plt.subplot(1,2,1)
plt.imshow(ori)
plt.title('input')
plt.subplot(1,2,2)
plt.imshow(derained)
plt.title('output')
plt.show()
Below is the part of training code:
def inference(images):
# conv1
with tf.variable_scope('conv_1'):
kernel = tf.Variable(tf.random_normal([16, 16, FLAGS.num_channels, FLAGS.num_feature], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights1')
biases = tf.Variable(tf.constant(0.0, shape=[FLAGS.num_feature], dtype=tf.float32), trainable=True, name='biases1')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.tanh(bias)
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1], strides=[1,1,1,1],padding='SAME')
# conv2
with tf.variable_scope('conv_2'):
kernel = tf.Variable(tf.random_normal([1, 1, FLAGS.num_feature, FLAGS.num_feature], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights2')
biases = tf.Variable(tf.constant(0.0, shape=[FLAGS.num_feature], dtype=tf.float32), trainable=True, name='biases2')
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.tanh(bias)
pool2 = tf.nn.max_pool(conv2, ksize=[1,3,3,1], strides=[1,1,1,1],padding='SAME')
with tf.variable_scope('conv_3'):
kernel = tf.Variable(tf.random_normal([4, 4, FLAGS.num_feature, FLAGS.num_feature], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights2')
biases = tf.Variable(tf.constant(0.0, shape=[FLAGS.num_feature], dtype=tf.float32), trainable=True, name='biases2')
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.tanh(bias)
pool3 = tf.nn.max_pool(conv3, ksize=[1,3,3,1], strides=[1,1,1,1],padding='SAME')
with tf.variable_scope('conv_4'):
kernel = tf.Variable(tf.random_normal([6, 6, FLAGS.num_feature, FLAGS.num_feature], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights2')
biases = tf.Variable(tf.constant(0.0, shape=[FLAGS.num_feature], dtype=tf.float32), trainable=True, name='biases2')
conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.tanh(bias)
pool4 = tf.nn.max_pool(conv4, ksize=[1,3,3,1], strides=[1,1,1,1],padding='SAME')
with tf.variable_scope('conv_6'):
kernel = tf.Variable(tf.random_normal([8, 8, FLAGS.num_channels,FLAGS.num_feature], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights3')
biases = tf.Variable(tf.constant(0.0, shape=[FLAGS.num_channels], dtype=tf.float32), trainable=True, name='biases3')
conv = tf.nn.conv2d_transpose(conv4,kernel,[tf.shape(images)[0], tf.shape(images)[1], tf.shape(images)[2], FLAGS.num_channels],[1, 1, 1, 1],padding='SAME')
conv5 = tf.nn.bias_add(conv, biases)
out = tf.nn.max_pool(conv5, ksize=[1,3,3,1], strides=[1,1,1,1],padding='SAME')
return out
This line:
ori = ori/255.0
Normalises the image as you read it in. Normal pixels will have values 0 to 255. For training, these are typically normalised to lie between 0 and 1. But if you want to view the image, you'll have to scale up the pixel values again. It's black because pixel values close to zero are very dark.
I think you want a corresponding
ori = ori*255
just before your imshow.

Concat two padded senteces and insert to conv1d i tensorflow?

What dimensions are required in tf.nn.conv1d ? and how to perform max pooling afterwards?
A simple example snip:
filter = tf.zeros([3, 16, 16])
W = tf.Variable(tf.truncated_normal(filter, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv1d(
input_values,
W,
strides=2,
padding="VALID",
name="conv")
# nonlinearity operation
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
Check this answer as well.

Dimensionality error after applying a dense layer

I am trying to add a dense layer after applying dropout to the max pooled convolutional layer output.
I have the following TensorFlow code written in Python. Number of filters is 128 and len(filter_sizes) is 3
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.embedded_chars_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Applying batch normalization
# h = tf.contrib.layers.batch_norm(conv, center=True, scale=True, is_training=True)
# Apply nonlinearity
h1 = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h1,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
self.h_pool = tf.concat(pooled_outputs, 3)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
# Add dropout
with tf.name_scope("dropout"):
#self.h_drop = tf.nn.dropout(dense, self.dropout_keep_prob)
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
# Adding dense layer
dense = tf.layers.dense(self.h_drop, units=num_classes, activation=tf.nn.relu)
Facing issues after the application of the dense layer.
Following is the error:
Dimensions must be equal, but are 11 and 384 for 'output/scores/MatMul' (op: 'MatMul') with input shapes: [?,11], [384,11]
Could someone please help me with it?
The error was with the indices of the matrices. I was using the xw_plus_b function provided by tensorflow and using the dimensions of the matrices for multiplication wrong.

Categories