Visualizing weights of trained neural network in keras - python

Hi I trained an auto encoder network with convolution layer of 96*96*32
Now I get the weights of my model named autoencoder by
layer=autoencoder.layers[1]
W=layer.get_weights()
As w is a list, please help me sort it's elements and visualize the trained kernels.
I am guessing it should be 32 kernels with 96×96 size
When I type
len(w)
It gives 2 so I have 2 arrays
The top array has 9 subarrays each with 32 numbers
The last array has 32 elements. So it must be bias
[array([[[[-6.56146603e-03, -1.51752336e-02, -3.76937017e-02,
-4.55160812e-03, 1.26366820e-02, -2.97747254e-02,
3.76312323e-02, -1.56892575e-02, 2.03932393e-02,
3.29606095e-03, 3.76580656e-02, 6.99581252e-03,
-4.97130565e-02, 3.63005586e-02, 3.70187908e-02,
2.63699284e-03, 4.42482866e-02, 8.26128479e-03,
3.44854854e-02, 1.94760375e-02, 3.91177870e-02,
-6.67006942e-03, 5.64308763e-02, -1.55166145e-02,
-3.46037326e-03, -3.14556211e-02, -2.31548538e-03,
5.77888393e-04, 2.17472352e-02, -8.16953406e-02,
1.54041937e-02, -3.55066173e-02]],
[[ 7.61649990e-03, -6.52475432e-02, 2.02584285e-02,
-4.36152853e-02, -7.94242844e-02, -6.29556971e-03,
-2.17294712e-02, 3.30206454e-02, 3.47386077e-02,
-2.77627818e-03, 4.49984707e-02, -3.03241126e-02,
-3.36903334e-02, 2.34354921e-02, 3.31020765e-02,
-7.81059638e-03, -9.54489596e-03, -1.07985372e-02,
4.10569459e-02, 5.06392084e-02, -1.64809041e-02,
8.42852518e-03, -6.24148361e-03, 1.38165271e-02,
4.47277874e-02, -1.68551356e-02, 2.87279133e-02,
-4.17906158e-02, -3.29194516e-02, 5.37550561e-02,
-3.10864598e-02, -4.53849025e-02]],
[[ 5.37880100e-02, 2.00091377e-02, -8.04780126e-02,
2.05146279e-02, -6.41385652e-03, 2.94176023e-02,
2.42049675e-02, 2.98423916e-02, 1.30865928e-02,
-9.23016574e-03, -2.63463743e-02, -1.58412699e-02,
-4.76215854e-02, -1.53328422e-02, -2.54222248e-02,
1.03113698e-02, 1.97005924e-02, -1.09527409e-02,
-4.29149866e-02, 1.15255425e-02, 3.65356952e-02,
2.26275604e-02, 8.76231957e-03, -1.82650369e-02,
4.30952013e-02, -1.58966344e-03, 1.01399068e-02,
7.15927547e-03, 2.70794444e-02, -1.93151142e-02,
2.06329934e-02, -3.24055366e-02]]],
[[[ 7.32885906e-04, -5.99233769e-02, 1.01583647e-02,
2.62707975e-02, -1.60765275e-02, 4.54364009e-02,
1.22182900e-02, 1.77695882e-02, 3.40870097e-02,
-3.20678158e-03, 1.94115974e-02, -5.89495376e-02,
5.51430099e-02, 1.08586736e-02, -2.14386974e-02,
-1.10124948e-03, -1.41514605e-02, -8.40184465e-03,
-4.09237854e-02, 2.27938611e-02, 2.82027805e-03,
3.99805643e-02, -5.23957238e-02, -6.65743649e-02,
-1.86213956e-03, 1.84283289e-03, 8.22036352e-04,
-2.04587094e-02, -4.95675243e-02, 5.40869832e-02,
4.00022417e-02, -4.74570543e-02]],
[[-3.73015292e-02, 9.84914601e-03, 9.94246900e-02,
3.19805741e-02, 8.14174674e-03, 2.72354241e-02,
-1.58177980e-03, -5.65455444e-02, -2.13499945e-02,
2.36055311e-02, 4.57456382e-03, 5.87781705e-02,
-4.50953143e-03, -3.05559561e-02, 8.65572542e-02,
-2.87776738e-02, 7.56273838e-03, -2.02421043e-02,
4.32164557e-02, 1.07650533e-02, 1.74834915e-02,
-2.26386450e-02, -4.51299828e-03, -7.19766971e-03,
-5.64673692e-02, -3.46505865e-02, -9.57003422e-03,
-4.17267382e-02, 2.74983943e-02, 7.50013590e-02,
-1.39447292e-02, -2.10063234e-02]],
[[-4.99953330e-03, -1.95915010e-02, 7.38414973e-02,
3.00457701e-02, 4.11909744e-02, -4.93509434e-02,
-3.72827090e-02, -4.84874584e-02, -1.73344277e-02,
2.13540550e-02, 2.63152272e-02, 5.11181913e-02,
5.94335012e-02, -8.46157200e-04, -3.79960015e-02,
-2.01609023e-02, 2.21411046e-02, -1.14003820e-02,
-1.78077854e-02, -6.17240835e-03, -9.96494666e-03,
-2.70768851e-02, 3.32489684e-02, -1.18451891e-02,
7.48611614e-02, 3.68427448e-02, -1.70680200e-04,
2.78645731e-03, 3.37152109e-02, -6.00774325e-02,
3.43431458e-02, 6.80516511e-02]]],
[[[ 4.51148823e-02, 4.12209071e-02, -1.92945134e-02,
-2.68811788e-02, 4.68725041e-02, -2.08357088e-02,
-3.62888947e-02, -1.60191804e-02, 3.19913588e-02,
1.54639455e-02, -7.92380888e-03, -4.85247411e-02,
-3.52074914e-02, -1.04825860e-02, -6.63231388e-02,
4.35819328e-02, 1.74060687e-02, -3.14022303e-02,
-2.88435258e-02, -2.56987382e-03, -4.61222306e-02,
9.01424140e-03, -3.54990773e-02, 3.61517034e-02,
-4.51472104e-02, -1.96188372e-02, 2.76502203e-02,
-3.39846462e-02, -5.75804268e-04, -4.55158725e-02,
2.47761561e-03, 5.08131757e-02]],
[[ 3.74217257e-02, 4.53428067e-02, -4.36269939e-02,
-1.65079869e-02, -2.69084796e-02, -2.38134293e-03,
2.26788968e-02, -3.10470518e-02, -4.33242172e-02,
1.89485904e-02, -5.52747138e-02, 6.01334386e-02,
-1.70235410e-02, -4.17503342e-02, -1.59652822e-03,
-3.10646854e-02, -1.94913559e-02, 5.42740058e-03,
5.47912866e-02, 2.19548331e-03, -2.94116754e-02,
2.24571414e-02, -1.57341175e-02, -5.24678500e-03,
4.41270098e-02, 1.79115515e-02, -3.40841003e-02,
-2.95497216e-02, 4.40835916e-02, 4.28234115e-02,
-4.25039157e-02, 5.90493456e-02]],
[[-2.71476209e-02, 6.84098527e-02, -2.91980486e-02,
-2.52507403e-02, -6.22444265e-02, 3.67519422e-03,
5.06899729e-02, 3.09969904e-03, 4.50362265e-02,
8.56801707e-05, 4.21552844e-02, -3.78406122e-02,
-1.73772611e-02, 4.68185954e-02, -6.93227863e-03,
-4.71074954e-02, 5.72011899e-03, -1.59831103e-02,
-1.66428182e-02, 1.12894354e-02, 5.62585844e-03,
1.36870472e-02, -2.89466791e-02, -2.87153292e-03,
-3.21626514e-02, -3.75866666e-02, -1.62240565e-02,
3.01954672e-02, -2.69964593e-03, -2.27513053e-02,
2.10835561e-02, -4.13369946e-02]]]], dtype=float32),
array([-1.1922461e-03, -2.0752363e-04, 1.1357996e-05, 1.6377015e-05,
-2.5950783e-04, 1.9307183e-05, -1.5572178e-06, -1.3648998e-03,
-8.6763187e-04, 4.4856939e-04, 2.7988455e-03, -7.7398616e-04,
-5.1178242e-04, -6.8265648e-04, 1.8571866e-04, -7.1992702e-04,
-5.5880222e-04, -3.6114815e-04, -9.7678707e-04, 2.6443407e-03,
1.1190268e-03, -1.0251488e-03, -1.1638318e-03, 7.1209669e-04,
4.9417594e-04, 2.3746442e-04, -4.8552561e-04, 1.4480414e-03,
-1.8445569e-05, 4.2989667e-04, 1.0579359e-04, -3.2821635e-04],
dtype=float32)]
The summary of model few starting layers
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 96, 96, 1) 0
__________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 96, 96, 32) 320 input_1[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 96, 96, 32) 128 conv2d_1[0][0]
Now how do I order them and visualize
I am using keras
Thanks

Usually, if you are using a Dense layer then the first lenth 2 corresponds to the weight vector and bias vector.
As I don't know the type of your layer I'm adding an example explaining shapes for Dense, Conv2D layers.
The first length always corresponds to weight and biases, the second shapes for weight and biases are different, for biases it's always an array, for Dense the weight has a shape (input_dim, output_dim), for a Conv2D (channels, kernel_h, kernel_w, num_filters).
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import numpy as np
i1 = Input(shape=(32,32,3))
c1 = Conv2D(32, 3)(i1)
f1 = Flatten()(c1)
d1 = Dense(5)(f1)
m = Model(i1, d1)
m.summary()
y = m(np.zeros((1, 32, 32, 3)))
print(m.layers)
cw1 = np.array(m.layers[1].get_weights())
print(cw1.shape) # 2 weight, 1 weight, 1 bias
print(cw1[0].shape) # 3 channels, 3 by 3 kernels, 32 filters
print(cw1[1].shape) # 32 biases
cw1 = np.array(m.layers[2].get_weights())
print(cw1.shape) # this is just a flatten operations, so no weights
cw1 = np.array(m.layers[3].get_weights())
print(cw1.shape) # 2 -> 1 weight, 1 bias
print(cw1[0].shape) # 28800 inputs, 5 outputs, 28800 by 5 weight matrix
print(cw1[1].shape) # 5 biases
Model: "model_13"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_14 (InputLayer) [(None, 32, 32, 3)] 0
_________________________________________________________________
conv2d_13 (Conv2D) (None, 30, 30, 32) 896
_________________________________________________________________
flatten_13 (Flatten) (None, 28800) 0
_________________________________________________________________
dense_13 (Dense) (None, 5) 144005
=================================================================
Total params: 144,901
Trainable params: 144,901
Non-trainable params: 0
_________________________________________________________________
[<tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7fb8ce3bb828>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7fb8ce5fd6d8>, <tensorflow.python.keras.layers.core.Flatten object at 0x7fb8ce3bb940>, <tensorflow.python.keras.layers.core.Dense object at 0x7fb8ce3bbb70>]
(2,)
(3, 3, 3, 32)
(32,)
(0,)
(2,)
(28800, 5)
(5,)
The visualization completely depends on the dimension.
If it's 1-D,
import matplotlib.pyplot as plt
plt.plot(weight)
plt.show()
If it's 2-D,
import matplotlib.pyplot as plt
plt.imshow(weight)
plt.show()
If it's 3-D,
you can choose a channel and plot that part only.
# plotting the 32 conv filter
import matplotlib.pyplot as plt
cw1 = np.array(m.layers[1].get_weights())
for i in range(32):
plt.imshow(cw1[0][:,:,:,i])
plt.show()

Related

How do I build a TFmodel from NumPy array files?

I have a dir with NumPy array files: bias1.npy, kernel1.npy, bias2.npy, kernel2.npy. How can I build a TF model that uses those arrays as kernels and biases of layers?
To avoid confusion bias matrix for the consistency of the numpy file is the 2D matrix with one column. This post shows how did I reproduce tf's model based on the numpy weights and biases.
class NumpyInitializer(tf.keras.initializers.Initializer):
# custom class converting numpy arrays to tf's initializers
# used to initialize both kernel and bias
def __init__(self, array):
# convert numpy array into tensor
self.array = tf.convert_to_tensor(array.tolist())
def __call__(self, shape, dtype=None):
# return tensor
return self.array
def restore_model_from_numpy(directory):
"""
Recreate model from the numpy files.
Numpy files in the directory are ordered by layers
and bias numpy matrix comes before numpy weight matrix.
In example:
directory-
- L1B.npy //numpy bias matrix for layer 1
- L1W.npy //numpy weights matrix for layer 1
- L2B.npy //numpy bias matrix for layer 2
- L2W.npy //numpy weights matrix for layer 2
Parameters:
directory - path to the directory with numpy files
Return:
tf's model recreated from numpy files
"""
def file_iterating(directory):
"""
Iterate over directory and create
dictionary of layers number and it's structure
layers[layer_number] = [numpy_bias_matrix, numpy_weight_matrix]
"""
pathlist = Path(directory).rglob("*.npy") # list of numpy files
layers = {} # initialize dictionary
index = 0
for file in pathlist: # iterate over file in the directory
if index % 2 == 0:
layers[int(index/2)] = [] # next layer - new key in dictionary
layers[int(index/2)].append(np.load(file)) # add to dictionary bias or weight
index +=1
print(file) # optional to show list of files we deal with
return layers # return dictionary
layers = file_iterating(directory) # get dictionary with model structure
inputs = Input(shape = (np.shape(layers[0][1])[0])) # create first model input layer
x = inputs
for key, value in layers.items(): # iterate over all levers in the layers dictionary
bias_initializer = NumpyInitializer(layers[key][0][0]) # create bias initializer for key's layer
kernal_initializer = NumpyInitializer(layers[key][1]) # create weights initializer for key's layer
layer_size = np.shape(layers[key][0])[-1] # get the size of the layer
new_layer = tf.keras.layers.Dense( # initialize new Dense layer
units = layer_size,
kernel_initializer=kernal_initializer,
bias_initializer = bias_initializer,
activation="tanh")
x = new_layer(x) # stack layer at the top of the previous layer
model = tf.keras.Model(inputs, x) # create tf's model based on the stacked layers
model.compile() # compile model
return model # return compiled model
In my directory, I had 4 numpy files (layer 1 - L1 and layer 2 - L2):
100_5_25_1Knapsack_Layer1\100_5_25_1Knapsack\L1B.npy , shape: (1, 80)
100_5_25_1Knapsack_Layer1\100_5_25_1Knapsack\L1W.npy , shape: (100, 80)
100_5_25_1Knapsack_Layer1\100_5_25_1Knapsack\L2B.npy , shape: (1, 100)
100_5_25_1Knapsack_Layer1\100_5_25_1Knapsack\L2W.npy , shape: (80, 100)
Calling the function result in:
m = restore_model_from_numpy(my_numpy_files_directory)
m.summary()
Model: "model_592"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_312 (InputLayer) [(None, 100)] 0
_________________________________________________________________
dense_137 (Dense) (None, 80) 8080
_________________________________________________________________
dense_138 (Dense) (None, 100) 8100
=================================================================
Total params: 16,180
Trainable params: 16,180
Non-trainable params: 0
_________________________________________________________________
I hope that this post will be helpful to anyone as it's my first one.
Happy coding :D

ValueError: Graph disconnected: cannot obtain value for tensor Tensor…The following previous layers were accessed without issue:

I want to obtain the output of intermediate sub-model layers with tf2.keras.Here is a model composed of two sub-modules:
input_shape = (100, 100, 3)
def model1():
input = tf.keras.layers.Input(input_shape)
cov = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,name='cov1')(input)
embedding_model = tf.keras.Model(input,cov,name='model1')
return embedding_model
def model2(embedding_model):
input_sequence = tf.keras.layers.Input((None,) + input_shape)
sequence_embedding = tf.keras.layers.TimeDistributed(embedding_model,name='time_dis1')
emb = sequence_embedding(input_sequence)
att = tf.keras.layers.Attention()([emb,emb])
dense1 = tf.keras.layers.Dense(64,name='dense1')(att)
outputs = tf.keras.layers.Softmax()(dense1)
final_model = tf.keras.Model(inputs=input_sequence, outputs=outputs,name='model2')
return final_model
embedding_model = model1()
model2 = model2(embedding_model)
print(model2.summary())
output:
Model: "model2"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_2 (InputLayer) [(None, None, 100, 1 0
__________________________________________________________________________________________________
time_dis1 (TimeDistributed) (None, None, 98, 98, 896 input_2[0][0]
__________________________________________________________________________________________________
attention (Attention) (None, None, 98, 98, 0 time_dis1[0][0]
time_dis1[0][0]
__________________________________________________________________________________________________
dense1 (Dense) (None, None, 98, 98, 2112 attention[0][0]
__________________________________________________________________________________________________
softmax (Softmax) (None, None, 98, 98, 0 dense1[0][0]
==================================================================================================
Total params: 3,008
Trainable params: 3,008
Non-trainable params: 0
and then,I want to get output intermediate layer of model1 and model2:
model1_output_layer = model2.get_layer('time_dis1').layer.get_layer('cov1')
output1 = model1_output_layer.get_output_at(0)
output2 = model2.get_layer('dense1').get_output_at(0)
output_tensors = [output1,output2]
model2_input = model2.input
submodel = tf.keras.Model([model2_input],output_tensors)
input_data2 = np.zeros((1,10,100,100,3))
result = submodel.predict([input_data2])
print(result)
Running in tf2.3 ,the error I am getting is:
File "/Users/bouluoyu/anaconda/envs/tf2/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 115, in __init__
self._init_graph_network(inputs, outputs)
File "/Users/bouluoyu/anaconda/envs/tf2/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/bouluoyu/anaconda/envs/tf2/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 191, in _init_graph_network
self.inputs, self.outputs)
File "/Users/bouluoyu/anaconda/envs/tf2/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 931, in _map_graph_network
str(layers_with_complete_input))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_1:0", shape=(None, 100, 100, 3), dtype=float32) at layer "cov1". The following previous layers were accessed without issue: ['time_dis1', 'attention', 'dense1']
But the following code works:
model1_input = embedding_model.input
model2_input = model2.input
submodel = tf.keras.Model([model1_input,model2_input],output_tensors)
input_data1 = np.zeros((1,100,100,3))
input_data2 = np.zeros((1,10,100,100,3))
result = submodel.predict([input_data1,input_data2])
print(result)
But not what I want.This is strange, model1 is part of model2, so why do we need to input an extra tensor.Sometimes,it is hard to get an extra tensor,especially for complex models.
so why do we need to input an extra tensor
Short answer is, TensorFlow doesn't know to make the connection between the inputs you expect it to make. The problem arises because you're passing a Model (instead of a Layer) to your TimeDistributed layer. This leave the Input layer of your model1 hanging, unless you explicitly pass it an input. The TimeDistributed layer is not smart enough to handle models in this way.
My solution would depend on the answer to the following question,
Why do you need model1? All it has is a Conv2D layer. You can easily do
sequence_embedding = tf.keras.layers.TimeDistributed(
tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,name='cov1'),
name='time_dis1'
)
If you do this, now you gotta change the following lines,
model1_output_layer = model2.get_layer('time_dis1').layer.get_layer('cov1')
output1 = model1_output_layer.get_output_at(0)
to something like (the exact output you want will depend on what you're actually after)
model1_output_layer = model2.get_layer('time_dis1')
output1 = model1_output_layer.output
# This output1 may need further processing depending on what you need
# e.g. if you need mean embeddings over time axis
output_mean = tf.keras.layers.Average(output1, axis=1)
This is because you can't access the output of the layer nested by a TimeDistributed layer. Because the layer passed to the TimeDistributed layer doesn't actually do anything. And it doesn't have a defined output. It's just sitting there as a template for the TimeDistributed layer to compute the output using it. So, to get the output from a TimeDistributed layer, you need to access it via that layer.
You try to do it as you have it (instead of my way), you'll get,
AttributeError: Layer cov1 has no inbound nodes.
You may ask, "why did it work before"?
It's because, before you had a Model there instead of a Layer. Because the Conv2D layer was wrapped by the model, the layer output was defined (because it had an Input layer). And this feeds back to the reason, why it complained about the missing Input from model1 when trying to define the submodel.
I know this explanation may make your head spin as the reasons behind this error are quite convoluted. But going through it a few times will hopefully help.

Python (TensorFlow) - Concatenating tensor objects with different dimensions

I have a question regarding concatenating two Tensor object in Tensorflow. As you can see in the code below, I would like to concatenate a2 and b1. a2 has shape (None, 1, 512) and b1 has shape (None, 34, 512). I would like to concatenate them along the second argument, thus axis=1.
a_input = Input(shape=(20480,))
b_input = Input(shape=(34,))
a1 = Dense(embedding_dim)(image_input) #N: Activation specification needed here? # shape = (None, 512)
a2 = K.expand_dims(image_emb, axis=1) # shape = (None, 1, 512)
b1 = Embedding(num_words, embedding_dim, mask_zero=True)(caption_input) # shape = (None, 34, 512)
c = concatenate((a2, b1), axis=1)
However, if I execute the code above, I obtain the following error
ValueError: Dimension 0 in both shapes must be equal, but are 512 and 1. Shapes are [512] and [1]. for '{{node concatenate_28/concat_1}} = ConcatV2[N=2, T=DT_BOOL, Tidx=DT_INT32](concatenate_28/ones_like, concatenate_28/ExpandDims, concatenate_28/concat_1/axis)' with input shapes: [?,1,512], [?,34,1], [] and with computed input tensors: input[2] = <1>.
What am I doing wrong here? How can this be solved?
Looking forward to some suggestions!
Providing the solution here (Answer Section), even though it is present in the comment section for the benefit of the community.
ValueError: Dimension 0 in both shapes must be equal, but are 512 and 1. Shapes are [512] and [1]. for '{{node concatenate_28/concat_1}} = ConcatV2[N=2, T=DT_BOOL, Tidx=DT_INT32](concatenate_28/ones_like, concatenate_28/ExpandDims, concatenate_28/concat_1/axis)' with input shapes: [?,1,512], [?,34,1], [] and with computed input tensors: input[2] = <1>.
This error was resolved when modifying code from
b1 = Embedding(num_words, embedding_dim, mask_zero=True)(caption_input)
to
b1 = Embedding(num_words, embedding_dim, mask_zero=False)(caption_input)
Complete updated code in below
a_input = Input(shape=(20480,))
b_input = Input(shape=(34,))
a1 = Dense(embedding_dim)(image_input) #N: Activation specification needed here? # shape = (None, 512)
a2 = K.expand_dims(image_emb, axis=1) # shape = (None, 1, 512)
b1 = Embedding(num_words, embedding_dim, mask_zero=False)(caption_input) # shape = (None, 34, 512)
c = concatenate((a2, b1), axis=1)

Tensorflow 2.0: Shape inference with Reshape returns None dimension

I'm working with a CNN-LSTM model on Tensorflow 2.0 + Keras to perform sequence classification. My model is defined as following:
inp = Input(input_shape)
rshp = Reshape((input_shape[0]*input_shape[1], 1), input_shape=input_shape)(inp)
cnn1 = Conv1D(100, 9, activation='relu')(rshp)
cnn2 = Conv1D(100, 9, activation='relu')(cnn1)
mp1 = MaxPooling1D((3,))(cnn2)
cnn3 = Conv1D(50, 3, activation='relu')(mp1)
cnn4 = Conv1D(50, 3, activation='relu')(cnn3)
gap1 = AveragePooling1D((3,))(cnn4)
dropout1 = Dropout(rate=dropout[0])(gap1)
flt1 = Flatten()(dropout1)
rshp2 = Reshape((input_shape[0], -1), input_shape=flt1.shape)(flt1)
bilstm1 = Bidirectional(LSTM(240,
return_sequences=True,
recurrent_dropout=dropout[1]),
merge_mode=merge)(rshp2)
dense1 = TimeDistributed(Dense(30, activation='relu'))(rshp2)
dropout2 = Dropout(rate=dropout[2])(dense1)
prediction = TimeDistributed(Dense(1, activation='sigmoid'))(dropout2)
model = Model(inp, prediction, name="CNN-bLSTM_per_segment")
print(model.summary(line_length=75))
Where input_shape = (60, 60). This definition, however, raises the following error:
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
At first, I thought it was because the rshp2 layer could not reshape the flt1 output to shape (60, X). So I added a printing block before the Bidirectional(LSTM)) layer:
print('reshape1: ', rshp.shape)
print('cnn1: ', cnn1.shape)
print('cnn2: ', cnn2.shape)
print('mp1: ', mp1.shape)
print('cnn3: ', cnn3.shape)
print('cnn4: ', cnn4.shape)
print('gap1: ', gap1.shape)
print('flatten 1: ', flt1.shape)
print('reshape 2: ', rshp2.shape)
And the shapes were:
reshape 1: (None, 3600, 1)
cnn1: (None, 3592, 100)
cnn2: (None, 3584, 100)
mp1: (None, 1194, 100)
cnn3: (None, 1192, 50)
cnn4: (None, 1190, 50)
gap1: (None, 396, 50)
flatten 1: (None, 19800)
reshape 2: (None, 60, None)
Looking at the flt1 layer, its output shape is (19800,), which can be reshaped as (60, 330), but for some reason the (60, -1) of the rshp2 layer is not working as intended, evidenced by the print reshape 2: (None, 60, None). When I try to reshape as (60, 330) it works just fine. Does anyone knows why the (-1) is not working?
-1 is working.
From Reshape documentation, https://www.tensorflow.org/api_docs/python/tf/keras/layers/Reshape
the layer returns a tensor with shape (batch_size,) + target_shape
So, the batch size stays the same, the other dimensions are calculated based on your target_shape.
From the doc, look at the last example,
# also supports shape inference using `-1` as dimension
model.add(tf.keras.layers.Reshape((-1, 2, 2)))
model.output_shape
(None, None, 2, 2)
If you pass -1 in your target shape, the Keras will store None, this is useful if you expect variable-length data in that axis, but if your data shape is always same, just put the dimension hard-coded that will place the dimension when you print the shape later.
N.B: Also no need to specify input_shape=input_shape for your intermediate layers in functional API. The model will infer that for you.

Keras LSTM Model for text-generation purpose

I am a beginner with Keras and in writing Neural Networks models and actually I'm trying to write a LSTM for text-generation purpose, without success. What am I doing wrong?
I read this question: here
and other articles but there is something I am missing I can't get, sorry if I seem dumb.
The goal
My purpose is to generate english articles of a fixed length (1500 by now).
Suppose I have a 20k records dataset in sequences (articles, basically) of different lengths, I set a fixed length for all articles (MAX_SEQUENCE_LENGTH=1500) and tokenized them, getting a matrix (X, my training-data) looking like:
[[ 0 0 0 ... 88 664 206]
[ 0 0 0 ... 1 93 140]
[ 0 0 0 ... 3 173 2283]
...
[ 50 2761 4 ... 167 148 156]
[ 0 0 0 ... 10 77 206]
[ 0 0 0 ... 167 148 156]]
with a shape of 20000x1500
the output of my LSTM should be a 1 x MAX_SEQUENCE_LENGTH array of tokens.
My model looks like that:
def generator_model(sequence_input, embedded_sequences, output_shape):
layer = LSTM(16,return_sequences = True)(embedded_sequences)
layer = LSTM(32,return_sequences = True)(layer)
layer = Flatten()(layer)
output = Dense(output_shape, activation='softmax')(layer)
generator = Model(sequence_input, output)
return generator
with:
sequence_input = Input(batch_shape=(1, 1,1500), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
output_shape = MAX_SEQUENCE_LENGTH
the LSTM is supposed to train, with model.fit(), on a training-set of 20k x MAX_SEQUENCE_LENGTH shape (X).
and getting an array of tokens with 1 x MAX_SEQUENCE_LENGTH shape as output when I call model.predict(seed), with seed a random noise array.
compile, fit and predict
comments for the following section:
. generator.compile works, the model is given in edit section of ths post.
. generator.fit compile, epochs=1 param is for testing-purpose, will be BATCH_NUM
. now i have some doubts on the y I give to generator.fit, by now I'm giving a matrix of 0 as target output, if I generate it with a different shape from the X.shape[0], it throw the error, this means it needs to have a label for every record in X. but if I give him a matrix of 0 as target for model.fit, isn't it going to predict just arrays of 0?
. the error is giving is always the same, despite i use the noise_generator() or noise_integer_generator(), i believe it's because it doesn't like the y_shape param i'm giving
embedding_layer = load_embeddings(word_index)
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,))
embedded_sequences = embedding_layer(sequence_input)
generator = generator_model(sequence_input, embedded_sequences, X.shape[1])
print(generator.summary())
generator.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
Xnoise = generate_integer_noise(MAX_SEQUENCE_LENGTH)
y_shape = np.zeros((X.shape[0],), dtype=int)
generator.fit(X, y_shape, epochs=1)
acc = generator.predict(Xnoise, verbose=1)
But actually I'm getting the following error
ValueError: Error when checking input: expected input_1 to have shape (1500,) but got array with shape (1,)
when I call:
Xnoise = generate_noise(samples_number=MAX_SEQUENCE_LENGTH)
generator.predict(Xnoise, verbose=1)
The noise I give is a 1 x 1500 array, but it seems it's expecting a (1500,) matrix, So there must be some kind of error in the shape settings for my output.
Is my model correct for my purpose? or did I wrote something really really stupid I can't see?
Thanks for the help you can give me, I appreciate that!
edit
Changelog:
v1.
###
- Changed model structure, now return_sequences = True and using shape instead of batch_shape
###
- Changed
sequence_input = Input(batch_shape=(1,1,1500), dtype='int32')
to
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,))
###
- Changed the error the model is giving
v2.
###
- Changed generate_noise() code
###
- Added generate_integer_noise() code
###
- Added full sequence with the model compile, fit and predict
###
- Added model.fit summary under the model summary, in the tail of the post
generate_noise() code:
def generate_noise(samples_number, mean=0.5, stdev=0.1):
noise = np.random.normal(mean, stdev, (samples_number, MAX_SEQUENCE_LENGTH))
print(noise.shape)
return noise
which print: (1500,)
generate_integer_noise() code:
def generate_integer_noise(samples_number):
noise = []
for _ in range(0, samples_number):
noise.append(np.random.randint(1, MAX_NB_WORDS))
Xnoise = np.asarray(noise)
return Xnoise
my function load_embeddings() is as follow:
def load_embeddings(word_index, embeddingsfile='Embeddings/glove.6B.%id.txt' %EMBEDDING_DIM):
embeddings_index = {}
f = open(embeddingsfile, 'r', encoding='utf8')
for line in f:
values = line.split(' ') #split the line by spaces
word = values[0] #each line starts with the word
coefs = np.asarray(values[1:], dtype='float32') #the rest of the line is the vector
embeddings_index[word] = coefs #put into embedding dictionary
f.close()
print('Found %s word vectors.' % len(embeddings_index))
embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=False)
return embedding_layer
model summary:
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 1500) 0
_________________________________________________________________
embedding_1 (Embedding) (None, 1500, 300) 9751200
_________________________________________________________________
lstm_1 (LSTM) (None, 1500, 16) 20288
_________________________________________________________________
lstm_2 (LSTM) (None, 1500, 32) 6272
_________________________________________________________________
flatten_1 (Flatten) (None, 48000) 0
_________________________________________________________________
dense_1 (Dense) (None, 1500) 72001500
=================================================================
Total params: 81,779,260
Trainable params: 72,028,060
Non-trainable params: 9,751,200
_________________________________________________________________
model.fit() summary (using a 999-sized dataset for testing, instad of the 20k-sized):
999/999 [==============================] - 62s 62ms/step - loss: 0.5491 - categorical_accuracy: 0.9680
I rewrote full answer, now it works (at least compiles and runs, can't say anything about convergence).
First, I don't know why you use sparse_categorical_crossentropy instead of categorical_crossentropy? It could be important. I change the model a bit, so it compiles and use a categorical_crossentropy. If you need a sparse one, change the shape of a target.
Also, I change batch_shape to shape argument, because it allows to use batches of different shape. It's easier to work with.
And the last edit: you should change generate_noise, because an Embedding layer awaits a numbers from (0, max_features), not the normally distributed floats (see a comment in the function).
EDIT
Addressing the last comments, I've removed a generate_noise and post modified generate_integer_noise function:
from keras.layers import Input, Embedding, LSTM
from keras.models import Model
import numpy as np
def generate_integer_noise(samples_number):
"""
samples_number is a number of samples, i.e. first dimension in (some, 1500)
"""
return np.random.randint(1, MAX_NB_WORDS, size=(samples_number, MAX_SEQUENCE_LENGTH))
MAX_SEQUENCE_LENGTH = 1500
"""
Tou can use your definition of embedding layer,
I post to make a reproducible example
"""
max_features, embed_dim = 10, 300
embedding_matrix = np.zeros((max_features, embed_dim))
output_shape = MAX_SEQUENCE_LENGTH
embedded_layer = Embedding(
max_features,
embed_dim,
weights=[embedding_matrix],
trainable=False
)
def generator_model(embedded_layer, output_shape):
"""
embedded_layer: Embedding keras layer
output_shape: shape of the target
"""
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ))
embedded_sequences = embedded_layer(sequence_input) # Set trainable to the True if you wish to train
layer = LSTM(32, return_sequences=True)(embedded_sequences)
layer = LSTM(64, return_sequences=True)(layer)
output = LSTM(output_shape)(layer)
generator = Model(sequence_input, output)
return generator
generator = generator_model(embedded_layer, output_shape)
noise = generate_integer_noise(32)
# generator.predict(noise)
generator.compile(loss='categorical_crossentropy', optimizer='adam')
generator.fit(noise, noise)

Categories