how to assign a value to the 'Tensor' object in Keras? - python

I want to assign value to a tensor variable in the following manner. However, I get an error saying: "'Tensor' object does not support item assignment".
I am trying to convert these python codes to tensorflow in Keras. However, the second line gives the error
s1 = tf.zeros([5:256:256:3], tf.float64)
s1[:,:,:,2] = -1
#depth is in shape [5:256,256,1]
lamda = -(depth/s2[:,:,:,2])
x_c = np.around(lamda * s1[:,:,:,0]/step,decimals=2)
y_c = np.around(lamda * s1[:,:,:,1]/step,decimals=2)
Please let me know how to fix this issue? Thank you in advance.

A TensorFlow tensor object is not assignable.
This question and this might be helpful.
import tensorflow as tf
s1 = tf.Variable(tf.zeros([5,256,256,3], tf.float64))
s2 = tf.Variable(-tf.ones([5,256,256,3], tf.float64))
assign_op = tf.assign(s1[:,:,:,2], s2[:,:,:,2])
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
result = sess.run(assign_op)
print(result)
[[[[ 0. 0. -1.]
[ 0. 0. -1.]
[ 0. 0. -1.]
...
[ 0. 0. -1.]
[ 0. 0. -1.]
[ 0. 0. -1.]]
[[ 0. 0. -1.]
[ 0. 0. -1.]
[ 0. 0. -1.]
...

Related

How to define a new Tensor with a dynamic shape to support batching in a custom layer

I am trying to implement a custom layer that would preprocess a tokenized sequence of words into a matrix with a predefined number of elements equal to the size of vocabulary. Essentially, I'm trying to implement a 'bag of words' layer. This is the closest I could come up with:
def get_encoder(vocab_size=args.vocab_size):
encoder = TextVectorization(max_tokens=vocab_size)
encoder.adapt(train_dataset.map(lambda text, label: text))
return encoder
class BagOfWords(tf.keras.layers.Layer):
def __init__(self, vocab_size=args.small_vocab_size, batch_size=args.batch_size):
super(BagOfWords, self).__init__()
self.vocab_size = vocab_size
self.batch_size = batch_size
def build(self, input_shape):
super().build(input_shape)
def call(self, inputs):
if inputs.shape[-1] == None:
return tf.constant(np.zeros([self.batch_size, self.vocab_size])) # 32 is the batch size
outputs = tf.zeros([self.batch_size, self.vocab_size])
if inputs.shape[-1] != None:
for i in range(inputs.shape[0]):
for ii in range(inputs.shape[-1]):
ouput_idx = inputs[i][ii]
outputs[i][ouput_idx] = outputs[i][ouput_idx] + 1
return outputs
model = keras.models.Sequential()
model.add(encoder)
model.add(bag_of_words)
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
No surprise that I get an error when calling fit() on the model: "Incompatible shapes: [8,1] vs. [32,1]". This happens on the last steps, when the batch size is less than 32.
My question is: Putting aside performance, how do I define the outputs Tensor for my bag of words matrix so that it has a dynamic shape for batching and get my code working?
Edit 1
After the comment, I realised that the code doesn't work indeed because it never goes to the 'else' branch.
I edited it a bit so that it uses only tf functions:
class BagOfWords(tf.keras.layers.Layer):
def __init__(self, vocab_size=args.small_vocab_size, batch_size=args.batch_size):
super(BagOfWords, self).__init__()
self.vocab_size = vocab_size
self.batch_size = batch_size
self.outputs = tf.Variable(tf.zeros([batch_size, vocab_size]))
def build(self, input_shape):
super().build(input_shape)
def call(self, inputs):
if tf.shape(inputs)[-1] == None:
return tf.zeros([self.batch_size, self.vocab_size])
self.outputs.assign(tf.zeros([self.batch_size, self.vocab_size]))
for i in range(tf.shape(inputs)[0]):
for ii in range(tf.shape(inputs)[-1]):
output_idx = inputs[i][ii]
if output_idx >= tf.constant(self.vocab_size, dtype=tf.int64):
output_idx = tf.constant(1, dtype=tf.int64)
self.outputs[i][output_idx].assign(self.outputs[i][output_idx] + 1)
return outputs
It didn't help though: AttributeError: 'Tensor' object has no attribute 'assign'.
Here is an example of a Bag-of-Words custom keras layer without using any additional preprocessing layers:
import tensorflow as tf
class BagOfWords(tf.keras.layers.Layer):
def __init__(self, vocabulary_size):
super(BagOfWords, self).__init__()
self.vocabulary_size = vocabulary_size
def call(self, inputs):
batch_size = tf.shape(inputs)[0]
outputs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
for i in range(batch_size):
string = inputs[i]
string_length = tf.shape(tf.where(tf.math.not_equal(string, b'')))[0]
string = string[:string_length]
string_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
for s in string:
string_array = string_array.write(string_array.size(), tf.where(tf.equal(s, self.vocabulary_size), 1.0, 0.0))
outputs = outputs.write(i, tf.cast(tf.reduce_any(tf.cast(string_array.stack(), dtype=tf.bool), axis=0), dtype=tf.float32))
return outputs.stack()
And here are the manual preprocessing steps and the model:
labels = [[1], [0], [1], [0]]
texts = ['All my cats in a row',
'When my cat sits down, she looks like a Furby toy!',
'The cat from the outer space',
'Sunshine loves to sit like this for some reason.']
DEFAULT_STRIP_REGEX = r'[!"#$%&()\*\+,-\./:;<=>?#\[\\\]^_`{|}~\']'
tensor_of_strings = tf.constant(texts)
tensor_of_strings = tf.strings.lower(tensor_of_strings)
tensor_of_strings = tf.strings.regex_replace(tensor_of_strings, DEFAULT_STRIP_REGEX, "")
split_strings = tf.strings.split(tensor_of_strings).to_tensor()
flattened_split_strings = tf.reshape(split_strings, (split_strings.shape[0] * split_strings.shape[1]))
unique_words, _ = tf.unique(flattened_split_strings)
unique_words = tf.random.shuffle(unique_words)
bag_of_words = BagOfWords(vocabulary_size = unique_words)
train_dataset = tf.data.Dataset.from_tensor_slices((split_strings, labels))
model = tf.keras.Sequential()
model.add(bag_of_words)
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss = tf.keras.losses.BinaryCrossentropy())
model.fit(train_dataset.batch(2), epochs=2)
Epoch 1/2
4/4 [==============================] - 2s 7ms/step - loss: 0.7081
Epoch 2/2
4/4 [==============================] - 0s 6ms/step - loss: 0.7008
<keras.callbacks.History at 0x7f5ba844bad0>
And this is what the 4 encoded sentences look like:
print(bag_of_words(split_strings))
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0.
1. 1. 1. 0.]
[1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
0. 1. 1. 0.]
[0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0.
0. 0. 0. 0.]
[0. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
0. 0. 0. 1.]], shape=(4, 28), dtype=float32)
Correct me if I am wrong, but I think that using the output_mode="multi_hot" of the TextVectorization layer would be sufficient to do what you want to do. According to the docs, the multi_hot output mode:
Outputs a single int array per batch, of either vocab_size or max_tokens size, containing 1s in all elements where the token mapped to that index exists at least once in the batch item
So it could be as simple as this:
import tensorflow as tf
def get_encoder():
encoder = tf.keras.layers.TextVectorization(output_mode="multi_hot")
encoder.adapt(train_dataset.map(lambda text, label: text))
return encoder
texts = [
'All my cats in a row',
'When my cat sits down, she looks like a Furby toy!',
'The cat from outer space',
'Sunshine loves to sit like this for some reason.']
labels = [[1], [0], [1], [1]]
train_dataset = tf.data.Dataset.from_tensor_slices((texts, labels))
model = tf.keras.Sequential()
model.add(get_encoder())
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss = tf.keras.losses.BinaryCrossentropy())
model.fit(train_dataset.batch(2), epochs=2)
This is how your texts would be encoded:
import tensorflow as tf
texts = ['All my cats in a row',
'When my cat sits down, she looks like a Furby toy!',
'The cat from outer space',
'Sunshine loves to sit like this for some reason.']
encoder = get_encoder()
inputs = encoder(texts)
print(inputs)
tf.Tensor(
[[0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.
0. 0. 1. 1.]
[0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0.
0. 1. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1.
0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0.
1. 0. 0. 0.]], shape=(4, 28), dtype=float32)
So just as you tried in your custom layer, the presence of words in a sequence is marked with 1 and the absence of words is marked with 0.
The answer above by #AloneTogether is perfectly relevant. Just wanted to publish the working code that I came up with in the first place without manual processing.
import tensorflow_datasets as tfds
ds, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True, data_dir='/tmp/imdb')
train_dataset = ds['train']
def get_encoder(vocab_size=args.vocab_size):
encoder = TextVectorization(max_tokens=vocab_size)
encoder.adapt(train_dataset.map(lambda text, label: text))
return encoder
class BagOfWords(tf.keras.layers.Layer):
def __init__(self, vocabulary_size):
super(BagOfWords, self).__init__()
self.vocabulary_size = vocabulary_size
def call(self, inputs):
batch_size = tf.shape(inputs)[0]
outputs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
for i in range(batch_size):
int_string = inputs[i]
array_string = tf.TensorArray(dtype=tf.float32, size=self.vocabulary_size)
array_string.unstack(tf.zeros(self.vocabulary_size))
for int_word in int_string:
idx = int_word
idx = tf.cond(idx >= self.vocabulary_size, lambda: 1, lambda: tf.cast(idx, tf.int32))
array_string = array_string.write(idx, array_string.read(idx) + 1.0)
outputs = outputs.write(i, array_string.stack())
return outputs.stack()
encoder = get_encoder(args.small_vocab_size)
bag_of_words = BagOfWords(args.small_vocab_size)
model = keras.models.Sequential()
model.add(encoder)
model.add(bag_of_words)
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
for d in train_dataset.batch(args.batch_size).take(1):
model(d[0])
model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()

How to unbatch a Tensorflow 2.0 Dataset

I have a dataset which I create with the following code working with tf.data.Dataset:
dataset = Dataset.from_tensor_slices(corona_new)
dataset = dataset.window(WINDOW_SIZE, 1, drop_remainder=True)
dataset = dataset.flat_map(lambda x: x.batch(WINDOW_SIZE))
dataset = dataset.map(lambda x: tf.transpose(x))
for i in dataset:
print(i.numpy())
break
which when I run it I get the following output (this is an example of one batch):
[[ 0. 125. 111. 232. 164. 134. 235. 190.]
[ 0. 14. 16. 7. 9. 7. 6. 8.]
[ 0. 132. 199. 158. 148. 141. 179. 174.]
[ 0. 0. 0. 2. 0. 2. 1. 2.]
[ 0. 0. 0. 0. 3. 5. 0. 0.]]
How can I unbatch them?
Found my solution.
In TensorFlow 2.0 you can unbatch a tf.data.Dataset by calling the .unbatch() function.
example: dataset.unbatch()

Error trying to classify 3D images using Naive Bayes

I've made a convolutional neural networks algorithm to classify images, and now I want to make a Naive Bayes algorithm for comparison. My images are 3D, and I think that's the cause of the error I'm getting.
The error:
raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (1776, 3)
My code:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
import numpy as np
much_data = np.load('muchdata-50-50-30-normalizado.npy', allow_pickle=True)
X = [data[0] for data in much_data]
y = [data[1] for data in much_data]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
print("Number of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (y_test != y_pred).sum()))
My X[0] is in the following format:
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
...
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
And my y[0]:
[0 1 0]
If someone can help me understand what I'm doing wrong, it will be really helpful!
Thank you so much!
By looking at your y[0] it seems like you have 3 classes in one-hot encoded format. sklearn's machine learning algorithms in general do not accept target values in one-hot encoded format. Moreover, the input (X) to model should have the shape of (no_samples, no_features). Therefore, you must flatten the 3D images.
Get rid of one-hot encodings in the target (y) and obtain a 1D array in the format of (no_samples,). You may achieve this by defining the 3 classes as 1, 2, 3.
Flatten the images. You may do this with X = [data[0].flatten() for data in much_data]

calling Keras predict on CNN simply returns the input

Based on a matrix, I am trying to approximate a value (regression). However, the CNN always predicts a matrix which is identical to the input of predict.
I am not getting any errors.
The data (matrices) used for training are stored in a numpy array but I only have around 9000 samples available. The values for each matrix are stored in a one dimensional array (one value for each matrix).
This is my model:
model = keras.Sequential([
layers.Conv2D(64, kernel_size=3, activation='selu', input_shape=(8, 8, 1)),
layers.Conv2D(64, kernel_size=3, activation='selu'),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(64, kernel_size=2, activation='selu'),
layers.Flatten(),
layers.Dense(1, activation='linear')
])
optimizer = keras.optimizers.RMSprop(0.001)
model.compile(optimizer=optimizer,
loss='mean_squared_error',
metrics=['mean_squared_error'])
model.fit(matrices, values, epochs=10)
test_loss = model.evaluate(test_boards, test_values, verbose=2)
Example output when calling prediction = model.predict(some_matrix) can be found below. In this case some_matrix is equal to the output below.
[[ 51. 0. 33. 0. 100. 33. 0. 51.]
[ 10. 10. 10. 0. 0. 10. 10. 10.]
[ 0. 0. 32. 0. 0. 32. 0. 0.]
[ 0. 0. 0. 88. 10. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. -10. 0. -32. 0. 0.]
[ -10. -10. -10. 0. 0. -10. -10. -10.]
[ -51. -32. -33. -88. -100. -33. 0. -51.]]
What am I missing to get a single value as output? Or at least a modified version of the input?
Edit:
My matrix data (did not fit in a free pastebin account, sorry)
My values
An example google colab file
I did not find a way to provide the data into Google Colab and include them in the link, I'm sorry for the inconvenience.
I did get an error this time which I did not get when running the code in my own environment. This is definitely the issue but I am still unaware of how to fix this.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-595f98617fa0> in <module>()
97 [ -51, -32, -33, -88, -100, -33, 0, -51,]])
98 print(test_boards[0])
---> 99 prediction = model.predict(test_boards[0])
100 print("Prediction:")
101 print(prediction)
3 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
561 ': expected ' + names[i] + ' to have ' +
562 str(len(shape)) + ' dimensions, but got array '
--> 563 'with shape ' + str(data_shape))
564 if not check_batch_axis:
565 data_shape = data_shape[1:]
ValueError: Error when checking input: expected conv2d_12_input to have 4 dimensions, but got array with shape (8, 8, 1)
You need to add the batch size dimension to the test sample.
some_matrix = some_matrix[np.newaxis,:,:,np.newaxis]

Keras masking zero before softmax

Suppose that I have the following output from an LSTM layer
[0. 0. 0. 0. 0.01843184 0.01929785 0. 0. 0. 0. 0. 0. ]
and I want to apply softmax on this output but I want to mask the 0's first.
When I used
mask = Masking(mask_value=0.0)(lstm_hidden)
combined = Activation('softmax')(mask)
It didnt work. Any ideas?
Update: The output from the LSTM hidden is (batch_size, 50, 4000)
You can define custom activation to achieve it. This is equivalent to mask 0.
from keras.layers import Activation,Input
import keras.backend as K
from keras.utils.generic_utils import get_custom_objects
import numpy as np
import tensorflow as tf
def custom_activation(x):
x = K.switch(tf.is_nan(x), K.zeros_like(x), x) # prevent nan values
x = K.switch(K.equal(K.exp(x),1),K.zeros_like(x),K.exp(x))
return x/K.sum(x,axis=-1,keepdims=True)
lstm_hidden = Input(shape=(12,))
get_custom_objects().update({'custom_activation': Activation(custom_activation)})
combined = Activation(custom_activation)(lstm_hidden)
x = np.array([[0.,0.,0.,0.,0.01843184,0.01929785,0.,0.,0.,0.,0.,0. ]])
with K.get_session()as sess:
print(combined.eval(feed_dict={lstm_hidden:x}))
[[0. 0. 0. 0. 0.49978352 0.50021654
0. 0. 0. 0. 0. 0. ]]

Categories