If the question seems to dumb, it is because I am new to TensorFlow.
I was implementing a toy endocer-decoder problem using TensorFlow 2’s TFA seq2seq implementation.
The API was clearly understandable until I wanted to change my BasicDecoder with BeamSearchDecoder.
My question is regarding start_tokens and end_token arguments’ initialization of BeamSearchDecoder.
Here is a copy of the implementation, any help is appreciated.
tf.keras.backend.clear_session()
tf.random.set_seed(42)
enc_vocab_size = len(train_vocab) + 1
dec_vocab_size = len(target_vocab) + 1
embed_size = 10
import tensorflow_addons as tfa
encoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
decoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
sequence_lengths = keras.layers.Input(shape=[], dtype=np.int32)
encoder_embeddings = keras.layers.Embedding(enc_vocab_size, embed_size)(encoder_inputs)
encoder = keras.layers.LSTM(512, return_state = True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]
sampler = tfa.seq2seq.sampler.TrainingSampler()
decoder_embeddings = keras.layers.Embedding(dec_vocab_size, embed_size)(decoder_inputs)
decoder_cell = keras.layers.LSTMCell(512)
output_layer = keras.layers.Dense(dec_vocab_size)
beam_width = 10
start_tokens = tf.zeros([32], tf.dtypes.int32)
end_token = tf.constant(1, tf.dtypes.int32)
decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(cell = decoder_cell, beam_width = beam_width, output_layer = output_layer)
decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(encoder_state, multiplier = beam_width)
outputs, _, _ = decoder(decoder_embeddings, start_tokens = start_tokens, end_token = end_token, initial_state = decoder_initial_state)
Y_proba = tf.nn.softmax(outputs.rnn_output)
model = keras.models.Model(inputs = [encoder_inputs, decoder_inputs], outputs = [Y_proba])
model.compile(loss="sparse_categorical_crossentropy", optimizer = 'adam', metrics = ['accuracy'])
Error Trance:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-101-6cf083735ed0> in <module>()
34 decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(cell = decoder_cell, beam_width = beam_width, output_layer = output_layer)
35 decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(encoder_state, multiplier = beam_width)
---> 36 outputs, _, _ = decoder(decoder_embeddings, start_tokens = start_tokens, end_token = end_token, initial_state = decoder_initial_state)
37 Y_proba = tf.nn.softmax(outputs.rnn_output)
38
1 frames
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
690 except Exception as e: # pylint:disable=broad-except
691 if hasattr(e, 'ag_error_metadata'):
--> 692 raise e.ag_error_metadata.to_exception(e)
693 else:
694 raise
ValueError: Exception encountered when calling layer "beam_search_decoder" (type BeamSearchDecoder).
in user code:
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/beam_search_decoder.py", line 941, in call *
self,
File "/usr/local/lib/python3.7/dist-packages/typeguard/__init__.py", line 262, in wrapper *
retval = func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/decoder.py", line 430, in body *
(next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/beam_search_decoder.py", line 705, in step *
cell_outputs, next_cell_state = self._cell(
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "lstm_cell_1" (type LSTMCell).
Dimensions must be equal, but are 80 and 320 for '{{node beam_search_decoder/decoder/while/BeamSearchDecoderStep/lstm_cell_1/mul}} = Mul[T=DT_FLOAT](beam_search_decoder/decoder/while/BeamSearchDecoderStep/lstm_cell_1/Sigmoid_1, beam_search_decoder/decoder/while/BeamSearchDecoderStep/Reshape_2)' with input shapes: [320,80,2048], [320,512].
Call arguments received:
• inputs=tf.Tensor(shape=(320, None, 10), dtype=float32)
• states=ListWrapper(['tf.Tensor(shape=(320, 512), dtype=float32)', 'tf.Tensor(shape=(320, 512), dtype=float32)'])
• training=None
Call arguments received:
• embedding=tf.Tensor(shape=(None, None, 10), dtype=float32)
• start_tokens=tf.Tensor(shape=(32,), dtype=int32)
• end_token=tf.Tensor(shape=(), dtype=int32)
• initial_state=['tf.Tensor(shape=(None, 512), dtype=float32)', 'tf.Tensor(shape=(None, 512), dtype=float32)']
• training=None
• kwargs=<class 'inspect._empty'>
I answered this on this GitHub repository: https://github.com/ageron/handson-ml2/issues/541
Here is a minimalistic implementation; without attention, of what you want. Beamserach is used during inference once we are done with training.
For implementation of the encoder-decoder part see the provided github.
Implementing Beamserach
def beam_search_inferance_model(beam_width):
batch_size = tf.shape(encoder_input)[:1]
max_output_length = Y_train.shape[1]
start_tokens = tf.fill(dims = batch_size, value = sos_id)
decoder_initial_state = tfa.seq2seq.tile_batch(encoder_state_HC, multiplier = beam_width)
beam_search_inference = tfa.seq2seq.BeamSearchDecoder(cell = LSTMCell, beam_width = beam_width, output_layer = output_layer, maximum_iterations = max_output_length)
outputs, _, _ = beam_search_inference(decoder_embd_layer.variables, start_tokens = start_tokens, end_token = 0, initial_state = decoder_initial_state)
final_outputs = tf.transpose(outputs.predicted_ids, perm = (0,2,1))
beam_scores = tf.transpose(outputs.beam_search_decoder_output.scores, perm = (0,2,1))
return keras.Model(inputs = [encoder_input], outputs = [final_outputs, beam_scores])
beam_search_inferance_model = beam_search_inferance_model(3)
Utility function
I copied this function from TFA's API tutorial and adapted it!!!
def beam_translate(sentence):
X = prepare_date_strs_padded(sentence)
result, beam_scores = beam_search_inferance_model.predict(X)
for beam, score in zip(result, beam_scores):
output = ids_to_date_strs(beam)
beam_score = [a.sum() for a in score]
print('Input: %s' % sentence)
print('-----' * 12)
for i in range(len(output)):
print('{} Predicted translation: {} {}'.format(i + 1, output[i], beam_score[i]))
print('\n')
Output
beam_translate(["July 14, 1789", "September 01, 2020"])
Input: ['July 14, 1789', 'September 01, 2020']
------------------------------------------------------------
1 Predicted translation: 2288-01-11 -83.7786865234375
2 Predicted translation: 2288-01-10 -83.90345764160156
3 Predicted translation: 2288-01-21 -84.30797576904297
Input: ['July 14, 1789', 'September 01, 2020']
------------------------------------------------------------
1 Predicted translation: 2221-02-26 -79.02340698242188
2 Predicted translation: 2222-02-26 -79.29275512695312
3 Predicted translation: 2221-02-21 -80.06587982177734
I hope this helps!
Cheers,
Kasra
Related
I have a discrete action space spaces.Discrete(4); as well as an observation space that is composed of a 2D array, with each item being either 0, 1, 2, or 3 (thus, spaces.MultiDiscrete([(1, 2*roo+1),(1, 2*roo+1),(1,5)]), where roo=20). I plan to conduct a DQN algorithm for them, with the following convolutions:
class DQN(nn.Module):
def __init__ (self, h, w, outputs):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=1)
self.bn1 = nn.BatchNorm2d(8)
self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1)
self.bn2 = nn.BatchNorm2d(16)
self.conv3 = nn.Conv2d(16, 32, kernel_size=3, stride=1)
self.bn3 = nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 3, stride = 1):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = x.to(device)
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
However, the following errors keep showing up:
21 21
This is using newly initialized nets
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Improved model.ipynb Cell 7 in <module>
132 state = current_screen - last_screen
133 for t in count():
134 # Select and perform an action
--> 135 action = select_action(state)
136 _, reward, done, _ = env.step(action.item())
137 reward = torch.tensor([reward], device=device)
Improved model.ipynb Cell 7 in select_action(state)
64 if sample > eps_threshold:
65 with torch.no_grad():
---> 66 return policy_net(state).max(1)[1].view(1, 1)
67 else:
68 return torch.tensor([[
69 env.adjust_action(random.randrange(n_actions))
70 ]], device=device, dtype=torch.int)
File ...Python\Python39\lib\site-packages\torch\nn\modules\module.py:1102, in Module._call_impl(self, *input, **kwargs)
1098 # If we don't have any hooks, we want to skip the rest of the logic in
1099 # this function, and just call forward.
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
...
441 _pair(0), self.dilation, self.groups)
--> 442 return F.conv2d(input, weight, bias, self.stride,
443 self.padding, self.dilation, self.groups)
RuntimeError: expected scalar type Int but found Float
I believe that the following codes cause the problem:
env = BasicEnv2()
plt.ion()
# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Transition = namedtuple('Transition',
('state', 'action', 'next_state', 'reward'))
# input extraction
def observe():
tensorify = T.Compose([T.ToTensor()])
return tensorify(np.array(env.render(mode = "robot"))).unsqueeze(0)
env.reset()
BATCH_SIZE = 16
GAMMA = 0.99 #0.95
EPS_START = 0.99 #0.95
EPS_END = 0.5
EPS_DECAY = 200
TARGET_UPDATE = 3
init_obs = observe()
_, _, height, width = init_obs.shape
print(height, width)
logging.info("height: '{0}'".format(height))
logging.info ("width: '{0}'".format(width))
n_actions = env.action_space.n
if os.path.isfile('model_save.pt'):
checkpoint = torch.load("model_save.pt")
policy_net = policy_net = DQN(height, width, n_actions).to(device)
policy_net.load_state_dict(checkpoint['policy_state_dict'])
target_net = DQN(height, width, n_actions).to(device)
target_net.load_state_dict(checkpoint['target_state_dict'])
target_net.eval()
optimizer = optim.RMSprop(policy_net.parameters())
print("This is using previously saved and uploaded nets")
else:
print("This is using newly initialized nets")
policy_net = DQN(height, width, n_actions).to(device)
target_net = DQN(height, width, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
optimizer = optim.RMSprop(policy_net.parameters())
# for param in policy_net.parameters():
# print(param.data)
memory = ReplayMemory(10000)
steps_done = 0
def select_action(state):
global steps_done
sample = random.random()
eps_threshold = EPS_END + (EPS_START - EPS_END) * \
math.exp(-1. * steps_done / EPS_DECAY)
steps_done += 1
if sample > eps_threshold:
with torch.no_grad():
return policy_net(state).max(1)[1].view(1, 1)
else:
return torch.tensor([[
env.adjust_action(random.randrange(n_actions))
]], device=device, dtype=torch.int)
episode_durations = []
# PLOT DURATIONS ENTER HERE
def optimize_model():
if len(memory) < BATCH_SIZE:
return
transitions = memory.sample(BATCH_SIZE)
batch = Transition(*zip(*transitions))
non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
batch.next_state)), device=device, dtype=torch.bool)
non_final_next_states = torch.cat([s for s in batch.next_state
if s is not None])
state_batch = torch.cat(batch.state)
action_batch = torch.cat(batch.action)
reward_batch = torch.cat(batch.reward)
#1
state_action_values = policy_net(state_batch).gather(1, action_batch)
#dtype = torch.int
next_state_values = torch.zeros(BATCH_SIZE, dtype=torch.int, device=device)
next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
criterion = nn.SmoothL1Loss()
loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))
optimizer.zero_grad()
loss.backward()
for param in policy_net.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
if not os.path.isdir("pics/"):
try:
os.mkdir("pics/")
except OSError as error:
print(error)
# Error handler (keyboard interrupt)
def sigint_handler(signal, frame):
# place something to do here when keyboardinterrupt is caught
print ('KeyboardInterrupt is caught')
env.render()
env.close()
# torch.save(DQN, "/content/model_save.pt")
torch.save({
'policy_state_dict': policy_net.state_dict(),
'target_state_dict': target_net.state_dict()
}, "model_save.pt")
sys.exit(0)
signal.signal(signal.SIGINT, sigint_handler)
num_episodes = 150
Hi i have the following error :
ValueError Traceback (most recent call
last)
C:\Users\COOKET~1\AppData\Local\Temp/ipykernel_10332/793675004.py in
<module>
2 real_data = np.array(real_data)
3 real_data = np.reshape(real_data, (real_data.shape[0],
real_data.shape[1], 1))
----> 4 prediction = model.predict(real_data)
5 prediction = scaler.inverse_transform(prediction)
6 print(f"Tomorrow's {company} share price: {prediction}")
~\anaconda3\envs\PYTHON\lib\site-packages\keras\utils\traceback_utils.py
in
error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\anaconda3\envs\PYTHON\lib\site-
packages\tensorflow\python\framework\func_graph.py in
autograph_handler(*args, **kwargs)
1127 except Exception as e: # pylint:disable=broad-except
1128 if hasattr(e, "ag_error_metadata"):
-> 1129 raise e.ag_error_metadata.to_exception(e)
1130 else:
1131 raise
ValueError: in user code:
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\engine\training.py", line 1621, in predict_function *
return step_function(self, iterator)
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\engine\training.py", line 1611, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\engine\training.py", line 1604, in run_step **
outputs = model.predict_step(data)
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\engine\training.py", line 1572, in predict_step
return self(x, training=False)
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Cooketaker\anaconda3\envs\PYTHON\lib\site-
packages\keras\engine\input_spec.py", line 263, in
assert_input_compatibility
raise ValueError(f'Input {input_index} of layer "{layer_name}" is '
ValueError: Input 0 of layer "sequential" is incompatible with the
layer: expected shape=(None, 60, 1), found shape=(None, 59, 1)
I don´t know how fix the next error , the error happens only in the last part when i want to predict the next day:
prediction = model.predict(real_data)
prediction = scaler.inverse_transform(prediction)
My full code is this:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM
#Ticker symbol of the company
company = 'FB'
#Date from which we are collecting the data (year, month, date)
start = dt.datetime(2012,1,1)
end = dt.datetime(2021,1,1)
data = web.DataReader(company, 'yahoo', start, end)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
#How many past days of data we want to use to predict the next day price
prediction_days = 60
#Preparing the Training data
X_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
X_train.append(scaled_data[x-prediction_days:x, 0])
y_train.append(scaled_data[x,0])
X_train, y_train = np.array(X_train), np.array(y_train)
#Reshaping so that it will work in Neural net
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=100, batch_size=32)
test_start = dt.datetime(2021,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'],test_data['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days: ].values
model_inputs = model_inputs.reshape(-1,1)
model_inputs = scaler.transform(model_inputs)
X_test = []
for x in range(prediction_days, len(model_inputs)):
X_test.append(model_inputs[x-prediction_days:x, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
predicted_price = model.predict(X_test)
predicted_price = scaler.inverse_transform(predicted_price)
plt.plot(actual_prices, color='black',label='Actual Share price')
plt.plot(predicted_price, color='green',label='Predicted Share price')
plt.title(f"{company} Share Price prediction")
plt.xlabel('Time')
plt.ylabel(f'{company} Share Price')
plt.legend()
plt.show()
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days : len(model_inputs)+1, 0]]
real_data = np.array(real_data)
real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1], 1))
prediction = model.predict(real_data)
prediction = scaler.inverse_transform(prediction)
print(f"Tomorrow's {company} share price: {prediction}")
Here is my attempt at a solution. I am not very familiar with the ML stuff but I just approached it as an issue of making the dimensions consistent.
Thus, I changed the line (approximately line 76) from:
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days : len(model_inputs)+1, 0]
to
real_data = [model_inputs[len(model_inputs) - prediction_days : len(model_inputs)+1, 0]]
I think there was an indentation issue somewhere on line 61 or so, changed to:
for x in range(prediction_days, len(model_inputs)):
X_test.append(model_inputs[x-prediction_days:x, 0])
Then, after this adjustment, I received the following graph and the following output at the end:
Tomorrow's FB share price: [[331.90765]]
The graph:
When I am defining the Hierarchical Attentional Network, an error is popping up which says "AttributeError: can't set attribute". Please help.
This is the Attention.py file
import keras
import Attention
from keras.engine.topology import Layer, Input
from keras import backend as K
from keras import initializers
#Hierarchical Attention Layer Implementation
'''
Implemented by Arkadipta De (MIT Licensed)
'''
class Hierarchical_Attention(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(Hierarchical_Attention, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Hierarchical_Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
This is the main file where I'm building the model.
import re
import os
import numpy as np
import pandas as pd
import keras
from keras.engine.topology import Layer, Input
import Attention
from sklearn.model_selection import train_test_split
from keras.models import Model, Input
from keras.layers import Dropout, Dense, LSTM, GRU, Bidirectional, concatenate, Multiply, Subtract
from keras.utils import to_categorical
from keras import backend as K
from keras import initializers
Max_Title_Length = 0
Max_Content_Length = 0
for i in range(0, len(X)):
Max_Title_Length = max(Max_Title_Length, len(X['title'][i]))
Max_Content_Length = max(Max_Content_Length, len(X['text'][i]))
vector_size = 100
input_title = Input(shape = (Max_Title_Length,vector_size,), name = 'input_title')
input_content = Input(shape = (Max_Content_Length,vector_size,), name = 'input_content')
def Classifier(input_title, input_content):
#x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
x = Bidirectional(GRU(100, return_sequences=True))(input_title)
x_attention = Attention.Hierarchical_Attention(100)(x)
#y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
y = Bidirectional(GRU(100, return_sequences=True))(input_content)
y_attention = Attention.Hierarchical_Attention(100)(y)
z = concatenate([x_attention,y_attention])
z = Dense(units = 512, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 256, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 128, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 50, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 10, activation = 'relu')(z)
z = Dropout(0.2)(z)
output = Dense(units = 2, activation = 'softmax')(z)
model = Model(inputs = [input_title, input_content], outputs = output)
model.summary()
return model
def compile_and_train(model, num_epochs):
model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['acc'])
history = model.fit([train_x_title,train_x_content], train_label, batch_size=32, epochs=num_epochs)
return history
Classifier_Model = Classifier(input_title,input_content)
This code is giving me an error which says:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2761 try:
-> 2762 super(tracking.AutoTrackable, self).__setattr__(name, value)
2763 except AttributeError:
AttributeError: can't set attribute
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
6 frames
<ipython-input-43-32804502e0b0> in <module>()
32 return history
33
---> 34 Classifier_Model = Classifier(input_title,input_content)
<ipython-input-43-32804502e0b0> in Classifier(input_title, input_content)
7 #x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
8 x = Bidirectional(GRU(200, return_sequences=True))(input_title)
----> 9 x_attention = Attention.Hierarchical_Attention(100)(x)
10 #y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
11 y = Bidirectional(GRU(100, return_sequences=True))(input_content)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1096 # Build layer if applicable (if the `build` method has been
1097 # overridden).
-> 1098 self._maybe_build(inputs)
1099 cast_inputs = self._maybe_cast_inputs(inputs, input_list)
1100
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2641 # operations.
2642 with tf_utils.maybe_init_scope(self):
-> 2643 self.build(input_shapes) # pylint:disable=not-callable
2644 # We must set also ensure that the layer is marked as built, and the build
2645 # shape is stored since user defined build functions may not be calling
/content/Attention.py in build(self, input_shape)
23 self.b = K.variable(self.init((self.attention_dim, )))
24 self.u = K.variable(self.init((self.attention_dim, 1)))
---> 25 self.trainable_weights = [self.W, self.b, self.u]
26 super(Hierarchical_Attention, self).build(input_shape)
27
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2765 ('Can\'t set the attribute "{}", likely because it conflicts with '
2766 'an existing read-only #property of the object. Please choose a '
-> 2767 'different name.').format(name))
2768 return
2769
AttributeError: Can't set the attribute "trainable_weights", likely because it conflicts with an existing read-only #property of the object. Please choose a different name.
I'm a noob in Neural Networks. Please help.
I ran into the same problem when I was trying to execute the code on Google Colab.
I found some answers on StackOverflow says it's an ongoing issue with tf on Colab.
link here
It remains unsolved for me, but I believe you can try to set self._trainable_weights instead of self.trainable_weights
I want to classify text to 2 classes by using this embedding: https://tfhub.dev/google/universal-sentence-encoder-multilingual/3
And I also want to add additional features after the embedding. So I have two inputs:
import tensorflow as tf
import tensorflow_hub as tfh
import tensorflow_datasets as tfds
import tensorflow_text as tft
hp = {
'embedding': 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3' EMBEDDINGS['senm'],
'units': 64,
'learning_rate': 1e-3,
'dropout': 0.2,
'layers': 2
}
textInput = tf.keras.Input(shape=(1, ), name = 'text', dtype = tf.string)
featuresInput = tf.keras.Input(shape=(36, ), name = 'features')
x = tfh.KerasLayer(hp.get('embedding'), dtype = tf.string, trainable = False)(textInput)
x = tf.keras.layers.concatenate([x, featuresInput])
for index in range(hp.get('layers')):
x = tf.keras.layers.Dense(hp.get('units'), activation = 'relu')(x)
x = tf.keras.layers.Dropout(hp.get('dropout'))(x)
output = tf.keras.layers.Dense(
1,
activation = 'sigmoid',
bias_initializer = tf.keras.initializers.Constant(INITIAL_BIAS) if INITIAL_BIAS else None
)(x)
model = tf.keras.Model(inputs = [textInput, featuresInput], outputs = output)
model.compile(
optimizer = tf.keras.optimizers.Adam(lr = hp.get('learning_rate')),
loss = tf.keras.losses.BinaryCrossentropy(),
metrics = METRICS,
)
And the code fails with error:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-17-61aed6f885c9> in <module>
10 featuresInput = tf.keras.Input(shape=(36, ), name = 'features')
11
---> 12 x = tfh.KerasLayer(hp.get('embedding'), dtype = tf.string, trainable = False)(textInput)
13 x = tf.keras.layers.concatenate([x, featuresInput])
14
~/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
920 not base_layer_utils.is_in_eager_or_tf_function()):
921 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 922 outputs = call_fn(cast_inputs, *args, **kwargs)
923 # Wrap Tensors in `outputs` in `tf.identity` to avoid
924 # circular dependencies.
~/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
263 except Exception as e: # pylint:disable=broad-except
264 if hasattr(e, 'ag_error_metadata'):
--> 265 raise e.ag_error_metadata.to_exception(e)
266 else:
267 raise
AssertionError: in user code:
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow_hub/keras_layer.py:222 call *
result = f()
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/saved_model/load.py:486 _call_attribute **
return instance.__call__(*args, **kwargs)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py:580 __call__
result = self._call(*args, **kwds)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py:650 _call
return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/function.py:1665 _filtered_call
self.captured_inputs)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/function.py:1759 _call_flat
"StatefulPartitionedCall": self._get_gradient_function()}):
/usr/lib/python3.6/contextlib.py:81 __enter__
return next(self.gen)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:4735 _override_gradient_function
assert not self._gradient_function_map
AssertionError:
BUT it works if I use Sequential
model = tf.keras.Sequential([
hub.KerasLayer(embedding, input_shape=[], dtype = tf.string, trainable = True),
tf.keras.layers.Dense(16, activation = 'relu', input_shape = (train_features.shape[-1],)),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1, activation = 'sigmoid', bias_initializer = output_bias),
])
model.compile(optimizer = tf.keras.optimizers.Adam(lr=1e-3), loss = tf.keras.losses.BinaryCrossentropy(), metrics = metrics)
Is there anything I'm doing wrong with the functual api? Can you please help me with the error
I've faced a similar problem. My solution looks like this:
def build_model():
premise = keras.Input(shape=(), dtype=tf.string)
hypothesis = keras.Input(shape=(), dtype=tf.string)
keras_emb = hub.KerasLayer(embed, input_shape=(), output_shape = (512), dtype=tf.string, trainable=True)
prem_emb = keras_emb(premise)
hyp_emb = keras_emb(hypothesis)
emb = layers.Concatenate()([prem_emb, hyp_emb])
dense = layers.Dense(32, activation="relu")(emb)
classifier = layers.Dense(3)(dense)
model = keras.Model(inputs=[premise, hypothesis], outputs=classifier, name="elementary_model")
model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=['accuracy'])
return model
Note: the text input shape should be () (empty tuple)
Tensorflow - InvalidArgumentError (see above for traceback): Incompatible shapes: [90] vs. [8704]** ..
I have incompatible shape errors. I was in the process of creating a simple chatbot with Seq2Seq.
Python version 3.5, Tensorflow version 1.3.0, Windows 10.
I hope you see the code below.
CODE
.... omission ....
hidden_size = 128
layers_size = 2
num_classes = 0
batch_size = 0
vacab_dict = {}
data = []
vocab_file = "data_temp.txt"
dialogue_file = "data.txt"
dir_path = "\\chatbot\\"
.... omission ...
data = load_file(dialogue_file)
enc_input = tf.placeholder(tf.float32, [None, None, num_classes])
dec_input = tf.placeholder(tf.float32, [None, None, num_classes])
targets = tf.placeholder(tf.int64, [None, None])
max_len = 0
word_to_idxs = []
word_to_idx_encoders = []
word_to_idx_decoders = []
word_to_idx_targets = []
.... omission ...
with tf.variable_scope("encode"):
enc_cell = rnn.BasicRNNCell(hidden_size)
enc_cell = rnn.DropoutWrapper(enc_cell, output_keep_prob=0.5)
stacked_rnn = []
for iiLyr in range(layers_size):
stacked_rnn.append(tf.nn.rnn_cell.LSTMCell(num_units=num_classes, state_is_tuple=True))
enc_cell = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn, state_is_tuple=True)
outputs, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input, dtype=tf.float32)
with tf.variable_scope("decode"):
dec_cell = rnn.BasicRNNCell(hidden_size)
dec_cell = rnn.DropoutWrapper(dec_cell, output_keep_prob=0.5)
stacked_rnn = []
for iiLyr in range(layers_size):
stacked_rnn.append(tf.nn.rnn_cell.LSTMCell(num_units=num_classes, state_is_tuple=True))
dec_cell = tf.nn.rnn_cell.MultiRNNCell(cells=stacked_rnn, state_is_tuple=True)
outputs, dec_states = tf.nn.dynamic_rnn(dec_cell, dec_input, initial_state=enc_states, dtype=tf.float32)
weights = tf.Variable(tf.ones([hidden_size, num_classes]), name="weights")
bias = tf.Variable(tf.zeros([num_classes]), name="bias")
x_for_fc = tf.reshape(outputs, [-1, hidden_size])
logit = tf.matmul(x_for_fc, weights) + bias
batch_size = tf.shape(outputs)[1]
logit = tf.reshape(logit, [-1, batch_size, num_classes])
cost = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logit, labels=targets))
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=outputs, targets=targets, weights=weights)
cost = tf.reduce_mean(sequence_loss)
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#print(sess.run(enc_onehots))
for epoch in range(100):
_, loss = sess.run([optimizer, cost],
feed_dict={enc_input: word_to_idx_encoders,
dec_input: word_to_idx_decoders,
targets: targets_batch})
print("Epcch: ", "%04d" % (epoch + 1),
"cost =", "{:.6f}".format(loss))
sess.close()
In the middle, the code is omitted.
ERROR
Traceback (most recent call last): File "C:\Users\james\Anaconda3\lib\site-packages\tensorflow\python\client\session.py",
line 1327, in _do_call return fn(*args) File "C:\Users\james\Anaconda3\lib\site-packages\tensorflow\python\client\session.py",
line 1306, in _run_fn status, run_metadata) File "C:\Users\james\Anaconda3\lib\contextlib.py",
line 66, in exit next(self.gen) File "C:\Users\james\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py",
line 466, in raise_exception_on_not_ok_status pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [90] vs. [8704] [[Node: sequence_loss/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](sequence_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits, sequence_loss/Reshape_2)]]