Dot pipeline data with constant matrix - python

Is it possible to multiply the batch in the middle of the pipeline with a constant transformation? Something along the lines of
constant_non_trainable_matrix = numpy.array([...]) # shape (n,n)
input = tf.keras.layers.InputLayer(shape = (n,))
dense_1 = tf.keras.layers.Dense((n,))(input)
transform = MultiplyWithMatrix(constant_non_trainable_matrix)(dense_1)
output = tf.keras.layers.Dense((n,))(transform)
model = tf.keras.models.Model(inputs = input, outputs = output)

You can use a Lambda layer and backend.dot() to achieve that:
from keras import layers
from keras import backend as K
# ...
transformed = layers.Lambda(lambda x: K.dot(x, mat))(dense_1)
You need to construct the mat tensor using the backend functions as well (e.g. K.constant(), K.variable(), etc.).

Related

How to use a batch_size of Keras tensor at the model building time?

I want to use an external program as a custom operation.
Because automatic gradient would be not available, I wrote the code to provide gradients by using numerical methods. However, because it have to compute the batch_size number of derivatives,
I wrote it to get batch_size from the shape of x.
Following is an example using numpy function as an external program
f(x) = np.sum(x**2)
(In fact, for this simple numpy function, no loop over batch_size is necessary. But, it is written for general external function.)
#tf.custom_gradient
def custom_op(x):
# without using numpy, use external function
# assume x shape = (batch_size,3)
batch_size= x.shape[0]
input_length = x.shape[1]
# assert input_length==3
yout=[] # shape should be (batch_size,1)
gout=[] # shape should be (batch_size,3)
for i in range(batch_size):
inputs = x[i,:] # shape (3,)
y = np.sum(inputs**2) # shape (3,)
yout.append(y) # shape (1,)
# compute differences
dy = []
for j in range(len(inputs)):
delta = np.zeros_like(inputs)
delta[j] = np.abs(inputs[j])*0.001
yplus = np.sum((inputs + delta)**2) # change only j-th input
grad = (yplus-y)/delta[j] #shape (1,)
dy.append(grad)
gout.append(dy)
yout = tf.convert_to_tensor(yout,dtype='float32') # (batch_size,)
yout = tf.reshape(yout,shape=(batch_size,1)) # (batch_size,1)
gout = tf.convert_to_tensor(gout,dtype='float32') # (batch_size,)
gout = tf.reshape(gout,shape=(batch_size,input_length)) # (batch_size,1)
def grad(upstream):
return upstream*gout
return yout, grad
x = tf.Variable([[1.,2.,3.],[2.,3.,4.]],dtype='float32')
with tf.GradientTape() as tape:
y = custom_op(x)
tape.gradient(y,x)
and found it works.
However, when I tried to use it in the keras model , for example,
def construct_model():
inputs = tf.keras.Input(shape=(3,)) #input array
x = tf.keras.layers.Dense(1)(inputs)
outputs = custom_op(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
optimizer = 'adam'
model.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model
model = construct_model()
it gives errors
because kerasTensor "inputs" does not have specified batch_size.
I tried to specify batch_size as "tf.keras.Input(shape=(3,),batch_size=2)".
However, it also raises errors because of the use of kerasTensor.
How should I change the custom_op to be compatible with keras?

Pass non-symbolic tensor to Keras Lambda layer

I am trying to pass a RNNCell object to a Keras lambda layer so that I can use the Tensorflow layer within a Keras model, as follows.
conv_cell = ConvGRUCell(shape = [14, 14],
filters = 32,
kernel = [3,3],
padding = 'SAME')
def convGRU(inputs, cell, length):
output, final = tf.nn.bidirectional_dynamic_rnn(
cell, cell, x, length, dtype=tf.float32)
output = tf.concat(output, -1)
final = tf.concat(final, -1)
return [output, final]
lm = Lambda(lambda x: convGRU(x[0], x[1], x[2])([input, conv_cell, length])
However, I get an error that conv_cell is not a symbolic tensor (it is a custom layer based on Tensorflow's GRUCell).
Is there any way to pass the cell to the lambda layer? I got it to work with functools.partial but it fails to save/load the model because it cannot access the function inside the model.
def convGRU(cell, length): # if length is produced by the model, use it with the inputs
def inner_func(inputs):
code...
return inner_func
lm = Lambda(convGRU(cell, length))(input)
For save/load you need to use custom_objects = {'convGRU': convGRU, 'cell':cell, 'length': length}, etc. Whatever Keras doesn't know automatically needs to be in custom_objects for loading a saved model.

How do I create a layer from a function that does not accept Tensors/NumPy arrays as arguments?

I have two Python functions that take strings as inputs and return NumPy arrays. I am trying to use these functions to create Lambda layers that are then fed into another Keras model.
I can vectorize the function, and then create a TensorFlow operation via tf.py_func, like so (full code is further down below):
def indices_tensor(tensor):
return tf.py_func(np.vectorize(indices),[tensor],tf.float32)
def segments_tensor(tensor):
return tf.py_func(np.vectorize(segments),[tensor],tf.float32)
pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')
# TF_KERAS must be added to environment variables in order to use TPU
os.environ['TF_KERAS'] = '1'
import codecs
from keras_bert import load_trained_model_from_checkpoint
token_dict = {}
with codecs.open(vocab_path, 'r', 'utf8') as reader:
for line in reader:
token = line.strip()
token_dict[token] = len(token_dict)
model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
import numpy as np
from keras_bert import Tokenizer
tokenizer = Tokenizer(token_dict)
def tokenize(text):
tokens = tokenizer.tokenize(text)
indices, segments = tokenizer.encode(first=text, max_len=512)
return indices,segments
def indices(text):
return tokenize(text)[0]
def segments(text):
return tokenize(text)[1]
##title Get indices and segments of a tensor of strings
def indices_tensor(tensor):
return tf.py_func(np.vectorize(indices),[tensor],tf.float32)
def segments_tensor(tensor):
return tf.py_func(np.vectorize(segments),[tensor],tf.float32)
input_layer = Input(shape=(1,),dtype=tf.string)
indices_layer = Lambda(indices_tensor)(input_layer)
segments_layer = Lambda(segments_tensor)(input_layer)
``
**Logging**
print(type(indices_layer))
print(type(segments_layer))
I want my Lambda function calls to produce Lambda layers, and Keras's documentation appears to suggest they should. Instead, per my indicated logging statements, they are creating Tensors.
What should I do to create working layers?
There is nothing wrong there. Just your variable names that show you think the outputs of the layers are layers, but in fact they are tensors.
This is an input tensor, not an input layer:
input_tensor = Input(shape=(1,),dtype=tf.string)
This is a Lambda layer:
layer = Lambda(segments_tensor) #not called on the input tensor
This is a tensor that is the result of feeding the input tensor to the lambda layer:
tensor = Lambda(segments_tensor)(input_tensor)
This logic is true for every layer.
output_tensor = Layer(parameters)(input_tensor)
layer_instance = Layer(parameters)
output1 = layer_instance(input_tensor1)
output2 = layer_instance(input_tensor2)
output3 = layer_instance(input_tensor3)

Creating constant value in Keras

I am trying to create a constant variable inside a keras model. What I was doing till now is to pass it as Input. But it is always a constant so I want it as a constant.(The input is [1,2,3...50] for each example => so I use np.tile(np.array(range(50)),(len(X_input))) to reproduce it for each example)
So for now I had:
constant_input = Input(shape=(50,), dtype='int32', name="constant_input")
Which gives a tensor: Tensor("constant_input", shape(?,50), dtype=int32)
Now trying to do it as a constant:
np_constant = np.array(list(range(50))).reshape(1, 50)
tf_constant = K.constant(np_constant)
tensor_constant = Input(tensor=tf_constant, shape=(50,), dtype='int32', name="constant_input")
which gives a tensor: Tensor("constant_input", shape(50,1),dtype=float32)
But What I want is the constant to be scaled in each batch, meaning that the shape of the tensor should be (?, 50), the same as the way of using Input.
Is it possible to do that?
You cannot have a constant with variable size. A constant always has the same value. What you can do is have the (1, 50) constant and then tile it within TensorFlow with K.tile. Also better use np.arange instead of np.array(list(range(50)). Something like:
from keras.layers.core import Lambda
import keras.backend as K
def operateWithConstant(input_batch):
tf_constant = K.constant(np.arange(50).reshape((1, 50)))
batch_size = K.shape(input_batch)[0]
tiled_constant = K.tile(tf_constant, (batch_size, 1))
# Do some operation with tiled_constant and input_batch
result = ...
return result
input_batch = Input(...)
input_operated = Lambda(operateWithConstant)(input_batch)
# continue...

Deep Network Produce zero Accuracy

I am trying to build a deep network using theano. However the accuracy is zero. I can not figure out my mistake. I am trying to create a deep learning network with 3 hidden layers and one output. I am tyring to do a classification task and I have 5 classes. Therefore, the output layer have 5 nodes.
Any suggestion?
#!/usr/bin/env python
from __future__ import print_function
import theano
import theano.tensor as T
import lasagne
import numpy as np
import sklearn.datasets
import os
import csv
import pandas as pd
# Lasagne is pre-release, so it's interface is changing.
# Whenever there's a backwards-incompatible change, a warning is raised.
# Let's ignore these for the course of the tutorial
import warnings
warnings.filterwarnings('ignore', module='lasagne')
from lasagne.objectives import categorical_crossentropy, aggregate
#load the data and prepare it
df = pd.read_excel('risk_sample_data_9.20.16_anon.xls',skiprows=0)
rawdata = df.values
# remove empty rows (odd rows)
mask = np.ones(len(rawdata), dtype=bool)
mask[::2] = False
data = rawdata[mask]
idx = np.array([1,5,6,7])
m = np.zeros_like(data)
m[:,idx] = 1
X = np.ma.masked_array(data,m)
X = np.ma.filled(X, fill_value=0)
X = X.astype(theano.config.floatX)
y = data[:,7] # extract financial rating labels
# convert char lables into int , A=1 , B=2, C=3, D=4, F=5
y[y == 'A'] = 1
y[y == 'B'] = 2
y[y == 'C'] = 3
y[y == 'D'] = 4
y[y == 'F'] = 5
y = pd.to_numeric(y)
y = y.astype('int32')
#y = y.astype(theano.config.floatX)
N_CLASSES = 5
# First, construct an input layer.
# The shape parameter defines the expected input shape,
# which is just the shape of our data matrix data.
l_in = lasagne.layers.InputLayer(shape=X.shape)
# We'll create a network with two dense layers:
# A tanh hidden layer and a softmax output layer.
l_hidden1 = lasagne.layers.DenseLayer(
# The first argument is the input layer
l_in,
# This defines the layer's output dimensionality
num_units=250,
# Various nonlinearities are available
nonlinearity=lasagne.nonlinearities.rectify)
l_hidden2 = lasagne.layers.DenseLayer(
# The first argument is the input layer
l_hidden1,
# This defines the layer's output dimensionality
num_units=100,
# Various nonlinearities are available
nonlinearity=lasagne.nonlinearities.rectify)
l_hidden3 = lasagne.layers.DenseLayer(
# The first argument is the input layer
l_hidden2,
# This defines the layer's output dimensionality
num_units=50,
# Various nonlinearities are available
nonlinearity=lasagne.nonlinearities.rectify)
l_hidden4 = lasagne.layers.DenseLayer(
# The first argument is the input layer
l_hidden3,
# This defines the layer's output dimensionality
num_units=10,
# Various nonlinearities are available
nonlinearity=lasagne.nonlinearities.sigmoid)
# For our output layer, we'll use a dense layer with a softmax nonlinearity.
l_output = lasagne.layers.DenseLayer(
l_hidden4, num_units=N_CLASSES, nonlinearity=lasagne.nonlinearities.softmax)
net_output = lasagne.layers.get_output(l_output)
# As a loss function, we'll use Theano's categorical_crossentropy function.
# This allows for the network output to be class probabilities,
# but the target output to be class labels.
true_output = T.ivector('true_output')
# get_loss computes a Theano expression for the objective,
# given a target variable
# By default, it will use the network's InputLayer input_var,
# which is what we want.
#loss = objective.get_loss(target=true_output)
loss = lasagne.objectives.categorical_crossentropy(net_output, true_output)
loss = aggregate(loss, mode='mean')
# Retrieving all parameters of the network is done using get_all_params,
# which recursively collects the parameters of all layers
# connected to the provided layer.
all_params = lasagne.layers.get_all_params(l_output)
# Now, we'll generate updates using Lasagne's SGD function
updates = lasagne.updates.sgd(loss, all_params, learning_rate=1)
# Finally, we can compile Theano functions for training and
# computing the output.
# Note that because loss depends on the input variable of our input layer,
# we need to retrieve it and tell Theano to use it.
train = theano.function([l_in.input_var, true_output], loss, updates=updates)
get_output = theano.function([l_in.input_var], net_output)
def eq(x, y):
if x==y:
return 1
return 0
print("Training ...")
# Train for 100 epochs
for n in xrange(10):
train(X, y)
y_predicted = np.argmax(get_output(X), axis=1)
correct = reduce(lambda a, b: a+b, map(eq, y_predicted, y))
print("Iteration {} correct prediction {}".format(n, correct))
# Compute the predicted label of the training data.
# The argmax converts the class probability output to class label
y_predicted = np.argmax(get_output(X), axis=1)
print(y_predicted)
The learning rate seems way too high. Try a lower learning rate first. It might be that your model diverges on the task. Hard to tell without being able to try it on your data.

Categories