I write a model in subclassing way,
'''
class block(tf.keras.Model):
def __init__(self,index,is_train_bn,channel_axis):
super().__init__()
prefix = 'block' + str(index + 5)
self.is_train_bn=is_train_bn
self.sepconv1_act = layers.Activation('relu', name=prefix + '_sepconv1_act')
self.sepconv1 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv1')
self.sepconv1_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv1_bn')
self.sepconv2_act = layers.Activation('relu', name=prefix + '_sepconv2_act')
self.sepconv2 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv2')
self.sepconv2_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv2_bn')
self.sepconv3_act = layers.Activation('relu', name=prefix + '_sepconv3_act')
self.sepconv3 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv3')
self.sepconv3_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv3_bn')
def __call__(self,x,training=False):
residual = x
x=self.sepconv1_act(x)
x=self.sepconv1(x)
x=self.sepconv1_bn(x,self.is_train_bn)
x=self.sepconv2_act(x)
x=self.sepconv2 (x)
x=self.sepconv2_bn(x,self.is_train_bn)
x=self.sepconv3_act (x)
x=self.sepconv3 (x)
x=self.sepconv3_bn (x,self.is_train_bn)
return x+residual
'''
When I want to print x, I get this error:
' Cannot convert a symbolic Tensor (block1_conv1_act_1/Relu:0) to a numpy array'.
To print out "x" from "middle of model" you can apply the approach exemplified below (code modified from your example). When creating that kind of "monitoring model" you simple get the "x_to_probe" out by a procedure like:
...where in this example the input of the model is exemplified by a random tensor.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
channel_axis=1
prefix='hmmm...'
sepconv1_act = layers.Activation('relu', name=prefix + '_sepconv1_act')
sepconv1 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv1')
sepconv1_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv1_bn')
sepconv2_act = layers.Activation('relu', name=prefix + '_sepconv2_act')
sepconv2 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv2')
sepconv2_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv2_bn')
sepconv3_act = layers.Activation('relu', name=prefix + '_sepconv3_act')
sepconv3 = layers.SeparableConv2D(728, (3, 3),padding='same',use_bias=False,name=prefix + '_sepconv3')
sepconv3_bn = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv3_bn')
#This should be "vice-versa" ...the x need to be taken from the function input...
#residual = x
is_train_bn=1
#x=self.sepconv1_act(x)
inputs=keras.Input(shape=(1,16,16))
x=sepconv1_act(inputs)
x=sepconv1(x)
x=sepconv1_bn(x,is_train_bn)
x=sepconv2_act(x)
x=sepconv2 (x)
x=sepconv2_bn(x,is_train_bn)
x=sepconv3_act (x)
x_to_probe=sepconv3 (x)
x=sepconv3_bn (x_to_probe,is_train_bn)
model=keras.Model(inputs=inputs,outputs=x,name="example for Wayne")
model.summary()
#Let's take x out..
model_for_monitoring_selected_x=keras.Model(inputs=inputs,outputs=x_to_probe,name="example for Wayne to print x")
model_for_monitoring_selected_x.summary()
Related
In my simulation of a FIFO algorithm, I am currently trying to create an object for each of the seven simulated tasks, which will later be used to display some time parameters graphically in Excel. So I create all objects in a for loop, and in another one, I execute with each of these objects the corresponding function for transfer to Excel in another class.
But in the second for loop I always get the error message
File "G:\Schedulibg\PyScheduler\pyFIFOAlgorithm.py", line 48, in sched_StartScheduler
self.worksheet1.write('A' + str((self.tasksAdded * 4) + 1), 'Turn Around Time:')
AttributeError: 'pyFIFOAlgorithm' object has no attribute 'worksheet1' "
I don't know why it looks for worksheet1 in pyFIFOAlgorithm, although the object is from pyToExcel and the corresponding method comes from the class. I have already changed the name of taskTest, because I read that this could also be a problem. Before it was just called tsk.
import pyScheduler
from pyToExcel import pyToExcel
def alg_SortArrivedTasks(arrivedTasks):
sortedTasks = []
for taskx in arrivedTasks:
sortedTasks.append(taskx)
sortedTasks.sort(key=lambda x: x.tskArrival)
return sortedTasks
class pyFIFOAlgorithm(pyScheduler.pyScheduler):
def __init__(self, taskSet, sched_Alg, idleTime):
self.alg_Identifier = 2
self.alg_Name = 'FIFO'
self.completionTime = 0
self.turnAroundTime = 0
self.totalWaitingTime = 0
self.totalTurnAroundTime = 0
self.tasksAdded = 0
super(pyFIFOAlgorithm, self).__init__(taskSet, sched_Alg, idleTime)
def sched_StartScheduler(self, taskSet):
self.sched_SchedulingPoints.insert(0, self.sched_Clock)
taskList = []
taskNumber = 1
for task in alg_SortArrivedTasks(taskSet):
print("\nArrival time of ", task.tskName, ": ", task.tskArrival)
self.sched_ExecuteAvailableTasks(task)
self.completionTime = task.completiontime
self.turnAroundTime = ((self.completionTime) - task.tskArrival)
#taskList.append(pyToExcel(taskNumber, self.completionTime, self.turnAroundTime))
self.totalTurnAroundTime += self.turnAroundTime
print("Turn Around Time: ""{:.2f}".format(self.turnAroundTime))
print("Completion Time: ""{:.2f}".format(self.completionTime))
taskList.append(pyToExcel(task.tskName, self.completionTime, self.turnAroundTime))
for taskTest in taskList:
pyToExcel.inputData(pyToExcel, taskNumber, taskTest.turn, taskTest.completion) #Line with Error
taskNumber += 1
print("\nAll tasks executed at: ", "{:.2f}".format(self.sched_Clock))
print("Average Waiting Time: ", "{:.2f}".format((self.totalWaitingTime /len(taskSet))))
print("Average Turn Around Time: ", "{:.2f}".format((self.totalTurnAroundTime / len(taskSet))))
self.worksheet1.write('A' + str((self.tasksAdded * 4) + 1), 'Turn Around Time:')
self.worksheet1.write('B' + str((self.tasksAdded * 4) + 1), self.totalTurnAroundTime / len(taskSet))
self.workbook.close()
import xlsxwriter
class pyToExcel:
def __init__(self, task, completion, turn):
self.task = task
self.completion = completion
self.turn = turn
workbook = xlsxwriter.Workbook('AlgorithmData.xlsx')
worksheet1 = workbook.add_worksheet('FIFO')
worksheet2 = workbook.add_worksheet('Graphics')
cell_format = workbook.add_format(
{
"border": 1,
"border_color": "#000000"
}
)
cell_format.set_font_color('green')
cell_format.set_bold()
cell_format.set_align('center')
cell_format.set_align('vcenter')
worksheet1.set_column(0, 0, 17)
worksheet1.set_column(1, 1, 12)
worksheet2.write('A' + str(1), 'Task', cell_format)
worksheet2.write('B' + str(1), 'Completion Time', cell_format)
worksheet2.write('C' + str(1), 'Turn Around Time', cell_format)
worksheet2.write('D' + str(1), 'Waiting Time', cell_format)
worksheet2.set_column(0, 0, 4)
worksheet2.set_column(1, 1, 15)
worksheet2.set_column(2, 2, 17)
worksheet2.set_column(3, 3, 12)
def inputData(self, task, turnaround, completion):
pyToExcel.worksheet1.write('A' + str((task * 4) + 1), 'Turn Around Time:')
pyToExcel.worksheet1.write('B' + str((task * 4) + 1), turnaround)
pyToExcel.worksheet1.write('A' + str((task * 4) + 2), 'Completion Time:')
pyToExcel.worksheet1.write('B' + str((task * 4) + 2), completion)
pyToExcel.worksheet2.write('A' + str((task + 1)), task)
pyToExcel.worksheet2.write('B' + str((task + 1)), completion)
pyToExcel.worksheet2.write('C' + str((task + 1)), turnaround)
pyToExcel.worksheet2.write('D' + str((task + 1)), 'waiting time')
It's looking up the worksheet1 attribute because you told it to:
self.worksheet1.write('A' + str((self.tasksAdded * 4) + 1), 'Turn Around Time:')
^^^^^^^^^^
That line, which is also quoted in the error message, is line 48 of the program, as per the text you quote. The line you mark as being the location of the error is line 38, but the traceback certainly says line 48.
The method is part of the class definition for pyFIFOAlgorithm, so self is almost certainly a pyFIFOAlgorithm object. Perhaps self was a typo.
I´m trying to automatize the model definition in PuLP.
Right now, I have the following model:
import pulp as pl
" Cost parameters"
p1 = 200 # Cost per unit 1
p2 = 300 # Cost per unit 2
" VARIABLES"
k0101 = pl.LpVariable("k0101", 0, 1, pl.LpInteger)
k0102 = pl.LpVariable("k0102", 0, 1, pl.LpInteger)
k0201 = pl.LpVariable("k0201", 0, 1, pl.LpInteger)
k0202 = pl.LpVariable("k0202", 0, 1, pl.LpInteger)
###### DEMAND
x010101 = pl.LpVariable("x010101", lowBound = 0)
x010102 = pl.LpVariable("x010102", lowBound = 0)
x010103 = pl.LpVariable("x010103", lowBound = 0)
x010104 = pl.LpVariable("x010104", lowBound = 0)
x010201 = pl.LpVariable("x010201", lowBound = 0)
x010202 = pl.LpVariable("x010202", lowBound = 0)
x010203 = pl.LpVariable("x010203", lowBound = 0)
x010204 = pl.LpVariable("x010204", lowBound = 0)
x020101 = pl.LpVariable("x020101", lowBound = 0)
x020102 = pl.LpVariable("x020102", lowBound = 0)
x020103 = pl.LpVariable("x020103", lowBound = 0)
x020104 = pl.LpVariable("x020104", lowBound = 0)
x020201 = pl.LpVariable("x020201", lowBound = 0)
x020202 = pl.LpVariable("x020202", lowBound = 0)
x020203 = pl.LpVariable("x020203", lowBound = 0)
x020204 = pl.LpVariable("x020204", lowBound = 0)
# Problem
z = pl.LpProblem("optimizator", pl.LpMinimize)
"OBJECTIVE FUNCTION"
z += ((p1) * (x010101 + x010102 + x010103 + x010104) + (p1) * (x010201 + x010202 + x010203 + x010204) + (p2) * (x020101 + x020102 + x020103 + x020104) + (p2) * (x020201 + x020202 + x020203 + x020204) + (p1) * (x010101 + x010102 + x010103 + x010104) + (p1) * (x010201 + x010202 + x010203 + x010204) + (p2) * (x020101 + x020102 + x020103 + x020104) + (p2) * (x020201 + x020202 + x020203 + x020204))
" CONSTRAINTS "
z += x010101 + x020101 >= 15 * k0101
" SOLUTION "
print(z)
estado = z.solve()
print(pl.LpStatus[estado])
"TOTAL COST:"
print(pl.value(z.objective))
I would like to simplify this variable definitions, in order to be able to define more variable in an easier description.
Does anyone now how can I define my variables and parameters as a dictionary, and consider that in the objective function and the constraints?
It would help to explain the problem more. The objective function as written right now has duplicate terms and it is hard to understand conceptually what you are trying to minimize.
That being said, you can use lpSum to express the sum of the variable * cost.
# create the variables
k_variable_names = ('k0101', 'k0102', 'k0201', 'k0202')
k_variables = {var: pl.LpVariable(var, cat=pl.LpBinary)
for var in k_variable_names}
x_variables_names = ('x010101' ...)
x_variables = {var: pl.LpVariable(var, lowBound=0)
for var in x_variable_names}
# objective function
z += (
lpSum([var * 2 * p1 for var_name, var in x_variables.items() if 'x010' in var_name]) +
lpSum([var * 2 * p2 for var_name, var in x_variables.items() if 'x020' in var_name])
)
I am trying to build a nest unet with the following structure:
class EFUnet(nn.Module):
backbone = efn.EfficientNetB3(
weights=None,
include_top=False,
input_shape=(256,1600,3)
)
backbone.load_weights(('../input/efficientnet-keras-weights-b0b5/'
'efficientnet-b3_imagenet_1000_notop.h5'))
def __init(self,in_channel = 3, out_channels = 1, features = 32):
super(EFUnet, self).__init__()
self.conv00 = backbone.input
self.conv10 = backbone.get_layer('stem_activation').output
self.conv20 = backbone.get_layer('block2c_add').output
self.conv30 = backbone.get_layer('block3c_add').output
self.conv40 = backbone.get_layer('block5e_add').output
self.conv50 = backbone.get_layer('block7b_add').output
self.conv01 = _H(features*(2^0+2^1), features*2^0)
self.conv11 = _H(feathres*(2^1+2^2),feature*2^1)
self.conv21 = _H(feathres*(2^2+2^3),feature*2^2)
self.conv31 = _H(feathres*(2^3+2^4),feature*2^3)
self.conv41 = _H(feathres*(2^4+2^5),feature*2^4)
self.conv02 = _H(features*(2^0*2+2^1), features*2^0)
self.conv12 = _H(features*(2^1*2+2^2), features*2^1)
self.conv22 = _H(features*(2^2*2+2^3), features*2^2)
self.conv32 = _H(features*(2^3*2+2^4), features*2^3)
self.conv03 = _H(features*(2^0*3+2^1), features*2^0)
self.conv13 = _H(features*(2^1*3+2^2), features*2^1)
self.conv23 = _H(features*(2^2*3+2^3), features*2^2)
self.conv04 = _H(features*(2^0*4+2^1), features*2^0)
self.conv14 = _H(features*(2^1*4+2^2), features*2^1)
self.conv05 = _H(features*(2^0*5+2^1), features*2^0)
self.final1 = nn.Conv2d(features, 1, kernel_size=1)
self.final2 = nn.Conv2d(features, 1, kernel_size=1)
self.final3 = nn.Conv2d(features, 1, kernel_size=1)
self.final4 = nn.Conv2d(features, 1, kernel_size=1)
self.final5 = nn.Conv2d(features, 1, kernel_size=1)
self.final = nn.Conv2d(feature*5, 4, (3,3), padding="same", activation="sigmoid")
def forward(self,input):
x00 = self.conv00(input)
x10 = self.conv10(x00)
x01 = self.conv01(torch.cat([x00,self._U(x10)],1))
x20 = self.conv20(x10)
x11 = self.conv11(torch.cat([x10,self._U(x20)],1))
x02 = self.conv02(torch.cat([x00,x01,self._U(x11)],1))
x30 = self.conv30(x20)
x21 = self.conv21(torch.cat([x20,self._U(x30)],1))
x12 = self.conv12(torch.cat([x10,x11,self._U(x21)],1))
x03 = self.conv03(torch.cat([x00,x01,x02,self._U(x12)],1))
x40 = self.conv40(x30)
x31 = self.conv31(torch.cat([x30,self._U(x40)],1))
x22 = self.conv22(torch.cat([x20,x21,self._U(x31)],1))
x13 = self.conv13(torch.cat([x10,x11,x12,self._U(x22)],1))
x04 = self.conv04(torch.cat([x00,x01,x02,x03,self._U(x13)],1))
x50 = self.conv50(x40)
x41 = self.conv41(torch.cat([x40,self._U(x50)],1))
x32 = self.conv32(torch.cat([x30,x31,self._U(x41)],1))
x23 = self.conv23(torch.cat([x20,x21,x22,self._U(x32)],1))
x14 = self.conv14(torch.cat([x10,x11,x12,x13,self._U(x23)],1))
x05 = self.conv05(torch.cat([x00,x01,x02,x03,x04,self._U(x14)],1))
output1 = self.final1(x01)
output2 = self.final2(x02)
output3 = self.final3(x03)
output4 = self.final4(x04)
output5 = self.final4(x05)
x_out = torch.cat([output1, output2, output3, output4, output5],1)
x_out = self.final(x_out)
return x_out
def _H(in_channels, features, use_gn=True):
if use_gn:
norm = torch.nn.GroupNorm(num_channels = 3, num_groups=1)
else:
norm = BatchNormalization(number_features = features)
return nn.Sequential(
OrderedDict(
[
(name + "conv", nn.Conv2D(in_channels, features, (2, 2), padding='same')),
(name + "norm", norm()),
(name + 'LReLU',LeakyReLU(alpha=0.1))
]
)
)
def _U(in_channels, features, use_gn=True):
if use_gn:
norm = torch.nn.GroupNorm(num_channels = 3, num_groups=1)
else:
norm = BatchNormalization(number_features = features)
return nn.Sequential(
OrderedDict(
[
(name + "upconv", nn.ConvTranspose2d(in_channels, features, (2, 2), padding='same')),
(name + "norm", norm()),
(name + 'LReLU',LeakyReLU(alpha=0.1))
]
)
)
When I put it in the adam optimizer. It complaint that
ValueError: optimizer got an empty parameter list
So i try to do some QC to check the parameters of the Unet. with the following code:
model = EFUnet()
model = model.cuda()
print(list(model.parameters))
However, python complaint that the output is a method which is not iterable.
TypeError: 'method' object is not iterable
Can any one help to see what is causing the issue that python not get the parameters?
Thank you!
In pytorch to get the parameters, one should call the method model.parameters() which will return a generator object on which you can iterate.
or
A better approach will be to use model.named_parameters() which will again return a generator object were parameters are mapped with the corresponding layer name.
Instead:
print(list(model.parameters))
use:
print(list(model.parameters()))
or
print(list(model.named_parameters())
or
for p in model.named_parameters():
print(p[0],":",p[1].size() )
as part of an assignment, we need to implement NN.
I am calculating forward result, then I run back propagation and then I updated the weights (all for the same instance).
When i try to calculate the forward value of the same instance, I am getting an error that the dimensions are wrong.
class MyNN:
def __init__(self, learning_rate, layer_sizes):
self.learning_rate = learning_rate
self.layer_sizes = layer_sizes
self.model_params = {}
self.memory = {}
self.grads = {}
# Initializing weights
for layer_index in range(len(layer_sizes) - 1):
W_input = layer_sizes[layer_index + 1]
W_output = layer_sizes[layer_index]
self.model_params['W_' + str(layer_index + 1)] = np.random.randn(W_input, W_output) * 0.1
self.model_params['b_' + str(layer_index + 1)] = np.random.randn(W_input) * 0.1
def forward_single_instance(self, x):
a_i_1 = x
self.memory['a_0'] = x
for layer_index in range(len(self.layer_sizes) - 1):
W_i = self.model_params['W_' + str(layer_index + 1)]
b_i = self.model_params['b_' + str(layer_index + 1)]
z_i = np.dot(W_i, a_i_1) + b_i
a_i = 1/(1+np.exp(-z_i))
self.memory['a_' + str(layer_index + 1)] = a_i
a_i_1 = a_i
return a_i_1
def log_loss(self, y_hat, y):
'''
Logistic loss, assuming a single value in y_hat and y.
'''
m = y_hat[0]
cost = -y[0]*np.log(y_hat[0]) - (1 - y[0])*np.log(1 - y_hat[0])
return cost
def backward_single_instance(self, y):
a_output = self.memory['a_' + str(len(self.layer_sizes) - 1)]
dz = a_output - y
for layer_index in range(len(self.layer_sizes) - 1, 0, -1):
a_l_1 = self.memory['a_' + str(layer_index - 1)]
dW = np.dot(dz.reshape(-1, 1), a_l_1.reshape(1, -1))
db = dz.transpose()
self.grads['dW_' + str(layer_index)] = dW
self.grads['db_' + str(layer_index)] = db
W_l = self.model_params['W_' + str(layer_index)]
dz = (a_l_1 * (1 - a_l_1)).reshape(-1, 1) * np.dot(W_l.T, dz.reshape(-1, 1))
def update(self):
for layer_index in range(len(self.layer_sizes) - 1):
Wi = 'W_' + str(layer_index + 1)
bi = 'b_' + str(layer_index + 1)
dWi = 'dW_' + str(layer_index + 1)
dbi = 'db_' + str(layer_index + 1)
W_i = self.model_params[Wi]
b_i = self.model_params[bi]
dW_i = self.grads[dWi]
db_i = self.grads[dbi]
self.model_params[Wi] = W_i - self.learning_rate * dW_i
self.model_params[bi] = b_i - self.learning_rate * db_i
then for testing I wrote this code:
nn = MyNN(0.01, [3, 2, 1])
x = np.random.randn(3)
y = np.random.randn(1)
y_hat = nn.forward_single_instance(x)
print(y_hat)
nn.backward_single_instance(y)
nn.update()
y_hat = nn.forward_single_instance(x)
This is the error that is printed:
x
[ 0.57072262 1.8578982 -1.48560691]
x
[[0.53932246 0.57051188]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-99-d8d9152fef18> in <module>()
----> 1 y_hat = nn.forward_single_instance(x)
2 print(y_hat)
3
4
5 l = nn.log_loss(y_hat, y)
<ipython-input-89-f354993c95f9> in forward_single_instance(self, x)
24 W_i = self.model_params['W_' + str(layer_index + 1)]
25 b_i = self.model_params['b_' + str(layer_index + 1)]
---> 26 z_i = np.dot(W_i, a_i_1) + b_i
27 a_i = 1/(1+np.exp(-z_i))
28 self.memory['a_' + str(layer_index + 1)] = a_i
ValueError: shapes (1,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)
the problem is in b_i dimensions, and I cant figure out why.
I tried variations to b_i shape(row vector, column vector), and all of them throws the same exception.
Essentially, I'm running into an issue where, when I try to update some of the variables within a scope (i.e. those in the discriminator scope), the gradient is always zero (I can verify this by printing them out after computing them).
This is confusing because I don't see why the loss isn't being propagated through.
My code stubs are as follows:
def build_model(self):
# Process inputs
self.inputs = tf.placeholder(tf.float32, shape = self.input_shape, name = "input")
self.is_training = tf.placeholder(tf.bool, name = "is_training")
self.targets = tf.placeholder(tf.float32, shape = self.output_shape, name = "targets")
self.target_p = tf.placeholder(tf.float32, shape = self.patient_shape, name = "targets_patients")
self.target_s = tf.placeholder(tf.float32, shape = self.sound_shape, name = "targets_sounds")
# Process outputs
self.encoded_X = self.encoder(self.inputs)
self.posteriors = self.predictor(self.encoded_X)
self.patient_predict, self.sound_predict = self.discriminator(self.encoded_X, tf.expand_dims(self.posteriors, axis = -1))
self.patient_predict_id = tf.argmax(tf.nn.softmax(self.patient_predict, axis = -1))
self.sound_predict_id = tf.argmax(tf.nn.softmax(self.sound_predict, axis = -1))
# Process losses
self.segment_loss = tf.losses.mean_squared_error(self.targets, self.posteriors)
self.patient_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.patient_predict, labels = self.target_p)
self.sound_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.sound_predict, labels = self.target_s)
self.disc_loss = self.patient_loss + self.sound_loss
self.combined_loss = self.segment_loss - self.lambda_param*(self.disc_loss)
self.extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(self.extra_update_ops):
predictor_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="predictor")
encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="encoder")
discrim_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="discriminator")
self.discrim_train = tf.train.AdamOptimizer(0.001).minimize(tf.reduce_mean(-1*self.combined_loss), var_list=discrim_vars)
self.predict_train = tf.train.AdamOptimizer(0.001).minimize(tf.reduce_mean(self.combined_loss), var_list=predictor_vars)
self.encode_train = tf.train.AdamOptimizer(0.001).minimize(tf.reduce_mean(self.combined_loss), var_list=encoder_vars)
As you can see self.combined_loss must depend on self.patient_loss which is found from self.discriminator.
My code for discriminator() is here:
def discriminator(self, encoded_X, posterior, reuse = False):
with tf.variable_scope("discriminator") as scope:
if reuse: scope.reuse_variables()
print('\n############## \nDiscriminator\n')
print('Discriminator encode input-shape: ', self.encode_shape)
print('Discriminator posterior input-shape: ', self.output_shape, ' (Expanded to correct size)')
inputs = tf.concat([encoded_X, posterior], axis = -2)
tf.stop_gradient(posterior)
print('Stacked input shape: ', inputs.get_shape())
h = tf.layers.conv2d(inputs, 10, (5, 2), padding = 'SAME', activation = tf.nn.relu)
h = tf.layers.max_pooling2d(h, (5, 2), (5, 2))
print('Layer 1: ', h.get_shape())
h = tf.layers.conv2d(h, 5, (5, 2), padding = 'SAME', activation = tf.nn.relu)
h = tf.squeeze(tf.layers.max_pooling2d(h, (3, 2), (3, 2)), axis = -2)
h = tf.layers.flatten(h)
print('Layer 2: ', h.get_shape())
h_p = tf.layers.dense(h, self.patient_shape[-1])
h_s = tf.layers.dense(h, self.sound_shape[-1])
print('Discriminator patient o/p shape: ', h_p.get_shape(), ' Expected shape: ', self.patient_shape)
print('Discriminator sound o/p shape: ', h_s.get_shape(), ' Expected shape: ', self.sound_shape)
return h_p, h_s
tf.stop_gradient is called because I do not want the gradient to flow through from the discriminator to the model that produces the posteriors.
Finally, I call my model here:
feed_dict= {
self.inputs: X,
self.targets: y,
self.target_p: y_p_oh,
self.target_s: y_s_oh,
self.is_training: True
}
posteriors, cost_loss, disc_loss, patient_id_pred , sound_id_pred, _ , _ = self.sess.run([
self.posteriors,
self.combined_loss,
self.disc_loss,
self.patient_predict_id,
self.sound_predict_id,
self.predict_train,
self.encode_train,
], feed_dict = feed_dict)
j = 0
print('Combined-loss: ', np.mean(cost_loss), 'Discriminator-loss: ', np.mean(disc_loss))
while np.mean(disc_loss) > entropy_cutoff:
disc_loss, _ = self.sess.run([self.disc_loss, self.discrim_train], feed_dict = feed_dict)
j+=1
print(' Inner loop iteration: ', j, ' Loss: ', np.around(np.mean(disc_loss), 5), ' Cutoff: ', np.around(entropy_cutoff, 5), end = '\r')
print("")
From my investigation, the code gets stuck on the while-loop as the train-step minimisation (to effectively maximise the combined_loss) simply gives zero gradients for all the variables within the discriminator scope. Why would this be occurring?
EDIT: I think I've localised the error to:
self.disc_loss = self.patient_loss + self.sound_loss
self.combined_loss = self.segment_loss - self.lambda_param * self.disc_loss
If I apply the minimisation on self.disc_loss, it works fine. But when I apply the minimisation on self.combined_loss, the operation breaks and the gradients zero. Why would this be the case?
EDIT: Tensorboard Graph