I am facing the following problem. I want to have a function that given the number of points in each hidden layer, creates the weights for a simple NN.
def initialize_parameters(hidden):
parameters = dict({})
def W_creator(b,a,i):
return tf.get_variable("W"+str(i+1), [b,a], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
def b_creator(b,i):
return tf.get_variable('b'+str(i+1), [b,1], initializer = tf.zeros_initializer())
for l in range(len(hidden)):
parameters['W'+str(l+1)] = W_creator(hidden[l+1],hidden[l],l)
parameters['b'+str(l+1)] = b_creator(hidden[l+1],l)
return parameters
I call this function then using
tf.reset_default_graph()
with tf.Session() as sess:
parameters = initialize_parameters()
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
to check everything is fine and an IndexError is raised.
29 for l in range(len(hidden)):
---> 30 W = W_creator(hidden[l+1],hidden[l],l)
31 parameters['W'+str(l+1)] = W
32
IndexError: list index out of range
Anyone can help with this?
As you use hidden[l+1], your loop has to stop one step earlier, to avoid "out of range" IndexError:
for l in range(len(hidden)-1):
Related
I want to enumerate the binary series generated with the code below (just copy-paste to see what I'm trying to do), I used Global var but still cannot find the way to pass the value of counters (nn,nx,ny). Please don't mind how to make the same series in a better way, I just want to know how to pass the value of the counters thru these recursions in order to enumerate the output as in the image at the head of this post. Thanks.
def ConcatenateString(saccum,nn):
if len(saccum)<4:
biset=[1,0]
for a in biset:
if a==1:
prevstring = saccum
newsaccum = saccum+str(a)
nx=nn+1
print(nx,newsaccum)
ConcatenateString(newsaccum,nx)
else:
newsaccum = prevstring+str(a)
ny=nx+1
print(ny,newsaccum)
ConcatenateString(newsaccum,ny)
nn=ny
return (nn)
##MAIN
newstring=str("")
nc=0
ConcatenateString(newstring,nc)
You should send nn to function and get it back to continue counting
nn = ConcatenateString(newsaccum, nn)
def ConcatenateString(saccum,nn):
if len(saccum)<4:
biset=[1,0]
for a in biset:
if a==1:
prevstring = saccum
newsaccum = saccum+str(a)
nn += 1
print(nn, newsaccum)
nn = ConcatenateString(newsaccum, nn)
else:
newsaccum = prevstring+str(a)
nn += 1
print(nn,newsaccum)
nn = ConcatenateString(newsaccum, nn)
return nn
ConcatenateString("", 0)
EDIT: Reduced version.
def ConcatenateString(saccum,nn):
if len(saccum)<4:
biset=[1,0]
for a in biset:
if a == 1:
prevstring = saccum
newsaccum = saccum + str(a)
else:
newsaccum = prevstring + str(a)
nn += 1
print(nn, newsaccum)
nn = ConcatenateString(newsaccum, nn)
return nn
ConcatenateString("", 0)
I have a model that operates on a large dataset - not all that large by Big Data standards but significantly more than my home server can hold in memory. As such, I'm using fit_generator to load it a chunk at a time, so that it never has to hold more than one minibatch in memory at once.
... At least, that's the theory. But when Keras hung at Epoch 1/10 without even starting the "training" animation and I (eventually) got an Out of Memory exception -- the minibatches are large but I can still hold a couple of them in memory at once without trouble -- I got suspicious and threw in a bunch of testing print statements into my generator. Lo and behold, Keras was invoking the generator three or four times before even starting (appearing to start?).
So... what's going on here? Is this normal, or did I implement my generator wrong somehow? How can I get it not to try and load more than one batch at once?
Code follows, in case it helps:
def data_gen(directory):
def epsilon_div(x, y):
return (x + K.epsilon()) / (y + K.epsilon())
while(True):
filelist = os.listdir(directory + "/data")
order = np.random.permutation(len(filelist))
for i in order:
dataf = directory + "/data/" + filelist[i]
labelf = directory + "/labels/" + filelist[i]
with open(dataf, 'rb') as f:
databook = sb.Songbook.FromString(f.read())
with open(labelf, 'rb') as f:
labelbook = sb.Songbook.FromString(f.read())
print('Booked')
l, _, r, _ = sb_np_extract(databook)
ll, _, lr, _ = sb_np_extract(labelbook)
databook = None
labelbook = None
print('Extracted')
l = l.transpose([0, 2, 1])
r = r.transpose([0, 2, 1])
ll = ll.transpose([0, 2, 1])
lr = lr.transpose([0, 2, 1])
print('Chosen')
mask_l = epsilon_div(ll, l)
mask_r = epsilon_div(lr, r)
print('Done')
yield [[l, r], [mask_l, mask_r]]
I suggest you save the filenames in an array then shuffle it. I call the file list with the full path and shuffled listname_data and listname_labels.
steps = number of files
def generator(steps):
i = 1
while True:
dataf = filelist_data[i]
labelf = filelist_labels[i]
...
if i == steps:
i = 1
c = list(zip(listname_data,listname_data))
shuffle(c)
listname_data, listname_data = zip(*c)
else:
i +=1
yield [[l, r], [mask_l, mask_r]]
I would like to use a TensorFlow Dataset built with from_generator to access a formatted file. Most everything works except I don't know how to stop the Dataset iterator when the generator runs out of data (the generator just returns empty lists forever when you go out of range).
My actual code is very complex, but I can mock up the situation with this short program:
import tensorflow as tf
def make_batch_generator_fn(batch_size=10, dset_size=100):
feats, targs = range(dset_size), range(1, dset_size + 1)
def batch_generator_fn():
start_idx, stop_idx = 0, batch_size
while True:
# if stop_idx > dset_size: --- stop action?
yield feats[start_idx: stop_idx], targs[start_idx: stop_idx]
start_idx, stop_idx = start_idx + batch_size, stop_idx + batch_size
return batch_generator_fn
def test(batch_size=10):
dgen = make_batch_generator_fn(batch_size)
features_shape, targets_shape = [None], [None]
ds = tf.data.Dataset.from_generator(
dgen, (tf.int32, tf.int32),
(tf.TensorShape(features_shape), tf.TensorShape(targets_shape))
)
feats, targs = ds.make_one_shot_iterator().get_next()
with tf.Session() as sess:
counter = 0
try:
while True:
f, t = sess.run([feats, targs])
print(f, t)
counter += 1
if counter > 15:
break
except tf.errors.OutOfRangeError:
print('end of dataset at counter = {}'.format(counter))
if __name__ == '__main__':
test()
If I know the number of records in advance, I can tune the number of batches, but I don't always know. I've tried putting some code in the snippet above where I have a comment line like stop action?. In particular, I've tried raising an IndexError, but TensorFlow doesn't like this, even if I explicitly catch it in my execution code. I also tried raising a tf.errors.OutOfRangeError, but I'm not sure how to instantiate it: the constructor requires three arguments - 'node_def', 'op', and 'message', and I'm not quite sure what to use for 'node_def' and 'op' in general.
I'd appreciate any thoughts or comments on this issue. Thanks!
Return when you meet your stop criteria:
def make_batch_generator_fn(batch_size=10, dset_size=100):
feats, targs = range(dset_size), range(1, dset_size + 1)
def batch_generator_fn():
start_idx, stop_idx = 0, batch_size
while True:
if stop_idx > dset_size:
return
else:
yield feats[start_idx: stop_idx], targs[start_idx: stop_idx]
start_idx, stop_idx = start_idx + batch_size, stop_idx + batch_size
return batch_generator_fn
This is in line with the behavior specified in the Python 3 documentation:
In a generator function, the return statement indicates that the generator is done and will cause StopIteration to be raised. The returned value (if any) is used as an argument to construct StopIteration and becomes the StopIteration.value attribute.
It works with following lines:
dataset_size = your dataset size
batch_size = your batch size
dataset = your tf.data.Dataset
steps_per_epoch = dataset_size // batch_size
for data, _ in zip(dataset, range(steps_per_epoch)):
# your train_step
The iteration will stop when it's through.
I wrote this code to filter values from a Dataset that are <= 6.
import tensorflow as tf
import tensorflow.contrib.data as ds
def make_graph():
inits = []
filter_value = tf.constant([6], dtype=tf.int64)
source = ds.Dataset.range(10)
batched = source.batch(3)
batched_iter = batched.make_initializable_iterator()
batched_next = batched_iter.get_next()
inits.append(batched_iter.initializer)
predicate = tf.less_equal(batched_next, filter_value, name="less_than_filter")
true_coordinates = tf.where(predicate)
reshaped = tf.reshape(true_coordinates, [-1])
# need to turn bools into 1 and 0 elsewhere
found = tf.gather(params=batched_next, indices=reshaped)
return found, inits # prepend final tensor
def run_graph(final_tensor, initializers, rounds):
with tf.Session() as sess:
init_ops = (tf.local_variables_initializer(), tf.global_variables_initializer())
sess.run(init_ops)
summary_writer = tf.summary.FileWriter(graph=sess.graph, logdir=".")
while rounds > 0:
for i in initializers:
sess.run(i)
try:
while True:
final_result = sess.run(final_tensor)
p```pythrint("Got result: {r}".format(r=final_result))
except tf.errors.OutOfRangeError:
print("Got out of range error")
rounds -=1
summary_writer.flush()
def run():
final_tensor, initializers = make_graph()
run_graph(final_tensor=final_tensor,
initializers=initializers,
rounds=1)
if __name__ == "__main__":
run()
However, the results are as follows:
Got result: [0 1 2]
Got result: [3 4 5]
Got result: [6]
Got result: []
Got out of range error
Is there a way to filter this empty Tensor? I tried to brainstorm ways to do this, maybe with a tf.while loop, but I'm not sure whether I'm missing something or such an operation (i.e. an OpKernel "dropping" an input by not producing output based on its value) is not possible in Tensorflow.
Keeping only values <= 6 BEFORE batching:
dataset = ds.Dataset.range(10)
dataset = dataset.filter( lambda v : v <= 6 )
dataset = dataset.batch(3)
batched_iter = dataset.make_initializable_iterator()
This will generate batches containing only the data you want. Note that it's generally better to filter out the unwanted data before building the batches. This way, empty tensors will not be generated by the iterator.
So im a real amateur, trying to implement something you may call a sort of 'simplified' version of the naive bayes algorithm in python, and seem to have a lot of trouble [the reason for which is perhaps the fact that im not too sure i completely understand the way the algorithm works..]. I would appreciate any help/suggestions very much though. This is the code I have:
class GaussianNB(object):
def __init__(self):
'''
Constructor
'''
# This variable will hold the gaussian distribution over your data
# In fact, you need a distribution per class for each feature variable.
# This can be done as a list of lists.
self.classmodels_count = {}
self.classmodels = {}
self.classmodelsMeanAndVariance = {}
self.featureTokenCount= 0;
self.featureTypeCount = 0;
def train(self, trainingdata):
for i in trainingdata:
current_class = i[0]
features = i[1]
if self.classmodels.has_key(current_class):
current_class_model = self.classmodels[current_class]
self.classmodels_count[current_class] = self.classmodels_count[current_class] + 1
else:
current_class_model = {}
self.classmodels_count[current_class] = 1
for f in features:
feature = f[0]
value = f[1]
if current_class_model.has_key(feature):
list_of_values = current_class_model[feature]
list_of_values.append(value)
current_class_model[feature] = list_of_values
else:
list_of_values = []
list_of_values.append(value)
current_class_model[feature] = list_of_values
self.classmodels[current_class] = current_class_model
for a_class in self.classmodels.keys():
a_class_model = self.classmodels[a_class]
a_class_model_mean_and_variance = {}
for feature in a_class_model.keys():
a_class_model_mean_and_variance[feature] = findMeanSD(np.array(a_class_model[feature]))
self.classmodelsMeanAndVariance[a_class] = a_class_model_mean_and_variance
def classify(self, testing_vecs):
outputs = []
for vec in testing_vecs:
features = vec[1]
class_model_output_prob = {}
for a_class in self.classmodelsMeanAndVariance.keys():
a_class_output_prob = 0.0
a_class_model_mean_and_variance = self.classmodelsMeanAndVariance[a_class]
for feature_value in features:
feature = feature_value[0]
value = feature_value[1]
#simply ignore a feature if its not seen in training
if(a_class_model_mean_and_variance.has_key(feature)):
feature_mean = a_class_model_mean_and_variance[feature][0]
feature_std = a_class_model_mean_and_variance[feature][1]
a_class_output_prob = a_class_output_prob + math.log10(norm(value,feature_mean,feature_std))
#ignoring P(class) prior.. assuming equal priors
class_model_output_prob[a_class_output_prob] = a_class
probs = class_model_output_prob.keys()
print probs
probs.sort()
max_prob = probs[len(probs)-1]
max_class =class_model_output_prob[max_prob]
outputs.append(max_class)
return outputs
When running on some data, the error I get is
Traceback (most recent call last):
File "C:\Users\Toshiba\workspace\Assignment6\src\gnb_test.py", line 34, in
gaussian = Model.train(testData)
File "C:\Users\Toshiba\workspace\Assignment6\src\gnb.py", line 91, in train
for f in features:
TypeError: 'numpy.float64' object is not iterable
And I dont really [at all] understand what it means
Your traceback suggests that the problem is that you are trying to iterate through features, but features is a float and not a list or tuple - basically, it can't be broken into individual elements. I think it is a float because the lines
for i in trainingdata:
current_class = i[0]
features = i[1]
suggest features keeps getting rewritten as a successive series of numbers, when what you seem to want is to save the numbers into an iterable type. Try
features = []
for i in trainingdata:
current_class = i[0]
features.append(i[1])