I'm training a CNN using for image classification. Due to the limited size of my data set I'm using transfer learning. Basically, I'm using the pre-trained network Google is proving in its retrain example (https://www.tensorflow.org/tutorials/image_retraining).
The model works great and gives a very good accuracy. But my dataset is highly imbalance which mean accuracy is not the best metric to judge the performance of the model.
By looking into different solutions, some suggested changing the sampling method or the performance metric used. I'm choosing to go with the later.
Tensorflow provides a good verity of metrics including, AUC, precision, recall, etc.
Now, here is the code of the retraing model:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/image_retraining/retrain.py
I'm adding the following to add_evaluation_step(result_tensor, ground_truth_tensor) function:
with tf.name_scope('AUC'):
with tf.name_scope('prediction'):
prediction = tf.argmax(result_tensor, 1)
with tf.name_scope('AUC'):
auc_value = tf.metrics.auc(tf.argmax(ground_truth_tensor, 1), prediction, curve='ROC')
tf.summary.scalar('accuracy', evaluation_step)
tf.summary.scalar('AUC', auc_value)
But I'm getting this error:
Traceback (most recent call last): File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/examples/image_retraining/retrain.py",
line 1135, in
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/platform/app.py",
line 44, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/examples/image_retraining/retrain.py",
line 911, in main
ground_truth_input: train_ground_truth}) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/client/session.py",
line 767, in run
run_metadata_ptr) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/client/session.py",
line 965, in _run
feed_dict_string, options, run_metadata) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/client/session.py",
line 1015, in _do_run
target_list, options, run_metadata) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/client/session.py",
line 1035, in _do_call
raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.FailedPreconditionError:
Attempting to use uninitialized value AUC/AUC/auc/false_positives
[[Node: AUC/AUC/auc/false_positives/read = IdentityT=DT_FLOAT,
_class=["loc:#AUC/AUC/auc/false_positives"], _device="/job:localhost/replica:0/task:0/cpu:0"]]
Caused by op u'AUC/AUC/auc/false_positives/read', defined at: File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/examples/image_retraining/retrain.py",
line 1135, in
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/platform/app.py",
line 44, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/examples/image_retraining/retrain.py",
line 874, in main
final_tensor, ground_truth_input) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/examples/image_retraining/retrain.py",
line 806, in add_evaluation_step
auc_value, update_op = tf.metrics.auc(tf.argmax(ground_truth_tensor, 1), prediction,
curve='ROC') File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/metrics_impl.py",
line 555, in auc
labels, predictions, thresholds, weights) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/metrics_impl.py",
line 473, in _confusion_matrix_at_thresholds
false_p = _create_local('false_positives', shape=[num_thresholds]) File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/metrics_impl.py",
line 177, in _create_local
validate_shape=validate_shape) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/variables.py",
line 226, in init
expected_shape=expected_shape) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/variables.py",
line 344, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read") File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/ops/gen_array_ops.py",
line 1490, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name) File
"/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/framework/op_def_library.py",
line 768, in apply_op
op_def=op_def) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/framework/ops.py",
line 2402, in create_op
original_op=self._default_original_op, op_def=op_def) File "/home/user_2/tensorflow/bazel-bin/tensorflow/examples/image_retraining/retrain.runfiles/org_tensorflow/tensorflow/python/framework/ops.py",
line 1264, in init
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use
uninitialized value AUC/AUC/auc/false_positives [[Node:
AUC/AUC/auc/false_positives/read = IdentityT=DT_FLOAT,
_class=["loc:#AUC/AUC/auc/false_positives"], _device="/job:localhost/replica:0/task:0/cpu:0"]]
But I don't understand why is this because in the main I have this:
init = tf.global_variables_initializer()
sess.run(init)
try this:
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
Related
Very beginner question, I hope that's fine
I'm trying to train this model from GitHub with the MAPS dataset and I made new .tfrecords with this code for the train set. It is based from the code here but I altered some things to make way for a different input (another MIDI file I'm just calling "tempo MIDI").
def create_train_set(tempopath, train_list, outdir, min_length, max_length):
# train_list = list of wav paths selected for
train_file_pairs = []
# find matching midi files
for wav_path in train_list:
midi_file = ''
tempo_midi_file = ''
if os.path.isfile(wav_path + '.mid'):
midi_file = wav_path + '.mid'
if os.path.isfile(wav_path + '.midi'):
midi_file = wav_path + '.midi'
if os.path.isfile(tempopath + os.path.basename(wav_path) + '_tempo.mid'):
tempo_midi_file = tempopath + os.path.basename(wav_path) + '_tempo.mid'
if os.path.isfile(tempopath + os.path.basename(wav_path) + '_tempo.midi'):
tempo_midi_file = tempopath + os.path.basename(wav_path) + '_tempo.midi'
wav_file = wav_path + '.wav'
train_file_pairs.append((wav_file, midi_file, tempo_midi_file))
train_output_name = os.path.join(outdir, 'train.tfrecord')
with tf.python_io.TFRecordWriter(train_output_name) as writer:
for idx, pair in enumerate(train_file_pairs):
print('{} of {}: {}'.format(idx, len(train_file_pairs), pair[0]))
# load the wav data
wav_data = tf.gfile.Open(pair[0], 'rb').read()
# load the midi data and convert to a notesequence
ns = midi_io.midi_file_to_note_sequence(pair[1])
tempo = midi_io.midi_file_to_note_sequence(pair[2])
# aldu = audio_label_data_utils.py
for example in aldu.process_record(
wav_data, ns, tempo, pair[0], min_length, max_length,
sample_rate):
writer.write(example.SerializeToString())
with the tf.Example as follows:
example = tf.train.Example(
features=tf.train.Features(
feature={
'id':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[example_id.encode('utf-8')])),
'sequence':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[ns.SerializeToString()])),
'audio':
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[wav_data])),
'tempo':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[velocity_range.SerializeToString()])),
'velocity_range':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[velocity_range.SerializeToString()])),
}))
However, when I try to train the model, I get this error message (I marked the py scripts with a print line so I know where everything's going):
Running wav_to_spec from data.py
Running _wav_to_mel in data.py
Running wav_to_num_frames from data.py
Running wav_to_spec from data.py
Running _wav_to_mel in data.py
Running wav_to_num_frames from data.py
E0611 07:56:55.419340 8436 error_handling.py:70] Error recorded from training_loop: Input to reshape is a tensor with 0 values, but the requested shape has 54912
[[{{node Reshape_8}}]]
[[IteratorGetNext]]
I0611 07:56:55.420338 8436 error_handling.py:96] training_loop marked as finished
W0611 07:56:55.421335 8436 error_handling.py:130] Reraising captured error
Traceback (most recent call last):
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1356, in _do_call
return fn(*args)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1429, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 0 values, but the requested shape has 54912
[[{{node Reshape_8}}]]
[[IteratorGetNext]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "onsets_frames_transcription_train.py", line 128, in <module>
console_entry_point()
File "onsets_frames_transcription_train.py", line 124, in console_entry_point
tf.app.run(main)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\absl\app.py", line 300, in run
_run_main(main, args)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\absl\app.py", line 251, in _run_main
sys.exit(main(argv))
File "onsets_frames_transcription_train.py", line 120, in main
additional_trial_info=additional_trial_info)
File "onsets_frames_transcription_train.py", line 95, in run
num_steps=FLAGS.num_steps)
File "C:\Users\User\magenta\magenta\models\onsets_frames_transcription\train_util.py", line 134, in train
estimator.train(input_fn=transcription_data, max_steps=num_steps)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\tpu\tpu_estimator.py", line 2876, in train
rendezvous.raise_errors()
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\tpu\error_handling.py", line 131, in raise_errors
six.reraise(typ, value, traceback)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\tpu\tpu_estimator.py", line 2871, in train
saving_listeners=saving_listeners)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 367, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1158, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1192, in _train_model_default
saving_listeners)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1484, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 754, in run
run_metadata=run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1252, in run
run_metadata=run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1353, in run
raise six.reraise(*original_exc_info)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1338, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1411, in run
run_metadata=run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1169, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 950, in run
run_metadata_ptr)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1173, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1350, in _do_run
run_metadata)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 0 values, but the requested shape has 54912
[[{{node Reshape_8}}]]
[[IteratorGetNext]]
From that, I figured the problem lies in wav_to_num_frames but this is the only code for it.
def wav_to_num_frames(wav_audio, frames_per_second):
"""Transforms a wav-encoded audio string into number of frames."""
print("Running wav_to_num_frames from data")
w = wave.open(six.BytesIO(wav_audio))
return np.int32(w.getnframes() / w.getframerate() * frames_per_second)
I didn't get this problem back when I tried training the model with tfrecords created with the original code, so I don't know what's wrong.
It turns out that problem wasn't the created .tfrecords itself but rather the size of the tensors I assigned for the newly added data. There isn't a concrete answer for this though since it's very specific to this situation.
I have implemented autoencoder on my custom images data for sign language recognition.Now i want to save tensors object of the output layer to an numpy array. I tried Session.run(tensor) and tensor.eval(). Here is my code.
#define model
x= tf.placeholder(tf.float32,[None,784])
y_=tf.placeholder(tf.float32,[None,6])
k=190
l=180
m=150
n=130
o=100
num_of_epoch=10
w1=tf.Variable(tf.truncated_normal([784,k],stddev=0.1))
b1=tf.Variable(tf.zeros([k]))
w2=tf.Variable(tf.truncated_normal([k,l],stddev=0.1))
b2=tf.Variable(tf.zeros([l]))
w3=tf.Variable(tf.truncated_normal([l,m],stddev=0.1))
b3=tf.Variable(tf.zeros([m]))
w4=tf.Variable(tf.truncated_normal([m,n],stddev=0.1))
b4=tf.Variable(tf.zeros([n]))
w5=tf.Variable(tf.truncated_normal([n,o],stddev=0.1))
b5=tf.Variable(tf.zeros([o]))
w6=tf.Variable(tf.truncated_normal([o,6],stddev=0.1))
b6=tf.Variable(tf.zeros([6]))
y1=tf.nn.relu(tf.matmul(x,w1)+b1)
y2=tf.nn.relu(tf.matmul(y1,w2)+b2)
y3=tf.nn.relu(tf.matmul(y2,w3)+b3)
y4=tf.nn.relu(tf.matmul(y3,w4)+b4)
y5=tf.nn.relu(tf.matmul(y4,w5)+b5)
y=tf.nn.softmax(tf.matmul(y5,w6)+b6)
cross_entropy=tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y),
reduction_indices=[1]))
train_step=tf.train.GradientDescentOptimizer(0.03).minimize(cross_entropy)
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(num_of_epoch):
train_data = {x:x_train,y_:y_train}
sess.run(train_step,feed_dict=train_data)
currect_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(currect_prediction,tf.float32))
sess.run(accuracy,feed_dict={x:x_train,y_:y_train})
currect_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(currect_prediction,tf.float32))
sess.run(accuracy,feed_dict= {x:x_test,y_:y_test})
y_p = tf.argmax(y, 1).eval() #this line shows me the error
print(y_p)
I am getting the below error. How can I fix this error and save tensor data to numpy array ?
Traceback (most recent call last):
File "<ipython-input-45-5e38490a3e8e>", line 1, in <module>
runfile('C:/Users/RIFAT/PycharmProjects/tensorflow_autoencoder
/autoencoderreconstruction.py',
wdir='C:/Users/RIFAT/PycharmProjects/tensorflow_autoencoder')
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils
\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils
\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/RIFAT/PycharmProjects/tensorflow_autoencoder
/autoencoderreconstruction.py", line 112, in <module>
y_p = tf.argmax(y, 1).eval()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\framework\ops.py", line 606, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\framework\ops.py", line 3928, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python\client
\session.py", line 789, in run
run_metadata_ptr)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python\client
\session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python\client
\session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python\client
\session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: Shape [-1,784] has negative dimensions
[[Node: Placeholder_62 = Placeholder[dtype=DT_FLOAT, shape=[?,784],
_device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'Placeholder_62', defined at:
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils\ipython
\start_kernel.py", line 231, in <module>
main()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils\ipython
\start_kernel.py", line 227, in main
kernel.start()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel\kernelapp.py",
line 477, in start
ioloop.IOLoop.instance().start()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\zmq\eventloop
\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tornado\ioloop.py", line
888, in start
handler_func(fd_obj, events)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tornad
\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\zmq\eventloop
\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\zmq\eventloop
\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\zmq\eventloop
\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tornado
\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel
\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel
\kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel
\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel\ipkernel.py",
line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\ipykernel\zmqshell.py",
line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\IPython
\core\interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\IPython
\core\interactiveshell.py", line 2827, in run_ast_nodes
if self.run_code(code, result):
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\IPython
\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-45-5e38490a3e8e>", line 1, in <module>
runfile('C:/Users/RIFAT/PycharmProjects/tensorflow_autoencoder
/autoencoderreconstruction.py', wdir='C:/Users/RIFAT/PycharmProjects
/tensorflow_autoencoder')
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils
\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\spyder\utils
\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/RIFAT/PycharmProjects/tensorflow_autoencoder
/autoencoderreconstruction.py", line 62, in <module>
x= tf.placeholder(tf.float32,[None,784])
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\ops\array_ops.py", line 1530, in placeholder
return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\ops\gen_array_ops.py", line 1954, in _placeholder
name=name)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\RIFAT\Anaconda3\lib\site-packages\tensorflow\python
\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Shape [-1,784] has
negative dimensions
[[Node: Placeholder_62 = Placeholder[dtype=DT_FLOAT, shape=[?,784],
_device="/job:localhost/replica:0/task:0/cpu:0"]()]]
That's because y is a tensor in the graph and not a variable. When you run .eval() on a variable, it gives you the current value held by that variable in that session but if you run .eval() on a tensor instead like tf.argmax(y, 1).eval() in your case then tensor flow runs the graph to that node to get the value of that node. And since in your case, it doesn't get the value of the placeholder x and y_ while running the graph, it gives the error. One way to resolve this error is by passing the values of the placeholders in your eval call like this:
tf.argmax(y, 1).eval(feed_dict= {x:x_test,y_:y_test})
But, a more preferred way is giving the context of your session to the eval call in which case it will return the value of the tensor. Ex:
tf.argmax(y, 1).eval(session = sess)
Your question is not 100% clear. But the error you are seeing is caused by the fact that you try to run the graph without the feed dict in place. To see the output of the prediction (that is with the argmax(y, 1) present) you'd simply run:
y_p = sess.run(tf.argmax(y, 1), feed_dict=train_data)
print(y_p)
But that will give you the actual predicted value (on the train data as this is fed, to get this on the test data, simply geed in the test_data). To get the probabilities you'd pull y without the argmax:
y_p = sess.run(y, feed_dict=train_data)
print(y_p)
I'm trying to retrain inceptionV3 on my RPi3. I'm getting this histogram error message.
python /home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py --bottleneck_dir=/home/pi/Documents/Machine\ Learning/Inception/tf_files/bottlenecks --how_many_training_steps 500 --model_dir=/home/pi/Documents/Machine\ Learning/Inception/tf_files/inception --output_graph=/home/pi/Documents/Machine\ Learning/Inception/tf_files/retrained_graph.pb --output_labels=/home/pi/Documents/Machine\ Learning/Inception/tf_files/retrained_labels.txt --image_dir /home/pi/Documents/Machine\ Learning/Inception/Retraining_Images
Looking for images in 'Granny Smith Apple'
Looking for images in 'Red Delicious'
100 bottleneck files created.
200 bottleneck files created.
2017-01-07 11:30:22.180768: Step 0: Train accuracy = 56.0%
2017-01-07 11:30:22.242166: Step 0: Cross entropy = nan
2017-01-07 11:30:22.850969: Step 0: Validation accuracy = 50.0%
Traceback (most recent call last):
File "/home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py", line 938, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 30, in run
sys.exit(main(sys.argv[:1] + flags_passthrough))
File "/home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py", line 887, in main
ground_truth_input: train_ground_truth})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 717, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 915, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 985, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: Nan in summary histogram for: HistogramSummary
[[Node: HistogramSummary = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary/tag, final_result)]]
Caused by op u'HistogramSummary', defined at:
File "/home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py", line 938, in <module>
tf.app.run()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 30, in run
sys.exit(main(sys.argv[:1] + flags_passthrough))
File "/home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py", line 846, in main
bottleneck_tensor)
File "/home/pi/Tensorflow/tensorflow/tensorflow/examples/image_retraining/retrain.py", line 764, in add_final_training_ops
tf.histogram_summary(final_tensor_name + '/activations', final_tensor)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/logging_ops.py", line 100, in histogram_summary
tag=tag, values=values, name=scope)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_logging_ops.py", line 100, in _histogram_summary
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2380, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1298, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: HistogramSummary
[[Node: HistogramSummary = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](HistogramSummary/tag, final_result)]]
I tried changing merged = tf.merge_all_summaries() in retrain.py after reading this
but it didnt work.
Also, the first time I tried to retrain, I got different results for step 0 before hitting an error:
2017-01-07 11:13:36.548913: Step 0: Train accuracy = 89.0%
2017-01-07 11:13:36.555770: Step 0: Cross entropy = 0.590778
2017-01-07 11:13:37.052190: Step 0: Validation accuracy = 76.0%
Sounds like that it might help to know where the NaN values are coming from. For that, take a look at tensorflow debugger (tfdbg):
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/how_tos/debugger/index.md
In your retrain.py, you can make a change like
from tensorflow.python import debug as tf_debug
# ...
# In def main(_)
if debug:
sess = tf_debug.LocalCLIDebugWrapperSession(sess)
sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
# ...
Then when the sess.run() happens for the training and evaluation, you will drop into the command-line interface of the debugger. At the tfdbg> prompt, you can enter command to let the code run until any NaNs or Infinities appear in the TensorFlow graph:
tfdbg> run -f has_inf_or_nan
When the tensor filter has_inf_or_nan is hit, the interface will give you a list of Tensors containing Infs or Nans, sorted in time order. The one on the top should be the "culprit", i.e., the one that first generated the bad numerical values. Say its name is node_1, you can use the following tfdbg commands to look at its inputs and node attributes:
tfdbg> li -r node_1
tfdbg> ni -a node_1
If you're using tf.contrib.learn you'll want to use the following:
debug_hook = tf_debug.LocalCLIDebugHook()
debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
hooks = [debug_hook]
...
classifier.fit(..., monitors=hooks)
My graph looks like this
with graph.as_default():
train_inputs = tf.placeholder(tf.int32, shape=[None, None])
with tf.device('/cpu:0'):
embeddings = tf.Variable(tf.zeros([vocab_size, options.embed_size]))
restorer = tf.train.Saver({'embeddings': embeddings})
init = tf.variables_initializer([embeddings])
uninit = tf.report_uninitialized_variables()
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
# length() returns a [batch_szie,] tensor of true lengths of sentences (lengths before zero-padding)
sequence_length = length(embed)
lstm = tf.nn.rnn_cell.LSTMCell(options.rnn_size)
output, _ = tf.nn.dynamic_rnn(
lstm,
embed,
dtype=tf.float32,
swequence_length=sequence_length
)
And my session:
with tf.Session(graph=graph) as session:
restorer.restore(session, options.restore_path)
# tf.global_variables_initializer.run()
init.run()
print session.run([uninit])
while len(data.ids):
# data.generate_batch returns a list of size [batch_size, max_length], and zero-padding is used, when the sentences are shorter than max_length. For example, batch_inputs = [[1,2,3,4], [3,2,1,0], [1,2,0,0]]
batch_inputs, _ = data.generate_batch(options.batch_size)
feed_dict = {train_inputs: batch_inputs}
test = session.run([tf.shape(output)], feed_dict=feed_dict)
print test
Function length():
def length(self, sequence):
length = tf.sign(sequence)
length = tf.reduce_sum(length, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
The error i got:
Traceback (most recent call last):
File "rnn.py", line 103, in <module>
test = session.run([tf.shape(output)], feed_dict=feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value RNN/LSTMCell/W_0
[[Node: RNN/LSTMCell/W_0/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/LSTMCell/W_0)]]
Caused by op u'RNN/LSTMCell/W_0/read', defined at:
File "rnn.py", line 75, in <module>
sequence_length=sequence_length,
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 845, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 1012, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2636, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2469, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2419, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 995, in _time_step
skip_conditionals=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 403, in _rnn_step
new_output, new_state = call_cell()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 983, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 496, in __call__
dtype, self._num_unit_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 329, in _get_concat_variable
sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 359, in _get_sharded_variable
dtype=dtype))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1024, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 850, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 346, in get_variable
validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 331, in _true_getter
caching_device=caching_device, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 677, in _get_single_variable
expected_shape=shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 224, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 367, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1424, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value RNN/LSTMCell/W_0
[[Node: RNN/LSTMCell/W_0/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/LSTMCell/W_0)]]
However when I printed out the uninitialized variables, i got [array([], dtype=object)]
When i replaced init.run() with tf.global_variables_initializer.run(), it worked.
Any idea why init.run() doesn't work?
You defined init as follows:
init = tf.variables_initializer([embeddings])
This definition means that init initializes only the embeddings variable. Calling the tf.nn.dynamic_rnn() function creates more variables, representing the various internal weights in the LSTM, and these are not initialized by init.
By contrast, tf.global_variables_initializer() returns an operation that, when run, will initialize all of the (global) variables in your model, including those created for the LSTM.
Hi I'm running the Linux CPU version of tensorflow on Ubuntu 14.04 and I'm running out of memory when I try to save my model. I'm using the tutorial for Deep MNIST that builds a convolution network. You can find it here:
https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html#deep-mnist-for-experts
I changed a couple of things and tried to add a Saver to export the model weights. However when I run it I get an error that says I am out of memory. Which doesn't make sense to me because it can train the data forever but saving it somehow uses too much memory?
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.06
W tensorflow/core/framework/op_kernel.cc:909] Resource exhausted: OOM when allocating tensor with shape[10000,28,28,32]
Traceback (most recent call last):
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 66, in <module>
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10000,28,28,32]
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape, Variable/read)]]
Caused by op u'Conv2D', defined at:
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 28, in <module>
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 18, in conv2d
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()`
This is what it outputs when I run it thanks so much!