I am trying to combine keras 2.0 with pymc3 to build a neural network. It is a modification of the code from Thomas Weicki's Bayesian deep learning II
This is the code I have:
import numpy as np
import pymc3 as pm
import theano
import theano.tensor as T
from keras.layers import Input, Dense
from keras import backend as K
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons
from scipy.stats import mode
X, Y = make_moons(noise=0.2, random_state=0, n_samples=1000)
X = scale(X)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.6)
ann_input = theano.shared(X_train.astype(np.float32))
ann_output = theano.shared(Y_train.astype(np.float32))
print (X_train.shape)
print (Y_train.shape)
class GaussWeights(object):
def __init__(self):
self.count = 0
def __call__(self, shape, name='w',dtype=None):
return pm.Normal(
name, mu=0, sd=.1,
testval=K.random_normal(shape,dtype=dtype),
shape=shape)
n_hidden = 16
def build_ann(x, y, init):
b = (T.ones_like(x[:]))
rows = b.shape.eval()[0]
cols = b.shape.eval()[1]
with pm.Model() as m:
i = Input(tensor=x, shape=(rows,cols))
layer1 = Dense(16,kernel_initializer=init, activation='tanh')(i)
layer2 = Dense(1, kernel_initializer=init, activation='sigmoid')(layer1)
layer2 = layer2.reshape((rows,))
out = pm.Bernoulli('out', layer2, observed=y)
return m, out
#m,out = build_ann(ann_input, ann_output)
m,out = build_ann(ann_input, ann_output, GaussWeights())
with m:
#Run ADVI which returns posterior means, standard deviations, and the evidence lower bound (ELBO)
ann_input.set_value(X_train.astype(np.float32))
ann_output.set_value(Y_train.astype(np.float32))
v_params = pm.variational.advi(n=50000)
trace = pm.variational.sample_vp(v_params, draws=5000)
# Replace shared variables with testing set
ann_input.set_value(X_test.astype(np.float32))
ann_output.set_value(Y_test.astype(np.float32))
with m:
ppc = pm.sample_ppc(trace, samples=500)
# Use probability of > 0.5 to assume prediction of class 1
pred = ppc['out'].mean(axis=0) > 0.5
pred_mode = mode(ppc['out'], axis=0).mode[0, :]
print (pred.shape)
print('Accuracy = {}%'.format((Y_test == pred).mean() * 100))
But I get the following error which I don't know how to fix:
Traceback (most recent call last):
File "keras_deep_learning.py", line 50, in <module>
m,out = build_ann(ann_input, ann_output, GaussWeights())
File "keras_deep_learning.py", line 43, in build_ann
layer1 = Dense(16,kernel_initializer=init, activation='tanh')(i)
File "/home/gbenga/.local/lib/python3.5/site-packages/keras/engine/topology.py", line 558, in __call__
self.build(input_shapes[0])
File "/home/gbenga/.local/lib/python3.5/site-packages/keras/layers/core.py", line 827, in build
constraint=self.kernel_constraint)
File "/home/gbenga/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "/home/gbenga/.local/lib/python3.5/site-packages/keras/engine/topology.py", line 391, in add_weight
weight = K.variable(initializer(shape), dtype=dtype, name=name)
File "/home/gbenga/.local/lib/python3.5/site-packages/keras/backend/theano_backend.py", line 143, in variable
value = value.eval()
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/gof/graph.py", line 516, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/compile/function.py", line 326, in function
output_keys=output_keys)
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/compile/pfunc.py", line 486, in pfunc
output_keys=output_keys)
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/compile/function_module.py", line 1794, in orig_function
output_keys=output_keys).create(
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/compile/function_module.py", line 1446, in __init__
accept_inplace)
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/compile/function_module.py", line 177, in std_fgraph
update_mapping=update_mapping)
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/gof/fg.py", line 180, in __init__
self.__import_r__(output, reason="init")
File "/home/gbenga/.local/lib/python3.5/site-packages/theano/gof/fg.py", line 361, in __import_r__
raise MissingInputError("Undeclared input", variable=variable)
theano.gof.fg.MissingInputError: Undeclared input
Unfortunately with Keras 2.0 you can no longer use a symbolic initializer for the weights. Try downgrading to Keras 1.2 it will work then.
See the following issues for reference:
https://github.com/fchollet/keras/issues/6546
https://github.com/fchollet/keras/issues/6551
Related
I am building a simple recurrent neural network, which contains a lstm layer with a fully connected layer, to make the classification for every row data. shape of my data 'x_train' is an nd.array with the shape of (210,240,1), 'y_train' is an nd.array with the shape of (210,). And the model output is normal. However, when I run model.fit(), there is always an error: AttributeError: 'NoneType' object has no attribute 'dtype'.
I don' t know what's wrong with the following code.
#%%
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import pandas as pd
import numpy as np
#%%
model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(units = 2)
])
#%%
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy\
(from_logits=True)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),\
loss = loss_fn)
#%%
x_train = np.random.randn(210,240,1)
y_train = np.random.binomial(1, 0.5,(210,))
#%%
model.fit(x_train, y_train, epochs=20)
The following are the whole error information:
Traceback (most recent call last):
File "D:\运筹优化\机器学习课程项目\时序数据预测\rnn_exploration.py", line 68, in <module>
model.fit(x_train, y_train, epochs=20)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 235, in fit
use_multiprocessing=use_multiprocessing)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 593, in _process_training_inputs
use_multiprocessing=use_multiprocessing)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 646, in _process_inputs
x, y, sample_weight=sample_weights)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2360, in _standardize_user_data
self._compile_from_inputs(all_inputs, y_input, x, y)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2618, in _compile_from_inputs
experimental_run_tf_function=self._experimental_run_tf_function)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\training\tracking\base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 416, in compile
endpoint.create_training_target(t, run_eagerly=self.run_eagerly)
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 3023, in create_training_target
self.loss_fn, K.dtype(self.output))
File "D:\python\anaconda\anaconda\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\backend.py", line 1237, in dtype
return x.dtype.base_dtype.name
AttributeError: 'NoneType' object has no attribute 'dtype'
Any help would be appreciated!
Problem still exists with numpy 1.21.3, downgrade to 1.19.5 works.
I only wrote a simple model in model.py, and when I ran it, it gave the following error.
2021-02-08 22:20:11.872409: E tensorflow/core/common_runtime/executor.cc:641] Executor failed to create kernel. Unimplemented: Cast string to int32 is not supported
[[{{node embedding/Cast}}]]
Traceback (most recent call last):
File "C:\Users\xiaoc\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "C:\Users\xiaoc\Anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\xiaoc\AppData\Local\Google\Cloud SDK\trainer\task.py", line 55, in
train_model(args)
File "C:\Users\xiaoc\AppData\Local\Google\Cloud SDK\trainer\task.py", line 43, in train_model
validation_data=(eval_data, eval_labels))
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 780, in fit
steps_name='steps_per_epoch')
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py", line 363, in model_iteration
batch_outs = f(ins_batch)
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\keras\backend.py", line 3289, in call
self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\keras\backend.py", line 3222, in _make_callable
callable_fn = session._make_callable_from_options(callable_opts)
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1489, in _make_callable_from_options
return BaseSession._Callable(self, callable_options)
File "C:\Users\xiaoc\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1446, in init
session._session, options_ptr)
tensorflow.python.framework.errors_impl.UnimplementedError: Cast string to int32 is not supported
[[{{node embedding/Cast}}]]
What is the problem? The requirement is to only make changes in model.py, not others. Thanks in advance!
Following are the three python files.
model.py
import tensorflow as tf
from tensorflow.keras.layers import Dense,Embedding,LSTM, Activation,Dropout
from tensorflow.keras import Model
def get_batch_size(): #size of training 8056 number of batches = 8056/128
return 128
def get_epochs():
return 50
def solution(input_layer):
max_len = 150
max_words = 200
# inputs = Input(name='inputs',shape=[max_len])
layer = Embedding(max_words,output_dim = 64, input_length=max_len)(input_layer)
# layer = LSTM(64,return_sequences=True)(input_layer)
layer = tf.expand_dims(layer, axis=-1)
layer = LSTM(64,return_sequences=True)(layer)
layer = Dense(256,)(layer)
layer = Activation('relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(5)(layer)
# layer = Activation('softmax')(layer)
model = Model(inputs=input_layer,outputs=layer)
model.compile(loss='sparse_categorical_crossentropy',optimizer=tf.keras.optimizers.Adam(),metrics=['accuracy'])
return model
data.py
import csv
import numpy as np
label_map = {
0: 'A',
1: 'B',
2: 'C',
3: 'D',
4: 'E',
}
label_map_inv = dict(map(reversed, label_map.items()))
def load_dataset(dataset_file):
data = []
labels = []
with open(dataset_file, "r", encoding="utf-8") as f:
data_reader = csv.reader(f, delimiter=",", quotechar='"')
next(data_reader)
for lbl, desc in data_reader:
data.append(desc)
labels.append(label_map_inv[lbl])
return np.array(data), np.array(labels)
task.py
import os
import argparse
import logging
import numpy as np
import tensorflow as tf
import tensorflow.keras
import trainer.data as data
import trainer.model as model
def train_model(params):
(train_data, train_labels) = data.load_dataset("data/train.csv")
(eval_data, eval_labels) = data.load_dataset("data/eval.csv")
input_layer = tf.keras.Input(shape=(), name='input_text', dtype=tf.string)
ml_model = model.solution(input_layer)
if ml_model is None:
print("No model found. You need to implement one in model.py")
else:
ml_model.fit(train_data, train_labels,
batch_size=model.get_batch_size(),
epochs=model.get_epochs(),
validation_data=(eval_data, eval_labels))
_ = ml_model.evaluate(eval_data, eval_labels, verbose=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
args = parser.parse_args()
tf_logger = logging.getLogger("tensorflow")
tf_logger.setLevel(logging.INFO)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf_logger.level // 10)
train_model(args)
I am searching for a way to use Keras Model.predict() function in a sub-process.
I am using Keras 2.3.1 and TensorFlow 2.0.0. (I tried Keras 2.25 and TensorFlow 1.14)
The following code throws that error.
import itertools
import random
from abc import ABC
from multiprocessing import Pool as Pool
import numpy as np
from keras.engine.saving import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation
class Pre(ABC):
pass
class Prediction(Pre):
def __init__(self):
model = Sequential([
Dense(32, input_shape=(2,)),
Activation('relu'),
Dense(2),
Activation('softmax'),
])
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0, 1], [1, 0], [1, 0], [0, 1]])
model.fit(x, y, epochs=20)
model.save("temp")
self.model = load_model('temp')
self.modifier = 2
def predict(self, input_array):
prediction = self.model.predict(np.array([input_array]))[0]
prediction += self.modifier
return prediction[0]
class B:
def __init__(self):
self.pred = Prediction()
def calculate_something(pred_inner: B, modifier: int):
pred_inner.modifier = modifier
sum_all = sum(pred_inner.pred.predict(np.array([random.choice([0, 1]), random.choice([0, 1])])) for _ in range(100))
# do some modifi
return (pred_inner,
sum_all)
if __name__ == '__main__':
probe_size = 100
pred = B()
for i in range(1000):
with Pool() as pool:
results = pool.starmap(calculate_something, zip(itertools.repeat(pred),
[probe_size for _ in range(i)]))
for r in results:
print(r[1])
Since I call the predict function in a sub-process it runs into a conflict with its own sub-process.
My Networks are very small so i think the multiprocessing is not strictly necessary is there any way to deactivate multiprocessing in Keras and TensorFlow?
Or is there another API i could use instead of Keras/TensorFlow?
Exception in thread Thread-24:
Traceback (most recent call last):
File "C:\Python37\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\Python37\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Python37\lib\multiprocessing\pool.py", line 470, in _handle_results
task = get()
File "C:\Python37\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\network.py", line 1334, in __setstate__
model = saving.unpickle_model(state)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\saving.py", line 604, in unpickle_model
return _deserialize_model(h5dict)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\saving.py", line 274, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\saving.py", line 627, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\layers\__init__.py", line 168, in deserialize
printable_module_name='layer')
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\utils\generic_utils.py", line 147, in deserialize_keras_object
list(custom_objects.items())))
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\sequential.py", line 302, in from_config
model.add(layer)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\sequential.py", line 162, in add
name=layer.name + '_input')
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\input_layer.py", line 178, in Input
input_tensor=tensor)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\engine\input_layer.py", line 87, in __init__
name=self.name)
File "C:\Users\phhor\PycharmProjects\py_doku\venv37\lib\site-packages\keras\backend\tensorflow_backend.py", line 73, in symbolic_fn_wrapper
if _SYMBOLIC_SCOPE.value:
Attrib
uteError: '_thread._local' object has no attribute 'value'
I found the problem!
If you create an object creating a Keras model and return it out of an subprocess you get this error.
If you delete the model before returning the object everything works fine.
def calculate_something(pred_inner: B, modifier: int):
pred_inner.modifier = modifier
sum_all = sum(pred_inner.pred.predict(np.array([random.choice([0, 1]), random.choice([0, 1])])) for _ in range(100))
del pred_inner.pred
# do some modifi
return (pred_inner,
sum_all)
I am currently training a CNN. One of the metrics I am using is AUC. One issue I have noticed is that sometimes my generator will only select examples from one class (I have 3 classes in this project). So if my batch size is 20 it will sometimes randomly select 20 examples from class one for 1 epoch. If this happens then I get an error stating that AUC cannot be calculated with only one class and then the training ends.
Is there a way to make a condition in the generator that more or less states you need at least 2 of the n classes? Without having to use tf.metrics.auc
Thank you
# load training data
def load_train_data_batch_generator(batch_size=32, rows_in=48, cols_in=48, zs_in=32,
channels_in=2, num_classes=3,
dir_dict=dir_dict):
# dir_in_train = main_dir + '/test_CT_PET_combo'
# required when using hyperopt
batch_size = int(batch_size)
# if not: TypeError: 'float' object cannot be interpreted as an integer
fnames = os.listdir(dir_dict['dir_in_train_combo'])
y_train = np.zeros((batch_size, num_classes))
x_train = np.zeros((batch_size, rows_in, cols_in, zs_in, channels_in))
while True:
count = 0
for fname in np.random.choice(fnames, batch_size, replace=False):
data_label = scipy.io.loadmat(os.path.join(dir_dict['dir_out_train'], fname))['output']
# changing one hot encoding to integer
integer_label = np.argmax(data_label[0], axis=0)
y_train[count,:] = data_label
# Loading train ct w/ c and pet/ct combo
train_combo = scipy.io.loadmat(os.path.join(dir_dict['dir_in_train_combo'], fname))[fname]
x_train[count,:,:,:,:] = train_combo
count += 1
yield(x_train, y_train)
Per request: code for metric and error
Metric code
def sk_auroc(y_true, y_pred):
import tensorflow as tf
from sklearn.metrics import roc_auc_score
return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)
Epoch 1/200
57/205 [=======>......................] - ETA: 11s - loss: 1.2858 - acc: 0.3632 - sk_auroc: 0.4581 - auc: 0.5380ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
Traceback (most recent call last):
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/script_ops.py", line 158, in __call__
ret = func(*args)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 277, in roc_auc_score
sample_weight=sample_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/base.py", line 118, in _average_binary_score
sample_weight=score_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 268, in _binary_roc_auc_score
raise ValueError("Only one class present in y_true. ROC AUC score "
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
[[Node: metrics_1/sk_auroc/PyFunc = PyFunc[Tin=[DT_FLOAT, DT_FLOAT], Tout=[DT_DOUBLE], token="pyfunc_24", _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_predictions_target_1_0_1, predictions_1/Softmax/_857)]]
Traceback (most recent call last):
File "<ipython-input-48-34101247f335>", line 8, in optimize_cnn
model, results = train_model(space)
File "<ipython-input-47-254bd056a344>", line 40, in train_model
validation_steps=round(len(os.listdir(dir_out_val))/space['batch_size'])
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1418, in fit_generator
initial_epoch=initial_epoch)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training_generator.py", line 217, in fit_generator
class_weight=class_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1217, in train_on_batch
outputs = self.train_function(ins)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1454, in __call__
self._session._session, self._handle, args, status, None)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 519, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
Traceback (most recent call last):
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/script_ops.py", line 158, in __call__
ret = func(*args)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 277, in roc_auc_score
sample_weight=sample_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/base.py", line 118, in _average_binary_score
sample_weight=score_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 268, in _binary_roc_auc_score
raise ValueError("Only one class present in y_true. ROC AUC score "
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
[[Node: metrics_1/sk_auroc/PyFunc = PyFunc[Tin=[DT_FLOAT, DT_FLOAT], Tout=[DT_DOUBLE], token="pyfunc_24", _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_predictions_target_1_0_1, predictions_1/Softmax/_857)]]
tf.metrics.auc code and the picture showing the reason I dont really like it
# converting tf metric in keras metric
def as_keras_metric(method):
import functools
from keras import backend as K
import tensorflow as tf
#functools.wraps(method)
def wrapper(self, args, **kwargs):
""" Wrapper for turning tensorflow metrics into keras metrics """
value, update_op = method(self, args, **kwargs)
K.get_session().run(tf.local_variables_initializer())
with tf.control_dependencies([update_op]):
value = tf.identity(value)
return value
return wrapper
tf_auc_roc = as_keras_metric(tf.metrics.auc)
Seems like the tf.metrics.auc is too smooth and something might be off that I will have to look into later
You can use tf.metrics.auc in tensorflow instead of sklearn.metrics.roc_auc_score in sklearns. For example:
import tensorflow as tf
label = tf.Variable([1,0,0,0,1])
pred = tf.Variable([0.8,1,0.6,0.23,0.78])
auc,op = tf.metrics.auc(label,pred)
with tf.Session()as sess:
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
for i in range(3):
auc_value, op_value = sess.run([auc,op])
print(auc_value)
0.0
0.6666667
0.66666657
There will be no problem with you.
I'm using doc2vec to train a model on tagged sentences that can then be used for multiclass classification on other sentences in the future.
I was able to vectorize the sentences but am now getting an error when I try to train the model.
ValueError: Unknown label type: 'unknown'
I'm very new at this, but after searching other posts, it looks like it has to do with my y value not being an array. But i'm not sure how to solve this. Can someone please suggest a resolution to this?
Here are the relevant parts of my code:
import pandas as pd
import numpy as np
np.random.seed(0)
def read_text_file(f):
df_complete = pd.read_csv(f)
df = df_complete.loc[: , ["Text", "Score"]]
df.dropna(how = "any", inplace = True)
return df
df = read_text_file("input/Reviews.csv")
print(df.head())
def sampling_dataset(df):
count = 5000
class_df_sampled = pd.DataFrame(columns = ["Score", "Text"])
temp = []
for c in df.Score.unique():
class_indexes = df[df.Score == c].index
random_indexes = np.random.choice(class_indexes, count, replace = False)
temp.append(df.loc[random_indexes])
for each_df in temp:
class_df_sampled = pd.concat([class_df_sampled, each_df], axis = 0)
return class_df_sampled
df = sampling_dataset(df)
df.reset_index(drop = True, inplace = True)
print(df.head())
print(df.shape)
from gensim.models.doc2vec import LabeledSentence
from gensim.models import Doc2Vec
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
import re
lmtzr = WordNetLemmatizer()
w = re.compile("\w+", re.I)
def label_sentences(df):
labeled_sentences = []
for index, datapoint in df.iterrows():
tokenized_words = re.findall(w, datapoint["Text"].lower())
labeled_sentences.append(LabeledSentence(words = tokenized_words, tags = ['SENT_%s' % index]))
return labeled_sentences
def train_doc2vec_model(labeled_sentences):
model = Doc2Vec(alpha = 0.025, min_alpha = 0.025)
model.build_vocab(labeled_sentences)
for epoch in range(10):
model.train(labeled_sentences, total_examples = 25000, epochs = 10)
model.alpha -= 0.002
model.min_alpha = model.alpha
return model
sen = label_sentences(df)
model = train_doc2vec_model(sen)
def vectorize_comments(df, d2v_model):
y = []
comments = []
for i in range(0, df.shape[0]):
label = 'SENT_%s' % i
comments.append(d2v_model.docvecs[label])
df['vectorized_comments'] = comments
return df
df = vectorize_comments(df, model)
print(df.head(2))
from sklearn import cross_validation
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import RandomForestClassifier as RFC
import warnings
warnings.filterwarnings("ignore", category = DeprecationWarning)
import pickle
def train_classifier(X, y):
n_estimators = [200, 400]
min_samples_split = [2]
min_samples_leaf = [1]
bootstrap = [True]
parameters = {
'n_estimators': n_estimators,
'min_samples_leaf': min_samples_leaf,
'min_samples_split': min_samples_split
}
clf = GridSearchCV(RFC(verbose = 1, n_jobs = 4), cv = 4, param_grid = parameters)
clf.fit(X, y)
return clf
X_train, X_test, y_train, y_test = cross_validation.train_test_split(df['vectorized_comments'].T.tolist(), df['Score'], test_size = 0.02, random_state = 17)
classifier = train_classifier(X_train, y_train)
print(classifier.best_score_, "----------------Best Accuracy score on Cross Validation Sets")
print(classifier.score(X_test, y_test))
f = open("Output.txt", "w")
f.write("Best Accuracy score on Cross Validation Sets %f" % classifier.best_score_, )
f.write("Score on Test Set %f" % classifier.score(X_test, y_test))
f.close()
Here is the full stack error:
Traceback (most recent call last):
File "<ipython-input-4-a9ad2a977535>", line 1, in <module>
runfile('C:/Users/user/.spyder-py3/multiclass doc2vec.py', wdir='C:/Users/user/.spyder-py3')
File "C:\Users\user\Anaconda31\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\user\Anaconda31\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/user/.spyder-py3/multiclass doc2vec.py", line 105, in <module>
classifier = train_classifier(X_train,y_train)
File "C:/Users/user/.spyder-py3/multiclass doc2vec.py", line 101, in train_classifier
clf.fit(X, y)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\grid_search.py", line 838, in fit
return self._fit(X, y, ParameterGrid(self.param_grid))
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\grid_search.py", line 574, in _fit
for parameters in parameter_iterable
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\parallel.py", line 779, in __call__
while self.dispatch_one_batch(iterator):
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\parallel.py", line 625, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\parallel.py", line 588, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 111, in apply_async
result = ImmediateResult(func)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 332, in __init__
self.results = batch()
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\externals\joblib\parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\cross_validation.py", line 1675, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\ensemble\forest.py", line 273, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\ensemble\forest.py", line 471, in _validate_y_class_weight
check_classification_targets(y)
File "C:\Users\user\Anaconda31\lib\site-packages\sklearn\utils\multiclass.py", line 172, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'unknown'