TensorFlow: Invalid argument error in DNNRegressor.fit() - python

I am trying to do regression for my own data following the example of Deep Neural Network Regression with Boston Data.
Following is my code.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sklearn import cross_validation
from sklearn import metrics
from sklearn import preprocessing
import tensorflow as tf
from tensorflow.contrib import learn
from numpy import genfromtxt
def main():
x_test = genfromtxt('ARM4mDec2002Jul2015OklahomaV2_mar_apr_may_date_time_normalized_16000_test_data_x.csv', delimiter=',')
y_test = genfromtxt('ARM4mDec2002Jul2015OklahomaV2_mar_apr_may_date_time_normalized_16000_test_data_y.csv', delimiter=',')
x_train = genfromtxt('ARM4mDec2002Jul2015OklahomaV2_mar_apr_may_date_time_normalized_16000_training_data_x.csv', delimiter=',')
y_train = genfromtxt('ARM4mDec2002Jul2015OklahomaV2_mar_apr_may_date_time_normalized_16000_training_data_y.csv', delimiter=',')
# Build 2 layer fully connected DNN with 10, 10 units respectively.
feature_columns = learn.infer_real_valued_columns_from_input(x_train)
regressor = learn.DNNRegressor(
feature_columns=feature_columns, hidden_units=[10, 10])
# Fit
regressor.fit(x_train, y_train, steps=10, batch_size=1)
# Predict and score
y_predicted = list(
regressor.predict(scaler.transform(x_test), as_iterable=True))
score = metrics.mean_squared_error(y_predicted, y_test)
print('MSE: {0:f}'.format(score))
if __name__ == '__main__':
tf.app.run()
I am getting lots of warning and invalid argument error. The complete console output is available here. I would like to put the highlights of the error messages below.
W tensorflow/core/framework/op_kernel.cc:975] Invalid argument: Nan in summary histogram for: dnn/hiddenlayer_0_activation
[[Node: dnn/hiddenlayer_0_activation = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](dnn/hiddenlayer_0_activation/tag, dnn/hiddenlayer_0/hiddenlayer_0/Relu)]]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "arm_data_regression.py", line 39, in main
regressor.fit(x_train, y_train, steps=10, batch_size=1)
File "/home/shehab/.local/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 191, in new_func
return func(*args, **kwargs)
Is this because I am using some deprecated TensorFlow API?

Related

Error in predicting Float values in kNN in python

I am new to this KNN I want to ask a simple question I have written a code in python of KNN. when I used fingerprints.csv that contains decimals number my code gives me an error. I assume that it doesn't predict float values. so I used another CSV that has similar data but no decimal value my code worked well.
what changes I should make so my code will be able to predict floats.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import pickle
import glob
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.multioutput import MultiOutputClassifier
# training/validation set
train_set = pd.read_csv("1.csv")
# test set
test_set = pd.read_csv("testing data.csv")
X = train_set.iloc[:,0:3].values #RSSI
Y = train_set.iloc[:,3:5].values #X,Y (OUTCOME)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
#print(X_train.shape)
#print(X_test)
#print(Y_train.shape)
#print(Y_test)
sc = StandardScaler() #feature scalin
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#print(Y_train)
aa =(X_train[:,0]+X_train[:,1]+X_train[:,2])/3
print(aa)
#import math
#print(math.sqrt(len(Y_test)))
knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
classifier = MultiOutputClassifier(knn, n_jobs=-2)
classifier.fit(X_train, Y_train)
# Save the trained model as a pickle string.
saved_model = pickle.dumps (classifier)
# Load the pickled model
classifier_from_pickle = pickle.loads(saved_model)
# Use the loaded pickled model to make predictions
classifier_from_pickle.predict(X_test)
Y_pred = classifier.predict(X_test)
print(Y_pred)
a = Y_test[:,0] # actual labels
b = Y_pred[:,0] # predicted labels
acc = len([a[i] for i in range(0, len(a)) if a[i] == b[i]]) / len(a)
a = Y_test[:,1] # actual labels
b = Y_pred[:,1] # predicted labels
accu = len([a[i] for i in range(0, len(a)) if a[i] == b[i]]) / len(a)
accuracy=acc+accu
print("Accuracy: ",accuracy)
#Model Validation on Validation.csv
X = test_set.iloc[:,0:3].values #RSSI
#print(X)
X_test = sc.transform(X)
#print(X_test)
aa =(X_test[:,0]+X_test[:,1]+X_test[:,2])/3
print(aa)
# Use the loaded pickled model to make predictions on Validate Dataset
classifier_from_pickle.predict(X_test)
Y_pred = classifier.predict(X_test)
print(Y_pred)
the error
Traceback (most recent call last):
File "d:/knnn code/knn2.py", line 53, in <module>
classifier.fit(X_train, Y_train)
File "C:\Users\92316\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\multioutput.py", line 359, in fit
super().fit(X, Y, sample_weight)
File "C:\Users\92316\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\multioutput.py", line 156, in fit
check_classification_targets(y)
File "C:\Users\92316\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\utils\multiclass.py", line 169, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'continuous-multioutput'
thanks in advance for your time and help.

ValueError: cannot use sparse input in 'SVC' trained on dense data

I'm trying to run my classifier but I get this error
import pandas
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
dataset = pd.read_csv('all_topics_limpo.csv', encoding = 'utf-8')
data = pandas.get_dummies(dataset['verbatim_corrige'])
labels = dataset['label']
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.2, random_state = 0)
count_vector = CountVectorizer()
tfidf = TfidfTransformer()
classifier = OneVsOneClassifier(SVC(kernel = 'linear', random_state = 100))
#classifier = LogisticRegression()
train_counts = count_vector.fit_transform(X_train)
train_tfidf = tfidf.fit_transform(train_counts)
classifier.fit(X_train, y_train)
test_counts = count_vector.transform(X_test)
test_tfidf = tfidf.transform(test_counts)
predicted = classifier.predict(test_tfidf)
predicted = classifier.predict(X_test)
print("confusion matrix")
print(confusion_matrix(y_test, predicted, labels = labels))
print("F-score")
print(f1_score(y_test, predicted))
print(precision_score(y_test, predicted))
print(recall_score(y_test, predicted))
print("cross validation")
test_counts = count_vector.fit_transform(data)
test_tfidf = tfidf.fit_transform(test_counts)
scores = cross_validation.cross_val_score(classifier, test_tfidf, labels, cv = 10)
print(scores)
print("Accuracy: {} +/- {}".format(scores.mean(), scores.std() * 2))
My output error:
ValueError: cannot use sparse input in 'SVC' trained on dense data
I can not execute my code because of this problem and I am not understanding anything of what is happening.
all output error
Traceback (most recent call last):
File "classification.py", line 42, in
predicted = classifier.predict(test_tfidf)
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 584, in predict
Y = self.decision_function(X)
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 614, in decision_function
for est, Xi in zip(self.estimators_, Xs)]).T
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 614, in
for est, Xi in zip(self.estimators_, Xs)]).T
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 548, in predict
y = super(BaseSVC, self).predict(X)
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 308, in predict
X = self._validate_for_predict(X)
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 448, in _validate_for_predict
% type(self).name)
ValueError: cannot use sparse input in 'SVC' trained on dense data
You get this error because your training & test data are not of the same kind: while you train in your initial X_train set:
classifier.fit(X_train, y_train)
you are trying to get predictions from a dataset which has undergone count vectorization & tf-idf transormations first:
predicted = classifier.predict(test_tfidf)
It is puzzling why you choose to do so, why you nevertheless compute train_counts and train_tfidf (you don't seem to actually use them anywhere), and why you are also trying to redefine predicted as classifier.predict(X_test) immediately afterwards. Normally, changing your training line to
classifier.fit(train_tfidf, y_train)
and getting rid of your second predicted definition should work OK...
you can use this code :
test_tfidf = tfidf.transform(test_counts).toarray()
befor you want to predict your model and after :
predicted = classifier.predict(test_tfidf)
just do this simple code
nice job

Object has no attribute error (Google Machine Learning Tutorial Ep.5)

Hi I'm having some trouble with my code i'm kinda new to python but I do have some understanding of classes and objects.
import random
class ScrappyKNN():
def fit(self,X_train,Y_train):
self.X_train = X_train
self.Y_train = Y_train
def predict(self, X_test,a):
predictions = []
for row in X_test:
label = random.choice(self.Y_train)
predictions.append(label)
return predictions
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
Y = iris.target
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = .5)
#from sklearn.neighbours import KNeighboursClassifier
my_classifier = ScrappyKNN()
my_classifier = my_classifier.fit(X_train,Y_train)
predictions = my_classifier.predict(X_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(Y_test, predictions))
but I'm getting an error when I run it:
Traceback (most recent call last):
File "pipelineKNeighbours.py", line 30, in <module>
predictions = my_classifier.predict(X_test)
AttributeError: 'NoneType' object has no attribute 'predict'
What am I doing wrong?
The line
my_classifier = my_classifier.fit(X_train,Y_train)
redefines my_classifier as the return value of fit(), which is None, so my_classifier is no longer pointing to an instance of your ScrappyKNN class, but to a NoneType object - thus, the error.

Python machine learning predict error

I have a .csv file with 26 features columns and 1 column class . I have used
Python to train and for test I used 26 features to predict the class .The code
shows this error :
ubuntu#ubuntu:~/python$ python3 classifier.py train.csv
/usr/local/lib/python3.4/dist-packages/sklearn/utils/validation.py:395:
DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17
and will
raise ValueError in 0.19. Reshape your data either using
X.reshape(-1, 1) if
your data has a single feature or X.reshape(1, -1) if it contains a
single sample.
DeprecationWarning) Traceback (most recent call last): File
"classifier.py", line 44, in
accuracy_score(train_X, predict) File "/usr/local/lib/python3.4/dist-packages/sklearn/metrics/classification.py",
line 172, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred) File "/usr/local/lib/python3.4/dist-packages/sklearn/metrics/classification.py",
line 72, in _check_targets
check_consistent_length(y_true, y_pred) File "/usr/local/lib/python3.4/dist-packages/sklearn/utils/validation.py",
line 181, in check_consistent_length
" samples: %r" % [int(l) for l in lengths]) ValueError: Found input variables with inconsistent numbers of samples: [778, 1]
import sys
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
def getFeatures(file_name,length=27):
all_features=[]
all_classes=[]
for line in open(file_name):
s=line.strip().split(",")
lst=[]
if len(s)==length:
class_=int(s[length-1])
for i in range(length-1):
lst.append(float(s[i]))
all_features.append(lst)
all_classes.append(class_)
return np.array(all_features),np.array(all_classes)
if len(sys.argv)>1:
input_train_file=sys.argv[1] # e.g. input train file
#input_test_file=sys.argv[2] # e.g. input test file
# make features ready for train set
train_X,train_y=getFeatures(input_train_file)
# make features ready for test set
#test_X,test_y=getFeatures(input_test_file)
clf = SVC(kernel='linear')
# train
clf.fit(train_X, train_y)
#predict
test_X=[0.24 ,0.27031, 0.239808 ,0.271922 ,0.249783 ,0.271922, 0.239808 ,0.271922, 0.261446, 0.27692 ,0.270599 ,0.27031, 0.270599, 0.281724, 0.259792, 0.271922, 0.239808, 0.271922, 0.261212 ,0.271922 ,0.259792 ,0.276243 ,0.261212, 0.27031 ,0.259792 ,0.274605
]
predict=clf.predict(test_X)
accuracy_score(train_X, predict)

RandomForest score method ValueError

I am trying to find the score of a given data set with respect to some training data. I have written the following code:
from sklearn.ensemble import RandomForestClassifier
import numpy as np
randomForest = RandomForestClassifier(n_estimators = 200)
li_train1 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
li_train2 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
li_text1 = [[10,20,30,40,50,60,70,80,90], [10,20,30,40,50,60,70,80,90]]
li_text2 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
randomForest.fit(li_train1, li_train2)
output = randomForest.score(li_train1, li_text1)
On compiling and trying to run the program I am getting the error:
Traceback (most recent call last):
File "trial.py", line 16, in <module>
output = randomForest.score(li_train1, li_text1)
File "/usr/local/lib/python2.7/dist-packages/sklearn/base.py", line 349, in score
return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
File "/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py", line 172, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py", line 89, in _check_targets
raise ValueError("{0} is not supported".format(y_type))
ValueError: multiclass-multioutput is not supported
On checking the documentation related to the score method it says:
score(X, y, sample_weight=None)
X : array-like, shape = (n_samples, n_features)
Test samples.
y : array-like, shape = (n_samples) or (n_samples, n_outputs)
True labels for X.
Both X and y in my case are arrays, 2d arrays.
I also went through this question but I couldn't understand where am I going wrong.
EDIT
So as per the answer and the comments that follow, I have edited the program as follows:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
randomForest = RandomForestClassifier(n_estimators = 200)
mlb = MultiLabelBinarizer()
li_train1 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
li_train2 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
li_text1 = [100,200]
li_text2 = [[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]]
randomForest.fit(li_train1, li_train2)
output = randomForest.score(li_train1, li_text1)
After this edit I am getting the error:
Traceback (most recent call last):
File "trial.py", line 19, in <module>
output = randomForest.score(li_train1, li_text1)
File "/usr/local/lib/python2.7/dist-packages/sklearn/base.py", line 349, in score
return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
File "/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py", line 172, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py", line 82, in _check_targets
"".format(type_true, type_pred))
ValueError: Can't handle mix of binary and multiclass-multioutput
According to the documentation:
Warning: At present, no metric in sklearn.metrics supports the multioutput-multiclass classification task.
The score method invokes sklearn's accuracy metric but this isn't supported for the multi-class, multi-output classification problem you've defined.
It's not clear from your question if you really intend to solve a multi-class, multi-output problem. If that's not your intention, then you should restructure your input arrays.
If on the other hand you really want to solve this kind of problem, you'll simply need to define your own scoring function.
UPDATE
Since you are not solving a multi-class, multi-label problem you should restructure your data so that it looks something like this:
from sklearn.ensemble import RandomForestClassifier
# training data
X = [
[1,2,3,4,5,6,7,8,9],
[1,2,3,4,5,6,7,8,9]
]
y = [0,1]
# fit the model
randomForest.fit(X,y)
# test data
Xtest = [
[1,2,0,4,5,6,0,8,9],
[1,1,3,1,5,0,7,8,9]
]
ytest = [0,1]
output = randomForest.score(Xtest,ytest)
print(output) # 0.5

Categories