I have a problem with my logistic regression function, I'm using Pycharm IDE and sklearn.linear_model package LogisticRegression.
My debugger shows AttributeError 'tuple' object has no attribute 'fit' and 'predict'.
Codebelow:
def logistic_regression(df, y):
x_train, x_test, y_train, y_test = train_test_split(
df, y, test_size=0.25, random_state=0)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
clf = LogisticRegression(random_state=0, solver='sag',
penalty='l2', max_iter=1000, multi_class='multinomial'),
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
return classification_metrics.print_metrics(y_test, y_pred, 'Logistic regression')
Can anyone help spotting the mistake here? Because for other functions I tried fit and predict seem fine.
There is small mistake in the code as I mentioned in the comment.
please remove the comma in the Logistic Regression model object creation.
Also there is no such function called classification_metrics.print_metrics
so I have used the metrics.classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
def logistic_regression(df, y):
x_train, x_test, y_train, y_test = train_test_split(df, y, test_size=0.25, random_state=0)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
clf = LogisticRegression(random_state=0, solver='sag', penalty='l2', max_iter=1000, multi_class='multinomial')
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
return metrics.classification_report(y_test, y_pred)
function call
logistic_regression(df, y)
Related
I have this code:
X, y = make_classification(n_features=2,n_redundant=0,n_samples=400, random_state=17)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=17)
clf = DecisionTree(max_depth=4, criterion='gini')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
prob_pred = clf.predict_proba(X_test)
accuracy = accuracy_score(y_test,y_pred)
However, there is an error Expected array-like (array or non-string sequence), got None in the last line accuracy = accuracy_score(y_test,y_pred). How can I fix it?
Your code with a minor fix works well:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
X, y = make_classification(n_features=2,n_redundant=0,n_samples=400, random_state=17)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=17)
clf = DecisionTreeClassifier(max_depth=4, criterion='gini') # Not DecisionTree
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
prob_pred = clf.predict_proba(X_test)
accuracy = accuracy_score(y_test,y_pred)
Output:
>>> accuracy
0.8833333333333333
The performance of the model does not increase during training epoch(s) where values are sorted by a specific row key. Dataset is balance and have 40,000 records with binary classification(0,1).
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
Linear_SVC_classifier = SVC(kernel='linear', random_state=1)#supervised learning
Linear_SVC_classifier.fit(x_train, y_train)
SVC_Accuracy = accuracy_score(y_test, SVC_Prediction)
print("\n\n\nLinear SVM Accuracy: ", SVC_Accuracy)
Add a count vectorizer to your train data and use logistic regression model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
cv = CountVectorizer()
ctmTr = cv.fit_transform(X_train)
X_test_dtm = cv.transform(X_test)
model = LogisticRegression()
model.fit(ctmTr, y_train)
y_pred_class = model.predict(X_test_dtm)
SVC_Accuracy = accuracy_score(y_test)
print("\n\n\nLinear SVM Accuracy: ", SVC_Accuracy)
the above model definition is something 'equivalent' to this statement
Linear_SVC_classifier = SVC(kernel='linear', random_state=1)
Linear_SVC_classifier.fit(ctmTr, y_train)
raise NotFittedError(msg % {'name': type(estimator).name})
sklearn.exceptions.NotFittedError: This Lasso instance is not fitted
yet. Call 'fit' with appropriate arguments before using this
estimator.
from sklearn import datasets
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
#
# Load the Boston Data Set
#
bh = datasets.load_boston()
X = bh.data
y = bh.target
#
# Create training and test split
#
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#
# Create an instance of Lasso Regression implementation
#
lasso = Lasso(alpha=1.0)
#
# Fit the Lasso model
#
lasso.fit(X_test, y_test)
#
# Create the model score
#
#lasso.score(X_test, y_test), lasso.score(X_train, y_train)
lasso_reg = Lasso(normalize=True)
y_pred_lass =lasso_reg.predict(X_test)
print(y_pred_lass)
You've actually created two lasso models. One called lasso which you fit. But after that you create another one lasso_reg = Lasso(normalize=True) which you try to call predict on but that model hasn't been fitted yet. Try this:
from sklearn import datasets
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
bh = datasets.load_boston()
X = bh.data
y = bh.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
lasso = Lasso(alpha=1.0, normalize=True)
lasso.fit(X_test, y_test)
y_pred_lass =lasso.predict(X_test)
print(y_pred_lass)
As the error says you have to call lasso_reg.fit(X_test, y_test) before calling lasso_reg.predict(X_test) This will fix the issue.
lasso_reg = Lasso(normalize=True)
lasso_reg.fit(X_test, y_test)
y_pred_lass =lasso_reg.predict(X_test)
print(y_pred_lass)
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred = none
accuracy = accuracy_score(y_test, y_pred)
print (accuracy)
What should I put in the y_pred = none area? Is there anything wrong with my code?
Normally you would split your data in train and test, below an example using iris:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X, y = make_classification()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
We can fit the model like you did:
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
Get the prediction on test set and score with actual value:
y_pred = logistic_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
I try logistic regression classification using "k-fold cross validation" in python.
my code:
`import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix,roc_auc_score
data = pd.read_csv('xxx.csv')
X = data[["a","b","c",...]]
y = data["Class"]
def get_predictions(clf, X_train, y_train, X_test):
clf = clf
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
y_pred_prob = clf.predict_proba(X_test)
train_pred = clf.predict(X_train)
print('train-set confusion matrix:\n', confusion_matrix(y_train,train_pred))
return y_pred, y_pred_prob
skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0)
pred_test_full=0
cv_score=[]
i=1
for train_index, test_index in skf.split(X, y):
X_train, y_train = X.loc[train_index], y.loc[train_index]
X_test, y_test = X.loc[test_index], y.loc[test_index]
log_cfl = LogisticRegression(C=2);
log_cfl.fit(X_train, y_train)
y_pred, y_pred_prob = get_predictions(LogisticRegression(C=2), X_train, y_train, X_test)
score=roc_auc_score(y_test,log_cfl.predict(X_test))
print('ROC AUC score: ',score)
cv_score.append(score)
pred_test_full = pred_test_full + y_pred_prob
i+=1`
I get error at this line of code:
`pred_test_full = pred_test_full + y_pred_prob`
For loop runs 2 times. Then in third, I get the error.
'operands could not be broadcast together with shapes <56962,2> <5696..' error.
I couldn't understand what is wrong, could you help to figure out?