I am trying to follow a machine-learning tutorial listed here:, but I am encountering an issue. I was able to run the following code on my Macbook air, however, it did not work on my Windows machine. I checked other questions with similar titles, none of which seem to fit my problem.
Why is this happening? How can it be fixed?
My entire code:
# Python version
import sys
print('Python: {}'.format(sys.version))
# scipy
import scipy
print('scipy: {}'.format(scipy.__version__))
# numpy
import numpy
print('numpy: {}'.format(numpy.__version__))
# matplotlib
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))
# pandas
import pandas
print('pandas: {}'.format(pandas.__version__))
# scikit-learn
import sklearn
print('sklearn: {}'.format(sklearn.__version__))
# compare algorithms
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
# Load dataset
url = "energyFormatted.csv"
dataset = read_csv(url, names=names)
# Split-out validation dataset
array = dataset.values
X = array[:, 0:4]
y = array[:, 4]
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)
# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
The line that's giving me an error:
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
The error itself:
Traceback (most recent call last):
File "D:\Applications\pythonProject\venv\lib\site-packages\joblib\", line 862, in dispatch_one_batch
tasks = self._ready_batches.get(block=False)
File "C:\Users\danie\AppData\Local\Programs\Python\Python39\lib\", line 168, in get
raise Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\danie\AppData\Roaming\JetBrains\PyCharmCE2022.2\scratches\FY23 SCI FAIR\", line 63, in <module>
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=None)
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 515, in cross_val_score
cv_results = cross_validate(
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 266, in cross_validate
results = parallel(
File "D:\Applications\pythonProject\venv\lib\site-packages\joblib\", line 1085, in __call__
if self.dispatch_one_batch(iterator):
File "D:\Applications\pythonProject\venv\lib\site-packages\joblib\", line 873, in dispatch_one_batch
islice = list(itertools.islice(iterator, big_batch_size))
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 266, in <genexpr>
results = parallel(
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 340, in split
for train, test in super().split(X, y, groups):
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 86, in split
for test_index in self._iter_test_masks(X, y, groups):
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 717, in _iter_test_masks
test_folds = self._make_test_folds(X, y)
File "D:\Applications\pythonProject\venv\lib\site-packages\sklearn\model_selection\", line 660, in _make_test_folds
raise ValueError(
ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.
Accuracy doesn't make sense for scoring a continuous variable. The error indicates that your y values are of type float (or are otherwise continuous). You could try something like sklearn.metrics.mean_squared_error instead of accuracy.
I am following Müller & Guido's Machine Learning with Python book, and I am trying to run classifications on this dataset.
So far my code looks like this:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
# Read the Churn data into a dataset (pandas) from the cvs file
dataset = pd.read_csv(r'C:\Users\Amalie\IdeaProjects\INFO284\src\Lab2.csv')
# Make the data into a 2D NumPy array (as scikit-learn expects for the data)
dataframe = dataset[['SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines',
'InternetService', 'OnlineSecurity', 'Churn']]
y = dataframe['Churn'] # Target
X = dataframe.drop('Churn', 1) # Features ( all other than target column 'Churn' )
# Logistic Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20) # Split into test/training sets
logReg = LogisticRegression(max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logReg.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logReg.score(X_test, y_test)))
When I run it, I get this error:
Traceback (most recent call last):
File "C:/Users/Amalie/IdeaProjects/INFO284/src/", line 19, in <module>
logReg = LogisticRegression(max_iter=100000).fit(X_train, y_train)
File "C:\Users\Amalie\IdeaProjects\INFO284\venv\lib\site-packages\sklearn\linear_model\", line 1514, in fit
accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
File "C:\Users\Amalie\IdeaProjects\INFO284\venv\lib\site-packages\sklearn\", line 581, in _validate_data
X, y = check_X_y(X, y, **check_params)
File "C:\Users\Amalie\IdeaProjects\INFO284\venv\lib\site-packages\sklearn\utils\", line 976, in check_X_y
File "C:\Users\Amalie\IdeaProjects\INFO284\venv\lib\site-packages\sklearn\utils\", line 746, in check_array
array = np.asarray(array, order=order, dtype=dtype)
File "C:\Users\Amalie\IdeaProjects\INFO284\venv\lib\site-packages\pandas\core\", line 1993, in __array__
return np.asarray(self._values, dtype=dtype)
ValueError: could not convert string to float: 'No'
Process finished with exit code 1
It says that the problem is with this line
logReg = LogisticRegression(max_iter=100000).fit(X_train, y_train)
I have used the fit()-method before when running other classification problems, but I've never come across this issue before. What am I doing wrong?
I'm trying to calculate the accuracy score, of a SVM using Laplacian kernel (as a pre-computed kernel). However, I'm getting the error as below when I try to calculate the accuracy score.
My code :
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
#Load the iris data
iris_data = load_iris()
#Split the data and target
X =
y =
#Convert X and y to a numpy array
X = np.array(X)
y = np.array(y)
#Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, shuffle=True)
#Using Laplacian kernel -
K = np.array(laplacian_kernel(X_train, gamma=.5))
svm = SVC(kernel='precomputed').fit(K, np.ravel(y_train))
pred_y = svm.predict(K)
#Print accuracy score - here is where the error is happening.
print(accuracy_score(y_test, pred_y))
When I run this code, I'm getting error as shown below :
Traceback (most recent call last):
File "/Users/user/Desktop/Research/Src/", line 36, in <module>
print(accuracy_score(y_test, pred_y))
File "/Users/user/miniforge3/envs/user_venv/lib/python3.8/site-packages/sklearn/utils/", line 63, in inner_f
return f(*args, **kwargs)
File "/Users/user/miniforge3/envs/user/lib/python3.8/site-packages/sklearn/metrics/", line 202, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/Users/user/miniforge3/envs/user/lib/python3.8/site-packages/sklearn/metrics/", line 83, in _check_targets
check_consistent_length(y_true, y_pred)
File "/Users/user/miniforge3/envs/user/lib/python3.8/site-packages/sklearn/utils/", line 262, in check_consistent_length
raise ValueError("Found input variables with inconsistent numbers of"
ValueError: Found input variables with inconsistent numbers of samples: [45, 105]
So how can I resolve this error?
You calculated pred_y using your train inputs which has 105 elements and y_test has 45 elements.
You need to add a step:
#user3046211's code
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
#Load the iris data
iris_data = load_iris()
#Split the data and target
X =
y =
#Convert X and y to a numpy array
X = np.array(X)
y = np.array(y)
#Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, shuffle=True)
#Using Laplacian kernel -
K = np.array(laplacian_kernel(X_train, gamma=.5))
svm = SVC(kernel='precomputed').fit(K, np.ravel(y_train))
pred_y = svm.predict(K)
#Print accuracy score - here is where the error is happening.
print(accuracy_score(y_test, pred_y))
K_test = np.array(laplacian_kernel(X=X_test,Y=X_train, gamma=.5))
pred_y_test = svm.predict(K_test)
print(accuracy_score(y_test, pred_y_test))
Trying to get a result out, but getting this error instead:
C:\Users\my_is\anaconda3\lib\site-packages\sklearn\model_selection\ FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\my_is\anaconda3\lib\site-packages\sklearn\model_selection\", line 531, in _fit_and_score, y_train, **fit_params)
File "C:\Users\my_is\anaconda3\lib\site-packages\sklearn\tree\", line 890, in fit
File "C:\Users\my_is\anaconda3\lib\site-packages\sklearn\tree\", line 181, in fit
File "C:\Users\my_is\anaconda3\lib\site-packages\sklearn\utils\", line 172, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'continuous'
Here is my code:
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.tree import DecisionTreeClassifier
data = load_boston()
c = np.array([1 if y > np.median(data['target']) else 0 for y in data['target']])
X_train, X_test, c_train, c_test = train_test_split(data['data'], c, random_state=0)
tree = DecisionTreeClassifier(), c_train)
#logReg = LogisticRegression(), c_train)
#result = cross_validate(logReg,,, cv=5, return_train_score=True)
result = cross_validate(tree,,, cv=5, return_train_score=True)
I am completely new to python and ML, any help is appreciated
You have a mistake here:
result = cross_validate(tree,,, cv=5, return_train_score=True)
Should be:
result = cross_validate(tree,, c, cv=5, return_train_score=True)
I have a problem when I tried to use logistic regression with the idea to determine a t predicted value in the penultimate line. The error is:
"Traceback (most recent call last):
line 15, in
t_pred = logreg(X_test)
TypeError: 'LogisticRegression' object is not callable"
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
X=df1 = df.iloc[:,1:5]
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.2, random_state=0)
predicted = cross_val_predict(logreg, X_train, t_train, cv=10)
print(accuracy_score(t_train, predicted)), t_train)
t_pred = logreg(X_test)
print(accuracy_score(t_test, t_pred))
This is due to:
t_pred = logreg(X_test)
You need to use a method of the object logreg, not supply the params directly to it.
Notice how you used fit() is a method which handles the training data. Similarly, you will need to call predict() to get the predictions on new data.
Try this:
t_pred = logreg.predict(X_test)