from sklearn.datasets import make_blobs
# Generate out datasets
dataset = make_blobs(n_samples=200,centers=4,n_features=2,cluster_std=1.6,random_state=50)
points = dataset[0]
## print(dataset)
from sklearn.cluster import KMeans
print(sklearn.__version__)
Isn't it possible to check sklearn version by print(sklearn.version)? Unfortunately, I got error which says name 'sklearn' is not defined
you need to import sklearn too.
import sklearn
print(sklearn.__version__)
Related
I'm trying to use the following:
from fireTS.models import NARX, DirectAutoRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import numpy as np
import scipy
import sklearn
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
However, upon running the first line, an error saying:
ModuleNotFoundError: No module named 'sklearn.metrics.regression'
Interestingly, I cannot find anything on the web about this problem (even in the recently asked question in stackoverflow about this 26+ days ago).
Anyone who have encountered the same and was bale to fix this?
EDIT:
SO I FOUND THE FIX.
I went to the library where my firets is located and clicked models.py.
I changed the following:
from sklearn.metrics.regression import r2_score, mean_squared_error
to
from sklearn.metrics import r2_score, mean_squared_error
and hola, NO MORE ERRORS :)
How to fix this
from sklearn.datasets import load_iris
import os
import math
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import csv
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import seaborn as sns
iris=load_iris()
print(iris.keys())
print(iris['target'].shape)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(iris['data'],iris['target'],random_state=0)
print(y_train.shape)
print(X_train.shape)
iris_dataframe=pd.DataFrame(X_train,columns=iris.feature_names)
knn=KNeighborsClassifier(n_neighbors=1)
y_pred = knn.predict(X_train) //error:NotFittedError: This KNeighborsClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
what is the error?
please help me!!!! I don't know how to fix this
You'll want to start by training you K-means model (computing clusters on training data): knn.fit(X_train, y_train).
You can then use it to classify your testing data, using the cluster centers computed during the training phase: y_pred = knn.predict(X_test).
Was trying to run the same code as per the SCIKIT user guide of Grid search but giving error.Quite surprised.
from sklearn.model_selection import GridSearchCV
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
X,y=make_moons()
calibrated_forest=CalibratedClassifierCV(base_estimator=RandomForestClassifier(n_estimators=10))
paramgrid={'base_estimator_max_depth':[2,4,6,8]}
search=GridSearchCV(calibrated_forest,paramgrid,cv=5)
search.fit(X,y)
Error message as below:
ValueError: Invalid parameter base_estimator_max_depth for estimator CalibratedClassifierCV(base_estimator=RandomForestClassifier(n_estimators=10)). Check the list of available parameters with `estimator.get_params().keys()`.
I tried with Iris data set which also gave the same error as above.
Then i used the make_moon dataset X,y and run the Random classifier as below.
clf = RandomForestClassifier(n_estimators=10, max_depth=2)
cross_val_score(clf, X, y, cv=5)
Got the output as below.
array([0.8 , 0.8 , 0.9 , 0.95, 0.95])
Looking strange and not sure what is happening and where iam wrong. Request help please.
Note the double score __ between base_estimator and a param:
from sklearn.model_selection import GridSearchCV
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
X,y=make_moons()
calibrated_forest=CalibratedClassifierCV(base_estimator=RandomForestClassifier(n_estimators=10))
paramgrid={'base_estimator__max_depth':[2,4,6,8]}
search=GridSearchCV(calibrated_forest,paramgrid,cv=5)
search.fit(X,y)
GridSearchCV(cv=5,
estimator=CalibratedClassifierCV(base_estimator=RandomForestClassifier(n_estimators=10)),
param_grid={'base_estimator__max_depth': [2, 4, 6, 8]})
I try to test a first example using sklearn:
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
X = [[0.44, 0.68], [0.99, 0.23]]
vector = [109.85, 155.72]
predict= [0.49, 0.18]
poly = PolynomialFeatures(degree=2)
X_ = poly.fit_transform(X)
predict_ = poly.fit_transform(predict)
clf = linear_model.LinearRegression()
clf.fit(X_, vector)
print clf.predict(predict_)
But i have these errors:
/usr/lib/python2.7/dist-packages/scipy/sparse/csgraph/__init__.py:148:
RuntimeWarning: numpy.dtype size changed, may indicate binary
incompatibility
from ._shortest_path import shortest_path, floyd_warshall, dijkstra,\
/usr/lib/python2.7/dist-packages/scipy/sparse/csgraph/_validation.py:5:
RuntimeWarning: numpy.dtype size changed, may indicate binary
incompatibility
File "hi.py", line 1, in <module>
from sklearn.preprocessing import PolynomialFeatures
ImportError: cannot import name PolynomialFeatures
python -V --> 2.7.6
Please, how can I deal with these errors?
Bests.
You can check your sklearn version, use:
import sklearn
print('Version {}.'.format(sklearn.__version__))
For me it shows:
Version 0.17.1.
Then check (from help of PolynomialFeatures) which version offers PolynomialFeatures and make an update. If your version is 0.14.1 or below, you will get this error. Check this page for more details on how to upgrade it: Not able to import PolynomialFeatures, make_pipeline in Scikit-learn (Official: http://scikit-learn.org/stable/install.html)
Today I've tried to test an amazing Catboost library published recently by Yandex but it shows very poor results even on a toy dataset. I've tried to find a root of my problem but due to the lack of proper documentation and topics about the library I can't figure out what's going on. Please help me =)
I'm using Anaconda 3 x64 with Python 3.6.
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, make_scorer
from catboost import CatBoostClassifier
X,y = make_classification( n_classes=2
,n_clusters_per_class=2
,n_features=10
,n_informative=4
,n_repeated=2
,shuffle=True
,random_state=564
,n_samples=10000
)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.8)
cb = CatBoostClassifier(depth=3,custom_loss=
['Accuracy','AUC'],
logging_level='Silent',
iterations=500,
od_type='Iter',
od_wait=20)
cb.fit(X_train,y_train,eval_set=(X_test,y_test),plot=True,use_best_model=True)
pred = cb.predict_proba(X_test)[:,1]
tpr,fpr,_=roc_curve(y_score=pred,y_true=y_test)
#just to show the difference
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier().fit(X_train,y_train)
pred_gbc = gbc.predict_proba(X_test)[:,1]
tpr_xgb,fpr_xgb,_=roc_curve(y_score=pred_gbc,y_true=y_test)
plt.plot(tpr,fpr,color='orange')
plt.plot(tpr_xgb,fpr_xgb,color='red')
plt.show()
It was a bug. Be careful and ensure you are using the latest version. The bug was fixed in 0.6.1 version.