Import Error while using new xgboost-distribution - python

I'm using the new xgboost-distribution module
I input the following into my file
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from xgboost_distribution import XGBDistribution
if __name__ == '__main__':
data = load_boston()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = XGBDistribution(
distribution="normal",
n_estimators=500
)
model.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
early_stopping_rounds=10
)
When I run this code, however, it generates an import error within the xgboost/sklearn.py module file:
from xgboost_distribution import XGBDistribution
File "/Users/timgundersen/opt/anaconda3/lib/python3.8/site-packages/xgboost_distribution/__init__.py", line 18, in <module>
from xgboost_distribution.model import XGBDistribution # noqa
File "/Users/timgundersen/opt/anaconda3/lib/python3.8/site-packages/xgboost_distribution/model.py", line 7, in <module>
from xgboost.sklearn import XGBModel, _wrap_evaluation_matrices, xgboost_model_doc
ImportError: cannot import name '_wrap_evaluation_matrices' from 'xgboost.sklearn' (/Users/timgundersen/opt/anaconda3/lib/python3.8/site-packages/xgboost/sklearn.py)
I tried updating sklearn, as well as xgboost and other modules that sklearn requires.
Why would this be giving me an import error if my computer is updated with all of the correct versions ?

Which python version do you use?
In case of python 3.8, it should work with scikit-learn 0.24.1
pip install scikit-learn==0.24.1

Related

NameError: name 'X_train' is not defined....tried in spyder too

have been trying to run this statement in colab but it's always showing the NameError
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.33,random_state = 42)
pl help to solve this problem

Why the error : "cannot import name 'balanced_accuracy' from 'sklearn.metrics' " is more complicated than it should?

I'm trying to do a multiclass classification project on COVID-19 imbalanced dataset using jupyter notebooks on kaggle, my code is :
import random
import gc
import numpy as np
from numpy import asarray
import itertools
train_dir='/kaggle/input/pandemic2/Training/Training'
test_dir='/kaggle/input/pandemic2/Testing/Testing'
train_covid=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'COVID' in I]
train_normal=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'Normal' in I]
train_pneumonia=['/kaggle/input/pandemic2/Training/Training/{}'.format(i) for i in os.listdir(train_dir) if 'MERS' or
'SARS'or
'Bacterial' or
'Chlamydia' or
'Influenza' or
'Klebsiella' or
'Legionella' or
'Mycoplasma' or
'Pneumocystis' or
'Streptococcus' or
'Varicella' in I]
test_imgs=['/kaggle/input/pandemic2/Testing/Testing/{}'.format(i) for i in os.listdir(test_dir)]
train_imgs=train_covid[:] + train_normal[:] + train_pneumonia[:]
random.shuffle(train_imgs)
del train_covid
del train_normal
del train_pneumonia
gc.collect()
nrows=150
ncolumns=150
channels= 3
def read_and_process_image (list_of_images):
x=[]
y=[]
for image in list_of_images:
x.append(cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR),(nrows, ncolumns), interpolation=cv2.INTER_CUBIC))
if 'Normal' in image:
y.append(0)
elif 'COVID' in image:
y.append(1)
else:
y.append(2)
return x,y
x,y= read_and_process_image(train_imgs)
del train_imgs
gc.collect()
x=np.array(x)
print(x.shape)
y=np.array(y)
print(y.shape)
import sklearn
from keras import layers
from keras import models
from keras import optimizers
from sklearn.model_selection import StratifiedKFold
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import cross_val_score
from sklearn.metrics import balanced_accuracy
from keras.applications import InceptionResNetV2
conv_base= InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(150,150,3))
model=models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
from keras import optimizers
model.compile(loss='categorical_crossentropy',optimizer=optimizers.Adam(lr=1e-4), metrics= ['categorical_accuracy'])
train_datagen=ImageDataGenerator(rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True )
val_datagen=ImageDataGenerator(rescale=1./255)
skf = StratifiedKFold(n_splits=5, shuffle=True,random_state=1)
scores = cross_val_score(model, x, y, cv=5, scoring= "balanced_accuracy")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y, stratify=y, test_size=0.20, random_state=2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, stratify=y_train, train_size=0.8, random_state=2)
from keras.utils import to_categorical
y_train=to_categorical(y_train,3)
print('Shape of training lables is:', y_train.shape)
y_val=to_categorical(y_val,3)
print('Shape of validation lables is:', y_val.shape)
y_test=to_categorical(y_test,3)
print('Shape of test labels is:', y_test.shape)
for index, (train_indices, val_indices) in enumerate(skf.split(x, y)):
print ("Training on fold " + str(index+1) + "/" + str(n_splits))
# Generate batches from indices
xtrain, xval = x[train_indices], x[val_indices]
ytrain, yval = y[train_indices], y[val_indices]
ntrain=len(x_train)
nval=len(x_val)
batch_size=32
train_generator=train_datagen.flow(x_train,y_train, batch_size=batch_size)
val_generator=val_datagen.flow(x_val,y_val,batch_size=batch_size)
print ("Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may take while...")
history=model.fit(train_generator,
steps_per_epoch=ntrain//batch_size,
epochs=80,
validation_data=(val_generator),
validation_steps=nval//batch_size,
verbose=2)
print('\nBalanced Accuracy:', mterics.balanced_accurcay*100, '%')
when I run the code it gives me the following error:
Traceback (most recent call last)
<ipython-input-7-45c4c9070141> in <module>
6 from keras.preprocessing.image import img_to_array, load_img
7 from sklearn.model_selection import cross_val_score
----> 8 from sklearn.metrics import balanced_accuracy
9
10 #Download the model
ImportError: cannot import name 'balanced_accuracy' from 'sklearn.metrics' (/opt/conda/lib/python3.7/site-packages/sklearn/metrics/__init__.py)
I've tried so many solutions like 1, 2, 3 and 4 but this solution has lead me to another complicated issue because whenever I run the command conda activate myenv it gives me the error:
CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
To initialize your shell, run
$ conda init <SHELL_NAME>
Currently supported shells are:
- bash
- fish
- tcsh
- xonsh
- zsh
- powershell
See 'conda init --help' for more information and options.
IMPORTANT: You may need to close and restart your shell after running 'conda init'.
Tackling this issue by understanding it thoroughly and trying what is provided in this thread lead me to the following error message:
/bin/bash: -c: line 0: syntax error near unexpected token `newline' /bin/bash: -c: line 0: `/opt/conda/bin/conda init <bash>'.
I have tried these solutions, 1 and 2 but had no luck !!
Then, when I got stuck and felt trapped, I tried to follow the conda official documentation to create a virtual environment with all the needed packages but I ran into the same error above stating that my shell is not properly configured to activate the new virtualenv !!
Again, I went back to do the simple solutions by reverting to the first version of my base environment by doing conda list revisions conda install --revision=0 and updating it but the error persists and still preventing me from using balanced_accuracy and other useful metrics.
I also tried to create a new jupyter notebook and start from scratch by updating my packages but they were already updated to the most recent versions !!
I believe that I'm running the set of configuration commands in the wrong order because my jupyter kernel and environment is a complete mess now.
It would be highly appreciated if someone can guide me to the best practices in setting up the environments for a deep learning task.
BTW: the solutions suggested manipulating the bashrc file are a bit confusing to me and I don't fully understand how it works.

ResourceWarning: Implicitly cleaning up <Temporary Directory>

I have the following code, which is called test_build, and it has a test case to save a scikit-learn model along with x_train, y_train and score data, in a tuple object to a ".pkl" file.
from build import *
import os
import pandas as pd
import sklearn
from sklearn import *
import unittest
from sklearn.model_selection import train_test_split
import numpy as np
import tempfile
class TestMachineLearningUtils(unittest.TestCase):
def test_save_model(self):
X, y = np.arange(10).reshape((5,2)), range(5)
model = RandomForestClassifier(n_estimators = 300,
oob_score = True,
n_jobs = -1,
random_state = 123)
X_train, X_test, y_train, y_test = train_test_split(\
X, y, test_size=0.33, random_state=42)
clf = model.fit(X_train, y_train)
score = model.score(X_test, y_test)
dir_path = os.path.dirname(os.path.realpath(__file__))
f = tempfile.TemporaryDirectory(dir = dir_path)
pkl_file_name = f.name + "/" + "pickle_model.pkl"
tuple_objects = (clf, X_train, y_train, score)
path_model = save_model(tuple_objects, pkl_file_name)
exists_model = os.path.exists(path_model)
self.assertExists(exists_model, True)
if __name__ == "__main__":
unittest.main()
This is the content of the save_model function found in the build module I imported in my test file.
def save_model(tuple_objects, model_path):
pickle.dump(tuple_objects, open(model_path), 'wb')
return model_path
The problem I am running to, is that I cannot test if the file is created within a temporal directory. It is apparently created, but it is cleaned after it has been created, from the error message I receive.
C:\Users\User\AppData\Local\Continuum\miniconda3\envs\geoenv\lib\tempfile.py:798: ResourceWarning: Implicitly cleaning up <TemporaryDirectory>
Does anyone knows a solution to this problem? How could one supress the cleaning up of a temporary directory created using the tempfile module in python?
It looks to me like your code does exactly what you want it to do and you just get confused by the warning. The warning is merely telling you that you should explicitly delete the temporary directory, but that the module is so kind as to do it for you.
To delete the temporary directory yourself, either use it in a context manager or call the close method on it.
You simply don't. If you want the directory to outlast the scope you create "not a temporary directory".
Or more likely when testing - you create directory in test setup, fill, test, teardown, so each test is independent.

Graphviz from sklearn cannot find my file?

So i'm trying to build a decisiontree in python using sklearn.
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import graphviz
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target,
stratify=cancer.target, random_state=42)
tree = DecisionTreeClassifier(random_state=0, max_depth=4)
tree.fit(X_train, y_train)
export_graphviz(tree, out_file=r"C:\Users\obaro\OneDrive\Documents\tree.dot", class_names=["malignant", "benign"],
feature_names=cancer.feature_names, impurity=False, filled=True)
with open(r"C:\Users\obaro\OneDrive\Documents\tree.dot") as f:
dot_graph = f.read()
display(graphviz.Source(dot_graph))
When I try to run this code in Jupyter, though, I get a FileNotFound error and an ExecutableNotFound error. At first, I tried using a relative path, but that didn't work, so I tried using an absolute path. The file WAS created and is in my current home directory so I'm not sure what's going on here. Any help would be appreciated, thanks.

ImportError: Numpy openblaspy flavour needed error displayed even when i have scipy, numpy and scikit modules installed

I am trying to run the following code:
# Gaussian Naive Bayes
from sklearn import datasets
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
# load the iris datasets
dataset = datasets.load_iris()
# fit a Naive Bayes model to the data
model = GaussianNB()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
I have all the prerequisite modules installed like numpy, scipy, scikit-learn.
when i run the code the error displayed is:
Traceback (most recent call last):
File "C:/Users/user/PycharmProjects/sentiment-analysis/ldaex.py", line 4, in
from sklearn import datasets
File "C:\Python27\lib\site-packages\sklearn__init__.py", line 57, in
from .base import clone
File "C:\Python27\lib\site-packages\sklearn\base.py", line 9, in
from scipy import sparse
File "C:\Python27\lib\site-packages\scipy__init__.py", line 131, in
raise ImportError("numpy openblaspy flavour needed.")
ImportError: numpy openblaspy flavour needed.
Can anyone let me know the problem in my modules???
Also when i try to run this following program, the same set of errors are displayed:
from sklearn.datasets import fetch_20newsgroups
categories = ['alt.atheism', 'soc.religion.christian','comp.graphics',
'sci.med']
twenty_train = fetch_20newsgroups(subset='train',categories=categories,
shuffle=True, random_state=42)
print twenty_train.target_names

Categories