XGBoost Bad Allocation - python

When loading an XGBoost model and running it I get the following error:
XGBoostError: bad allocation
I read it might be a memory problem, I have 32Gb of RAM and the model is quite small. I only have 8Gb of memory on my C: drive which might be causing the problem?
Code below:
def workflow_funnel(embeddings, model, funneldf):
xvalid_count_source = embeddings.transform(funneldf['cleaned_original_text'].apply(lambda x: np.str_(x)))
funnel_predictions = model.predict(xvalid_count_source)
funnel_model = xgboost.Booster(model_file = project_directory + language+funnel_model_load)
Full error:
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-47-40524e96ae70> in <module>
1 # Funnel model application
----> 2 funnel_model = xgboost.Booster(model_file = project_directory + language+funnel_model_load)
3
4 funnel_embedding = pickle.load(open(project_directory + language+funnel_vectorizer_load, 'rb'))
5
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in __init__(self, params, cache, model_file)
1324 self.__dict__.update(state)
1325 elif isinstance(model_file, (STRING_TYPES, os.PathLike, bytearray)):
-> 1326 self.load_model(model_file)
1327 elif model_file is None:
1328 pass
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in load_model(self, fname)
2160 fname = os.fspath(os.path.expanduser(fname))
2161 _check_call(_LIB.XGBoosterLoadModel(
-> 2162 self.handle, c_str(fname)))
2163 elif isinstance(fname, bytearray):
2164 buf = fname
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in _check_call(ret)
216 """
217 if ret != 0:
--> 218 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
219
220
XGBoostError: bad allocation

Related

ValueError when calling activity_classifier.create(...) method

I am using TuriCreate to create model to classify a human activity, but I get error when I try to run activity_classifier.create(...) method.
Code
This is what I did:
Load all data:
train_sf = tc.SFrame("data/cleaned_train_sframe")
valid_sf = tc.SFrame("data/cleaned_valid_sframe")
test_sf = tc.SFrame("data/cleaned_test_sframe")
Dividing the SFrame randomly into two smaller SFrames:
train, valid = tc.activity_classifier.util.random_split_by_session(train_sf, session_id='sessionId', fraction=0.9)
Trying to build and train my model:
model = tc.activity_classifier.create(dataset=train_sf,
session_id='sessionId',
target='activity',
features=["rotX", "rotY", "rotZ", "accelX", "accelY", "accelZ"],
prediction_window=50,
validation_set=valid_sf,
max_iterations=20)
Error
The third step raise the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [34], in <cell line: 1>()
----> 1 model = tc.activity_classifier.create(dataset=train_sf,
2 session_id='sessionId',
3 target='activity',
4 features=["rotX", "rotY", "rotZ", "accelX", "accelY", "accelZ"],
5 prediction_window=50,
6 validation_set=valid_sf,
7 max_iterations=20)
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/toolkits/activity_classifier/_activity_classifier.py:200, in create(dataset, session_id, target, features, prediction_window, validation_set, max_iterations, batch_size, verbose, random_seed)
197 options["_show_loss"] = False
198 options["random_seed"] = random_seed
--> 200 model.train(dataset, target, session_id, validation_set, options)
201 return ActivityClassifier(model_proxy=model, name=name)
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/extensions.py:305, in _ToolkitClass.__getattr__.<locals>.<lambda>(*args, **kwargs)
302 return _wrap_function_return(self._tkclass.get_property(name))
303 elif name in self._functions:
304 # is it a function?
--> 305 ret = lambda *args, **kwargs: self.__run_class_function(name, args, kwargs)
306 ret.__doc__ = (
307 "Name: " + name + "\nParameters: " + str(self._functions[name]) + "\n"
308 )
309 try:
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/extensions.py:290, in _ToolkitClass.__run_class_function(self, fnname, args, kwargs)
288 # unwrap it
289 try:
--> 290 ret = self._tkclass.call_function(fnname, argument_dict)
291 except RuntimeError as exc:
292 # Expose C++ exceptions using ToolkitError.
293 raise _ToolkitError(exc)
File cy_model.pyx:35, in turicreate._cython.cy_model.UnityModel.call_function()
File cy_model.pyx:40, in turicreate._cython.cy_model.UnityModel.call_function()
ValueError: stod: no conversion
Does anyone know what the problem could be?
You can get passed this issue by setting the validation_set to None.
This does mean that you have no validation, but at least you can create your model.

How can I load sklearn data in Jupyter Python 3?

Hey I have a very short question. I need to load data for my machine learning course, but it does not work for me and I have no idea why. Im using Jupyter with Python 3.
My Code:
from sklearn.datasets import fetch_covtype
forest = fetch_covtype()
For my friend it works fine with the same conditions. I already tried to update sklearn with pip install -U scikit-learn, but it did not solve the problem. I hope somebody can help me.
It creates the following error:
UnboundLocalError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
126 try:
--> 127 X, y
128 except NameError:
UnboundLocalError: local variable 'X' referenced before assignment
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-fb303a92b6ca> in <module>
----> 1 forest =fetch_covtype()
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
127 X, y
128 except NameError:
--> 129 X, y = _refresh_cache([samples_path, targets_path], 9)
130 # TODO: Revert to the following two lines in v0.23
131 # X = joblib.load(samples_path)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in _refresh_cache(files, compress)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in <listcomp>(.0)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load(filename, mmap_mode)
603 return load_compatibility(fobj)
604
--> 605 obj = _unpickle(fobj, filename, mmap_mode)
606
607 return obj
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
527 obj = None
528 try:
--> 529 obj = unpickler.load()
530 if unpickler.compat_mode:
531 warnings.warn("The file '%s' has been generated with a "
/opt/conda/lib/python3.7/pickle.py in load(self)
1083 raise EOFError
1084 assert isinstance(key, bytes_types)
-> 1085 dispatch[key[0]](self)
1086 except _Stop as stopinst:
1087 return stopinst.value
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load_build(self)
353 if isinstance(array_wrapper, NDArrayWrapper):
354 self.compat_mode = True
--> 355 self.stack.append(array_wrapper.read(self))
356
357 # Be careful to register our new method.
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read(self, unpickler)
196 array = self.read_mmap(unpickler)
197 else:
--> 198 array = self.read_array(unpickler)
199
200 # Manage array subclass case
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read_array(self, unpickler)
147 read_size = int(read_count * self.dtype.itemsize)
148 data = _read_bytes(unpickler.file_handle,
--> 149 read_size, "array data")
150 array[i:i + read_count] = \
151 unpickler.np.frombuffer(data, dtype=self.dtype,
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle_utils.py in _read_bytes(fp, size, error_template)
241 if len(data) != size:
242 msg = "EOF: reading %s, expected %d bytes got %d"
--> 243 raise ValueError(msg % (error_template, size, len(data)))
244 else:
245 return data
ValueError: EOF: reading array data, expected 262144 bytes got 209661

Cannot find the variable that is input to the ReadVariableOp

Trying to save a Keras .h5 file containing weights to Tensorflow .pb file
# I keep getting the error: ValueError: Cannot find the variable that is an input to the ReadVariableOp.
frozen_graph = freeze_session(K.get_session(),
output_names=[out.op.name for out in model.keras_model.output])
0
frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.keras_model.output])
I got an error:
ValueError Traceback (most recent call last) in 1 frozen_graph =
freeze_session(K.get_session(), ----> 2 output_names=[out.op.name for
out in model.keras_model.output])
in freeze_session(session, keep_var_names, output_names,
clear_devices) 26 node.device = "" 27 frozen_graph =
tf.graph_util.convert_variables_to_constants( ---> 28 session,
input_graph_def, output_names, freeze_var_names) 29 return
frozen_graph
~/anaconda3/envs/env_name/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py
in new_func(*args, **kwargs) 322 'in a future version' if date is None
else ('after %s' % date), 323 instructions) --> 324 return func(*args,
**kwargs) 325 return tf_decorator.make_decorator( 326 func, new_func, 'deprecated',
~/anaconda3/envs/env_name/lib/python3.6/site-packages/tensorflow/python/framework/graph_util_impl.py
in convert_variables_to_constants(sess, input_graph_def,
output_node_names, variable_names_whitelist, variable_names_blacklist)
300 source_op_name = get_input_name(map_name_to_node[source_op_name])
301 if map_name_to_node[source_op_name].op != "VarHandleOp": --> 302
raise ValueError("Cannot find the variable that is an input " 303 "to
the ReadVariableOp.") 304
ValueError: Cannot find the variable that is an input to the
ReadVariableOp.
I just ran into this same issue, adding
import keras.backend as K
k.set_learning_phase(0)
which sets the learning phase to testing mode, was the solution.

zipfile extractall raising "BadZipFile: Bad CRC-32 for file" error

This is the file I am trying to open
https://drive.google.com/file/d/1K2kDBTNXS2ikx9xKmi2Fy0Wsc5u_Lls0/view
It is described here
https://github.com/armancohan/long-summarization
After I added the file to my google drive, this is the code I am trying to use to open it.
from google.colab import drive
drive.mount('/content/gdrive')
import zipfile
zip_ref = zipfile.ZipFile('/content/gdrive/My Drive/arxiv-release.zip', 'r')
zip_ref.extractall('arxiv-release')
zip_ref.close()
This is the error that is raised
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-9-9965160388a1> in <module>()
1
----> 2 zip_ref.extractall('arxiv-release')
3 zip_ref.close()
5 frames
/usr/lib/python3.6/zipfile.py in extractall(self, path, members, pwd)
1522
1523 for zipinfo in members:
-> 1524 self._extract_member(zipinfo, path, pwd)
1525
1526 #classmethod
/usr/lib/python3.6/zipfile.py in _extract_member(self, member, targetpath, pwd)
1577 with self.open(member, pwd=pwd) as source, \
1578 open(targetpath, "wb") as target:
-> 1579 shutil.copyfileobj(source, target)
1580
1581 return targetpath
/usr/lib/python3.6/shutil.py in copyfileobj(fsrc, fdst, length)
77 """copy data from file-like object fsrc to file-like object fdst"""
78 while 1:
---> 79 buf = fsrc.read(length)
80 if not buf:
81 break
/usr/lib/python3.6/zipfile.py in read(self, n)
870 self._offset = 0
871 while n > 0 and not self._eof:
--> 872 data = self._read1(n)
873 if n < len(data):
874 self._readbuffer = data
/usr/lib/python3.6/zipfile.py in _read1(self, n)
960 if self._left <= 0:
961 self._eof = True
--> 962 self._update_crc(data)
963 return data
964
/usr/lib/python3.6/zipfile.py in _update_crc(self, newdata)
888 # Check the CRC if we're at the end of the file
889 if self._eof and self._running_crc != self._expected_crc:
--> 890 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
891
892 def read1(self, n):
BadZipFile: Bad CRC-32 for file 'arxiv-release/train.txt'

How to resolve unsupported pickle protocol: 4 in python 3.5?

My code:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import pickle
with open('word2vec_model', 'rb') as handle:
model = pickle.load(handle)
The error that happens:
ValueError
Traceback (most recent call last)
<ipython-input-2-aa1ad84b16ed> in <module>()
18 #if you do NOT have RAM >= 12GB, use the code below.
19 with open('word2vec_model', 'rb') as handle:
---> 20 model = pickle.load(handle)
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load(file)
1382
1383 def load(file):
-> 1384 return Unpickler(file).load()
1385
1386 def loads(str):
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load(self)
862 while 1:
863 key = read(1)
--> 864 dispatch[key](self)
865 except _Stop, stopinst:
866 return stopinst.value
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load_proto(self)
890 proto = ord(self.read(1))
891 if not 0 <= proto <= 2:
--> 892 raise ValueError, "unsupported pickle protocol: %d" % proto
893 dispatch[PROTO] = load_proto
894
ValueError: unsupported pickle protocol: 4

Categories