My code:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import pickle
with open('word2vec_model', 'rb') as handle:
model = pickle.load(handle)
The error that happens:
ValueError
Traceback (most recent call last)
<ipython-input-2-aa1ad84b16ed> in <module>()
18 #if you do NOT have RAM >= 12GB, use the code below.
19 with open('word2vec_model', 'rb') as handle:
---> 20 model = pickle.load(handle)
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load(file)
1382
1383 def load(file):
-> 1384 return Unpickler(file).load()
1385
1386 def loads(str):
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load(self)
862 while 1:
863 key = read(1)
--> 864 dispatch[key](self)
865 except _Stop, stopinst:
866 return stopinst.value
C:\Users\home pc\Anaconda2\lib\pickle.pyc in load_proto(self)
890 proto = ord(self.read(1))
891 if not 0 <= proto <= 2:
--> 892 raise ValueError, "unsupported pickle protocol: %d" % proto
893 dispatch[PROTO] = load_proto
894
ValueError: unsupported pickle protocol: 4
Related
When loading an XGBoost model and running it I get the following error:
XGBoostError: bad allocation
I read it might be a memory problem, I have 32Gb of RAM and the model is quite small. I only have 8Gb of memory on my C: drive which might be causing the problem?
Code below:
def workflow_funnel(embeddings, model, funneldf):
xvalid_count_source = embeddings.transform(funneldf['cleaned_original_text'].apply(lambda x: np.str_(x)))
funnel_predictions = model.predict(xvalid_count_source)
funnel_model = xgboost.Booster(model_file = project_directory + language+funnel_model_load)
Full error:
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-47-40524e96ae70> in <module>
1 # Funnel model application
----> 2 funnel_model = xgboost.Booster(model_file = project_directory + language+funnel_model_load)
3
4 funnel_embedding = pickle.load(open(project_directory + language+funnel_vectorizer_load, 'rb'))
5
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in __init__(self, params, cache, model_file)
1324 self.__dict__.update(state)
1325 elif isinstance(model_file, (STRING_TYPES, os.PathLike, bytearray)):
-> 1326 self.load_model(model_file)
1327 elif model_file is None:
1328 pass
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in load_model(self, fname)
2160 fname = os.fspath(os.path.expanduser(fname))
2161 _check_call(_LIB.XGBoosterLoadModel(
-> 2162 self.handle, c_str(fname)))
2163 elif isinstance(fname, bytearray):
2164 buf = fname
~\AppData\Roaming\Python\Python37\site-packages\xgboost\core.py in _check_call(ret)
216 """
217 if ret != 0:
--> 218 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
219
220
XGBoostError: bad allocation
I am working on extended isolation forest for anomaly detection. Gihub link for the alogirthm is present here. I am not able to pickle the model after training it. How can I resolve this error.
iso_forest_model = iso.iForest(X , ntrees=100, sample_size=256, ExtensionLevel=1)
import joblib
joblib.dump(iso_forest_model, os.path.join(models_path,'extended_isolation_forest.pkl'), compress=9)
Error
--------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-36-6b85f8a16cb7> in <module>()
6
7 import joblib
----> 8 joblib.dump(iso_forest_model, os.path.join(models_path,'extended_isolation_forest.pkl'), compress=9)
4 frames
/usr/local/lib/python3.7/dist-packages/joblib/numpy_pickle.py in dump(value, filename, compress, protocol, cache_size)
475 with _write_fileobject(filename, compress=(compress_method,
476 compress_level)) as f:
--> 477 NumpyPickler(f, protocol=protocol).dump(value)
478 elif is_filename:
479 with open(filename, 'wb') as f:
/usr/lib/python3.7/pickle.py in dump(self, obj)
435 if self.proto >= 4:
436 self.framer.start_framing()
--> 437 self.save(obj)
438 self.write(STOP)
439 self.framer.end_framing()
/usr/local/lib/python3.7/dist-packages/joblib/numpy_pickle.py in save(self, obj)
280 return
281
--> 282 return Pickler.save(self, obj)
283
284
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
522 reduce = getattr(obj, "__reduce_ex__", None)
523 if reduce is not None:
--> 524 rv = reduce(self.proto)
525 else:
526 reduce = getattr(obj, "__reduce__", None)
/usr/local/lib/python3.7/dist-packages/eif.cpython-37m-x86_64-linux-gnu.so in eif.iForest.__reduce_cython__()
TypeError: no default __reduce__ due to non-trivial __cinit__
I am trying to extract the text from the following PDF.
First, I used the PyPDF2 library. I used the following code:
from PyPDF2 import PdfFileReader
pdf = PdfFileReader('March_5_8000/1.pdf',strict=False)
information = pdf.getDocumentInfo()
This throws the following error:
/opt/conda/envs/fastai/lib/python3.6/site-packages/PyPDF2/pdf.py in getObject(self, indirectReference)
1597 if debug: print((" Uncompressed Object", indirectReference.idnum, indirectReference.generation, ":", start))
1598 self.stream.seek(start, 0)
-> 1599 idnum, generation = self.readObjectHeader(self.stream)
1600 if idnum != indirectReference.idnum and self.xrefIndex:
1601 # Xref table probably had bad indexes due to not being zero-indexed
/opt/conda/envs/fastai/lib/python3.6/site-packages/PyPDF2/pdf.py in readObjectHeader(self, stream)
1665 warnings.warn("Superfluous whitespace found in object header %s %s" % \
1666 (idnum, generation), utils.PdfReadWarning)
-> 1667 return int(idnum), int(generation)
1668
1669 def cacheGetIndirectObject(self, generation, idnum):
ValueError: invalid literal for int() with base 10: b'obj'
I also tried using the pdfrw library, but that returned the same error:
Code:
from pdfrw import PdfReader
x = PdfReader('March_5_8000/980.pdf')
x.keys()
ERROR:
PdfParseError Traceback (most recent call last)
<ipython-input-6-6d3575671b6c> in <module>
1 from pdfrw import PdfReader
----> 2 x = PdfReader('March_5_8000/980.pdf')
3 x.keys()
/opt/conda/envs/fastai/lib/python3.6/site-packages/pdfrw/pdfreader.py in __init__(self, fname, fdata, decompress, decrypt, password, disable_gc, verbose)
617 while 1:
618 source.obj_offsets = {}
--> 619 trailer, is_stream = self.parsexref(source)
620 prev = trailer.Prev
621 if prev is None:
/opt/conda/envs/fastai/lib/python3.6/site-packages/pdfrw/pdfreader.py in parsexref(self, source)
463 return self.readdict(source), False
464 else:
--> 465 source.exception('Expected "xref" keyword or xref stream object')
466
467 def readpages(self, node):
/opt/conda/envs/fastai/lib/python3.6/site-packages/pdfrw/tokens.py in exception(self, *arg)
227
228 def exception(self, *arg):
--> 229 raise PdfParseError(self.msg(*arg))
PdfParseError: Expected "xref" keyword or xref stream object (line=2238, col=39, token='obj')
How can I solve this?
Hey I have a very short question. I need to load data for my machine learning course, but it does not work for me and I have no idea why. Im using Jupyter with Python 3.
My Code:
from sklearn.datasets import fetch_covtype
forest = fetch_covtype()
For my friend it works fine with the same conditions. I already tried to update sklearn with pip install -U scikit-learn, but it did not solve the problem. I hope somebody can help me.
It creates the following error:
UnboundLocalError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
126 try:
--> 127 X, y
128 except NameError:
UnboundLocalError: local variable 'X' referenced before assignment
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-fb303a92b6ca> in <module>
----> 1 forest =fetch_covtype()
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/covtype.py in fetch_covtype(data_home, download_if_missing, random_state, shuffle, return_X_y)
127 X, y
128 except NameError:
--> 129 X, y = _refresh_cache([samples_path, targets_path], 9)
130 # TODO: Revert to the following two lines in v0.23
131 # X = joblib.load(samples_path)
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in _refresh_cache(files, compress)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/sklearn/datasets/base.py in <listcomp>(.0)
928 msg = "sklearn.externals.joblib is deprecated in 0.21"
929 with warnings.catch_warnings(record=True) as warns:
--> 930 data = tuple([joblib.load(f) for f in files])
931
932 refresh_needed = any([str(x.message).startswith(msg) for x in warns])
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load(filename, mmap_mode)
603 return load_compatibility(fobj)
604
--> 605 obj = _unpickle(fobj, filename, mmap_mode)
606
607 return obj
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
527 obj = None
528 try:
--> 529 obj = unpickler.load()
530 if unpickler.compat_mode:
531 warnings.warn("The file '%s' has been generated with a "
/opt/conda/lib/python3.7/pickle.py in load(self)
1083 raise EOFError
1084 assert isinstance(key, bytes_types)
-> 1085 dispatch[key[0]](self)
1086 except _Stop as stopinst:
1087 return stopinst.value
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in load_build(self)
353 if isinstance(array_wrapper, NDArrayWrapper):
354 self.compat_mode = True
--> 355 self.stack.append(array_wrapper.read(self))
356
357 # Be careful to register our new method.
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read(self, unpickler)
196 array = self.read_mmap(unpickler)
197 else:
--> 198 array = self.read_array(unpickler)
199
200 # Manage array subclass case
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in read_array(self, unpickler)
147 read_size = int(read_count * self.dtype.itemsize)
148 data = _read_bytes(unpickler.file_handle,
--> 149 read_size, "array data")
150 array[i:i + read_count] = \
151 unpickler.np.frombuffer(data, dtype=self.dtype,
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle_utils.py in _read_bytes(fp, size, error_template)
241 if len(data) != size:
242 msg = "EOF: reading %s, expected %d bytes got %d"
--> 243 raise ValueError(msg % (error_template, size, len(data)))
244 else:
245 return data
ValueError: EOF: reading array data, expected 262144 bytes got 209661
This is the file I am trying to open
https://drive.google.com/file/d/1K2kDBTNXS2ikx9xKmi2Fy0Wsc5u_Lls0/view
It is described here
https://github.com/armancohan/long-summarization
After I added the file to my google drive, this is the code I am trying to use to open it.
from google.colab import drive
drive.mount('/content/gdrive')
import zipfile
zip_ref = zipfile.ZipFile('/content/gdrive/My Drive/arxiv-release.zip', 'r')
zip_ref.extractall('arxiv-release')
zip_ref.close()
This is the error that is raised
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-9-9965160388a1> in <module>()
1
----> 2 zip_ref.extractall('arxiv-release')
3 zip_ref.close()
5 frames
/usr/lib/python3.6/zipfile.py in extractall(self, path, members, pwd)
1522
1523 for zipinfo in members:
-> 1524 self._extract_member(zipinfo, path, pwd)
1525
1526 #classmethod
/usr/lib/python3.6/zipfile.py in _extract_member(self, member, targetpath, pwd)
1577 with self.open(member, pwd=pwd) as source, \
1578 open(targetpath, "wb") as target:
-> 1579 shutil.copyfileobj(source, target)
1580
1581 return targetpath
/usr/lib/python3.6/shutil.py in copyfileobj(fsrc, fdst, length)
77 """copy data from file-like object fsrc to file-like object fdst"""
78 while 1:
---> 79 buf = fsrc.read(length)
80 if not buf:
81 break
/usr/lib/python3.6/zipfile.py in read(self, n)
870 self._offset = 0
871 while n > 0 and not self._eof:
--> 872 data = self._read1(n)
873 if n < len(data):
874 self._readbuffer = data
/usr/lib/python3.6/zipfile.py in _read1(self, n)
960 if self._left <= 0:
961 self._eof = True
--> 962 self._update_crc(data)
963 return data
964
/usr/lib/python3.6/zipfile.py in _update_crc(self, newdata)
888 # Check the CRC if we're at the end of the file
889 if self._eof and self._running_crc != self._expected_crc:
--> 890 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
891
892 def read1(self, n):
BadZipFile: Bad CRC-32 for file 'arxiv-release/train.txt'