Can't import cpplist into Cython? - python

I'm trying to figure out how to work lists/arrays in Cython, which seems impossibly complex, so I would prefer to just use C++ lists as I saw some people use on SO. However when I've ran their code, I'm getting a gcc+ compile error in ipynb. Cython data structures are infuriating.
When ran alone in a cell I get this error, I've tried importing with and without the %%cython magic call and both error...
'''
%%cython
from libcpp.list cimport list as cpplist
'''
def main(int t):
cdef cpplist[int] temp
for x in range(t):
if x> 0:
temp.push_back(x)
cdef int N = temp.size()
cdef list OutputList = N*[0]
for i in range(N):
OutputList[i] = temp.front()
temp.pop_front()
return OutputList
'''
'''
---------------------------------------------------------------------------
DistutilsExecError Traceback (most recent call last)
/anaconda3/lib/python3.6/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
117 self.spawn(compiler_so + cc_args + [src, '-o', obj] +
--> 118 extra_postargs)
119 except DistutilsExecError as msg:
/anaconda3/lib/python3.6/distutils/ccompiler.py in spawn(self, cmd)
908 def spawn(self, cmd):
--> 909 spawn(cmd, dry_run=self.dry_run)
910
/anaconda3/lib/python3.6/distutils/spawn.py in spawn(cmd, search_path, verbose, dry_run)
35 if os.name == 'posix':
---> 36 _spawn_posix(cmd, search_path, dry_run=dry_run)
37 elif os.name == 'nt':
/anaconda3/lib/python3.6/distutils/spawn.py in _spawn_posix(cmd, search_path, verbose, dry_run)
158 "command %r failed with exit status %d"
--> 159 % (cmd, exit_status))
160 elif os.WIFSTOPPED(status):
DistutilsExecError: command 'gcc' failed with exit status 1
During handling of the above exception, another exception occurred:
CompileError Traceback (most recent call last)
<ipython-input-6-70891eecfa66> in <module>()
----> 1 get_ipython().run_cell_magic('cython', '--cplus', '\n# distutils: language = c++\nfor i in range(10):\n print(i)\n \n\n\n#from libc.math cimport log\nfrom libcpp.list cimport list as cpplist')
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
<decorator-gen-127> in cython(self, line, cell)
/anaconda3/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
/anaconda3/lib/python3.6/site-packages/Cython/Build/IpythonMagic.py in cython(self, line, cell)
327
328 self._build_extension(extension, lib_dir, pgo_step_name='use' if args.pgo else None,
--> 329 quiet=args.quiet)
330
331 module = imp.load_dynamic(module_name, module_path)
/anaconda3/lib/python3.6/site-packages/Cython/Build/IpythonMagic.py in _build_extension(self, extension, lib_dir, temp_dir, pgo_step_name, quiet)
437 if not quiet:
438 old_threshold = distutils.log.set_threshold(distutils.log.DEBUG)
--> 439 build_extension.run()
440 finally:
441 if not quiet and old_threshold is not None:
/anaconda3/lib/python3.6/distutils/command/build_ext.py in run(self)
337
338 # Now actually compile and link everything.
--> 339 self.build_extensions()
340
341 def check_extensions_list(self, extensions):
/anaconda3/lib/python3.6/distutils/command/build_ext.py in build_extensions(self)
446 self._build_extensions_parallel()
447 else:
--> 448 self._build_extensions_serial()
449
450 def _build_extensions_parallel(self):
/anaconda3/lib/python3.6/distutils/command/build_ext.py in _build_extensions_serial(self)
471 for ext in self.extensions:
472 with self._filter_build_errors(ext):
--> 473 self.build_extension(ext)
474
475 #contextlib.contextmanager
/anaconda3/lib/python3.6/distutils/command/build_ext.py in build_extension(self, ext)
531 debug=self.debug,
532 extra_postargs=extra_args,
--> 533 depends=ext.depends)
534
535 # XXX outdated variable, kept here in case third-part code
/anaconda3/lib/python3.6/distutils/ccompiler.py in compile(self, sources, output_dir, macros, include_dirs, debug, extra_preargs, extra_postargs, depends)
572 except KeyError:
573 continue
--> 574 self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
575
576 # Return *all* object filenames, not just the ones we just built.
/anaconda3/lib/python3.6/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
118 extra_postargs)
119 except DistutilsExecError as msg:
--> 120 raise CompileError(msg)
121
122 def create_static_lib(self, objects, output_libname,
CompileError: command 'gcc' failed with exit status 1
When ran alone in a cell I get this error, I've tried importing with and without the %%cython magic call and both error...
in Cython, I get the object of type 'None' has no length (the ONLY error message in Cython language)
or
invalid syntax
Please advise, Cython has me ready to rip my hair out 2 days in.
EDITS: I've tried using:
%%cython --cplus
#distutils: language = c++
same error message.
Also, JUST RUNNING '%%cython --cplus' GIVES ME AN ERROR MESSAGE, SAME ONE?
With anything in the cell, a simple print or anything. Something is wrong with my cpp extension I think... how do I resolve?
In terminal (using runipy -- don't know how else to run ipynb in terminal aside from compiling via a setup.py and distutils Build)
zacharys-mbp:Cython zoakes$ runipy CSTL.ipynb
08/08/2019 08:47:47 PM INFO: Reading notebook CSTL.ipynb
08/08/2019 08:47:49 PM INFO: Running cell:
%load_ext cython
08/08/2019 08:47:49 PM INFO: Cell returned
08/08/2019 08:47:49 PM INFO: Running cell:
%%cython
#distutils: language = c++
from libcpp.list cimport list as cpplist
warning: include path for stdlibc++ headers not found; pass '- stdlib=libc++' on
the command line to use the libc++ standard library instead
[-Wstdlibcxx-not-found]
/Users/zoakes/.ipython/cython/_cython_magic_5a0764b273da2aafc5775e4dd20b1249.cpp:592:10: fatal error:
'ios' file not found
#include "ios"
^~~~~
1 warning and 1 error generated.
08/08/2019 08:47:50 PM INFO: Cell raised uncaught exception:
---------------------------------------------------------------------------
DistutilsExecError Traceback (most recent call last)
/anaconda3/lib/python3.6/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
117 self.spawn(compiler_so + cc_args + [src, '-o', obj] +
--> 118 extra_postargs)
119 except DistutilsExecError as msg:
/anaconda3/lib/python3.6/distutils/ccompiler.py in spawn(self, cmd)
908 def spawn(self, cmd):
--> 909 spawn(cmd, dry_run=self.dry_run)
910
/anaconda3/lib/python3.6/distutils/spawn.py in spawn(cmd, search_path, verbose, dry_run)
35 if os.name == 'posix':
---> 36 _spawn_posix(cmd, search_path, dry_run=dry_run)
37 elif os.name == 'nt':
/anaconda3/lib/python3.6/distutils/spawn.py in _spawn_posix(cmd, search_path, verbose, dry_run)
158 "command %r failed with exit status %d"
--> 159 % (cmd, exit_status))
160 elif os.WIFSTOPPED(status):
DistutilsExecError: command 'gcc' failed with exit status 1
During handling of the above exception, another exception occurred:
CompileError Traceback (most recent call last)
<ipython-input-2-e4f283bb7389> in <module>()
----> 1 get_ipython().run_cell_magic('cython', '', '\n#distutils: language = c++\nfrom libcpp.list cimport list as cpplist')
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
<decorator-gen-127> in cython(self, line, cell)
/anaconda3/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
/anaconda3/lib/python3.6/site-packages/Cython/Build/IpythonMagic.py in cython(self, line, cell)
327
328 self._build_extension(extension, lib_dir, pgo_step_name='use' if args.pgo else None,
--> 329 quiet=args.quiet)
330
331 module = imp.load_dynamic(module_name, module_path)
/anaconda3/lib/python3.6/site-packages/Cython/Build/IpythonMagic.py in _build_extension(self, extension, lib_dir, temp_dir, pgo_step_name, quiet)
437 if not quiet:
438 old_threshold = distutils.log.set_threshold(distutils.log.DEBUG)
--> 439 build_extension.run()
440 finally:
441 if not quiet and old_threshold is not None:
/anaconda3/lib/python3.6/distutils/command/build_ext.py in run(self)
337
338 # Now actually compile and link everything.
--> 339 self.build_extensions()
340
341 def check_extensions_list(self, extensions):
/anaconda3/lib/python3.6/distutils/command/build_ext.py in build_extensions(self)
446 self._build_extensions_parallel()
447 else:
--> 448 self._build_extensions_serial()
449
450 def _build_extensions_parallel(self):
/anaconda3/lib/python3.6/distutils/command/build_ext.py in _build_extensions_serial(self)
471 for ext in self.extensions:
472 with self._filter_build_errors(ext):
--> 473 self.build_extension(ext)
474
475 #contextlib.contextmanager
/anaconda3/lib/python3.6/distutils/command/build_ext.py in build_extension(self, ext)
531 debug=self.debug,
532 extra_postargs=extra_args,
--> 533 depends=ext.depends)
534
535 # XXX outdated variable, kept here in case third-part code
/anaconda3/lib/python3.6/distutils/ccompiler.py in compile(self, sources, output_dir, macros, include_dirs, debug, extra_preargs, extra_postargs, depends)
572 except KeyError:
573 continue
--> 574 self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
575
576 # Return *all* object filenames, not just the ones we just built.
/anaconda3/lib/python3.6/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
118 extra_postargs)
119 except DistutilsExecError as msg:
--> 120 raise CompileError(msg)
121
122 def create_static_lib(self, objects, output_libname,
CompileError: command 'gcc' failed with exit status 1
08/08/2019 08:47:50 PM INFO: Shutdown kernel
08/08/2019 08:47:50 PM WARNING: Exiting with nonzero exit status

I think this is a Mac issue, which limits my ability to help. However, the key error message seems to be:
warning: include path for stdlibc++ headers not found; pass '-stdlib=libc++' on
the command line to use the libc++ standard library instead
[-Wstdlibcxx-not-found]
If you search for (part of) this message it looks like it's related to XCode. At some point Apple switched the compiler from GCC to Clang, and this changed which implementation of the C++ standard library it uses.
I think the best solution is to install "stdlibc++" on XCode. Unfortunately I have no idea how you'd practically do this.
The second best solution involves adding the suggested command line argument for Cython - I think this is second-best because it's using a slightly mismatched implementation of the C++ standard library.
%%cython --compile-args=-stdlib=libc++ --link-args=-stdlib=libc++
I'm not sure if it needs to be in both compile and link args or just compile args.

Related

GridSearchCV & BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable

I'm running a GridSearchCV for NLP data, this is the code I'm using:
%%time
# Next we can specify the hyperparameters for each model
param_grid = [
{
'transformer': list_of_vecs,
'scaler': [StandardScaler()],
'model': [LogisticRegression()],
'model__penalty': ['l1', 'l2'],
'model__C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
},
{
'transformer': list_of_vecs,
'scaler': [StandardScaler()],
'model': [DecisionTreeClassifier()],
'model__max_depth': [2, 3, 4, 5, 6]
}
]
# Train the GridSearch
grid = GridSearchCV(pipe, param_grid, cv=5, n_jobs=-1)
fitted_grid = grid.fit(X_train, y_train)
I've already run the GridSearch successfully once without any issue with fewer hyperparameters just to make sure it would run, but I started to suddenly get this error after I added a few more model__parameters and it only appears after about an hour of the code running. Any idea how I can fix this?:
exception calling callback for <Future at 0x1da7efdba60 state=finished
raised BrokenProcessPool>
joblib.externals.loky.process_executor._RemoteTraceback: """
Traceback (most recent call last): File
"C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py",
line 407, in _process_worker File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\queues.py", line 117,
in get
res = self._recv_bytes() File "C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
221, in recv_bytes File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
323, in _recv_bytes File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
345, in _get_more_data MemoryError """
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File
"C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\externals\loky_base.py",
line 625, in _invoke_callbacks
callback(self) File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\parallel.py",
line 359, in call
self.parallel.dispatch_next() File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\parallel.py",
line 794, in dispatch_next
if not self.dispatch_one_batch(self._original_iterator): File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\parallel.py",
line 861, in dispatch_one_batch
self._dispatch(tasks) File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\parallel.py",
line 779, in _dispatch
job = self._backend.apply_async(batch, callback=cb) File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib_parallel_backends.py",
line 531, in apply_async
future = self._workers.submit(SafeFunction(func)) File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\externals\loky\reusable_executor.py",
line 177, in submit
return super(_ReusablePoolExecutor, self).submit( File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py",
line 1115, in submit
raise self._flags.broken joblib.externals.loky.process_executor.BrokenProcessPool: A task has
failed to un-serialize. Please ensure that the arguments of the
function are all picklable.
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback: """ Traceback (most recent call last): File "C:\Users\Alfredo\anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py",
line 407, in _process_worker File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\queues.py", line 117,
in get
res = self._recv_bytes() File "C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
221, in recv_bytes File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
323, in _recv_bytes File
"C:\Users\Alfredo\anaconda3\lib\multiprocessing\connection.py", line
345, in _get_more_data MemoryError """
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call
last) in
~\anaconda3\lib\site-packages\sklearn\model_selection_search.py in
fit(self, X, y, groups, **fit_params)
889 return results
890
--> 891 self._run_search(evaluate_candidates)
892
893 # multimetric is determined here because in the case of a callable
~\anaconda3\lib\site-packages\sklearn\model_selection_search.py in
_run_search(self, evaluate_candidates) 1390 def _run_search(self, evaluate_candidates): 1391 """Search all candidates in param_grid"""
-> 1392 evaluate_candidates(ParameterGrid(self.param_grid)) 1393 1394
~\anaconda3\lib\site-packages\sklearn\model_selection_search.py in
evaluate_candidates(candidate_params, cv, more_results)
836 )
837
--> 838 out = parallel(
839 delayed(_fit_and_score)(
840 clone(base_estimator),
~\anaconda3\lib\site-packages\joblib\parallel.py in call(self,
iterable) 1054 1055 with
self._backend.retrieval_context():
-> 1056 self.retrieve() 1057 # Make sure that we get a last message telling us we are done 1058
elapsed_time = time.time() - self._start_time
~\anaconda3\lib\site-packages\joblib\parallel.py in retrieve(self)
933 try:
934 if getattr(self._backend, 'supports_timeout', False):
--> 935 self._output.extend(job.get(timeout=self.timeout))
936 else:
937 self._output.extend(job.get())
~\anaconda3\lib\site-packages\joblib_parallel_backends.py in
wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
~\anaconda3\lib\concurrent\futures_base.py in result(self, timeout)
443 raise CancelledError()
444 elif self._state == FINISHED:
--> 445 return self.__get_result()
446 else:
447 raise TimeoutError()
~\anaconda3\lib\concurrent\futures_base.py in __get_result(self)
388 if self._exception:
389 try:
--> 390 raise self._exception
391 finally:
392 # Break a reference cycle with the exception in self._exception
~\anaconda3\lib\site-packages\joblib\externals\loky_base.py in
_invoke_callbacks(self)
623 for callback in self._done_callbacks:
624 try:
--> 625 callback(self)
626 except BaseException:
627 LOGGER.exception('exception calling callback for %r', self)
~\anaconda3\lib\site-packages\joblib\parallel.py in call(self,
out)
357 with self.parallel._lock:
358 if self.parallel._original_iterator is not None:
--> 359 self.parallel.dispatch_next()
360
361
~\anaconda3\lib\site-packages\joblib\parallel.py in
dispatch_next(self)
792
793 """
--> 794 if not self.dispatch_one_batch(self._original_iterator):
795 self._iterating = False
796 self._original_iterator = None
~\anaconda3\lib\site-packages\joblib\parallel.py in
dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
863
~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self,
batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
~\anaconda3\lib\site-packages\joblib_parallel_backends.py in
apply_async(self, func, callback)
529 def apply_async(self, func, callback=None):
530 """Schedule a func to be run"""
--> 531 future = self._workers.submit(SafeFunction(func))
532 future.get = functools.partial(self.wrap_future_result, future)
533 if callback is not None:
~\anaconda3\lib\site-packages\joblib\externals\loky\reusable_executor.py
in submit(self, fn, *args, **kwargs)
175 def submit(self, fn, *args, **kwargs):
176 with self._submit_resize_lock:
--> 177 return super(_ReusablePoolExecutor, self).submit(
178 fn, *args, **kwargs)
179
~\anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py
in submit(self, fn, *args, **kwargs) 1113 with
self._flags.shutdown_lock: 1114 if self._flags.broken
is not None:
-> 1115 raise self._flags.broken 1116 if self._flags.shutdown: 1117 raise
ShutdownExecutorError(
BrokenProcessPool: A task has failed to un-serialize. Please ensure
that the arguments of the function are all picklable.
You can fix it by removing the n_jobs=-1. However, I am not sure how to fix and also allow parallel processing. Another thing you could try is to set the pre_dispatch. It controls the number of jobs that get dispatched during parallel execution. The default value is 2 times the n_jobs. Thus, it could be overloading your processing queue. I had a case like yours, and I have set the n_jobs = -1 and the pre_dispatch = '1*n_jobs'. This worked for me.

Failure to parallelize code trying to load the same numpy array with joblib

I am new to the world of parallelization, and encountered a very odd bug as I was trying to run a function trying to load the same npy file running on several cores.
My code is of the form:
import os
from pathlib import Path
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
mydir = 'path/of/your/choice'
myfile = 'myArray.npy'
mydir=Path(mydir)
myfile=mydir/myfile
os.chdir(mydir)
myarray = np.zeros((12345))
np.save(myfile, myarray)
def foo(myfile, x):
# function loading a myArray and working with it
arr=np.load(myfile)
return arr+x
if __name__=='__main__':
foo_results = Parallel(n_jobs=num_cores, backend="threading")(\
delayed(foo)(myfile,i) for i in range(10))
In my case, this script would run fine about 40% of the way, then return
--> 17 arr=np.load(mydir/'myArray.npy')
ValueError: cannot reshape array of size 0 into shape (12345,)
What blows my mind is that if I enter %pdb debug mode and actually try to run arr=np.load(mydir/'myArray.npy'), this works! So I assume that the issue stems from all the parallel processes running foo trying to load the same numpy array at the same time (as in debug mode, all the processes are paused and only the code that I execute actually runs).
This very minimal example actually works, presumably because the function is very simple and joblib handles this gracefully, but my code would be too long and complicated to be posted here - first of all, has anyone encountered a similar issue in the past? If no one manages to identify my issue, I will post my whole script.
Thanks for your help!
-------------------- EDIT ------------------
Given that there doesn't seem to be an easy answer with the toy code that I posted, here are the full error logs. I played around with the backends following #psarka recommendation and for some reason, the following error arises with the default loky backend (again, no problem to run the code in a non-parallel manner):
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/corr.py in ccg_stack(dp, U_src, U_trg, cbin, cwin, normalize, all_to_all, name, sav, again, periods)
541
542 ccg_results=Parallel(n_jobs=num_cores)(\
--> 543 delayed(ccg)(*ccg_inputs[i]) for i in tqdm(range(len(ccg_inputs)), desc=f'Computing ccgs over {num_cores} cores'))
544 for ((i1, u1, i2, u2), CCG) in zip(ccg_ids,ccg_results):
545 if i1==i2:
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in __call__(self, iterable)
1052
1053 with self._backend.retrieval_context():
-> 1054 self.retrieve()
1055 # Make sure that we get a last message telling us we are done
1056 elapsed_time = time.time() - self._start_time
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in retrieve(self)
931 try:
932 if getattr(self._backend, 'supports_timeout', False):
--> 933 self._output.extend(job.get(timeout=self.timeout))
934 else:
935 self._output.extend(job.get())
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
~/miniconda3/envs/npyx/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
~/miniconda3/envs/npyx/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
ValueError: Cannot load file containing pickled data when allow_pickle=False
but this arises with the threading backend, which is more informative (which was originally used in my question) - again, it is possible to actually run train = np.load(Path(dprm,fn)) in debug mode:
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/corr.py in ccg_stack(dp, U_src, U_trg, cbin, cwin, normalize, all_to_all, name, sav, again, periods)
541
542 ccg_results=Parallel(n_jobs=num_cores, backend='threading')(\
--> 543 delayed(ccg)(*ccg_inputs[i]) for i in tqdm(range(len(ccg_inputs)), desc=f'Computing ccgs over {num_cores} cores'))
544 for ((i1, u1, i2, u2), CCG) in zip(ccg_ids,ccg_results):
545 if i1==i2:
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in __call__(self, iterable)
1052
1053 with self._backend.retrieval_context():
-> 1054 self.retrieve()
1055 # Make sure that we get a last message telling us we are done
1056 elapsed_time = time.time() - self._start_time
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in retrieve(self)
931 try:
932 if getattr(self._backend, 'supports_timeout', False):
--> 933 self._output.extend(job.get(timeout=self.timeout))
934 else:
935 self._output.extend(job.get())
~/miniconda3/envs/npyx/lib/python3.7/multiprocessing/pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
~/miniconda3/envs/npyx/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/_parallel_backends.py in __call__(self, *args, **kwargs)
593 def __call__(self, *args, **kwargs):
594 try:
--> 595 return self.func(*args, **kwargs)
596 except KeyboardInterrupt as e:
597 # We capture the KeyboardInterrupt and reraise it as
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in __call__(self)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
~/miniconda3/envs/npyx/lib/python3.7/site-packages/joblib-1.0.1-py3.7.egg/joblib/parallel.py in <listcomp>(.0)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/corr.py in ccg(dp, U, bin_size, win_size, fs, normalize, ret, sav, verbose, periods, again, trains)
258 if verbose: print("File {} not found in routines memory.".format(fn))
259 crosscorrelograms = crosscorrelate_cyrille(dp, bin_size, win_size, sortedU, fs, True,
--> 260 periods=periods, verbose=verbose, trains=trains)
261 crosscorrelograms = np.asarray(crosscorrelograms, dtype='float64')
262 if crosscorrelograms.shape[0]<len(U): # no spikes were found in this period
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/corr.py in crosscorrelate_cyrille(dp, bin_size, win_size, U, fs, symmetrize, periods, verbose, trains)
88 U=list(U)
89
---> 90 spike_times, spike_clusters = make_phy_like_spikeClustersTimes(dp, U, periods=periods, verbose=verbose, trains=trains)
91
92 return crosscorr_cyrille(spike_times, spike_clusters, win_size, bin_size, fs, symmetrize)
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/corr.py in make_phy_like_spikeClustersTimes(dp, U, periods, verbose, trains)
46 for iu, u in enumerate(U):
47 # Even lists of strings can be dealt with as integers by being replaced by their indices
---> 48 trains_dic[iu]=trn(dp, u, sav=True, periods=periods, verbose=verbose) # trains in samples
49 else:
50 assert len(trains)>1
/media/maxime/ut_data/Dropbox/NeuroPyxels/npyx/spk_t.py in trn(dp, unit, sav, verbose, periods, again, enforced_rp)
106 if op.exists(Path(dprm,fn)) and not again:
107 if verbose: print("File {} found in routines memory.".format(fn))
--> 108 train = np.load(Path(dprm,fn))
109
110 # if not, compute it
~/miniconda3/envs/npyx/lib/python3.7/site-packages/numpy-1.21.0rc2-py3.7-linux-x86_64.egg/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
443 # Try a pickle
444 if not allow_pickle:
--> 445 raise ValueError("Cannot load file containing pickled data "
446 "when allow_pickle=False")
447 try:
ValueError: Cannot load file containing pickled data when allow_pickle=False
The original error ValueError: cannot reshape array of size 0 into shape (12345,) doesn't show up anymore for some reason.

Jupyter notebook - rpy2 - Cannot find R libraries

I am currently trying to use both R and Python in the same Jupyter Notebook. I successfully installed rpy2; if I try to write something in R (putting %%R at the beginning) everything works, but as soon as I try to use a library, the following error appears:
R[write to console]: Error in library(name of the package) : there is no package
called - name of the package -
If I try to use the same library in R Studio (not in Jupyter) everything works.
This is the code is giving me trouble:
import os
os.environ['R_HOME'] = r'C:/PROGRA~1/R/R-40~1.0'
os.environ['path'] += r';C:/PROGRA~1/R/R-40~1.0\bin;'
%load_ext rpy2.ipython
%%R
library(readr)
After this last line the following error appears:
R[write to console]: Error in library(readr) : there is no package called 'readr'
Error in library(readr) : there is no package called 'readr'
--------------------------------------------------------------------------- RRuntimeError Traceback (most recent call
last)
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\ipython\rmagic.py
in eval(self, code)
267 # Need the newline in case the last line in code is a comment.
--> 268 value, visible = ro.r("withVisible({%s\n})" % code)
269 except (ri.embedded.RRuntimeError, ValueError) as exception:
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\robjects_init_.py
in call(self, string)
415 p = rinterface.parse(string)
--> 416 res = self.eval(p)
417 return conversion.rpy2py(res)
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\robjects\functions.py
in call(self, *args, **kwargs)
196 kwargs[r_k] = v
--> 197 return (super(SignatureTranslatedFunction, self)
198 .call(*args, **kwargs))
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\robjects\functions.py
in call(self, *args, **kwargs)
124 new_kwargs[k] = conversion.py2rpy(v)
--> 125 res = super(Function, self).call(*new_args, **new_kwargs)
126 res = conversion.rpy2py(res)
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\rinterface_lib\conversion.py
in _(*args, **kwargs)
43 def _(*args, **kwargs):
---> 44 cdata = function(*args, **kwargs)
45 # TODO: test cdata is of the expected CType
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\rinterface.py in
call(self, *args, **kwargs)
623 if error_occured[0]:
--> 624 raise embedded.RRuntimeError(_rinterface._geterrmessage())
625 return res
RRuntimeError: Error in library(readr) : there is no package called
'readr'
During handling of the above exception, another exception occurred:
RInterpreterError Traceback (most recent call
last)
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\ipython\rmagic.py
in R(self, line, cell, local_ns)
762 else:
--> 763 text_result, result, visible = self.eval(code)
764 text_output += text_result
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\ipython\rmagic.py
in eval(self, code)
271 warning_or_other_msg = self.flush()
--> 272 raise RInterpreterError(code, str(exception),
273 warning_or_other_msg)
RInterpreterError: Failed to parse and evaluate line
'library(readr)\n'. R error message: "Error in library(readr) : there
is no package called 'readr'"
During handling of the above exception, another exception occurred:
PermissionError Traceback (most recent call
last) in
----> 1 get_ipython().run_cell_magic('R', '', 'library(readr)\n')
~\anaconda3\envs\Cattolica2020\lib\site-packages\IPython\core\interactiveshell.py
in run_cell_magic(self, magic_name, line, cell) 2379
with self.builtin_trap: 2380 args = (magic_arg_s,
cell)
-> 2381 result = fn(*args, **kwargs) 2382 return result 2383
in R(self, line, cell, local_ns)
~\anaconda3\envs\Cattolica2020\lib\site-packages\IPython\core\magic.py
in (f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~\anaconda3\envs\Cattolica2020\lib\site-packages\rpy2\ipython\rmagic.py
in R(self, line, cell, local_ns)
782 print(e.err)
783 if tmpd:
--> 784 rmtree(tmpd)
785 return
786 finally:
~\anaconda3\envs\Cattolica2020\lib\shutil.py in rmtree(path,
ignore_errors, onerror)
735 # can't continue even if onerror hook returns
736 return
--> 737 return _rmtree_unsafe(path, onerror)
738
739 # Allow introspection of whether or not the hardening against symlink
~\anaconda3\envs\Cattolica2020\lib\shutil.py in _rmtree_unsafe(path,
onerror)
613 os.unlink(fullname)
614 except OSError:
--> 615 onerror(os.unlink, fullname, sys.exc_info())
616 try:
617 os.rmdir(path)
~\anaconda3\envs\Cattolica2020\lib\shutil.py in _rmtree_unsafe(path,
onerror)
611 else:
612 try:
--> 613 os.unlink(fullname)
614 except OSError:
615 onerror(os.unlink, fullname, sys.exc_info())
PermissionError: [WinError 32] Impossibile accedere al file. Il file รจ
utilizzato da un altro processo:
'C:\Users\User\AppData\Local\Temp\tmp82eo8sb4\Rplots001.png'
I also tried to verify if the library directory is the same for Jupyter and R and I obtain the same two directories:
[1] "C:/Users/User/Documents/R/win-library/4.0"
[2] "C:/Program Files/R/R-4.0.0/library
I am currently using R 4.0.0 and Python 3.8.3
The exception RRuntimeError is normally just forwarding to Python an exception that R generated itself during the execution.
The error message says that R does not find the library. If you are really sure that both RStudio and Jupyter use the very same R installed, the difference between the two will come from RStudio being instructed to look for installed R packages in more directories than the R started from Jupyter is.
Run the following in RStudio to know where readr is loaded from:
library(dplyr)
as_data_frame(installed.packages()) %>%
filter(Package == "readr") %>%
select(Package, LibPath)

Simple code for phi(k) correlation matrix in Python

I am looking for a simple way (2 or 3 lines of code) to generate a Phi(k) correlation matrix in Python.
That should be possible since pandas_profiling is doing it, and it works fine.
But I want to be able to do it without pandas_profiling which is too heavy and computes things I don't need.
pandas_profiling is using phik library.
I tried phik library (didn't find anything else)
I don't understand the error I got :
TypeError: sequence item 0: expected str instance, int found
I have no int in my dataframe.
Seems like a bug in phik, but then how does pandas profiling do, since it's using it too ?
What's happening here ?
Many thanks
I have this code :
import numpy as np
import pandas as pd
import phik
NB_SAMPLES = 200
NB_VARIABLES = 3
rand_mat = np.random.uniform(low=0.5, high=15, size=(NB_SAMPLES,NB_VARIABLES))
df = pd.DataFrame(rand_mat)
df['cat_column'] = pd.cut(df[0], bins=5, labels=['F1','F2','F3','F4','F5'])
print(df)
df.phik_matrix()
Result :
0 1 2 cat_column
0 0.911098 8.549206 9.270484 F1
1 13.591250 9.161498 5.614470 F5
2 3.308305 1.589402 5.394675 F1
3 12.031064 9.968686 7.519628 F5
4 14.427813 1.533533 2.352659 F5
.. ... ... ... ...
195 10.556285 3.541869 4.804826 F4
196 5.721784 11.783908 13.104844 F2
197 7.336637 14.512256 14.993096 F3
198 4.375895 11.881784 1.129816 F2
199 0.519900 6.624423 9.239070 F1
[200 rows x 4 columns]
interval_cols not set, guessing: [0, 1, 2]
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
r = call_item()
File "/opt/conda/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 608, in __call__
return self.func(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 256, in __call__
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/joblib/parallel.py", line 256, in <listcomp>
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.7/site-packages/phik/phik.py", line 162, in _calc_phik
combi = ':'.join(comb)
TypeError: sequence item 0: expected str instance, int found
"""
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
<ipython-input-31-398c72b34799> in <module>
11 df['cat_column'] = pd.cut(df[0], bins=5, labels=['F1','F2','F3','F4','F5'])
12 print(df)
---> 13 df.phik_matrix()
/opt/conda/lib/python3.7/site-packages/phik/phik.py in phik_matrix(df, interval_cols, bins, quantile, noise_correction, dropna, drop_underflow, drop_overflow)
215 data_binned, binning_dict = bin_data(df_clean, cols=interval_cols_clean, bins=bins, quantile=quantile, retbins=True)
216 return phik_from_rebinned_df(data_binned, noise_correction, dropna=dropna, drop_underflow=drop_underflow,
--> 217 drop_overflow=drop_overflow)
218
219
/opt/conda/lib/python3.7/site-packages/phik/phik.py in phik_from_rebinned_df(data_binned, noise_correction, dropna, drop_underflow, drop_overflow)
145
146 phik_list = Parallel(n_jobs=NCORES)(delayed(_calc_phik)(co, data_binned[list(co)], noise_correction)
--> 147 for co in itertools.combinations_with_replacement(data_binned.columns.values, 2))
148
149 phik_overview = create_correlation_overview_table(dict(phik_list))
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1015
1016 with self._backend.retrieval_context():
-> 1017 self.retrieve()
1018 # Make sure that we get a last message telling us we are done
1019 elapsed_time = time.time() - self._start_time
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
907 try:
908 if getattr(self._backend, 'supports_timeout', False):
--> 909 self._output.extend(job.get(timeout=self.timeout))
910 else:
911 self._output.extend(job.get())
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
560 AsyncResults.get from multiprocessing."""
561 try:
--> 562 return future.result(timeout=timeout)
563 except LokyTimeoutError:
564 raise TimeoutError()
/opt/conda/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
/opt/conda/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
TypeError: sequence item 0: expected str instance, int found
Try to reinstall the phik module as the following:
pip install phik==0.10.0
Then, your code together with sns.heatmap results the following:

"command 'gcc' failed with exit status 1" when running %%cython in jupyter notebook

I have installed cython with conda
cython installation
After restarting the kernel, I have also loaded it without error using %reload_ext Cython.
However, when I try to run the following code
%%cython
import numpy as np
cdef int a = 0
cdef int g[10]
cdef int i
for i in range(10):
g[i] = a
a += i
print(g)
I get the error command 'gcc' failed with exit status 1.
I am fairly new to cython and have no idea on how to solve this or even what questions to search for.
The complete error log:
---------------------------------------------------------------------------
DistutilsExecError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.7/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
117 self.spawn(compiler_so + cc_args + [src, '-o', obj] +
--> 118 extra_postargs)
119 except DistutilsExecError as msg:
~/opt/anaconda3/lib/python3.7/distutils/ccompiler.py in spawn(self, cmd)
908 def spawn(self, cmd):
--> 909 spawn(cmd, dry_run=self.dry_run)
910
~/opt/anaconda3/lib/python3.7/distutils/spawn.py in spawn(cmd, search_path, verbose, dry_run)
35 if os.name == 'posix':
---> 36 _spawn_posix(cmd, search_path, dry_run=dry_run)
37 elif os.name == 'nt':
~/opt/anaconda3/lib/python3.7/distutils/spawn.py in _spawn_posix(cmd, search_path, verbose, dry_run)
158 "command %r failed with exit status %d"
--> 159 % (cmd, exit_status))
160 elif os.WIFSTOPPED(status):
DistutilsExecError: command 'gcc' failed with exit status 1
During handling of the above exception, another exception occurred:
CompileError Traceback (most recent call last)
<ipython-input-25-16f694f6508b> in <module>
----> 1 get_ipython().run_cell_magic('cython', '', '\nimport numpy as np\n\ncdef int a = 0\ncdef int g[10]\ncdef int i\n\nfor i in range(10):\n g[i] = a\n a += i\nprint(g)\n')
~/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2357 if getattr(fn, "needs_local_scope", False):
2358 kwargs['local_ns'] = self.user_ns
-> 2359
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
</Users/w849277/opt/anaconda3/lib/python3.7/site-packages/decorator.py:decorator-gen-128> in cython(self, line, cell)
~/opt/anaconda3/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/opt/anaconda3/lib/python3.7/site-packages/Cython/Build/IpythonMagic.py in cython(self, line, cell)
331 extension = None
332 if need_cythonize:
--> 333 extensions = self._cythonize(module_name, code, lib_dir, args, quiet=args.quiet)
334 if extensions is None:
335 # Compilation failed and printed error message
~/opt/anaconda3/lib/python3.7/site-packages/Cython/Build/IpythonMagic.py in _build_extension(self, extension, lib_dir, temp_dir, pgo_step_name, quiet)
441 force=True,
442 )
--> 443 if args.language_level is not None:
444 assert args.language_level in (2, 3)
445 opts['language_level'] = args.language_level
~/opt/anaconda3/lib/python3.7/distutils/command/build_ext.py in run(self)
338
339 # Now actually compile and link everything.
--> 340 self.build_extensions()
341
342 def check_extensions_list(self, extensions):
~/opt/anaconda3/lib/python3.7/distutils/command/build_ext.py in build_extensions(self)
447 self._build_extensions_parallel()
448 else:
--> 449 self._build_extensions_serial()
450
451 def _build_extensions_parallel(self):
~/opt/anaconda3/lib/python3.7/distutils/command/build_ext.py in _build_extensions_serial(self)
472 for ext in self.extensions:
473 with self._filter_build_errors(ext):
--> 474 self.build_extension(ext)
475
476 #contextlib.contextmanager
~/opt/anaconda3/lib/python3.7/distutils/command/build_ext.py in build_extension(self, ext)
532 debug=self.debug,
533 extra_postargs=extra_args,
--> 534 depends=ext.depends)
535
536 # XXX outdated variable, kept here in case third-part code
~/opt/anaconda3/lib/python3.7/distutils/ccompiler.py in compile(self, sources, output_dir, macros, include_dirs, debug, extra_preargs, extra_postargs, depends)
572 except KeyError:
573 continue
--> 574 self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
575
576 # Return *all* object filenames, not just the ones we just built.
~/opt/anaconda3/lib/python3.7/distutils/unixccompiler.py in _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts)
118 extra_postargs)
119 except DistutilsExecError as msg:
--> 120 raise CompileError(msg)
121
122 def create_static_lib(self, objects, output_libname,
CompileError: command 'gcc' failed with exit status 1
Also, my mac is updated to 10.15.3 Catalina while my friend's mac that stayed as 10.14.6 Mojave ran the same code without any problems. I have heard compatibility issues with Anaconda when Catalina first came out, but I don't know if this has anything to do with my error.
Just answering my own question so I can close this.
Thank you for two comments on my question! I was able to locate the meaningful answer in my terminal according to the answer provided here: What does CompileError/LinkerError: "command 'gcc' failed with exit status 1" mean, when running %%cython-magic cell in IPython
It turns out the error was
xcrun: error: invalid active developer path (/Library/Developer/CommandLineTools), missing xcrun at: /Library/Developer/CommandLineTools/usr/bin/xcrun
which simply needed to be resolved by running this code in the terminal:
xcode-select --install
For a more detailed solution to the missing xcrun problem, check here https://apple.stackexchange.com/questions/254380/why-am-i-getting-an-invalid-active-developer-path-when-attempting-to-use-git-a

Categories