rpy2 convert python object to R object in jupyter lab - python

I'm using rpy2 library to use both R and python in the same notebook in jupyter lab. All is working properly (first I installed R essential and second rpy2 library).
To load library I use
%load_ext rpy2.ipython
Before each R code i use
%%R
To convert R object to python, i use
%R -o R_object
And to convert python object to R i use
%R -i python_object
My problem is the last code (to convert from python to R). I have the following mistake I don't understand
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-90-cc234320b667> in <module>
----> 1 get_ipython().run_line_magic('R', '-i python_object')
~\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2315 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2316 with self.builtin_trap:
-> 2317 result = fn(*args, **kwargs)
2318 return result
2319
<C:\Users\steph\anaconda3\lib\site-packages\decorator.py:decorator-gen-131> in R(self, line, cell, local_ns)
~\anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~\anaconda3\lib\site-packages\rpy2\ipython\rmagic.py in R(self, line, cell, local_ns)
688 raise NameError("name '%s' is not defined" % input)
689 with localconverter(converter) as cv:
--> 690 ro.r.assign(input, val)
691
692 tmpd = self.setup_graphics(args)
~\anaconda3\lib\site-packages\rpy2\robjects\functions.py in __call__(self, *args, **kwargs)
176 v = kwargs.pop(k)
177 kwargs[r_k] = v
--> 178 return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
179
180 pattern_link = re.compile(r'\\link\{(.+?)\}')
~\anaconda3\lib\site-packages\rpy2\robjects\functions.py in __call__(self, *args, **kwargs)
100
101 def __call__(self, *args, **kwargs):
--> 102 new_args = [conversion.py2ri(a) for a in args]
103 new_kwargs = {}
104 for k, v in kwargs.items():
~\anaconda3\lib\site-packages\rpy2\robjects\functions.py in <listcomp>(.0)
100
101 def __call__(self, *args, **kwargs):
--> 102 new_args = [conversion.py2ri(a) for a in args]
103 new_kwargs = {}
104 for k, v in kwargs.items():
~\anaconda3\lib\functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~\anaconda3\lib\site-packages\rpy2\robjects\numpy2ri.py in nonnumpy2ri(obj)
119 # For now, go with the default_converter.
120 # TODO: the conversion system needs an overhaul badly.
--> 121 return ro.default_converter.py2ri(obj)
122 else:
123 # The conversion module was "activated"
~\anaconda3\lib\functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~\anaconda3\lib\site-packages\rpy2\robjects\conversion.py in _py2ri(obj)
58 (ri) objects.
59 """
---> 60 raise NotImplementedError("Conversion 'py2ri' not defined for objects of type '%s'" % str(type(obj)))
61
62 def _py2ro(obj):
NotImplementedError: Conversion 'py2ri' not defined for objects of type '<class 'tuple'>'
Someone have an idea what does it mean?
Thanks you in advance
Stéphane

Actually I did some tests. Because this thing exist one of a largest benefit is that we can create or manipulate objects or data in one language and pass it to the other (to do more operations for example).
Apparently it depends how I create object in python (I'm new in python, I'm better in R).
But now both
%R -o R_object
and
%R -i python_object
work well. But i didn't check for several different object (matrix etc...). I think I need to use it more to better understand but basicaly it work :)
Have a nice day
Stéphane

%R/%%R is documented. Try:
%load_ext rpy2.ipython
?%%R
There is also a section in the rpy2 documentation dedicated to notebooks:
https://rpy2.github.io/doc/v3.4.x/html/generated_rst/notebooks.html

Related

Python Error - NotImplementedError: Conversion 'py2rpy' not defined for objects of type '<class 'ipywidgets.widgets.widget_string.Text'>'

I'm getting the error below from some Python code. Can anyone help me figure out what is going on? I tried to embed the full code, but I kept getting an error saying my code was not properly formatted. I'll try to post a reply with the full code!
NotImplementedError: Conversion 'py2rpy' not defined for objects of type '<class 'ipywidgets.widgets.widget_string.Text'>'
**Code:**
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Input In [122], in <cell line: 1>()
----> 1 get_ipython().run_cell_magic('R', '-i cutoff -i mtry -i ntree -i data -o max_f1 -o max_cut -o max_mtry -o max_ntree -o max_precision -o max_recall', 'library(randomForest)\n\nfrom types import NoneType\nNoneType = type(None)\n\n#from IPython.core.interactiveshell import InteractiveShell\n#InteractiveShell.ast_node_interactivity = "all"\n\n#9-24-2022 SL\n#from rpy2.robjects.conversion import localconverter as lc\n\n#with lc(ro.default_converter + pr.converter):\n# fileName_c = ro.conversion.py2rpy(fileName)\n# url_c = ro.conversion.py2rpy(url)\n#ro.globalenv[\'fileName\'] = fileName_c\n#ro.globalenv[\'url\'] = url_c\n\n#from rpy2.robjects import pandas2ri\n\n\n#pandas2ri.activate()\n#end 9-24-2022 SL\n\ndata$target = factor(data$target)\ncutoff_list = unlist(strsplit("0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9",",")) #cutoff 9-24-2022 SL\nmtry_list = unlist(strsplit("2,3,4,5",",")) #mtry 9-24-2022 SL\nntree_list = unlist(strsplit("100,500,1000",",")) #ntree 9-24-2022 SL\nset.seed(830)\nmax_f1 = 0\nmax_cut = 0\nmax_mtry = 0\nmax_ntree = 0\nmax_precision = 0\nmax_recall = 0\n# a nested for loop for ntree, mtry, cutoff?\nfor (cut in cutoff_list )\n{\n for (Mtry in mtry_list)\n {\n for (Ntree in ntree_list)\n {\n cut = as.numeric(cut) #9-24-2022 SL cut - should be "cutoff"?\n Mtry = as.numeric(mtry) #9-24-2022 SL mtry\n Ntree = as.numeric(NTree) #9-24-2022 SL Ntree\n fit_data <- randomForest(data$target ~ ., data=data, importance=TRUE, proximity=TRUE, cutoff=c(cut, 1-cut), ntree=Ntree, mtry=Mtry)\n true_positives = fit_data$confusion[\'1\',\'1\']\n true_negatives = fit_data$confusion[\'0\',\'0\']\n false_positives = fit_data$confusion[\'0\',\'1\']\n false_negatives = fit_data$confusion[\'1\',\'0\']\n accuracy = (true_positives + true_negatives)/nrow(data)\n precision = true_positives/(true_positives+false_positives)\n recall = true_positives/(true_positives + false_negatives)\n f1_score = 2*((recall*precision)/(precision+recall))\n if (! is.nan(f1_score) && f1_score > max_f1)\n {\n max_f1 = f1_score\n max_cut = cut\n max_mtry = Mtry\n max_ntree = Ntree\n max_precision = precision\n max_recall = recall\n }\n }\n }\n}\n\n')
File ~\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2347, in InteractiveShell.run_cell_magic(self, magic_name, line, cell)
2345 with self.builtin_trap:
2346 args = (magic_arg_s, cell)
-> 2347 result = fn(*args, **kwargs)
2348 return result
File ~\anaconda3\lib\site-packages\rpy2\ipython\rmagic.py:755, in RMagics.R(self, line, cell, local_ns)
753 raise NameError("name '%s' is not defined" % input)
754 with localconverter(converter) as cv:
--> 755 ro.r.assign(input, val)
757 if args.display:
758 try:
File ~\anaconda3\lib\site-packages\rpy2\robjects\functions.py:203, in SignatureTranslatedFunction.__call__(self, *args, **kwargs)
201 v = kwargs.pop(k)
202 kwargs[r_k] = v
--> 203 return (super(SignatureTranslatedFunction, self)
204 .__call__(*args, **kwargs))
File ~\anaconda3\lib\site-packages\rpy2\robjects\functions.py:118, in Function.__call__(self, *args, **kwargs)
116 def __call__(self, *args, **kwargs):
117 cv = conversion.get_conversion()
--> 118 new_args = [cv.py2rpy(a) for a in args]
119 new_kwargs = {}
120 for k, v in kwargs.items():
121 # TODO: shouldn't this be handled by the conversion itself ?
File ~\anaconda3\lib\site-packages\rpy2\robjects\functions.py:118, in <listcomp>(.0)
116 def __call__(self, *args, **kwargs):
117 cv = conversion.get_conversion()
--> 118 new_args = [cv.py2rpy(a) for a in args]
119 new_kwargs = {}
120 for k, v in kwargs.items():
121 # TODO: shouldn't this be handled by the conversion itself ?
File ~\anaconda3\lib\functools.py:888, in singledispatch.<locals>.wrapper(*args, **kw)
884 if not args:
885 raise TypeError(f'{funcname} requires at least '
886 '1 positional argument')
--> 888 return dispatch(args[0].__class__)(*args, **kw)
File ~\anaconda3\lib\site-packages\rpy2\robjects\numpy2ri.py:134, in nonnumpy2rpy(obj)
129 return ro.default_converter.py2rpy(obj)
130 elif original_converter is None:
131 # This means that the conversion module was not "activated".
132 # For now, go with the default_converter.
133 # TODO: the conversion system needs an overhaul badly.
--> 134 return ro.default_converter.py2rpy(obj)
135 else:
136 # The conversion module was "activated"
137 return original_converter.py2rpy(obj)
File ~\anaconda3\lib\functools.py:888, in singledispatch.<locals>.wrapper(*args, **kw)
884 if not args:
885 raise TypeError(f'{funcname} requires at least '
886 '1 positional argument')
--> 888 return dispatch(args[0].__class__)(*args, **kw)
File ~\anaconda3\lib\site-packages\rpy2\robjects\conversion.py:240, in _py2rpy(obj)
238 if isinstance(obj, _rinterface_capi.SupportsSEXP):
239 return obj
--> 240 raise NotImplementedError(
241 "Conversion 'py2rpy' not defined for objects of type '%s'" %
242 str(type(obj))
243 )
NotImplementedError: Conversion 'py2rpy' not defined for objects of type '<class 'ipywidgets.widgets.widget_string.Text'>'
Turns out I there were some input parameters that I needed to hit for prior steps to store them properly. Once I did that, the above error got resolved.
Fair point, TY Robin! The resolution to my error was that prior steps had a text box widget input/prompt. I needed to press after entering into those textbox widgets. Once I did so, subsequent model building steps recognized those inputs. Pressing was key

Unit issue with MetPy's parcel_profile function

I have been working on programming to plot Skew_Ts from Wyoming's weather servers. The issue I am having is I get an error when attempting to run the parcel_profile function, it says it can not convert from dimensionless to hectopascals. The pressure array being fed into the function as well as the temperature and dewpoint data point have the appropriate units attached though. To add to my confusion, I have the exact same coding on another machine with the same library versions and it runs fine on that one. Am I missing an obvious problem? Code and relevant library versions are listed below:
import metpy as mp
from metpy.units import units
import metpy.calc as mpcalc
from siphon.simplewebservice.wyoming import WyomingUpperAir
from datetime import datetime
import pandas as pd
import numpy as np
final_time = datetime(2022, 1, 21, 12)
station = 'ABQ'
df = WyomingUpperAir.request_data(final_time, station)
data_dict = {"Press":"", "Temp": "", "Dew_Point": "", "Height":"",
"Mask": "", "Parcel": "", "Idx": "", "U": "", "V": ""}
data_dict['Press'] = df['pressure'].values * units(df.units['pressure'])
data_dict['Temp'] = df['temperature'].values * units(df.units['temperature'])
data_dict['Dew_Point'] = df['dewpoint'].values * units(df.units['dewpoint'])
data_dict['Height'] = df['height'].values * units(df.units['height'])
data_dict['U'] = df['u_wind'].values * units(df.units['u_wind'])
data_dict['V'] = df['v_wind'].values * units(df.units['v_wind'])
data_dict['Parcel'] = mpcalc.parcel_profile(data_dict['Press'],
data_dict['Temp'][0],
data_dict['Dew_Point'][0]).to('degC')
Error:
DimensionalityError Traceback (most recent call last)
C:\Users\####################.py in <module>
----> 1 data_dict['Parcel'] = mpcalc.parcel_profile(data_dict['Press'],
2 data_dict['Temp'][0],
3 data_dict['Dew_Point'][0]).to('degC')
~\anaconda3\envs\Met_World\lib\site-packages\metpy\xarray.py in wrapper(*args, **kwargs)
1214
1215 # Evaluate inner calculation
-> 1216 result = func(*bound_args.args, **bound_args.kwargs)
1217
1218 # Wrap output based on match and match_unit
~\anaconda3\envs\Met_World\lib\site-packages\metpy\units.py in wrapper(*args, **kwargs)
244 'that the function is being called properly.\n') + msg
245 raise ValueError(msg)
--> 246 return func(*args, **kwargs)
247
248 return wrapper
~\anaconda3\envs\Met_World\lib\site-packages\metpy\calc\thermo.py in parcel_profile(pressure, temperature, dewpoint)
737
738 """
--> 739 _, _, _, t_l, _, t_u = _parcel_profile_helper(pressure, temperature, dewpoint)
740 return concatenate((t_l, t_u))
741
~\anaconda3\envs\Met_World\lib\site-packages\metpy\calc\thermo.py in _parcel_profile_helper(pressure, temperature, dewpoint)
892
893 # If the pressure profile doesn't make it to the lcl, we can stop here
--> 894 if _greater_or_close(np.nanmin(pressure), press_lcl):
895 return (press_lower[:-1], press_lcl, units.Quantity(np.array([]), press_lower.units),
896 temp_lower[:-1], temp_lcl, units.Quantity(np.array([]), temp_lower.units))
~\anaconda3\envs\Met_World\lib\site-packages\metpy\calc\tools.py in _greater_or_close(a, value, **kwargs)
738
739 """
--> 740 return (a > value) | np.isclose(a, value, **kwargs)
741
742
~\anaconda3\envs\Met_World\lib\site-packages\pint\quantity.py in __array_ufunc__(self, ufunc, method, *inputs, **kwargs)
1721 )
1722
-> 1723 return numpy_wrap("ufunc", ufunc, inputs, kwargs, types)
1724
1725 def __array_function__(self, func, types, args, kwargs):
~\anaconda3\envs\Met_World\lib\site-packages\pint\numpy_func.py in numpy_wrap(func_type, func, args, kwargs, types)
919 if name not in handled or any(is_upcast_type(t) for t in types):
920 return NotImplemented
--> 921 return handled[name](*args, **kwargs)
~\anaconda3\envs\Met_World\lib\site-packages\pint\numpy_func.py in implementation(*args, **kwargs)
284 if input_units == "all_consistent":
285 # Match all input args/kwargs to same units
--> 286 stripped_args, stripped_kwargs = convert_to_consistent_units(
287 *args, pre_calc_units=first_input_units, **kwargs
288 )
~\anaconda3\envs\Met_World\lib\site-packages\pint\numpy_func.py in convert_to_consistent_units(pre_calc_units, *args, **kwargs)
105 """
106 return (
--> 107 tuple(convert_arg(arg, pre_calc_units=pre_calc_units) for arg in args),
108 {
109 key: convert_arg(arg, pre_calc_units=pre_calc_units)
~\anaconda3\envs\Met_World\lib\site-packages\pint\numpy_func.py in <genexpr>(.0)
105 """
106 return (
--> 107 tuple(convert_arg(arg, pre_calc_units=pre_calc_units) for arg in args),
108 {
109 key: convert_arg(arg, pre_calc_units=pre_calc_units)
~\anaconda3\envs\Met_World\lib\site-packages\pint\numpy_func.py in convert_arg(arg, pre_calc_units)
87 return arg
88 else:
---> 89 raise DimensionalityError("dimensionless", pre_calc_units)
90 elif _is_quantity(arg):
91 return arg.m
DimensionalityError: Cannot convert from 'dimensionless' to 'hectopascal'
Libraries used:
python 3.9.7
metpy 1.1.0
pandas 1.2.4
numpy 1.22.0
xarray 0.20.2
My first guess is that this is a problem with multiplying whatever e.g. df['u_wind'].values is returning by units. While it's a nicer syntax, the more robust way is to use the Quantity constructor:
data_dict['Press'] = units.Quantity(df['pressure'].values, units(df.units['pressure']))
You can shorten all of that, though, and use the Quantity() method by using MetPy's helper metpy.units.pandas_dataframe_to_unit_arrays:
data_dict = units.pandas_dataframe_to_unit_arrays(df)
If you want the column names you were originally using, you can change them with df.rename().

Overriding * imports globally for jupyter

I'm running jupyter lab on windows and fastai.vision.utils.verify_images(fns) is giving me problems because it calls fastcore.parallel.parallel with default n_workers=8. There are many ways around it, but I was trying to figure out a code block that I could slap in any notebook and have it so all underlying calls to parallel will run with n_workers=1.
I tried the following cell:
import fastcore
import sys
_fastcore = fastcore
_parallel = lambda *args, **kwargs: fastcore.parallel.parallel(*args, **kwargs, n_workers=1)
_fastcore.parallel.parallel = _parallel
sys.modules['fastcore'] = _fastcore
fastcore.parallel.parallel
printing
<function __main__.<lambda>(*args, **kwargs)>
but when I try running verify_images it still fails as if the patch never happened
---------------------------------------------------------------------------
BrokenProcessPool Traceback (most recent call last)
<ipython-input-37-f1773f2c9e62> in <module>
3 # from mock import patch
4 # with patch('fastcore.parallel.parallel') as _parallel:
----> 5 failed = verify_images(fns)
6 # failed = L(fns[i] for i,o in enumerate(_parallel(verify_image, fns)) if not o)
7 failed
~\anaconda3\lib\site-packages\fastai\vision\utils.py in verify_images(fns)
59 def verify_images(fns):
60 "Find images in `fns` that can't be opened"
---> 61 return L(fns[i] for i,o in enumerate(parallel(verify_image, fns)) if not o)
62
63 # Cell
~\anaconda3\lib\site-packages\fastcore\parallel.py in parallel(f, items, n_workers, total, progress, pause, threadpool, timeout, chunksize, *args, **kwargs)
121 if total is None: total = len(items)
122 r = progress_bar(r, total=total, leave=False)
--> 123 return L(r)
124
125 # Cell
~\anaconda3\lib\site-packages\fastcore\foundation.py in __call__(cls, x, *args, **kwargs)
95 def __call__(cls, x=None, *args, **kwargs):
96 if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 97 return super().__call__(x, *args, **kwargs)
98
99 # Cell
~\anaconda3\lib\site-packages\fastcore\foundation.py in __init__(self, items, use_list, match, *rest)
103 def __init__(self, items=None, *rest, use_list=False, match=None):
104 if (use_list is not None) or not is_array(items):
--> 105 items = listify(items, *rest, use_list=use_list, match=match)
106 super().__init__(items)
107
~\anaconda3\lib\site-packages\fastcore\basics.py in listify(o, use_list, match, *rest)
54 elif isinstance(o, list): res = o
55 elif isinstance(o, str) or is_array(o): res = [o]
---> 56 elif is_iter(o): res = list(o)
57 else: res = [o]
58 if match is not None:
~\anaconda3\lib\concurrent\futures\process.py in _chain_from_iterable_of_lists(iterable)
482 careful not to keep references to yielded objects.
483 """
--> 484 for element in iterable:
485 element.reverse()
486 while element:
~\anaconda3\lib\concurrent\futures\_base.py in result_iterator()
609 # Careful not to keep a reference to the popped future
610 if timeout is None:
--> 611 yield fs.pop().result()
612 else:
613 yield fs.pop().result(end_time - time.monotonic())
~\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
437 raise CancelledError()
438 elif self._state == FINISHED:
--> 439 return self.__get_result()
440 else:
441 raise TimeoutError()
~\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
386 def __get_result(self):
387 if self._exception:
--> 388 raise self._exception
389 else:
390 return self._result
BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
I suspect it has to do with fastai.vision.utils using * imports for fastcore. Is there a way to achieve what I want?
Since the parallel function has already been imported into the fastai.vision.utils module, the correct way is to monkeypatch that module rather than fastcore.parallel:
... # your code for custom `parallel` function goes here
import fastai.vision.utils
fastai.vision.utils.parallel = _parallel # assign your custom function here

Dask.Series .mean().compute() results in "TypeError: _sum() got an unexpected keyword argument 'skipna' "

I do not grasp how the dask module uses lazy evaluations under the hood.
In the minimal example the numbers replaces a significant larger data set, hence the float64 (overflow).
What is the error in this "dask-like" syntax and why does it produces this (seemingly unrelated?) Error.
import pandas as pd
import numpy as np
from dask import dataframe as dd
df = pd.DataFrame({'foo': [[1,2,3], [4,5,6]]})
ddf = dd.from_pandas(df, npartitions=2)
In pandas I would do
In[1]: df['foo'].apply(np.float64).mean()
Out[1]: array([2.5, 3.5, 4.5])
What is the same as
In[2]: ddf['foo'].apply(np.float64, meta=('foo','f8')).compute().mean()
Out[2]: array([2.5, 3.5, 4.5])
as I evaluates first the pandas.Series and then calculating the mean.
If I want to make the whole calculation lazy I tried
In[3]: ddf['foo'].apply(np.float64, meta=('foo','f8')).mean().compute()
but I do not understand the TypeError.
The stack trace:
TypeError Traceback (most recent call last)
<ipython-input-6-8704408e68ef> in <module>
----> 1 ddf['foo'].apply(np.float64, meta=('foo','f8')).mean().compute()
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
277 dask.base.compute
278 """
--> 279 (result,) = compute(self, traverse=False, **kwargs)
280 return result
281
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
559 postcomputes.append(x.__dask_postcompute__())
560
--> 561 results = schedule(dsk, keys, **kwargs)
562 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
563
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
74 pools[thread][num_workers] = pool
75
---> 76 results = get_async(
77 pool.apply_async,
78 len(pool._pool),
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
485 _execute_task(task, data) # Re-execute locally
486 else:
--> 487 raise_exception(exc, tb)
488 res, worker_id = loads(res_info)
489 state["cache"][key] = res
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in reraise(exc, tb)
315 if exc.__traceback__ is not tb:
316 raise exc.with_traceback(tb)
--> 317 raise exc
318
319
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/utils.py in apply(func, args, kwargs)
29 def apply(func, args, kwargs=None):
30 if kwargs:
---> 31 return func(*args, **kwargs)
32 else:
33 return func(*args)
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/dataframe/core.py in _reduction_aggregate(x, aca_aggregate, **kwargs)
6289 if isinstance(x, list):
6290 x = pd.Series(x)
-> 6291 return aca_aggregate(x, **kwargs)
6292
6293
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/utils.py in __call__(self, obj, *args, **kwargs)
895
896 def __call__(self, obj, *args, **kwargs):
--> 897 return getattr(obj, self.method)(*args, **kwargs)
898
899 def __reduce__(self):
TypeError: _sum() got an unexpected keyword argument 'skipna'
If you could provide me with a hint to where find the answer, it would really helpful!
Python 3.8.6
Numpy 1.19.4
Dask 2021.01.0
Pandas 1.1.4

NameError: name 'onset_to_death' is not defined. Works in Py2 but not Py3

I'm taking an online python course (EpiSkills, which uses the Jupyter notebook) that was written in Python 2.7, and I'm on Python 3.6.4 so I have run into a few compatibility issues along the way. Most of the time I've been able to stumble through, but can't figure out this one, so was hoping someone might be able to help.
I start with the following packages:
import pandas as pd
import epipy
import seaborn as sns
%pylab inline
import statsmodels.api as sm
from scipy import stats
import numpy as np
And use the following code to create a pandas series and model:
multivar_model = sm.formula.glm('age ~ onset_to_hospital + onset_to_death +
data=my_data).fit()
new_data = pd.Series([6, 8, 'male'], index=['onset_to_hospital', 'onset_to_death', 'sex'])
When I try to use this to the following code, I throw the error that I've attached:
multivar_model.predict(new_data)
NameError part1
NameError part2
The intended output is meant to be this:
array([ 60.6497459])
I know that a lot of NameErrors are because something has been specified in the local, not global, environment but I'm unsure how to correct it in this instance. Any help is much appreciated.
Thanks!
C
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
116 try:
--> 117 return f(*args, **kwargs)
118 except Exception as e:
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>()
NameError: name 'onset_to_death' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
<ipython-input-79-e0364e267da7> in <module>()
----> 1 multivar_model.predict(new_data)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\statsmodels\base\model.py in predict(self, exog, transform, *args, **kwargs)
774 exog_index = exog.index
775 exog = dmatrix(self.model.data.design_info.builder,
--> 776 exog, return_type="dataframe")
777 if len(exog) < len(exog_index):
778 # missing values, rows have been dropped
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
167 return build_design_matrices(design_infos, data,
168 NA_action=NA_action,
--> 169 return_type=return_type)
170 else:
171 # No builders, but maybe we can still get matrices
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in build_design_matrices(design_infos, data, NA_action, return_type, dtype)
886 for factor_info in six.itervalues(design_info.factor_infos):
887 if factor_info not in factor_info_to_values:
--> 888 value, is_NA = _eval_factor(factor_info, data, NA_action)
889 factor_info_to_isNAs[factor_info] = is_NA
890 # value may now be a Series, DataFrame, or ndarray
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in _eval_factor(factor_info, data, NA_action)
61 def _eval_factor(factor_info, data, NA_action):
62 factor = factor_info.factor
---> 63 result = factor.eval(factor_info.state, data)
64 # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask
65 if factor_info.type == "numerical":
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
122 origin)
123 # Use 'exec' to hide this syntax from the Python 2 parser:
--> 124 exec("raise new_exc from e")
125 else:
126 # In python 2, we just let the original exception escape -- better
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in <module>()
PatsyError: Error evaluating factor: NameError: name 'onset_to_death' is not defined
age ~ onset_to_hospital + onset_to_death + sex
^^^^^^^^^^^^^^

Categories