I have a function who plot a line, something like that:
def tmp_plot(*args, **kwargs):
plt.plot([1,2,3,4,5],[1,2,3,4,5], *args, **kwargs)
and when I'm calling it with by passing line as a keyword argument like that:
tmp_plot(line = '-')
I get this error:
TypeError: set_lineprops() got multiple values for keyword argument 'line'
but it work fine with color argument.
I'm using matplotlib 1.4.3 and python 2.7.7
Any clues?
You can see where Matplotlib adds its own line argument in the Traceback below. This means your own keyword argument is a duplicate of Matplotlib's own one in the set_lineprops call:
In [1]: import matplotlib.pyplot as plt
In [2]: plt.plot([1,2,3], [1,4,9], line='-')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-82-f298702afcfe> in <module>()
----> 1 plt.plot([1,2,3], [1,4,9], line='-')
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/pyplot.py in plot(*args, **kwargs)
2985 ax.hold(hold)
2986 try:
-> 2987 ret = ax.plot(*args, **kwargs)
2988 draw_if_interactive()
2989 finally:
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in plot(self, *args, **kwargs)
4137 lines = []
4138
-> 4139 for line in self._get_lines(*args, **kwargs):
4140 self.add_line(line)
4141 lines.append(line)
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _grab_next_args(self, *args, **kwargs)
317 return
318 if len(remaining) <= 3:
--> 319 for seg in self._plot_args(remaining, kwargs):
320 yield seg
321 return
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _plot_args(self, tup, kwargs)
305 ncx, ncy = x.shape[1], y.shape[1]
306 for j in range(max(ncx, ncy)):
--> 307 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
308 ret.append(seg)
309 return ret
/Users/xnx/anaconda/envs/py33/lib/python3.3/site-packages/matplotlib/axes.py in _makeline(self, x, y, kw, kwargs)
257 **kw
258 )
--> 259 self.set_lineprops(seg, **kwargs)
260 return seg
261
TypeError: set_lineprops() got multiple values for argument 'line'
Perhaps you mean ls or linestyle instead of line in any case?
In [83]: plt.plot([1,2,3], [1,4,9], ls='-')
Out[83]: [<matplotlib.lines.Line2D at 0x10ed65610>]
I would guess the internals of matplotlib are unpacking an internal dictionary of parameters in addition to the caller provided ones, without stripping out duplicates so both you and matplot lib internals are providing separate keyword parameters of the same name via two parallel routes.
Related
I just started to use Ray for handling the data.
I tried to install the code below,
import ray
ray.init(num_cpus = 16, ignore_reinit_error=True, object_store_memory = 10**10)
#ray.remote
def PROB_SCORES(words):
encoded_tweet = tokenizer(words, return_tensors = 'pt')
output = model(**encoded_tweet)
# convert the tensor in probability
scores = output[0][0].detach().numpy()
scores = softmax(scores)
prob_scores = scores[0] * (-1) + scores[-1] * 1 # neg to -1, pos to 1
return prob_scores
ray.get([PROB_SCORES.remote(Tweets['Content'])]) # 'Tweets' is the dataframe which contains data.
However, I get the error message like this,
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
c:\Users\HERO\OneDrive\Github\[i-MAES]\[TextAnalysis]\TextAnalysis\NLTK_STUDY\2. Senti.ipynb Cell 11' in <cell line: 1>()
----> 1 ray.get([PROB_SCORES.remote(S8['Content'])])
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\remote_function.py:111, in RemoteFunction.__init__.<locals>._remote_proxy(*args, **kwargs)
109 #wraps(function)
110 def _remote_proxy(*args, **kwargs):
--> 111 return self._remote(args=args, kwargs=kwargs, **self._default_options)
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\util\tracing\tracing_helper.py:303, in _tracing_task_invocation.<locals>._invocation_remote_span(self, args, kwargs, *_args, **_kwargs)
301 if kwargs is not None:
302 assert "_ray_trace_ctx" not in kwargs
--> 303 return method(self, args, kwargs, *_args, **_kwargs)
305 assert "_ray_trace_ctx" not in kwargs
307 tracer = _opentelemetry.trace.get_tracer(__name__)
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\remote_function.py:213, in RemoteFunction._remote(self, args, kwargs, **task_options)
210 raise TypeError(msg) from e
212 self._last_export_session_and_job = worker.current_session_and_job
--> 213 worker.function_actor_manager.export(self)
215 kwargs = {} if kwargs is None else kwargs
216 args = [] if args is None else args
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\_private\function_manager.py:205, in FunctionActorManager.export(self, remote_function)
...
653 ray_constants.FUNCTION_SIZE_ERROR_THRESHOLD // (1024 * 1024),
654 )
--> 655 raise ValueError(error)
ValueError: The remote function __main__.PROB_SCORES is too large (476 MiB > FUNCTION_SIZE_ERROR_THRESHOLD=95 MiB). Check that its definition is not implicitly capturing a large array or other object in scope. Tip: use ray.put() to put large objects in the Ray object store.
I have no idea why this problem is happening. The 'Tweets' dataframe consumes the memory of only about 5MB.
Any help would be my lifesaver.
I'm taking an online python course (EpiSkills, which uses the Jupyter notebook) that was written in Python 2.7, and I'm on Python 3.6.4 so I have run into a few compatibility issues along the way. Most of the time I've been able to stumble through, but can't figure out this one, so was hoping someone might be able to help.
I start with the following packages:
import pandas as pd
import epipy
import seaborn as sns
%pylab inline
import statsmodels.api as sm
from scipy import stats
import numpy as np
And use the following code to create a pandas series and model:
multivar_model = sm.formula.glm('age ~ onset_to_hospital + onset_to_death +
data=my_data).fit()
new_data = pd.Series([6, 8, 'male'], index=['onset_to_hospital', 'onset_to_death', 'sex'])
When I try to use this to the following code, I throw the error that I've attached:
multivar_model.predict(new_data)
NameError part1
NameError part2
The intended output is meant to be this:
array([ 60.6497459])
I know that a lot of NameErrors are because something has been specified in the local, not global, environment but I'm unsure how to correct it in this instance. Any help is much appreciated.
Thanks!
C
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
116 try:
--> 117 return f(*args, **kwargs)
118 except Exception as e:
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>()
NameError: name 'onset_to_death' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
<ipython-input-79-e0364e267da7> in <module>()
----> 1 multivar_model.predict(new_data)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\statsmodels\base\model.py in predict(self, exog, transform, *args, **kwargs)
774 exog_index = exog.index
775 exog = dmatrix(self.model.data.design_info.builder,
--> 776 exog, return_type="dataframe")
777 if len(exog) < len(exog_index):
778 # missing values, rows have been dropped
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
167 return build_design_matrices(design_infos, data,
168 NA_action=NA_action,
--> 169 return_type=return_type)
170 else:
171 # No builders, but maybe we can still get matrices
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in build_design_matrices(design_infos, data, NA_action, return_type, dtype)
886 for factor_info in six.itervalues(design_info.factor_infos):
887 if factor_info not in factor_info_to_values:
--> 888 value, is_NA = _eval_factor(factor_info, data, NA_action)
889 factor_info_to_isNAs[factor_info] = is_NA
890 # value may now be a Series, DataFrame, or ndarray
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in _eval_factor(factor_info, data, NA_action)
61 def _eval_factor(factor_info, data, NA_action):
62 factor = factor_info.factor
---> 63 result = factor.eval(factor_info.state, data)
64 # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask
65 if factor_info.type == "numerical":
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
122 origin)
123 # Use 'exec' to hide this syntax from the Python 2 parser:
--> 124 exec("raise new_exc from e")
125 else:
126 # In python 2, we just let the original exception escape -- better
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in <module>()
PatsyError: Error evaluating factor: NameError: name 'onset_to_death' is not defined
age ~ onset_to_hospital + onset_to_death + sex
^^^^^^^^^^^^^^
I can extract an autocorrelation value for a specific lag time with this:
df.rolling(window = 10).apply(lambda x: acf(x, nlags = 5)[5]).plot()
However since acf is actually doing all the calculations anyway, I'd like to get all the results calculated, not just a single one. The idea would be that I could then unpack this single returned array/list into a bunch of columns and plot each one separately but not run through acf so many unnecessary times. So I tried:
df.rolling(window = 10).apply(lambda x: list(acf(x, nlags = 5)))
This throws the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-63-e5f337100eb5> in <module>()
----> 1 df.rolling(window = 10).apply(lambda x: list(acf(x, nlags = 5)))
/Users/a/anaconda3/lib/python3.5/site-packages/pandas/core/window.py in apply(self, func, args, kwargs)
861 #Appender(_shared_docs['apply'])
862 def apply(self, func, args=(), kwargs={}):
--> 863 return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
864
865 #Substitution(name='rolling')
/Users/a/anaconda3/lib/python3.5/site-packages/pandas/core/window.py in apply(self, func, args, kwargs)
619
620 return self._apply(f, func, args=args, kwargs=kwargs,
--> 621 center=False)
622
623 def sum(self, **kwargs):
/Users/a/anaconda3/lib/python3.5/site-packages/pandas/core/window.py in _apply(self, func, name, window, center, check_minp, how, **kwargs)
556
557 if values.ndim > 1:
--> 558 result = np.apply_along_axis(calc, self.axis, values)
559 else:
560 result = calc(values)
/Users/a/anaconda3/lib/python3.5/site-packages/numpy/lib/shape_base.py in apply_along_axis(func1d, axis, arr, *args, **kwargs)
89 outshape = asarray(arr.shape).take(indlist)
90 i.put(indlist, ind)
---> 91 res = func1d(arr[tuple(i.tolist())], *args, **kwargs)
92 # if res is a number, then we have a smaller output array
93 if isscalar(res):
/Users/a/anaconda3/lib/python3.5/site-packages/pandas/core/window.py in calc(x)
553
554 def calc(x):
--> 555 return func(x, window, min_periods=self.min_periods)
556
557 if values.ndim > 1:
/Users/a/anaconda3/lib/python3.5/site-packages/pandas/core/window.py in f(arg, window, min_periods)
616 minp = _use_window(min_periods, window)
617 return algos.roll_generic(arg, window, minp, offset, func, args,
--> 618 kwargs)
619
620 return self._apply(f, func, args=args, kwargs=kwargs,
pandas/algos.pyx in pandas.algos.roll_generic (pandas/algos.c:51581)()
TypeError: a float is required
Does this mean apply style operations with rolling can only handle floats? At least for groupby I have often had occasion to return lists or sets, but perhaps rolling is not so flexible?
To plot acf results you may want to try tsaplots.plot_acf():
from statsmodels.graphics import tsaplots
tsaplots.plot_acf(x, lags = 5, alpha = 0.05)
I'm getting this error "ValueError: invalid literal for float(): 17/08/2015". This is the last row in the file I'm reading and it follows the same format as the others. The code for the script is below.
I'm wondering. Is the error actually occurring throughout the file but it's being flagged as the only error because it's the last of the errors, if that makes sense to anyone.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
rankings = pd.read_csv('data/rankingunitsdata.csv', parse_dates='date')
rankings.plot('date','rankingpos')
x = rankings.date.values
y = rankings.rankingpos.values
plt.plot(x,y, 'o')
plt.xlabel('Ranking Position')
plt.ylabel('Date')
plt.show()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-15-b6d9eb0809d3> in <module>()
----> 1 plt.plot(x,y, 'o')
2 plt.xlabel('Ranking Position')
3 plt.ylabel('Date')
4 plt.show()
C:\Anaconda3\lib\site-packages\matplotlib\pyplot.py in plot(*args, **kwargs)
3097 ax.hold(hold)
3098 try:
-> 3099 ret = ax.plot(*args, **kwargs)
3100 draw_if_interactive()
3101 finally:
C:\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, *args, **kwargs)
1372
1373 for line in self._get_lines(*args, **kwargs):
-> 1374 self.add_line(line)
1375 lines.append(line)
1376
C:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in add_line(self, line)
1502 line.set_clip_path(self.patch)
1503
-> 1504 self._update_line_limits(line)
1505 if not line.get_label():
1506 line.set_label('_line%d' % len(self.lines))
C:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _update_line_limits(self, line)
1513 Figures out the data limit of the given line, updating self.dataLim.
1514 """
-> 1515 path = line.get_path()
1516 if path.vertices.size == 0:
1517 return
C:\Anaconda3\lib\site-packages\matplotlib\lines.py in get_path(self)
872 """
873 if self._invalidy or self._invalidx:
--> 874 self.recache()
875 return self._path
876
C:\Anaconda3\lib\site-packages\matplotlib\lines.py in recache(self, always)
573 x = ma.asarray(xconv, np.float_)
574 else:
--> 575 x = np.asarray(xconv, np.float_)
576 x = x.ravel()
577 else:
C:\Anaconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
472
473 """
--> 474 return array(a, dtype, copy=False, order=order)
475
476 def asanyarray(a, dtype=None, order=None):
ValueError: could not convert string to float: '17/08/2015'
The error occurs because you are trying to plot some stuff with dates as strings on the x-axis while plt.plot() expects numerical values. Hence it fails when it tries to convert '17/08/2015' to a float, which cannot work.
You need to convert your x-values to datetime objects and then use plt.plot_date, which is for example demonstrated here.
In Seaborn, you can use FacetGrid to set up data-aware grids on which to plot. You can then use the map or map_dataframe methods to plot to those grids.
I am having trouble correctly specifying a user-defined plot function that works with map or map_dataframe. In this example I use the errorbar function in which I want to pass the error values as a 2xN array-like. In my example (taken from #mwaskom's answer here) the errors are symmetrical -- but imagine I have a situation where they are not.
In [255]:
from scipy import stats
tips_all = sns.load_dataset("tips")
tips_grouped = tips_all.groupby(["smoker", "size"])
tips = tips_grouped.mean()
tips["error_min"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips["error_max"] = tips_grouped.total_bill.apply(stats.sem) * 1.96
tips.reset_index(inplace=True)
tips
Out[255]:
smoker size total_bill tip error_min error_max
0 No 1 8.660000 1.415000 2.763600 2.763600
1 No 2 15.342333 2.489000 0.919042 0.919042
2 No 3 21.009615 3.069231 2.680447 2.680447
3 No 4 27.769231 4.195769 3.303131 3.303131
4 No 5 30.576667 5.046667 11.620808 11.620808
5 No 6 34.830000 5.225000 9.194360 9.194360
6 Yes 1 5.825000 1.460000 5.399800 5.399800
7 Yes 2 17.955758 2.709545 1.805528 1.805528
8 Yes 3 28.191667 4.095000 6.898186 6.898186
9 Yes 4 30.609091 3.992727 5.150063 5.150063
10 Yes 5 29.305000 2.500000 2.263800 2.263800
Define my error bar function, that takes data and indexes the error columns to produce the 2xN array:
In [256]:
def my_errorbar(*args, **kwargs):
data = kwargs['data']
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);
Call using map_dataframe (because my function gets the data as a kwarg):
In [257]:
g = sns.FacetGrid(tips, col="smoker", size=5)
g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
[[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]
[ 2.7636 0.9190424 2.68044722 3.30313068 11.62080751
9.19436049]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-257-dc8b35ec70ec> in <module>()
1 g = sns.FacetGrid(tips, col="smoker", size=5)
----> 2 g.map_dataframe(my_errorbar, "size", "total_bill", marker="o")
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
509
510 # Draw the plot
--> 511 self._facet_plot(func, ax, args, kwargs)
512
513 # Finalize the annotations and layout
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
527
528 # Draw the plot
--> 529 func(*plot_args, **plot_kwargs)
530
531 # Sort out the supporting information
<ipython-input-256-62202c841233> in my_errorbar(*args, **kwargs)
9 data[args[1]],
10 yerr=errors,
---> 11 **kwargs);
12
13
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/pyplot.py in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
2764 barsabove=barsabove, lolims=lolims, uplims=uplims,
2765 xlolims=xlolims, xuplims=xuplims,
-> 2766 errorevery=errorevery, capthick=capthick, **kwargs)
2767 draw_if_interactive()
2768 finally:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
2859
2860 if not barsabove and plot_line:
-> 2861 l0, = self.plot(x, y, fmt, **kwargs)
2862
2863 if ecolor is None:
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_axes.py in plot(self, *args, **kwargs)
1371 lines = []
1372
-> 1373 for line in self._get_lines(*args, **kwargs):
1374 self.add_line(line)
1375 lines.append(line)
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _grab_next_args(self, *args, **kwargs)
302 return
303 if len(remaining) <= 3:
--> 304 for seg in self._plot_args(remaining, kwargs):
305 yield seg
306 return
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
290 ncx, ncy = x.shape[1], y.shape[1]
291 for j in xrange(max(ncx, ncy)):
--> 292 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
293 ret.append(seg)
294 return ret
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in _makeline(self, x, y, kw, kwargs)
242 **kw
243 )
--> 244 self.set_lineprops(seg, **kwargs)
245 return seg
246
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/axes/_base.py in set_lineprops(self, line, **kwargs)
184 raise TypeError('There is no line property "%s"' % key)
185 func = getattr(line, funcName)
--> 186 func(val)
187
188 def set_patchprops(self, fill_poly, **kwargs):
/Users/x/miniconda3/envs/default/lib/python3.4/site-packages/matplotlib/lines.py in set_data(self, *args)
557 """
558 if len(args) == 1:
--> 559 x, y = args[0]
560 else:
561 x, y = args
ValueError: too many values to unpack (expected 2)
I don't understand the reason for the failure here. Note that the plot function gets something, because a plot of the first grid of errorbars is produced. I assume I'm not passing the **kwargs dictionary on correctly.
In general, I would find it really helpful if the tutorial for Seaborn contained one or two examples of user-defined plot functions passed to map or map_dataframe.
This is #mwaskom's answer, and works a treat (see comments):
Just change the my_errorbar function so that it pops the data out of the keyword dict:
def my_errorbar(*args, **kwargs):
data = kwargs.pop('data')
errors = np.vstack([data['error_min'],
data['error_max']])
print(errors)
plt.errorbar(data[args[0]],
data[args[1]],
yerr=errors,
**kwargs);