Pandas sum up to a specific column value

Pandas sum up to a specific column value - python

The little piece of code is giving me more trouble than necessary. I would appreciate any help if I could. Thank you in advance for taking a look at this for me.
I am trying to sum up the price and the volume from the previous 22 data points which I added a column named dtt that holds this value. The formula that I am trying to represent here is:
vamp = [sum(1, dtt) price * volume] / [sum(1,dtt) volume]
Here is my code
# Import necessary libraries
import numpy as np
import pandas as pd
import os
# Load SPY dataset
spy_data = pd.read_csv('SPY.csv', parse_dates=['Date'])
# Compute Daily Return
spy_data['daily_ret'] = (spy_data['Adj Close'] - (spy_data['Adj Close']).shift(1)) / ((spy_data['Adj Close']).shift(1)) * 100
spy_data['daily_ret'] = spy_data['daily_ret'].fillna(0.0)
# calculate Annualized Volatility
rsquare = (spy_data['daily_ret']) ** 2
spy_data['annualized_volatility']=(np.sqrt(rsquare.rolling(252).sum() / 251) * np.sqrt(252))
spy_data['annualized_volatility'] = spy_data['annualized_volatility'].fillna(0)
spy_shares = 889112600
# Calculate Days to Trade
spy_data['dtt'] = spy_shares / (spy_data['Volume'].rolling(22).sum()/22)
spy_data['dtt'] = spy_data['dtt'].fillna(1).astype(int)
# Calculate VWAP
#numerator is equal to the sum of the price * volume of the latest DTT
numerator = spy_data.loc[0:spy_data['dtt'], 'Adj Close'].sum()#*spy_data.loc[0:spy_data['dtt'], 'Volume']
#denominator = spy_data.loc[0:spy_data['dtt'], 'Volume']
#spy_data['vwap'] = numerator / denominator
print(spy_data)
I commented out the other lines because I was trying to problem solve it one step at a time. The price column that I need is the Adj Close.
No matter what I try in terms of slicing I keep getting an error. This is the current one:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-68-ade975138e12> in <module>
7 # Calculate VWAP
8 #numerator is equal to the sum of the price * volume of the latest DTT
----> 9 numerator = spy_data.loc[0:spy_data['dtt']].sum()#*spy_data[0:spy_data['dtt'], 'volume']
10 #denominator = spy_data.loc[0:spy_data['dtt'], 'Volume']
11 #spy_data['vwap'] = numerator / denominator
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1408
1409 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1410 return self._getitem_axis(maybe_callable, axis=axis)
1411
1412 def _is_scalar_access(self, key: Tuple):
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1770 if isinstance(key, slice):
1771 self._validate_key(key, axis)
-> 1772 return self._get_slice_axis(key, axis=axis)
1773 elif com.is_bool_indexer(key):
1774 return self._getbool_axis(key, axis=axis)
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
1438 labels = obj._get_axis(axis)
1439 indexer = labels.slice_indexer(
-> 1440 slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name
1441 )
1442
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind)
5025 slice(1, 3)
5026 """
-> 5027 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5028
5029 # return a slice
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
5245 end_slice = None
5246 if end is not None:
-> 5247 end_slice = self.get_slice_bound(end, "right", kind)
5248 if end_slice is None:
5249 end_slice = len(self)
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
5155 # we need to look up the label
5156 try:
-> 5157 slc = self.get_loc(label)
5158 except KeyError as err:
5159 try:
~/.local/lib/python3.6/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
377 except ValueError:
378 raise KeyError(key)
--> 379 return super().get_loc(key, method=method, tolerance=tolerance)
380
381 #Appender(_index_shared_docs["get_indexer"])
~/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2888 )
2889 try:
-> 2890 return self._engine.get_loc(key)
2891 except KeyError:
2892 return self._engine.get_loc(self._maybe_cast_indexer(key))
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
TypeError: '0 1
1 1
2 1
3 1
4 1
..
4694 9
4695 9
4696 10
4697 10
4698 11
Name: dtt, Length: 4699, dtype: int64' is an invalid key

Related

KeyError: 96 when using TimeseriesGenerator

I have data, which has 2 input columns and 42 output columns. Here is what my code looks like:
data_columns=["value","average"]
prediction_columns=[]
for i in range(43):
prediction_columns.append("s"+str(i))
from keras.preprocessing.sequence import TimeseriesGenerator
n_input = 24*4 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
n_features= len(prediction_columns)#X_train.shape[1] # how many predictors/Xs/features we have to predict y
b_size = 7 # Number of timeseries samples in each batch
generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
print(generator[0][0].shape)
This fails with following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File /usr/lib/python3.10/site-packages/pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 96
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [38], in <cell line: 8>()
5 b_size = 7 # Number of timeseries samples in each batch
6 generator = TimeseriesGenerator(columns[data_columns], columns[prediction_columns], length=n_input, batch_size=b_size)
----> 8 print(generator[0][0].shape)
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in __getitem__(self, index)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/keras/preprocessing/sequence.py:176, in <listcomp>(.0)
172 rows = np.random.randint(
173 self.start_index, self.end_index + 1, size=self.batch_size
174 )
175 else:
--> 176 i = self.start_index + self.batch_size * self.stride * index
177 rows = np.arange(
178 i,
179 min(i + self.batch_size * self.stride, self.end_index + 1),
180 self.stride,
181 )
183 samples = np.array(
184 [
185 self.data[row - self.length : row : self.sampling_rate]
186 for row in rows
187 ]
188 )
File /usr/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File /usr/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 96
I thought that maybe I'm providing invalid column names, but both columns[data_columns].head() and columns[prediction_columns].head() execute without issues
Columns shape is (42749, 45)
What could be the source of the problem?

Problem went away after I converted dataframes to numpy
generator = TimeseriesGenerator(work[data_columns].to_numpy(), work[prediction_columns].to_numpy(), length=n_input, batch_size=b_size)

Pandas read_csv - non-printable character (columns not recognized)

Could someone tell me what non-printable character I have in my code that makes python not recognize the columns names in my dataframe? :
import pandas as pd
data_olymp = pd.read_csv("Olympics_data.csv", sep=";")
Here is the Traceback of the error when I try to group by teamname :
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-103-ae95f10f5210> in <module>
30 # print(type(réponse1))
31 # print(len(winter_games_bronze_won))
---> 32 print(data_olymp.loc[" winter_games_bronze_won"] == 9)
~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
893
894 maybe_callable = com.apply_if_callable(key, self.obj)
--> 895 return self._getitem_axis(maybe_callable, axis=axis)
896
897 def _is_scalar_access(self, key: Tuple):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1122 # fall thru to straight lookup
1123 self._validate_key(key, axis)
-> 1124 return self._get_label(key, axis=axis)
1125
1126 def _get_slice_axis(self, slice_obj: slice, axis: int):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
1071 def _get_label(self, label, axis: int):
1072 # GH#5667 this will fail if the label is not present in the axis.
-> 1073 return self.obj.xs(label, axis=axis)
1074
1075 def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
~\anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3737 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3738 else:
-> 3739 loc = index.get_loc(key)
3740
3741 if isinstance(loc, np.ndarray):
~\anaconda3\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
352 except ValueError as err:
353 raise KeyError(key) from err
--> 354 raise KeyError(key)
355 return super().get_loc(key, method=method, tolerance=tolerance)
356
KeyError: ' winter_games_bronze_won'
The file looks like that :
team_name; summer_games_played; summer_games_gold_won; summer_games_silver_won; summer_games_bronze_won; summer_games_medals_won; winter_games_played; winter_games_gold_won; winter_games_silver_won; winter_games_bronze_won; winter_games_medals_won; total_games_played
Canada (CAN);13;0;0;2;2;0;0;0;0;0;13
United States (USA);12;5;2;8;15;3;0;0;0;0;15
Russia (RUS);23;18;24;28;70;18;0;0;0;0;41

Key errors are raised when you are trying to access a key that is not in a dictionary. While working Pandas, it is about the same thing. .loc is trying to locate a key value that is not found in the data frame.
Looking at your code and the traceback error, my assumption is that because you are trying to look up winter_games_bronze_won (with the spaces at the beginning), you are getting the error. Try removing the spaces before winter_games_bronze_won and see what happens.

loc() funktion wont read Datetime date.today to split panda DataFrame. (Keyerror)

I have a panda dataframe and want to extract data by date.
If i use the loc() funktion with the date like
data_today = data.loc["2020-07-20"]
it will work and show the right data. But if I use a datetime it wont work.
import datetime as dt
from datetime import datetime , timedelta
date_today = dt.date.today() - dt.timedelta(days=5)
print(date_today)
data_today = data.loc["date_today"]
data_today
If I print date_today the format is the format of the DataFrame "2020-07-20" for example.
Thanks for the Help
Here the full code and errormessage
import datetime as dt
from datetime import datetime , timedelta
data = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv" , index_col = "date")
data.head()
date_today = dt.date.today() - dt.timedelta(days=5)
print(date_today)
data_today = data.loc["date_today"]
data_today
and the error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._maybe_get_bool_indexer()
KeyError: 'date_today'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-179-6abf305e8190> in <module>
5 print(date_today)
6
----> 7 data_today = data.loc["date_today"]
8 data_today
~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1765
1766 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1767 return self._getitem_axis(maybe_callable, axis=axis)
1768
1769 def _is_scalar_access(self, key: Tuple):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1962 # fall thru to straight lookup
1963 self._validate_key(key, axis)
-> 1964 return self._get_label(key, axis=axis)
1965
1966
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
622 raise IndexingError("no slices here, handle elsewhere")
623
--> 624 return self.obj._xs(label, axis=axis)
625
626 def _get_loc(self, key: int, axis: int):
~\anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3535 loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
3536 else:
-> 3537 loc = self.index.get_loc(key)
3538
3539 if isinstance(loc, np.ndarray):
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._maybe_get_bool_indexer()
KeyError: 'date_today'

Your code is mostly fine. You need to convert the index to date, and also use date_today as a variable rather than a string:
from datetime import datetime , timedelta
data = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv" , index_col = "date")
data.index = pd.to_datetime(data.index)
date_today = dt.date.today() - dt.timedelta(days=5)
data_today = data.loc[date_today]
print(data_today.head())
The output is:
iso_code continent location total_cases new_cases total_deaths new_deaths total_cases_per_million new_cases_per_million total_deaths_per_million ... aged_70_older \
date ...
2020-07-21 AFG Asia Afghanistan 35615.0 140.0 1186.0 5.0 914.886 3.596 30.466 ... 1.337
2020-07-21 ALB Europe Albania 4171.0 81.0 113.0 1.0 1449.371 28.147 39.266 ... 8.643
...

Your code should work fine, you just need to format date_today properly.
data.loc[date_today.strftime("%Y-%m-%d")]

Error when applying normalize function in python

I have 400 columns and I am trying to do min-max normalization (row-wise). For the first 200 points I want to do min-max normalization and scale it between 0 and 500 and do the same for the next two hundred points but scale it between 0 and 10.
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(500,1000,size=(5, 400)))
def normalize(ds,value):
normalizedds = []
normalizedds.extend((ds[:value] - np.min(ds[:value])) / np.max(ds[:value] - np.min(ds[:value])) * 500)
normalizedds.extend(ds[value:value*2] / np.max(ds[value:value*2]) * 10)
return normalizedds
normalizeddsList = pd.DataFrame.from_records(df.apply(normalize, value=200, axis=1))
I get the following error!
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5166 try:
-> 5167 return self._searchsorted_monotonic(label, side)
5168 except ValueError:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in _searchsorted_monotonic(self, label, side)
5127
-> 5128 raise ValueError("index must be monotonic increasing or decreasing")
5129
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
----> 1 scaledCardList = pd.DataFrame.from_records(originalCardList.apply(scale, pointCount=200, axis=1))
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\apply.py in f(x)
110
111 def f(x):
--> 112 return func(x, *args, **kwds)
113
114 else:
in scale(card, pointCount)
1 def scale(card, pointCount):
2 scaledCard = []
----> 3 scaledCard.extend((card[:pointCount] - np.min(card[:pointCount])) / np.max(card[:pointCount] - np.min(card[:pointCount])) * 10000)
4 scaledCard.extend(card[pointCount:pointCount*2] / np.max(card[pointCount:pointCount*2]) * 100)
5 return scaledCard
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in getitem(self, key)
1111 key = check_bool_indexer(self.index, key)
1112
-> 1113 return self._get_with(key)
1114
1115 def _get_with(self, key):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py in _get_with(self, key)
1116 # other: fancy integer or otherwise
1117 if isinstance(key, slice):
-> 1118 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1119 return self._get_values(indexer)
1120 elif isinstance(key, ABCDataFrame):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in _convert_slice_indexer(self, key, kind)
395
396 # translate to locations
--> 397 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
398
399 def _format_native_types(
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_indexer(self, start, end, step, kind)
5032 slice(1, 3)
5033 """
-> 5034 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
5035
5036 # return a slice
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in slice_locs(self, start, end, step, kind)
5252 end_slice = None
5253 if end is not None:
-> 5254 end_slice = self.get_slice_bound(end, "right", kind)
5255 if end_slice is None:
5256 end_slice = len(self)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5168 except ValueError:
5169 # raise the original KeyError
-> 5170 raise err
5171
5172 if isinstance(slc, np.ndarray):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_slice_bound(self, label, side, kind)
5162 # we need to look up the label
5163 try:
-> 5164 slc = self.get_loc(label)
5165 except KeyError as err:
5166 try:
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\numeric.py in get_loc(self, key, method, tolerance)
477 except (TypeError, NotImplementedError):
478 pass
--> 479 return super().get_loc(key, method=method, tolerance=tolerance)
480
481 #cache_readonly
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: (200.0, 'occurred at index 0')

Facet Grid not working for categorical variables

I get an error when trying to create a Facet Grid with Seaborn. I have 3 categorical variables: Gender, Day of the Week, Color. I want to understand the direct correlation of all values within each category to each other.
Gender: Female, Male
Day of the Week: Mo,Tue,Wed,Thu,Fri,Sat,Sun
Color:Red, Green.
g = sns.FacetGrid(tips, col="Gender", row="Color")
g = g.map(plt.hist, "Day of the Week")
display()
Get an error:
KeyError-Traceback (most recent call last)
<command-206114> in <module>()
2 tips = sns.load_dataset("tips")
3
----> 4 g = sns.FacetGrid(tips, col="Gender", row="Color")
5 g = g.map(plt.hist, "Day of the week")
6 display()
/databricks/python/lib/python3.5/site-packages/seaborn/axisgrid.py in __init__(self, data, row, col, hue, col_wrap, sharex, sharey, size, aspect, palette, row_order, col_order, hue_order, hue_kws, dropna, legend_out, despine, margin_titles, xlim, ylim, subplot_kws, gridspec_kws)
240 row_names = []
241 else:
--> 242 row_names = utils.categorical_order(data[row], row_order)
243
244 if col is None:
/databricks/python/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/databricks/python/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/databricks/python/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/databricks/python/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
/databricks/python/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
KeyError: 'Color'
Does anybody know why this is?

The variable "Color" is the issue here. It could me misspelled . and if you are plotting two categorical varaibles try using a Bar Chart .

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pandas sum up to a specific column value - python

Related

KeyError: 96 when using TimeseriesGenerator

Pandas read_csv - non-printable character (columns not recognized)

loc() funktion wont read Datetime date.today to split panda DataFrame. (Keyerror)

Error when applying normalize function in python

Facet Grid not working for categorical variables

Categories

Resources