Performing Dickey-Fuller test in Python - python

I'm trying to perform the Dickey-Fuller test in part of the code and this error is displayed:
TypeError: 'str' object cannot be interpreted as an integer
When I try the same test in another part of the code, it works fine.
The only difference I noticed is in the arrangement of the data as you can see in this print (https://i.stack.imgur.com/czUB1.jpg)
Where the "table" marked in red is the one that does not work.
Code:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(indexedDataset_logScale)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(indexedDataset_logScale, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)
Complete traceback:
Results of Dickey-Fuller Test
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4410 try:
-> 4411 return libindex.get_value_at(s, key)
4412 except IndexError:
pandas\_libs\index.pyx in pandas._libs.index.get_value_at()
pandas\_libs\index.pyx in pandas._libs.index.get_value_at()
pandas\_libs\util.pxd in pandas._libs.util.get_value_at()
pandas\_libs\util.pxd in pandas._libs.util.validate_indexer()
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
650 try:
--> 651 value = Index.get_value(self, series, key)
652 except KeyError:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4418 else:
-> 4419 raise e1
4420 except Exception:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine._date_check_type()
KeyError: '#Passengers'
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas\_libs\tslibs\parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1373 else:
-> 1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ParserError("Unknown string format: %s", timestr)
650
ParserError: Unknown string format: #Passengers
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
659 try:
--> 660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value_maybe_box(self, series, key)
674 elif not isinstance(key, Timestamp):
--> 675 key = Timestamp(key)
676 values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)
pandas\_libs\tslibs\timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-18-f3dd1636e820> in <module>
23 decomposedLogData = residual
24 decomposedLogData.dropna(inplace=True)
---> 25 test_stationarity(decomposedLogData)
26
<ipython-input-12-099228b0a850> in test_stationarity(timeseries)
16 #Perform Dickey-Fuller test:
17 print('Results of Dickey-Fuller Test:')
---> 18 dftest = adfuller(timeseries['#Passengers'], autolag='AIC')
19 dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
20 for key,value in dftest[4].items():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
--> 662 raise KeyError(key)
663 else:
664 return com.maybe_box(self, value, series, key)
KeyError: '#Passengers'
Would you please help me with this question?
Thank you.
Marcelo

In the function "test_stationary" when implemending adfuller the argument timeseries should have column named '#Passengers'.
But when implementing seasonal_decompose the output comes without column name.
My solution was to convert decomposedLogData to dataframe and to give it '#Passengers' column name back. Worked for me.
decomposedLogData = residual
decomposedLogData.dropna(inplace = True)
decomposedLogData = pd.DataFrame(decomposedLogData)
decomposedLogData.columns = ["#Passengers"]
test_stationary(decomposedLogData)
If you've already found more elegant solution, please share :)

Related

The above exception was the direct cause of the following exception - ValueError: 10 is not in range

I am trying to run the below code to load the dataset into a PyTorch dataset class with a custom collate function and map them but I am getting the error. The dataset consists of 123061 data samples so in the below code I have used only 10 samples. if i use total dataset then i am getting error of ValueError: 123061 is not in range. So exactly where i am doing wrong?
class Dataclass(Dataset):
def __init__(self,dataset):
self.dataset = dataset
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
solute = self.dataset.loc[idx]['Drug1_SMILES']
mol = Chem.MolFromSmiles(solute)
mol = Chem.AddHs(mol)
solute = Chem.MolToSmiles(mol)
solute_graph = get_graph_from_smile(solute)
solvent = self.dataset.loc[idx]['Drug2_SMILES']
mol = Chem.MolFromSmiles(solvent)
mol = Chem.AddHs(mol)
solvent = Chem.MolToSmiles(mol)
solvent_graph = get_graph_from_smile(solvent)
delta_g = self.dataset.loc[idx]['label']
return [solute_graph, solvent_graph]
tg = Dataclass(train_df[:10])
solute_graphs, solvent_graphs, labels = map(list, zip(*tg))
Error
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
384 try:
--> 385 return self._range.index(new_key)
386 except ValueError as err:
ValueError: 10 is not in range
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
6 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
385 return self._range.index(new_key)
386 except ValueError as err:
--> 387 raise KeyError(key) from err
388 raise KeyError(key)
389 return super().get_loc(key, method=method, tolerance=tolerance)
KeyError: 10

Periodic KeyError in Pandas

I want to replace the empty values in the dataframe using random already existing values, while maintaining the weights so that the correlation does not suffer and the data is not lost.
def nan_fill_random(column_name, nan):
for i in range(len(column_name)):
if column_name[i] == nan:
column_name[i] = random.choice(column_name[column_name != nan])
else:
continue
I wrote a function, but it periodically throws a KeyError: and the value has different numbers, I assume indexes. Also, when you restart the cell, it can either disappear or be updated.
nan_fill_random(data['education'], 'unknown')
Here is the error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 14563
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4720/2723938638.py in <module>
----> 1 nan_fill_random(data['education'], 'unknown')
~\AppData\Local\Temp/ipykernel_4720/1980306790.py in nan_fill_random(column_name, nan)
2 for i in range(len(column_name)):
3 if column_name[i] == nan:
----> 4 column_name[i] = random.choice(column_name[column_name != nan])
5 else:
6 continue
W:\ProgramData\Anaconda3\lib\random.py in choice(self, seq)
344 """Choose a random element from a non-empty sequence."""
345 # raises IndexError if seq is empty
--> 346 return seq[self._randbelow(len(seq))]
347
348 def shuffle(self, x, random=None):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 14563
def nan_fill_random(column_name, nan):
list_values = set(column_name)
try :
list_values.remove(nan)
except :
return(column_name)
column_name = column_name.apply(lambda x: x if x != nan else random.choice(list(list_values)))
return(column_name)

Why am I getting odd errors on applying dataframe lambda?

I have a dataframe where I am trying to create a new column based on applying a lambda to two existing columns. The dataframe uses a datetimeindex and each column is either float or int and there are no missing or null values:
closeunadj qtr_timedelta
date
2021-05-18 128.75 107
2021-05-19 130.21 108
2021-05-20 132.15 109
2021-05-21 132.30 110
2021-05-24 133.34 113
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1021 entries, 2017-05-01 to 2021-05-24
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 closeunadj 1021 non-null float64
1 qtr_timedelta 1021 non-null int64
dtypes: float64(1), int64(1)
memory usage: 63.9 KB
The lambda calculates a simple shifted return on the first column using a changing shift count value from the second column.
final_merge['qtr_gwth'] = final_merge[['closeunadj',
'qtr_timedelta']].apply(lambda x: x['closeunadj'] / x['closeunadj'].shift(x['qtr_timedelta']) - 1)
However, I get the following errors (in traceback order) which don’t make sense to me since the dataframe values are either datetimeindex, float, or int (as outlined above). ‘closeunadj’ is float64, index is datetimeindex, and there are no str objects.
TypeError: 'str' object cannot be interpreted as an integer
KeyError: 'closeunadj'
ParserError: Unknown string format: closeunadj
ValueError: could not convert string to Timestamp
KeyError: ‘closeunadj'
I have tried recasting the columns to float and int, recasting the datetimeindex, recreating the dataframe and using a different lambda syntax, all to no avail. Any help or fix greatly appreciated!
This is the full traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4410 try:
-> 4411 return libindex.get_value_at(s, key)
4412 except IndexError:
pandas/_libs/index.pyx in pandas._libs.index.get_value_at()
pandas/_libs/index.pyx in pandas._libs.index.get_value_at()
pandas/_libs/util.pxd in pandas._libs.util.get_value_at()
pandas/_libs/util.pxd in pandas._libs.util.validate_indexer()
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
650 try:
--> 651 value = Index.get_value(self, series, key)
652 except KeyError:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4418 else:
-> 4419 raise e1
4420 except Exception:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine._date_check_type()
KeyError: 'closeunadj'
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
~/opt/anaconda3/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(timestr, parserinfo, **kwargs)
1373 else:
-> 1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
~/opt/anaconda3/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ParserError("Unknown string format: %s", timestr)
650
ParserError: Unknown string format: closeunadj
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
659 try:
--> 660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value_maybe_box(self, series, key)
674 elif not isinstance(key, Timestamp):
--> 675 key = Timestamp(key)
676 values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-55-ac0ccaa452bc> in <module>
1 # Use close price change from date of last results
----> 2 final_merge['qtr_gwth'] = final_merge[['closeunadj',
3 'qtr_timedelta']].apply(lambda x : x['closeunadj'] / x['closeunadj'].shift(x['qtr_timedelta']) - 1)
4
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
6876 kwds=kwds,
6877 )
-> 6878 return op.get_result()
6879
6880 def applymap(self, func) -> "DataFrame":
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py in apply_standard(self)
293
294 try:
--> 295 result = libreduction.compute_reduction(
296 values, self.f, axis=self.axis, dummy=dummy, labels=labels
297 )
pandas/_libs/reduction.pyx in pandas._libs.reduction.compute_reduction()
pandas/_libs/reduction.pyx in pandas._libs.reduction.Reducer.get_result()
<ipython-input-55-ac0ccaa452bc> in <lambda>(x)
1 # Use close price change from date of last results
2 final_merge['qtr_gwth'] = final_merge[['closeunadj',
----> 3 'qtr_timedelta']].apply(lambda x : x['closeunadj'] / x['closeunadj'].shift(x['qtr_timedelta']) - 1)
4
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
--> 662 raise KeyError(key)
663 else:
664 return com.maybe_box(self, value, series, key)
KeyError: 'closeunadj'

Enrichment Analysis with GSEAPY

I am trying to run an enrichment analysis with gseapy enrichr on a list of gene names that look like the following:
0 RAB4B
1 TIGAR
2 RNF44
3 DNAH3
4 RPL23A
5 ARL8B
6 CALB2
7 MFSD3
8 PIGV
9 ZNF708
Name: 0, dtype: object
I am using the following code:
# run enrichr
# if you are only intrested in dataframe that enrichr returned, please set no_plot=True
# list, dataframe, series inputs are supported
enr = gseapy.enrichr(gene_list = glist2,
gene_sets=['ARCHS4_Cell-lines', 'KEGG_2016','KEGG_2013', 'GO_Cellular_Component_2018', 'GO_Cellular_Component_AutoRIF', 'GO_Cellular_Component_AutoRIF_Predicted_zscore', 'GO_Molecular_Function_2018', 'GO_Molecular_Function_AutoRIF', 'GO_Molecular_Function_AutoRIF_Predicted_zscore'],
organism='Human', # don't forget to set organism to the one you desired! e.g. Yeast
description='test_name',
outdir='test/enrichr_kegg',
# no_plot=True,
cutoff=1 # test dataset, use lower value from range(0,1)
)
However, I am receiving the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Adjusted P-value'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-78-dad3e0840d86> in <module>
9 outdir='test/enrichr_kegg',
10 # no_plot=True,
---> 11 cutoff=1 # test dataset, use lower value from range(0,1)
12 )
~/venv/lib/python3.7/site-packages/gseapy/enrichr.py in enrichr(gene_list, gene_sets, organism, description, outdir, background, cutoff, format, figsize, top_term, no_plot, verbose)
500 # set organism
501 enr.set_organism()
--> 502 enr.run()
503
504 return enr
~/venv/lib/python3.7/site-packages/gseapy/enrichr.py in run(self)
418 top_term=self.__top_term, color='salmon',
419 title=self._gs,
--> 420 ofname=outfile.replace("txt", self.format))
421 if msg is not None : self._logger.warning(msg)
422 self._logger.info('Done.\n')
~/venv/lib/python3.7/site-packages/gseapy/plot.py in barplot(df, column, title, cutoff, top_term, figsize, color, ofname, **kwargs)
498 if colname in ['Adjusted P-value', 'P-value']:
499 # check if any values in `df[colname]` can't be coerced to floats
--> 500 can_be_coerced = df[colname].map(isfloat)
501 if np.sum(~can_be_coerced) > 0:
502 raise ValueError('some value in %s could not be typecast to `float`'%colname)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'Adjusted P-value'
It seems that everything is running fine before calculating the adjusted p values. Also, when I insert my gene names into sites like Biomart, I get returns on the values that I input, but I don't know where I'm going wrong with the Adjusted P - Values in my code. Can anyone point me in the right direction? Thanks
How many genes do you have in your gene list? I had same issue. My gene list has about 22000 genes. I only picked top 5000 genes. Then the problem solved. Of course you can change it as you wish.
Here is my code:
import gseapy
enr_res = gseapy.enrichr(gene_list=glist[:5000],
organism='human',
gene_sets=['GO_Biological_Process_2018','KEGG_2019_Human','WikiPathways_2019_Human','GO_Biological_Process_2017b'],
description='pathway',
cutoff = 0.5)

Receiving Key Error = 0 while calculating the polarity in Python

I have two columns - text and title for news articles.
Data looks fine, apologize for a printscreen, just to show the structure.
But it gives me a weird error when I try to calculate the polarity.
# Create
polarity = []
# Creare for loop for Text column only
for i in range(len(jordan_df['text'])):
polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
# Put data together
polarity_data = {'article_text':jordan_df['text'], 'article_polarity': polarity}
The weird thing that this code works, when I change jordan_df to some_df with the same structure.
Error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method,
tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
**KeyError: 0**
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
3 frames
<ipython-input-186-edab50678cab> in <module>()
9 # Creare for loop for Text column only
10 for i in range(len(jordan_df['text'])):
---> 11 polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
12
13 # Put data together
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _get_value(self, label, takeable)
988
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
992
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method,
tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2901
2902 if tolerance is not None:
Add this line in your code:
polarity = []
jordan_df.reset_index(drop=True,inplace = True) #add this line
# Creare for loop for Text column only
for i in range(len(jordan_df['text'])):
polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
# Put data together
polarity_data = {'article_text':jordan_df['text'], 'article_polarity': polarity}
You have probably filtered out result, which have changed the index in your jordan_df. You can see in head() of your jordan_df that the index starts with 7.
And that's why you get KeyError on Key 0
i.e. when i=0 in jordan_df['text'][i]

Categories