How to enable season selection as JJAS instead of JJA in xarray

How to enable season selection as JJAS instead of JJA in xarray - python

I am relatively new to python and programming and have been trying to make some initial plots of precipitation data for the Indian subcontinent specifically for the indian summer monsoon through the period of June,July,August and September. I have managed to understand some of the code in a tutorial to obtain plot for JJA shown below but failing to modify it suitably to show me season as JJAS instead of JJA. Simply substituting JJAS in place of JJA ofcourse yielded the error
KeyError: 'JJAS'
I have seen one solution to this on the same forum but I am unable to adapt it to my code. I would be extremely grateful if I could receive any advice on this. Thank you !
Below is the code
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import cmocean
accesscm2_pr_file = r'C:\Users\uSER\Desktop\DissTrack1\ESGF data files\pr_Amon_CAMS-CSM1-0_historical_r1i1p1f1_gn_185001-201412.nc'
dset = xr.open_dataset(accesscm2_pr_file)
clim = dset['pr'].groupby('time.season').mean('time', keep_attrs=True)
clim.data = clim.data * 86400
clim.attrs['units'] = 'mm/day'
fig = plt.figure(figsize=[12,5])
ax = fig.add_subplot(111, projection=ccrs.PlateCarree(central_longitude=180))
clim.sel(season='JJAS').plot.contourf(ax=ax,
levels=np.arange(0, 13.5, 1.5),
extend='max',
transform=ccrs.PlateCarree(),
cbar_kwargs={'label': clim.units},
cmap=cmocean.cm.haline_r)
ax.coastlines()
plt.show()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'JJAS'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16124/3658430410.py in <module>
15 fig = plt.figure(figsize=[12,5])
16 ax = fig.add_subplot(111, projection=ccrs.PlateCarree(central_longitude=180))
---> 17 clim.sel(season='JJAS').plot.contourf(ax=ax,
18 levels=np.arange(0, 13.5, 1.5),
19 extend='max',
~\anaconda3\lib\site-packages\xarray\core\dataarray.py in sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
1269 Dimensions without coordinates: points
1270 """
-> 1271 ds = self._to_temp_dataset().sel(
1272 indexers=indexers,
1273 drop=drop,
~\anaconda3\lib\site-packages\xarray\core\dataset.py in sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
2363 """
2364 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel")
-> 2365 pos_indexers, new_indexes = remap_label_indexers(
2366 self, indexers=indexers, method=method, tolerance=tolerance
2367 )
~\anaconda3\lib\site-packages\xarray\core\coordinates.py in remap_label_indexers(obj, indexers, method, tolerance, **indexers_kwargs)
419 }
420
--> 421 pos_indexers, new_indexes = indexing.remap_label_indexers(
422 obj, v_indexers, method=method, tolerance=tolerance
423 )
~\anaconda3\lib\site-packages\xarray\core\indexing.py in remap_label_indexers(data_obj, indexers, method, tolerance)
272 coords_dtype = data_obj.coords[dim].dtype
273 label = maybe_cast_to_coords_dtype(label, coords_dtype)
--> 274 idxr, new_idx = convert_label_indexer(index, label, dim, method, tolerance)
275 pos_indexers[dim] = idxr
276 if new_idx is not None:
~\anaconda3\lib\site-packages\xarray\core\indexing.py in convert_label_indexer(index, label, index_name, method, tolerance)
189 indexer = index.get_loc(label_value)
190 else:
--> 191 indexer = index.get_loc(label_value, method=method, tolerance=tolerance)
192 elif label.dtype.kind == "b":
193 indexer = label
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'JJAS'

Indeed, grouping by "time.season" will only split your data into "DJF", "MAM", "JJA", and "SON". For other combinations of months you will need to define your own mask(s) to apply when taking a mean. For "JJAS" I often use something like this:
jjas = dset.time.dt.month.isin(range(6, 10))
clim = dset.sel(time=jjas).mean("time")

Related

Making a sns.pairplot using scikit wine dataset

This seems simple enough, but I can't find a solution online.
I am trying to create an sns.pairplot in Python. I have downloaded the wine dataset, kept the features that I need, and run the plot.
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_wine
# Load the wine dataset
wine = datasets.load_wine()
wine = list(zip(wine.data, wine.target))
wine = load_wine()
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
wine = load_wine
data = load_wine()
df = pd.DataFrame(data.data, columns=data.feature_names)
#This is the code that should run the plot
b=sns.pairplot(df, vars = df.columns[1 :], hue = "target", height = 2.5)
But I get this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2894 try:
-> 2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'target'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-108-1107acc27949> in <module>
----> 1 b=sns.pairplot(df, vars = df.columns[1 :], hue = "target", height = 2.5)
2
3 plt.show()
~\anaconda3\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\anaconda3\lib\site-packages\seaborn\axisgrid.py in pairplot(data, hue, hue_order, palette, vars, x_vars, y_vars, kind, diag_kind, markers, height, aspect, corner, dropna, plot_kws, diag_kws, grid_kws, size)
1923 # Set up the PairGrid
1924 grid_kws.setdefault("diag_sharey", diag_kind == "hist")
-> 1925 grid = PairGrid(data, vars=vars, x_vars=x_vars, y_vars=y_vars, hue=hue,
1926 hue_order=hue_order, palette=palette, corner=corner,
1927 height=height, aspect=aspect, dropna=dropna, **grid_kws)
~\anaconda3\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\anaconda3\lib\site-packages\seaborn\axisgrid.py in __init__(self, data, hue, hue_order, palette, hue_kws, vars, x_vars, y_vars, corner, diag_sharey, height, aspect, layout_pad, despine, dropna, size)
1212 index=data.index)
1213 else:
-> 1214 hue_names = categorical_order(data[hue], hue_order)
1215 if dropna:
1216 # Filter NA from the list of unique hue names
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2900 if self.columns.nlevels > 1:
2901 return self._getitem_multilevel(key)
-> 2902 indexer = self.columns.get_loc(key)
2903 if is_integer(indexer):
2904 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
-> 2897 raise KeyError(key) from err
2898
2899 if tolerance is not None:
KeyError: 'target'
The solution linked to this question: How to convert a Scikit-learn dataset to a Pandas dataset unfortunately doesn't seem to work here.
I also tried 'class' instead of target. Could it be that the 'zip' function isn't working correctly above, so the program can't identify 'target'?
Thank you in advance!

From what you typed it works like this.
from sklearn.datasets import load_iris
wine = load_wine
data = load_wine()
df = pd.DataFrame(data.data, columns=data.feature_names)
#This is the code that should run the plot
b=sns.pairplot(df, vars = df.columns[1 :], height = 2.5)
The question is how do you want to highlight features and why?
You cut alcohol from the list so the target simply won't be aligned.
Second thing is that it's feature wise pairplot not target/class.
So all in all I don't understand what you are trying to do here

Pandas KeyError, accessing column

I am trying to run this code:
(this will download the MNIST dataset to %HOME directory!)
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()
X, y = mnist["data"], mnist["target"]
import matplotlib as mpl
import matplotlib.pyplot as plt
some_digit = X[0] # **ERROR LINE** <---------
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()
I have this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-45-d5d685fca2de> in <module>
2 import matplotlib.pyplot as plt
3 import numpy as np
----> 4 some_digit = X[0]
5 some_digit_image = some_digit.reshape(28, 28)
6 plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0
Code example is from this book: Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow
I tried X.iloc[0] but its also not working.

From your dataframe pic, there is no column header named 0. If you want to access column by index, you can use .iloc which is primarily integer position based:
df.iloc[:, 0]
Or access by column header list
df[df.columns[0]]

Enrichment Analysis with GSEAPY

I am trying to run an enrichment analysis with gseapy enrichr on a list of gene names that look like the following:
0 RAB4B
1 TIGAR
2 RNF44
3 DNAH3
4 RPL23A
5 ARL8B
6 CALB2
7 MFSD3
8 PIGV
9 ZNF708
Name: 0, dtype: object
I am using the following code:
# run enrichr
# if you are only intrested in dataframe that enrichr returned, please set no_plot=True
# list, dataframe, series inputs are supported
enr = gseapy.enrichr(gene_list = glist2,
gene_sets=['ARCHS4_Cell-lines', 'KEGG_2016','KEGG_2013', 'GO_Cellular_Component_2018', 'GO_Cellular_Component_AutoRIF', 'GO_Cellular_Component_AutoRIF_Predicted_zscore', 'GO_Molecular_Function_2018', 'GO_Molecular_Function_AutoRIF', 'GO_Molecular_Function_AutoRIF_Predicted_zscore'],
organism='Human', # don't forget to set organism to the one you desired! e.g. Yeast
description='test_name',
outdir='test/enrichr_kegg',
# no_plot=True,
cutoff=1 # test dataset, use lower value from range(0,1)
)
However, I am receiving the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Adjusted P-value'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-78-dad3e0840d86> in <module>
9 outdir='test/enrichr_kegg',
10 # no_plot=True,
---> 11 cutoff=1 # test dataset, use lower value from range(0,1)
12 )
~/venv/lib/python3.7/site-packages/gseapy/enrichr.py in enrichr(gene_list, gene_sets, organism, description, outdir, background, cutoff, format, figsize, top_term, no_plot, verbose)
500 # set organism
501 enr.set_organism()
--> 502 enr.run()
503
504 return enr
~/venv/lib/python3.7/site-packages/gseapy/enrichr.py in run(self)
418 top_term=self.__top_term, color='salmon',
419 title=self._gs,
--> 420 ofname=outfile.replace("txt", self.format))
421 if msg is not None : self._logger.warning(msg)
422 self._logger.info('Done.\n')
~/venv/lib/python3.7/site-packages/gseapy/plot.py in barplot(df, column, title, cutoff, top_term, figsize, color, ofname, **kwargs)
498 if colname in ['Adjusted P-value', 'P-value']:
499 # check if any values in `df[colname]` can't be coerced to floats
--> 500 can_be_coerced = df[colname].map(isfloat)
501 if np.sum(~can_be_coerced) > 0:
502 raise ValueError('some value in %s could not be typecast to `float`'%colname)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'Adjusted P-value'
It seems that everything is running fine before calculating the adjusted p values. Also, when I insert my gene names into sites like Biomart, I get returns on the values that I input, but I don't know where I'm going wrong with the Adjusted P - Values in my code. Can anyone point me in the right direction? Thanks

How many genes do you have in your gene list? I had same issue. My gene list has about 22000 genes. I only picked top 5000 genes. Then the problem solved. Of course you can change it as you wish.
Here is my code:
import gseapy
enr_res = gseapy.enrichr(gene_list=glist[:5000],
organism='human',
gene_sets=['GO_Biological_Process_2018','KEGG_2019_Human','WikiPathways_2019_Human','GO_Biological_Process_2017b'],
description='pathway',
cutoff = 0.5)

Receiving Key Error = 0 while calculating the polarity in Python

I have two columns - text and title for news articles.
Data looks fine, apologize for a printscreen, just to show the structure.
But it gives me a weird error when I try to calculate the polarity.
# Create
polarity = []
# Creare for loop for Text column only
for i in range(len(jordan_df['text'])):
polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
# Put data together
polarity_data = {'article_text':jordan_df['text'], 'article_polarity': polarity}
The weird thing that this code works, when I change jordan_df to some_df with the same structure.
Error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method,
tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
**KeyError: 0**
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
3 frames
<ipython-input-186-edab50678cab> in <module>()
9 # Creare for loop for Text column only
10 for i in range(len(jordan_df['text'])):
---> 11 polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
12
13 # Put data together
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _get_value(self, label, takeable)
988
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
992
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method,
tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2901
2902 if tolerance is not None:

Add this line in your code:
polarity = []
jordan_df.reset_index(drop=True,inplace = True) #add this line
# Creare for loop for Text column only
for i in range(len(jordan_df['text'])):
polarity.append(TextBlob(jordan_df['text'][i]).sentiment.polarity)
# Put data together
polarity_data = {'article_text':jordan_df['text'], 'article_polarity': polarity}
You have probably filtered out result, which have changed the index in your jordan_df. You can see in head() of your jordan_df that the index starts with 7.
And that's why you get KeyError on Key 0
i.e. when i=0 in jordan_df['text'][i]

Cannot find data from index in python

I am trying to show image of specific index using matplotlib, but it is showing me error which i did not get why ? I am trying to get index 0 of mnist data and resize it to 28 by 28 pixel and then show that index value by plot.show() function.
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
some_digit = X[0]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(
some_digit_image,
cmap = matplotlib.cm.binary,
interpolation="nearest")
plt.axis("off")
plt.show()
KeyError Traceback (most recent call last)
~/Machinelearning/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-35-246778f0802e> in <module>
3 import matplotlib.pyplot as plt
4
----> 5 some_digit = X[0]
6 some_digit_image = some_digit.reshape(28, 28)
7
~/Machinelearning/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/Machinelearning/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0

Change
import numpy as np
x = [36000]
to
np.array(x.iloc[36000])

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to enable season selection as JJAS instead of JJA in xarray - python

Related

Making a sns.pairplot using scikit wine dataset

Pandas KeyError, accessing column

Enrichment Analysis with GSEAPY

Receiving Key Error = 0 while calculating the polarity in Python

Cannot find data from index in python

Categories

Resources