How to move end plot on the first row downwards - python

I'm trying to move the radar plot on the end of the first row downwards so it's middle way between the first and second row. I have no idea where to even start to attempt this. I've added the desired location of the plot.
Some code to reproduce the problem is here:
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def make_spider(row, title, color, ax=None):
categories=list(radar_dfs)
N = len(categories)
angles = np.arange(N+1)/N*2*np.pi
values=radar_dfs.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color,linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha=0.4)
radar_dfs = pd.DataFrame.from_dict({"no_rooms":{"0":-0.3470532925,"1":-0.082144001,"2":-0.082144001,"3":-0.3470532925,"4":-0.3470532925},"total_area":{"0":-0.1858487321,"1":-0.1685491141,"2":-0.1632483955,"3":-0.1769700284,"4":-0.0389887094},"car_park_spaces":{"0":-0.073703681,"1":-0.073703681,"2":-0.073703681,"3":-0.073703681,"4":-0.073703681},"house_price":{"0":-0.2416123064,"1":-0.2841806825,"2":-0.259622004,"3":-0.3529449824,"4":-0.3414842657},"pop_density":{"0":-0.1271390651,"1":-0.3105853643,"2":-0.2316607937,"3":-0.3297832328,"4":-0.4599021194},"business_rate":{"0":-0.1662745006,"1":-0.1426329043,"2":-0.1577528867,"3":-0.163560133,"4":-0.1099718326},"noqual_pc":{"0":-0.0251535462,"1":-0.1540641646,"2":-0.0204666924,"3":-0.0515740013,"4":-0.0445135996},"level4qual_pc":{"0":-0.0826103951,"1":-0.1777759951,"2":-0.114263357,"3":-0.1787044751,"4":-0.2709496389},"badhealth_pc":{"0":-0.105481688,"1":-0.1760349683,"2":-0.128215043,"3":-0.1560577648,"4":-0.1760349683}})
fig, axes = plt.subplots(2, 3, subplot_kw=dict(polar=True), sharey=True,
figsize=(28,20))
palette =['#79BD9A','#69D2E7','#F38630', '#547980','#EDC951']
labels = ['A', 'B', 'C', 'D', 'E']
row_one = axes.flatten()
for row, (ax, letter, col) in enumerate(zip(row_one, labels, palette)):
make_spider( row = row, title='Group ' + str(letter), color=col, ax=ax)
fig.delaxes(axes[1][2])
plt.subplots_adjust(wspace=.4, hspace=.3)
plt.show()
Does anybody have any advice?

One way to do so would be to create each subplot with plt.subplot2grid() and locating them manually. A trick that can be used is to double the number of rows and the height of each chart, which will allow to tune the location more finely.
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def make_spider(row, title, color, ax=None):
categories=list(radar_dfs)
N = len(categories)
angles = np.arange(N+1)/N*2*np.pi
values=radar_dfs.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color,linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha=0.4)
radar_dfs = pd.DataFrame.from_dict({"no_rooms":{"0":-0.3470532925,"1":-0.082144001,"2":-0.082144001,"3":-0.3470532925,"4":-0.3470532925},"total_area":{"0":-0.1858487321,"1":-0.1685491141,"2":-0.1632483955,"3":-0.1769700284,"4":-0.0389887094},"car_park_spaces":{"0":-0.073703681,"1":-0.073703681,"2":-0.073703681,"3":-0.073703681,"4":-0.073703681},"house_price":{"0":-0.2416123064,"1":-0.2841806825,"2":-0.259622004,"3":-0.3529449824,"4":-0.3414842657},"pop_density":{"0":-0.1271390651,"1":-0.3105853643,"2":-0.2316607937,"3":-0.3297832328,"4":-0.4599021194},"business_rate":{"0":-0.1662745006,"1":-0.1426329043,"2":-0.1577528867,"3":-0.163560133,"4":-0.1099718326},"noqual_pc":{"0":-0.0251535462,"1":-0.1540641646,"2":-0.0204666924,"3":-0.0515740013,"4":-0.0445135996},"level4qual_pc":{"0":-0.0826103951,"1":-0.1777759951,"2":-0.114263357,"3":-0.1787044751,"4":-0.2709496389},"badhealth_pc":{"0":-0.105481688,"1":-0.1760349683,"2":-0.128215043,"3":-0.1560577648,"4":-0.1760349683}})
palette =['#79BD9A','#69D2E7','#F38630', '#547980','#EDC951']
labels = ['A', 'B', 'C', 'D', 'E']
fig = plt.figure(figsize=(28,20))
position = [[0,0], [0,1], [1,2], [2,0], [2,1]]
axes = []
for row, (letter, col) in enumerate(zip(labels, palette)):
ax = plt.subplot2grid([4,3], position[row], rowspan=2, colspan=1, **{'polar': True}, sharey=axes[0] if row else None)
axes.append(ax)
make_spider( row = row, title='Group ' + str(letter), color=col, ax=ax)
plt.subplots_adjust(wspace=.4, hspace=.3)
plt.show()
With this solution, sharey has to be set manually. Maybe a cleaner way to do so would be to use ax.get_shared_y_axes().join(*axes) (check here).
Hope it helped.

Related

Line chart to surface chart

[UPDATE: Sorry for not providing the piece where the author of the codes create example data. I have updated the codes]
I found an example of a 3D mesh line chart that satisfied what I need (colouring change with level on z dimension). However, instead of line, I want surface plot. How can I change the codes to have the 3d surface plot?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation, rc
from matplotlib.cm import get_cmap
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.font_manager import FontProperties
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d.art3d import Line3DCollection
index_returns = np.random.normal(loc=1e-4, scale=5e-3, size=(783, 9))
index_returns = np.vstack((np.zeros(shape=(1, 9)) + 100, index_returns))
index_prices = np.cumprod(1 + index_returns, axis=0)
window = 261
df = np.zeros(shape=(index_prices.shape[0]-window, 9))
for i in range(window, index_prices.shape[0], 1):
df[i-window] = (index_prices[i]/index_prices[i-window]) - 1
index = pd.date_range('2019-01-01', periods=index_prices.shape[0]-window, freq='B')
columns = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
df = pd.DataFrame(df, index=index, columns=columns)
# create the figure
fig = plt.figure(figsize=(14.4, 9))
ax = fig.add_subplot(111, projection='3d')
fig.patch.set_alpha(1)
# get the cmap to use
cmap = get_cmap('RdYlGn')
# get the slice based on data frame
current_slice = df.values[:261, :]
index_names = df.columns
index_dates = df.index
# list holding the lines
lines = []
# for each index...
for i in range(current_slice.shape[1]):
# get the coordinates
x = np.array(np.arange(current_slice.shape[0]))
y = np.tile(i, current_slice.shape[0])
z = np.array(current_slice[:, i])
# crete points and segments to color
points = np.array([x, y, z]).T.reshape(-1, 1, 3)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(-0.19, 0.19)
lc = Line3DCollection(segments, cmap=cmap, norm=norm, zorder=current_slice.shape[1]-i)
# Set the values used for colormapping
lc.set_array(z)
lc.set_linewidth(2)
lc.set_color(cmap(z[-1] * 2.5 + 0.5))
lc.set_label(index_names[i])
lines.append(ax.add_collection(lc))
# add the grids
ax.legend(loc='center right', bbox_to_anchor=(1.1, 0.46), fancybox=True, facecolor=(.95,.95,.95,1), framealpha=1, shadow=False, frameon=True, ncol=1, columnspacing=0, prop={'family': 'DejaVu Sans Mono'})
ax.set_zlabel('Rolling Equity 1Y', labelpad=10)
ax.set_zlim(-0.39, 0.39)
ax.set_zticklabels([' '* 3 + '{:.0%}'.format(val) for val in ax.get_zticks()], fontdict={'verticalalignment': 'center', 'horizontalalignment': 'center'})
ax.set_xlabel('Date', labelpad=30)
ax.set_xlim(0, current_slice.shape[0]-1)
ax.set_xticklabels([index_dates[int(val)].strftime('%m/%y') for val in ax.get_xticks()[:-1]] + [''], rotation=0, fontdict={'verticalalignment': 'top', 'horizontalalignment': 'center'})
ax.set_yticks(np.arange(current_slice.shape[1]))
ax.set_yticklabels([index_names[i] for i in range(current_slice.shape[1])], rotation=-15, fontdict={'verticalalignment': 'center', 'horizontalalignment': 'left'})
# show the plot
plt.show()

boxplot show max and min fliers results in TypeError: 'AxesSubplot' object is not subscriptable

I am preparing box plots with a whisker interval of [2,98]. The issue is that I am working with air quality data and have a large range of data points, so the outliers take up the entire figure and overshadow the boxplots. I would like to plot the max and min outliers only and have tried the method from Matplotlib boxplot show only max and min fliers, however, I get an error message that says TypeError: 'AxesSubplot' object is not subscriptable.
Here is my code:
fig,ax = plt.subplots(1, figsize=(8,6))
g = sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98))
fliers = g['fliers']
for fly in fliers:
fdata=fly.get_data
fly.set_data([fdata[0][0],fdata[0][-1],fdata[1][0],fdata[1][-1]])
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
Here's some sample data:
mda8 = pd.DataFrame({
'T1':[35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2':[28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3':[34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
I need help with plotting the max and min outliers only and am open to doing another method besides the one that I tried here.
EDIT here's the link to my csv file https://drive.google.com/file/d/1E3A0UAYCbSN53JXtfsbrA4i_Phci_JWf/view?usp=sharing
A possible approach could be:
hide the outliers plotted by seaborn.boxplot by passing showfliers = False parameter:
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
get the list of outliers for each column, find maximum and minimum and plot only them:
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.cbook import boxplot_stats
mda8 = pd.DataFrame({'T1': [35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2': [28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3': [34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
fig,ax = plt.subplots(1, figsize=(8,6))
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
EDIT
Working on the data your provided, if I plot them as they are:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = True)
plt.show()
I get:
In the code above I change the parameter showfliers = False, in order to hide outliers.
Then, as suggested by JohanC in the comment, a simpler way to plot outliers is to plot min and max for each column:
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = False)
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
plt.show()

How to assign color to error bar caps? [Matplotlib]

I am new at Matplotlib and would like to assign colors to error bar caps...in my data (attached) the mean values are 'numbers' and the SD ('error') is in the column 'sd'. I grouped data by 'strain' (4 categories; mc, mut1, etc.). Colors are 'strains' (lines). The code below works BUT When I use "capsize" to add caps it throws an error...
I want the caps to have the same color as lines (from color vector "c"), any way? Thanks!
The file is https://anonfiles.com/d8A7m4F5o0/mutdp_csv
muts = pd.read_csv('mutdp.csv')
#SUBSET
# Select rows (i.e. 1 to 28)
gr=muts[1:28]
fig, ax = plt.subplots(figsize=(12,9.9))
c=['b','y','r','g']
#Data GR ---------------------------------------------------------------------------------------------
grstrain=gr.groupby(['time','strain']).mean()['numbers'].unstack()
grstrain.plot.line(ax=ax, style=['-o','-o','-o','-o'],color=c, ls = '--', linewidth=2.7)
# Error (-----HERE is where "capsize" causes the error----)
ax.errorbar(gr.time, gr.numbers, yerr=gr.sd, ls='', color=[i for i in c for _i in range(7)], capsize=3, capthick=3)
#(SCALE)
plt.yscale('log')
plt.ylim(0.04, 3)
#SAVE FIG!
plt.show()
As ax.errorbar only accepts one fixed color, it could be called in a loop, once for each color. The following code creates some random data to show how the loop could be written:
from matplotlib import pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
gr = pd.DataFrame({'time': np.tile(range(0, 14, 2), 4),
'strain': np.repeat(['mc', 'mut1', 'mut2', 'mut3'], 7),
'numbers': 0.1 + np.random.uniform(-0.01, 0.06, 28).cumsum(),
'sd': np.random.uniform(0.01, 0.05, 28)})
fig, ax = plt.subplots(figsize=(12, 9.9))
colors = ['b', 'y', 'r', 'g']
grstrain = gr.groupby(['time', 'strain']).mean()['numbers'].unstack()
grstrain.plot.line(ax=ax, style=['-o', '-o', '-o', '-o'], color=colors, ls='--', linewidth=2.7)
for strain, color in zip(np.unique(gr.strain), colors):
grs = gr[gr.strain == strain]
ax.errorbar(grs.time, grs.numbers, yerr=grs.sd, ls='', color=color, capsize=3, capthick=3)
plt.yscale('log')
plt.ylim(0.04, 3)
plt.show()

Scatter plots in Pandas: Plot by category with different color and shape combinations

I would like to plot a data-set by its categories, using geometric shapes such as circle, triangle and square to represent category 1 and colors to represent category 2. The output would have varying combination of the geometric shapes and colors and the legend would list the attributes of the categories separately i.e.:
circle = a
triangle = b square = c
red = I
green = II
blue = III
Looking for solutions I found following posts which would only give solutions for one specific geometric shape having one specific color.
How to plot by category with different markers
How to plot by category
I tried to work something out with the code from one of the posts but without success.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1983)
num = 10
x, y = np.random.random((2, num))
cat1 = np.random.choice(['a', 'b', 'c'], num)
cat2 = np.random.choice(['I', 'II', 'III'], num)
df = pd.DataFrame(dict(x=x, y=y, cat1=cat1, cat2=cat2))
groups = df.groupby(['cat1', 'cat2'])
fig, ax = plt.subplots()
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend()
plt.show()
you can try this code block
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#Create mapping dictionary that you want
marker_dict = {'a':'o','b':'^','c':'s'}
color_dict = {'I':'red', 'II':'green', 'III':'blue'}
np.random.seed(1983)
num = 10
x, y = np.random.random((2, num))
cat1 = np.random.choice(['a', 'b', 'c'], num)
cat2 = np.random.choice(['I', 'II', 'III'], num)
df = pd.DataFrame(dict(x=x, y=y, cat1=cat1, cat2=cat2))
groups = df.groupby(['cat1', 'cat2'])
fig, ax = plt.subplots()
ax.margins(0.05)
for name, group in groups:
marker = marker_dict[name[0]]
color = color_dict[name[1]]
ax.plot(group.x, group.y, marker=marker, linestyle='', ms=12, label=name,color=color)
ax.legend()
plt.show()
Hope it helps.

Plot a line between prediction and ground_truth point in matplotlib

I have two dataframes, ground_truth and prediction (Both are pandas series). Finally, I want to plot all prediction points and all ground_truth points as I already did. What I wanna do, is to plot a line between each prediction and ground_truth point. So that the line is a connection between the prediction point x1,y1 and the ground_truth point x2,y2. For a better understanding I attached an image. The black lines (created via paint) is what I want to do.
This is what I already have:
fig, ax = plt.subplots()
ax.plot(pred,'ro', label='Prediction', color = 'g')
ax.plot(GT,'^', label='Ground Truth', color = 'r' )
plt.xlabel('a')
plt.ylabel('b')
plt.title('test')
plt.xticks(np.arange(-1, 100, 5))
plt.style.use('ggplot')
plt.legend()
plt.show()
I guess the easiest and most understandable solution is to plot the respective lines between pred and GT in a loop.
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['legend.numpoints'] = 1
#generate some random data
pred = np.random.rand(10)*70
GT = pred+(np.random.randint(8,40,size= len(pred))*2.*(np.random.randint(2,size=len(pred))-.5 ))
fig, ax = plt.subplots(figsize=(6,4))
# plot a black line between the
# ith prediction and the ith ground truth
for i in range(len(pred)):
ax.plot([i,i],[pred[i], GT[i]], c="k", linewidth=0.5)
ax.plot(pred,'o', label='Prediction', color = 'g')
ax.plot(GT,'^', label='Ground Truth', color = 'r' )
ax.set_xlim((-1,10))
plt.xlabel('a')
plt.ylabel('b')
plt.title('test')
plt.legend()
plt.show()
You can plot each line as a separate plot. You could make a loop and call plot for each line connecting the two points. However you could also give the plot(x, y, ...) two 2d arrays as arguments. Each column in x will correspond to the same column in y and are represented by a line in the plot. So you'll need to generate these two. It could look something like this:
L = len(pred)
t = np.c_[range(L), range(L)].T
ax.plot(t, np.c_[pred, GT].T, '-k')
You can achieve this using matplotlib errorbar (http://matplotlib.org/1.2.1/examples/pylab_examples/errorbar_demo.html), with the idea of drawing error bars around the average of the two lines you are plotting:
Here is a minimal example to show my idea:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# example data
x = np.arange(0.1,10, 0.5)
y1 = pd.Series(np.exp(-x), index = x)
y2 = pd.Series(np.exp(-x)+ np.sin(x), index = x)
avg_line = (y1 + y2)*0.5
err = (avg_line - y1).abs()
fig, ax = plt.subplots(1)
y1.plot(marker = 'o', label='Prediction', color = 'g', linestyle = '', ax = ax)
y2.plot(marker = '^', label='Ground Truth', color = 'r', linestyle = '', ax = ax)
ax.errorbar(x, avg_line.values, yerr=err.values, fmt= 'none', ecolor = 'k', barsabove = False, capthick=0)
plt.style.use('ggplot')
ax.legend()
Hope this solves your problem.

Categories