Inset plots in with the simple plt plots - python

I am trying to add an inset plot for the Holding_cost and Backlogging_cost lines of the below graph. When I searched for the inset plots, I see that many of the examples use subplots with (ax, figure) definitions. Since I have some specific code for plt., I do not want to migrate this figure to a subplot version. Is there a solution to embed the inset plot under these circumstances?
from matplotlib import rc
import matplotlib.pyplot as plt
from labellines import labelLine, labelLines
rc('text', usetex=True)
plt.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}",r"\usepackage[mathscr]{euscript}"]
plt.figure(figsize=(8, 8))
plt.plot(range(ksize),Cost_sum, '-k', label = 'Total',linewidth=3)
plt.plot(range(ksize),Build_cost, '-',c='seagreen', label = 'Build',linewidth=3)
plt.plot(range(ksize),Downtime_cost, '-', c='indigo', label = 'Downtime',linewidth=3)
plt.plot(range(ksize),Purchasing_cost, '-', c='crimson', label = 'Purchasing',linewidth=3)
plt.plot(range(ksize),Holding_cost, '-', c='orange', label = 'Holding',linewidth=3)
plt.plot(range(ksize),Backlogging_cost, '-', c='turquoise', label = 'Backlogging',linewidth=3)
plt.plot([k],[min(Cost_sum)], 'k', marker='*', mfc='none', markersize=14)
plt.text(k*0.98, min(Cost_sum)*1.04, r'$k^*$')
plt.xticks(list(plt.xticks()[0]) + [k])
lines = plt.gca().get_lines()
labelLine(lines[4],11,label='{}'.format(lines[4].get_label()),align = False,fontsize=16,zorder=2.5,va='bottom')
labelLine(lines[0],15,label='{}'.format(lines[0].get_label()),align = False,fontsize=16,zorder=2.5)
labelLine(lines[1],15,label='{}'.format(lines[1].get_label()),align = False,fontsize=16,zorder=2.5)
labelLine(lines[2],15,label='{}'.format(lines[2].get_label()),align = False,fontsize=16,zorder=2.5)
labelLine(lines[3],13,label='{}'.format(lines[3].get_label()),align = False,fontsize=16,zorder=2.5)
labelLines([lines[i] for i in range(len(lines))
if i not in [0,1,2,3,4]],zorder=2.5,align=False,fontsize=16,xvals=(15, 20),va='top')
plt.plot([k,k],[-max(Cost_sum)*0.05,min(Cost_sum)], '--k', dashes=(8, 6))
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.rc('font', size=20)
plt.xlim(left=-1,right=ksize*1.01)
plt.ylim(bottom = -max(Cost_sum)*0.05, top = max(Cost_sum)*1.05)
plt.xlabel(r'$k$', fontsize=20)
plt.ylabel('Annual Sourcing Cost (\$)', fontsize=20)
plt.show()
Here is the data to reproduce the attached figure.
(k,ksize,Cost_sum,Build_cost,Downtime_cost,Purchasing_cost,Holding_cost,Backlogging_cost) = (5, 21, [4566185.84958104, 3851831.0572200846, 3565535.616370524, 3395959.546010131, 3335310.1252832105, 3329129.801671144, 3348670.023232867, 3396492.9660517015, 3466368.5698340936, 3554060.0211574556, 3651792.419066521, 3758257.621291115, 3869866.130740931, 3985036.500754009, 4102434.354938996, 4221042.0030950345, 4340432.879293293, 4460167.615843281, 4580058.451828075, 4700015.099763582, 4819998.516231451], [0, 177222.2678574566, 358430.74126313033, 498693.32509609073, 625885.403095768, 746604.6981917183, 873034.7125363777, 975063.4720472957, 1049769.8471082426, 1145978.9429791428, 1199143.42108458, 1240852.8140335542, 1269882.4509229255, 1287853.138858912, 1308162.0194890695, 1311076.0940421396, 1312594.5371365794, 1312594.537136579, 1312594.537136579, 1312594.5371365792, 1312594.537136579], [0, 141129.06486431422, 198208.5540829755, 222573.40844582563, 259146.4212700642, 287560.0302637707, 320215.10890425567, 349656.43658995605, 368483.16599147196, 395395.4583382164, 410688.11784972803, 419781.9418066965, 432130.2205095227, 444005.25280786975, 451695.2201472967, 454782.15603979095, 456688.3693602677, 456423.1059102573, 456313.94189504976, 456270.5898305575, 456254.00629842596], [4541171.853867372, 3388943.963490517, 2745797.4942291463, 2296277.8906481406, 1953109.7381211254, 1678321.807949813, 1420580.3170326485, 1217326.2795553817, 1074452.1288599337, 919861.1429883543, 829741.2108432015, 765975.1717142115, 716516.4480671097, 682606.0612729619, 652138.545832702, 644789.7371737638, 640812.2765522903, 640812.2765522903, 640812.2765522903, 640812.2765522903, 640812.2765522903], [22393.016948892902, 21960.667293075123, 20795.12848139708, 16300.577174067963, 15257.966173544293, 14798.161857120422, 13090.785835092029, 12744.159873509096, 12026.756694177322, 11342.598968663648, 10753.439568233127, 10274.278741187029, 10018.226634046483, 9465.36734027374, 9350.039444208951, 9312.956511300557, 9259.41257276402, 9259.41257276402, 9259.41257276402, 9259.41257276402, 9259.41257276402], [2620.9787647758053, 2575.0937147202317, 2303.69831387141, 2114.3446460037326, 1910.5966227084793, 1845.1034087196194, 1749.098924491782, 1702.6179855582488, 1636.671180267143, 1481.8778830783633, 1466.2297207771976, 1373.4149954678835, 1318.7846073251453, 1106.6804739931142, 1088.530025718579, 1081.0593280402318, 1078.2836713916126, 1078.2836713916126, 1078.2836713916126, 1078.2836713916126, 1078.2836713916126])

Related

Is it possible to plot multiple buffers in python

Im rather new to coding so sorry if my question is stupid, but i can't find a solution anywhere.
My question is if you can plot multiple buffers on top of eachother, with multiple colors? Im trying to make a map where i would like a buffer showing 20, 30 and 50km range from a coordinate. My try so far looks like this:
gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y), crs="EPSG:25832")
gdf30=gdf
gdf30['geometry'] = gdf30.geometry.buffer(30*1000)
gdf20=gdf
gdf20['geometry'] = gdf20.geometry.buffer(20*1000)
Map = geopandas.read_file("Map_DK_SWE.gpkg")
Map = Map.to_crs(25832)
fig,ax=plt.subplots()
Map.plot(ax=ax,color='white', edgecolor='black')
ax.set_ylim([6000000, 6500000])
ax.set_xlim([400000, 850000])
gdf30.plot(ax=ax, color='blue',zorder=2)
gdf20.plot(ax=ax, color='green',zorder=1)
[This is what i get from then code][1]
i dont know what exactly your issue is since I cant see your plot - but you can do it like this
from matplotlib import pyplot as plt
import geopandas as gpd
cities = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
centroid = cities[cities.name == 'Tokyo']
buffer_1 = cities[cities.name == 'Tokyo'].geometry.buffer(3)
buffer_2 = cities[cities.name == 'Tokyo'].geometry.buffer(2)
buffer_3 = cities[cities.name == 'Tokyo'].geometry.buffer(1)
f, ax = plt.subplots()
# plot basemap
world.plot(edgecolor='k', facecolor='w', ax=ax)
# plot buffers
buffer_1.plot(color='r', label='buffer 1', ax=ax, alpha=.5)
buffer_2.plot(color='b', label='buffer 2', ax=ax, alpha=.5)
buffer_3.plot(color='g', label='buffer 3', ax=ax, alpha=.5)
# plot original coordinates
centroid.plot(marker='X', color='r', ax=ax)
# crop map to extent
ax.set_xlim(120, 145)
ax.set_ylim(25, 50)
plt.show()

Seaborn violinplots: how to I obtain line paths for violin edges?

I am using Seaborn to plot two violin plots, and I would like to obtain the X-coordinates of the edges of the violins, such that I may subtract one from the other to find the difference between the two KDEs/distributions. I suspect it has to do with the properties of the matplotlib.collections.PolyCollection objects, but I have had difficulty navigating the documentation for this - so I apologize I donnot have much code to attach... but I'm including my current violin plot in case that helps
import pandas as pd
import seaborn as sns
observed_results = [11.10283128625, 6.031906445000001, 4.625099850384, 4.371541683749999, 4.188776438315, 4.169933187839999, 4.147271982216, 3.137545605726, 2.727390606468, 2.706991071933, 2.483074510875, 2.470624399684, 2.45608460474, 2.413902898276, 2.390530982763, 2.347653087613, 2.3049660823, 2.173520711313, 2.114398085409, 2.072213409552, 1.96126510972, 1.768724290017, 1.722913211104, 1.715972042575, 1.71293343376, 1.686909025847, 1.546933962564, 1.520621928225, 1.50428319008, 1.4944074417, 1.409957657136, 1.40292975245, 1.3157577856, 1.3078804375, 1.2974806016, 1.288403682732, 1.236493409437, 1.225900768752, 1.222094652926, 1.202655483344, 1.109818003441, 1.108678687017, 1.103788138352, 1.066656041116, 0.9799193812, 0.9729697610879998, 0.9532061536159999, 0.908066599737, 0.8847958337999999, 0.8698585519490001, 0.859714675264, 0.8422146736200001, 0.8229930509580001, 0.79569571686, 0.79170962662, 0.7855221024799999, 0.775488805524, 0.7690100510069999, 0.765153773568, 0.677797640336, 0.62878587992, 0.6278724034559998, 0.619961861949, 0.555740507912, 0.5458990340579999, 0.495263271752, 0.4744517001510001, 0.453783299787, 0.426952628919, 0.4079525625, 0.4030645275, 0.401266962432, 0.3807181439999999, 0.3313936548, 0.2707379496719999, 0.256952683998, 0.248430471184, 0.242090703552, 0.235644786034, 0.214602373804, 0.199521746592, 0.1951300125, 0.173116351962, 0.172562334309, 0.156660445172, 0.145336979139, 0.1190036898, 0.114659525376, 0.094888354288, 0.06696725615, 0.0305541639, 0.023464003706, 0.021922131125]
observed_labels = ["Observed" for n in observed_results]
expected_results = [4.5217885770490405, 3.828641396489095, 3.4231762883809305, 3.1354942159291497, 2.91235066461494, 2.7300291078209855, 2.575878427993727, 2.4423470353692043, 2.324563999712821, 2.2192034840549946, 2.12389330425067, 2.03688192726104, 1.9568392195875037, 1.8827312474337816, 1.8137383759468302, 1.749199854809259, 1.6885752329928243, 1.6314168191528757, 1.5773495978825998, 1.5260563034950494, 1.4772661393256175, 1.4307461236907244, 1.3862943611198906, 1.3437347467010947, 1.3029127521808397, 1.2636920390275583, 1.2259517110447113, 1.1895840668738362, 1.1544927470625663, 1.120591195386885, 1.087801372563894, 1.0560526742493137, 1.02528101558256, 0.995428052432879, 0.9664405155596267, 0.9382696385929304, 0.9108706644048158, 0.8842024173226546, 0.858226930919394, 0.832909122935104, 0.8082165103447325, 0.7841189587656721, 0.760588461355478, 0.7375989431307791, 0.7151260872787205, 0.6931471805599453, 0.6716409753389818, 0.6505875661411494, 0.6299682789384137, 0.6097655716208943, 0.5899629443247145, 0.570544858467613, 0.5514966634969185, 0.5328045304847661, 0.5144553918165694, 0.496436886313891, 0.4787373092144902, 0.46134556650262093, 0.44425113314332093, 0.4274440148269396, 0.4109147128757291, 0.39465419200394874, 0.37865385065750773, 0.3629054936893685, 0.34740130715340317, 0.3321338350226148, 0.31709595765807425, 0.3022808718729337, 0.2876820724517809, 0.2732933349996814, 0.2591087000077249, 0.2451224580329851, 0.2313291359006492, 0.21772348384487053, 0.20430046351272993, 0.19105523676270922, 0.17798315519535654, 0.16507975035944858, 0.1523407245820189, 0.13976194237515874, 0.1273394223766015, 0.11506932978478723, 0.10294796925244237, 0.09097177820572676, 0.07913732055872386, 0.06744128079553265, 0.055880458394456614, 0.04445176257083381, 0.03315220731690051, 0.02197890671877523, 0.010929070532190317, -0.0]
expected_labels = ["Expected" for n in expected_results]
all_results = observed_results + expected_results
all_labels = observed_labels + expected_labels
df_longform = {r"Z-score": all_results, 'Condition': all_labels}
df_longform = pd.DataFrame(data=df_longform)
ax = sns.violinplot(x='Condition', y=r"Z-score", data=df_longform, inner=None,
scale='area', bw=0.3, width=0.8, saturation=1,
linewidth=1, cut=0)
print(ax.collections)
plt.show()
Please let me know if there if anything about this question is unclear, or there is anything else I forgot to provide here. Can't figure this out for the life of me, so any advice is really appreciated -
Here is a comparison between:
calculating and visualizing the two distributions via a kde plot and their intersection
simulating a violinplot given the kde curves
a boxenplot, which may be another valuable way to compare the distributions.
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import seaborn as sns
import pandas as pd
observed_results = [11.10283128625, 6.031906445000001, 4.625099850384, 4.371541683749999, 4.188776438315, 4.169933187839999, 4.147271982216, 3.137545605726, 2.727390606468, 2.706991071933, 2.483074510875, 2.470624399684, 2.45608460474, 2.413902898276, 2.390530982763, 2.347653087613, 2.3049660823, 2.173520711313, 2.114398085409, 2.072213409552, 1.96126510972, 1.768724290017, 1.722913211104, 1.715972042575, 1.71293343376, 1.686909025847, 1.546933962564, 1.520621928225, 1.50428319008, 1.4944074417, 1.409957657136, 1.40292975245, 1.3157577856, 1.3078804375, 1.2974806016, 1.288403682732, 1.236493409437, 1.225900768752, 1.222094652926, 1.202655483344, 1.109818003441, 1.108678687017, 1.103788138352, 1.066656041116, 0.9799193812, 0.9729697610879998, 0.9532061536159999, 0.908066599737, 0.8847958337999999, 0.8698585519490001, 0.859714675264, 0.8422146736200001, 0.8229930509580001, 0.79569571686, 0.79170962662, 0.7855221024799999, 0.775488805524, 0.7690100510069999, 0.765153773568, 0.677797640336, 0.62878587992, 0.6278724034559998, 0.619961861949, 0.555740507912, 0.5458990340579999, 0.495263271752, 0.4744517001510001, 0.453783299787, 0.426952628919, 0.4079525625, 0.4030645275, 0.401266962432, 0.3807181439999999, 0.3313936548, 0.2707379496719999, 0.256952683998, 0.248430471184, 0.242090703552, 0.235644786034, 0.214602373804, 0.199521746592, 0.1951300125, 0.173116351962, 0.172562334309, 0.156660445172, 0.145336979139, 0.1190036898, 0.114659525376, 0.094888354288, 0.06696725615, 0.0305541639, 0.023464003706, 0.021922131125]
expected_results = [4.5217885770490405, 3.828641396489095, 3.4231762883809305, 3.1354942159291497, 2.91235066461494, 2.7300291078209855, 2.575878427993727, 2.4423470353692043, 2.324563999712821, 2.2192034840549946, 2.12389330425067, 2.03688192726104, 1.9568392195875037, 1.8827312474337816, 1.8137383759468302, 1.749199854809259, 1.6885752329928243, 1.6314168191528757, 1.5773495978825998, 1.5260563034950494, 1.4772661393256175, 1.4307461236907244, 1.3862943611198906, 1.3437347467010947, 1.3029127521808397, 1.2636920390275583, 1.2259517110447113, 1.1895840668738362, 1.1544927470625663, 1.120591195386885, 1.087801372563894, 1.0560526742493137, 1.02528101558256, 0.995428052432879, 0.9664405155596267, 0.9382696385929304, 0.9108706644048158, 0.8842024173226546, 0.858226930919394, 0.832909122935104, 0.8082165103447325, 0.7841189587656721, 0.760588461355478, 0.7375989431307791, 0.7151260872787205, 0.6931471805599453, 0.6716409753389818, 0.6505875661411494, 0.6299682789384137, 0.6097655716208943, 0.5899629443247145, 0.570544858467613, 0.5514966634969185, 0.5328045304847661, 0.5144553918165694, 0.496436886313891, 0.4787373092144902, 0.46134556650262093, 0.44425113314332093, 0.4274440148269396, 0.4109147128757291, 0.39465419200394874, 0.37865385065750773, 0.3629054936893685, 0.34740130715340317, 0.3321338350226148, 0.31709595765807425, 0.3022808718729337, 0.2876820724517809, 0.2732933349996814, 0.2591087000077249, 0.2451224580329851, 0.2313291359006492, 0.21772348384487053, 0.20430046351272993, 0.19105523676270922, 0.17798315519535654, 0.16507975035944858, 0.1523407245820189, 0.13976194237515874, 0.1273394223766015, 0.11506932978478723, 0.10294796925244237, 0.09097177820572676, 0.07913732055872386, 0.06744128079553265, 0.055880458394456614, 0.04445176257083381, 0.03315220731690051, 0.02197890671877523, 0.010929070532190317, -0.0]
x0 = observed_results
x1 = expected_results
kde0 = gaussian_kde(x0, bw_method=0.3)
kde1 = gaussian_kde(x1, bw_method=0.3)
xmin = min(min(x0), min(x1))
xmax = max(max(x0), max(x1))
dx = 0.2 * (xmax - xmin) # add a 20% margin, as the kde is wider than the data
xmin -= dx
xmax += dx
x = np.linspace(xmin, xmax, 500)
kde0_x = kde0(x)
kde1_x = kde1(x)
inters_x = np.minimum(kde0_x, kde1_x)
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(15, 7))
ax1.plot(x, kde0_x, color='b', label='Observed')
ax1.fill_between(x, kde0_x, 0, color='b', alpha=0.2)
ax1.plot(x, kde1_x, color='orange', label='Expected')
ax1.fill_between(x, kde1_x, 0, color='orange', alpha=0.2)
ax1.plot(x, inters_x, color='r')
ax1.fill_between(x, inters_x, 0, facecolor='none', edgecolor='r', hatch='xx', label='intersection')
area_inters_x = np.trapz(inters_x, x)
handles, labels = ax1.get_legend_handles_labels()
labels[2] += f': {area_inters_x * 100:.1f} %'
ax1.legend(handles, labels)
ax1.set_ylim(ymin=0)
ax1.set_title("kde plot with intersection")
ax2.plot(kde0_x/2, x, color='b', label='Observed')
ax2.plot(-kde0_x/2, x, color='b')
ax2.fill_betweenx(x, -kde0_x/2, kde0_x/2, color='b', alpha=0.2)
ax2.plot(kde1_x/2, x, color='r', label='Expected')
ax2.plot(-kde1_x/2, x, color='r')
ax2.fill_betweenx(x, -kde1_x/2, kde1_x/2, color='r', alpha=0.2)
ax2.plot(inters_x/2, x, color='k')
ax2.plot(-inters_x/2, x, color='k')
ax2.fill_betweenx(x, -inters_x/2, inters_x/2, facecolor='none', edgecolor='k', hatch='oo', label='intersection')
handles, labels = ax2.get_legend_handles_labels()
labels[2] += f': {area_inters_x * 100:.1f} %'
ax2.legend(handles, labels)
ax2.set_title("simulated violinplot with intersection")
df_longform = pd.DataFrame(data= {"Z-score": observed_results + expected_results,
"Condition": ["Observed"] * len(observed_results)+["Expected"] * len(expected_results) })
sns.boxenplot(x="Condition", y="Z-score", data=df_longform, ax=ax3)
ax3.set_title("sns.boxenplot")
plt.tight_layout()
plt.show()

How to add a mean line to a seaborn stripplot or swarmplot

I have a rather simple strip plot with vertical data.
planets = sns.load_dataset("planets")
sns.stripplot(x="method", y="distance", data=planets, size=4, color=".7")
plt.xticks(rotation=45, ha="right")
plt.show()
I want to plot the mean of each x-element (method) as a small horizontal bar similar to what you get with:
sns.boxplot(
x="method",
y="distance",
data=planets,
whis=[50, 50],
showfliers=False,
showbox=False,
showcaps=False
)
But without the vertical lines (with whis=[50,50] just spots) for the first / third quartile and showing mean instead of median. Maybe there is a more elegant solution not involving a Boxplot.
Thanks in advance.
Boxplot objects are defined in matplotlib.pyplot.boxplot
showmeans=True
meanline=True makes a line instead of a marker
meanprops={'color': 'k', 'ls': '-', 'lw': 2} sets the color, style and width of the line.
See matplotlib.lines.Line2D for other line properties.
medianprops={'visible': False} makes the median line not visible
whiskerprops={'visible': False} makes the whisker line not visible
zorder=10 places the line on the top layer
Tested in matplotlib v3.4.2 and seaborn v0.11.1
import seaborn as sns
import matplotlib.pyplot as plt
# load the dataset
planets = sns.load_dataset("planets")
p = sns.stripplot(x="method", y="distance", data=planets, size=4, color=".7")
plt.xticks(rotation=45, ha="right")
p.set(yscale='log')
# plot the mean line
sns.boxplot(showmeans=True,
meanline=True,
meanprops={'color': 'k', 'ls': '-', 'lw': 2},
medianprops={'visible': False},
whiskerprops={'visible': False},
zorder=10,
x="method",
y="distance",
data=planets,
showfliers=False,
showbox=False,
showcaps=False,
ax=p)
plt.show()
Works similarly with a seaborn.swarmplot
Here's a solution using ax.hlines with find the mean using groupby and list comprehension:
import seaborn as sns
import matplotlib.pyplot as plt
# load the dataset
planets = sns.load_dataset("planets")
p = sns.stripplot(x="method", y="distance", data=planets, size=4, color=".7", zorder=1)
plt.xticks(rotation=45, ha="right")
p.set(yscale='log');
df_mean = planets.groupby('method', sort=False)['distance'].mean()
_ = [p.hlines(y, i-.25, i+.25, zorder=2) for i, y in df_mean.reset_index()['distance'].items()]
Output:
Here's another hack that is similar to the boxplot idea but requires less overriding: draw a pointplot but with a confidence interval of width 0, and activate the errorbar "caps" to get a horizontal line with a parametrizable width:
planets = sns.load_dataset("planets")
spec = dict(x="method", y="distance", data=planets)
sns.stripplot(**spec, size=4, color=".7")
sns.pointplot(**spec, join=False, ci=0, capsize=.7, scale=0)
plt.xticks(rotation=45, ha="right")
One downside that is evident here is that bootstrapping gets skipped for groups with a single observation, so you don't get a mean line there. This may or may not be a problem in an actual application.
Another trick would be to do the groupby yourself and then draw a scatterplot with a very wide vertical line marker:
planets = sns.load_dataset("planets")
variables = dict(x="method", y="distance")
sns.stripplot(data=planets, **variables, size=4, color=".7")
sns.scatterplot(
data=planets.groupby("method")["distance"].mean().reset_index(),
**variables, marker="|", s=2, linewidth=25
)
plt.xticks(rotation=45, ha="right")

How to make a bubble graph using seaborn

import matplotlib.pyplot as plt
import numpy as np
# data
x=["IEEE", "Elsevier", "Others"]
y=[7, 6, 2]
import seaborn as sns
plt.legend()
plt.scatter(x, y, s=300, c="blue", alpha=0.4, linewidth=3)
plt.ylabel("No. of Papers")
plt.figure(figsize=(10, 4))
I want to make a graph as shown in the image. I am not sure how to provide data for both journal and conference categories. (Currently, I just include one). Also, I am not sure how to add different colors for each category.
You can try this code snippet for you problem.
- I modified your Data format, I suggest you to use pandas for
data visualization.
- I added one more field to visualize the data more efficiently.
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
# data
x=["IEEE", "Elsevier", "Others", "IEEE", "Elsevier", "Others"]
y=[7, 6, 2, 5, 4, 3]
z=["conference", "journal", "conference", "journal", "conference", "journal"]
# create pandas dataframe
data_list = pd.DataFrame(
{'x_axis': x,
'y_axis': y,
'category': z
})
# change size of data points
minsize = min(data_list['y_axis'])
maxsize = max(data_list['y_axis'])
# scatter plot
sns.catplot(x="x_axis", y="y_axis", kind="swarm", hue="category",sizes=(minsize*100, maxsize*100), data=data_list)
plt.grid()
How to create the graph with correct bubble sizes and with no overlap
Seaborn stripplot and swarmplot (or sns.catplot(kind=strip or kind=swarm)) provide the handy dodge argument which prevents the bubbles from overlapping. The only downside is that the size argument applies a single size to all bubbles and the sizes argument (as used in the other answer) is of no use here. They do not work like the s and size arguments of scatterplot. Therefore, the size of each bubble must be edited after generating the plot:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import seaborn as sns # v 0.11.0
# Create sample data
x = ['IEEE', 'Elsevier', 'Others', 'IEEE', 'Elsevier', 'Others']
y = np.array([7, 6, 3, 7, 1, 3])
z = ['conference', 'conference', 'conference', 'journal', 'journal', 'journal']
df = pd.DataFrame(dict(organisation=x, count=y, category=z))
# Create seaborn stripplot (swarmplot can be used the same way)
ax = sns.stripplot(data=df, x='organisation', y='count', hue='category', dodge=True)
# Adjust the size of the bubbles
for coll in ax.collections[:-2]:
y = coll.get_offsets()[0][1]
coll.set_sizes([100*y])
# Format figure size, spines and grid
ax.figure.set_size_inches(7, 5)
ax.grid(axis='y', color='black', alpha=0.2)
ax.grid(axis='x', which='minor', color='black', alpha=0.2)
ax.spines['bottom'].set(position='zero', color='black', alpha=0.2)
sns.despine(left=True)
# Format ticks
ax.tick_params(axis='both', length=0, pad=10, labelsize=12)
ax.tick_params(axis='x', which='minor', length=25, width=0.8, color=[0, 0, 0, 0.2])
minor_xticks = [tick+0.5 for tick in ax.get_xticks() if tick != ax.get_xticks()[-1]]
ax.set_xticks(minor_xticks, minor=True)
ax.set_yticks(range(0, df['count'].max()+2))
# Edit labels and legend
ax.set_xlabel('Organisation', labelpad=15, size=12)
ax.set_ylabel('No. of Papers', labelpad=15, size=12)
ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left', frameon=False);
Alternatively, you can use scatterplot with the convenient s argument (or size) and then edit the space between the bubbles to reproduce the effect of the missing dodge argument (note that the x_jitter argument seems to have no effect). Here is an example using the same data as before and without all the extra formatting:
# Create seaborn scatterplot with size argument
ax = sns.scatterplot(data=df, x='organisation', y='count',
hue='category', s=100*df['count'])
ax.figure.set_size_inches(7, 5)
ax.margins(0.2)
# Dodge bubbles
bubbles = ax.collections[0].get_offsets()
signs = np.repeat([-1, 1], df['organisation'].nunique())
for bubble, sign in zip(bubbles, signs):
bubble[0] += sign*0.15
As a side note, I recommend that you consider other types of plots for this data. A grouped bar chart:
df.pivot(index='organisation', columns='category').plot.bar()
Or a balloon plot (aka categorical bubble plot):
sns.scatterplot(data=df, x='organisation', y='category', s=100*count).margins(0.4)
Why? In the bubble graph, the counts are displayed using 2 visual attributes, i) the y-coordinate location and ii) the bubble size. Only one of them is really necessary.

Problems with long lists as input of set_xticklabels (Misaligned)

I am following the NMT with attention (https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb) tutorial and I am applying it for my own use case. Unfortunately, when I try to plot the attention weigths, I get alignment problems of the x-axis if the input is too long (e.g. 14 instead of 7).
In this code block, the plotting works as expected:
import numpy as np
from matplotlib import pyplot as plt
def plot_attention():
attention = np.array([[7.78877574e-10, 4.04739769e-10, 6.65854022e-05, 1.63362725e-04,
2.85054208e-04, 8.50252633e-04, 4.58042100e-02],
[9.23501700e-02, 5.69618285e-01, 1.80586591e-01, 9.78111699e-02,
2.71992851e-02, 9.59911197e-03, 2.54837354e-03]])
sentence = ['<start>', 'hace', 'mucho', 'frio', 'aqui', '.', '<end>']
predicted_sentence = ['it', 's']
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attention, cmap='viridis')
fontdict = {'fontsize': 14}
ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)
ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)
plt.show()
plot_attention()
but with more elements in the list "sentence", it seems to misalign:
def plot_attention():
attention = np.array([[7.78877574e-10, 4.04739769e-10, 6.65854022e-05, 1.63362725e-04,
2.85054208e-04, 8.50252633e-04, 4.58042100e-02, 7.78877574e-10, 4.04739769e-10, 6.65854022e-05, 1.63362725e-04,
2.85054208e-04, 8.50252633e-04, 4.58042100e-02],
[9.23501700e-02, 5.69618285e-01, 1.80586591e-01, 9.78111699e-02,
2.71992851e-02, 9.59911197e-03, 2.54837354e-03, 7.78877574e-10, 4.04739769e-10, 6.65854022e-05, 1.63362725e-04,
2.85054208e-04, 8.50252633e-04, 4.58042100e-02]])
sentence = ['<start>', 'hace', 'mucho', 'frio', 'aqui', '.', '<end>', '<start>', 'hace', 'mucho', 'frio', 'aqui', '.', '<end>']
predicted_sentence = ['it', 's']
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attention, cmap='viridis')
fontdict = {'fontsize': 14}
ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)
ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)
plt.show()
plot_attention()
I expect the x-axis to be perfectly aligned and that all elements of the x-axis are shown (not every second one as it is right now)
The problem is that you are only setting the tick-labels without specifying the positions of the ticks. Whenever you modify the tick labels, you should always first set the tick positions. So, do the following in your code
ax.set_xticks(range(len(sentence)))
ax.set_yticks(range(len(predicted_sentence)))
ax.set_xticklabels(sentence, fontdict=fontdict, rotation=90)
ax.set_yticklabels(predicted_sentence, fontdict=fontdict)

Categories