Seaborn annotate lineplot of projected world population - python

"""
I'm trying to reproduce a plot showing the world population growth from 1950 to 2100.
ideally, I'd like to show two different colors under the lineplot, darkgreen from 1950 to 2019 because these are actual data, and lightgreen for the projected data (2019 to 2100)
I'd like to annotate specific points corresponding to 1950, 1987, 2019 and 2050. I tried using markers=True but but failed.
I'm looking for something like the following plot (without the annual growth rate in red)
Thank you in advance for helping me out.
"""
data = {'Year': {0: 1950,
1: 1951,
2: 1952,
3: 1953,
4: 1954,
5: 1955,
6: 1956,
7: 1957,
8: 1958,
9: 1959,
10: 1960,
11: 1961,
12: 1962,
13: 1963,
14: 1964,
15: 1965,
16: 1966,
17: 1967,
18: 1968,
19: 1969,
20: 1970,
21: 1971,
22: 1972,
23: 1973,
24: 1974,
25: 1975,
26: 1976,
27: 1977,
28: 1978,
29: 1979,
30: 1980,
31: 1981,
32: 1982,
33: 1983,
34: 1984,
35: 1985,
36: 1986,
37: 1987,
38: 1988,
39: 1989,
40: 1990,
41: 1991,
42: 1992,
43: 1993,
44: 1994,
45: 1995,
46: 1996,
47: 1997,
48: 1998,
49: 1999,
50: 2000,
51: 2001,
52: 2002,
53: 2003,
54: 2004,
55: 2005,
56: 2006,
57: 2007,
58: 2008,
59: 2009,
60: 2010,
61: 2011,
62: 2012,
63: 2013,
64: 2014,
65: 2015,
66: 2016,
67: 2017,
68: 2018,
69: 2019,
70: 2020,
71: 2091,
72: 2092,
73: 2093,
74: 2094,
75: 2095,
76: 2096,
77: 2097,
78: 2098,
79: 2099,
80: 2100},
'billion': {0: 2.5,
1: 2.6,
2: 2.6,
3: 2.7,
4: 2.7,
5: 2.8,
6: 2.8,
7: 2.9,
8: 2.9,
9: 3.0,
10: 3.0,
11: 3.1,
12: 3.2,
13: 3.2,
14: 3.3,
15: 3.3,
16: 3.4,
17: 3.5,
18: 3.6,
19: 3.6,
20: 3.7,
21: 3.8,
22: 3.9,
23: 3.9,
24: 4.0,
25: 4.1,
26: 4.2,
27: 4.2,
28: 4.3,
29: 4.4,
30: 4.5,
31: 4.5,
32: 4.6,
33: 4.7,
34: 4.8,
35: 4.9,
36: 5.0,
37: 5.1,
38: 5.1,
39: 5.2,
40: 5.3,
41: 5.4,
42: 5.5,
43: 5.6,
44: 5.7,
45: 5.7,
46: 5.8,
47: 5.9,
48: 6.0,
49: 6.1,
50: 6.1,
51: 6.2,
52: 6.3,
53: 6.4,
54: 6.5,
55: 6.5,
56: 6.6,
57: 6.7,
58: 6.8,
59: 6.9,
60: 7.0,
61: 7.0,
62: 7.1,
63: 7.2,
64: 7.3,
65: 7.4,
66: 7.5,
67: 7.5,
68: 7.6,
69: 7.7,
70: 7.8,
71: 10.8,
72: 10.8,
73: 10.8,
74: 10.8,
75: 10.9,
76: 10.9,
77: 10.9,
78: 10.9,
79: 10.9,
80: 10.9}}
df = pd.DataFrame(data)
print(df)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
fig,ax = plt.subplots(figsize=(10,8))
sns.lineplot(x='Year',y='billion',data=df,ax=ax,color='b')
ax.set_ylim([2,11])
plt.fill_between(df['Year'].values, df['billion'].values,color='lightgreen')
plt.text(1950,2.5,'2.5 Billion\nin 1950',horizontalalignment='left')
plt.text(1987,5,'5 Billion\nin 1987',horizontalalignment='right')
plt.text(2019,7.7,'7.7 Billion\nin 2019',horizontalalignment='right')
plt.text(2050,9.7,'9.7 Billion\nin 2050',horizontalalignment='right')
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)#hiding y spine
plt.gca().axes.get_yaxis().set_visible(False) #hiding y axis
ax.spines['right'].set_visible(False)
plt.show()
plt.close()
"""
This is what I got so far
"""

You can fill between years using where=:
ax.fill_between(df['Year'], df['billion'], color='darkgreen', where=df['Year'] <= 2019)
ax.fill_between(df['Year'], df['billion'], color='lightgreen', where=df['Year'] >= 2019)
You can interpolate values for the years with np.interp():
marked_years = [1950, 1987, 2019, 2050]
ax.scatter(marked_years, np.interp(marked_years, df['Year'], df['billion']), marker='o', color='black', s=50)
In a similar way the texts could be placed:
for year, value in zip(marked_years, np.interp(marked_years, df['Year'], df['billion'])):
ax.text(year, value, f'{value:.1f} Billion\nin {year}\n', ha='left' if year < 1970 else 'right', va='bottom')
Optionally you set tick marks for the x-axis every 10 years, and leave out the padding:
ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax.margins(x=0, tight=True) # zero padding for the x-axis```

Related

Add a Line to Bar Chart Plotly Python

I created a stacked bar chart and need to add a horizontal line but it doesn't show
What can be the problem?
Below is the code
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
#creating array for days of the week
arr1=(['Mon. W1']*18)
arr2=(['Tue. W1']*18)
arr3=(['Wed. W1']*18)
arr4=(['Thu. W1']*18)
arr5=(['Fri. W1']*18)
arr6=(['Mon. W2']*18)
arr7=(['Tue. W2']*18)
arr8=(['Wed. W2']*18)
arr9=(['Thu. W2']*18)
arr10=(['Fri. W2']*18)
dates = np.concatenate((arr1,arr2,arr3,arr4,arr5,arr6,arr7,arr8,arr9,arr10))
#creating array for desk and meeting space population
from itertools import chain
from itertools import zip_longest
x = df3['Occupancy x Hour'].to_numpy()
y = df3['Population x Hour'].to_numpy()
#this is to change numpy array to normal array for further coding
values_array = list(filter(lambda x: x != '', chain.from_iterable(zip_longest(x, y, fillvalue = ''))))
#creating dictionary for the figure
df = pd.DataFrame(
dict(
day=dates,
time=['9am','9am','10am','10am','11am','11am','12pm','12pm','1pm','1pm','2pm','2pm','3pm','3pm','4pm','4pm','5pm','5pm']*10,
type=["Desk", "Meeting"]*90,
numbers=values_array,
)
)
#creating the bar chart
fig = go.Figure()
fig.update_layout(
template="simple_white",
xaxis=dict(title_text="Time"),
yaxis=dict(title_text="Population"),
barmode="stack",
)
colors = ["Blue","LimeGreen"]
#here adding time and day of the week on x-axis
for r, c in zip(df.type.unique(), colors):
plot_df = df[df.type == r]
fig.add_trace(
go.Bar(x=[plot_df.day, plot_df.time], y=plot_df.numbers, name=r, marker_color=c),
)
fig
df3_dictionary = {'Week': {0: 1.0,
1: 1.0,
2: 1.0,
3: 1.0,
4: 1.0,
5: 1.0,
6: 1.0,
7: 1.0,
8: 1.0,
9: 1.0,
10: 1.0,
11: 1.0,
12: 1.0,
13: 1.0,
14: 1.0,
15: 1.0,
16: 1.0,
17: 1.0,
18: 1.0,
19: 1.0,
20: 1.0,
21: 1.0,
22: 1.0,
23: 1.0,
24: 1.0,
25: 1.0,
26: 1.0,
27: 1.0,
28: 1.0,
29: 1.0,
30: 1.0,
31: 1.0,
32: 1.0,
33: 1.0,
34: 1.0,
35: 1.0,
36: 1.0,
37: 1.0,
38: 1.0,
39: 1.0,
40: 1.0,
41: 1.0,
42: 1.0,
43: 1.0,
44: 1.0,
45: 2.0,
46: 2.0,
47: 2.0,
48: 2.0,
49: 2.0,
50: 2.0,
51: 2.0,
52: 2.0,
53: 2.0,
54: 2.0,
55: 2.0,
56: 2.0,
57: 2.0,
58: 2.0,
59: 2.0,
60: 2.0,
61: 2.0,
62: 2.0,
63: 2.0,
64: 2.0,
65: 2.0,
66: 2.0,
67: 2.0,
68: 2.0,
69: 2.0,
70: 2.0,
71: 2.0,
72: 2.0,
73: 2.0,
74: 2.0,
75: 2.0,
76: 2.0,
77: 2.0,
78: 2.0,
79: 2.0,
80: 2.0,
81: 2.0,
82: 2.0,
83: 2.0,
84: 2.0,
85: 2.0,
86: 2.0,
87: 2.0,
88: 2.0,
89: 2.0},
'Day': {0: 'Monday',
1: 'Monday',
2: 'Monday',
3: 'Monday',
4: 'Monday',
5: 'Monday',
6: 'Monday',
7: 'Monday',
8: 'Monday',
9: 'Tuesday',
10: 'Tuesday',
11: 'Tuesday',
12: 'Tuesday',
13: 'Tuesday',
14: 'Tuesday',
15: 'Tuesday',
16: 'Tuesday',
17: 'Tuesday',
18: 'Wednesday',
19: 'Wednesday',
20: 'Wednesday',
21: 'Wednesday',
22: 'Wednesday',
23: 'Wednesday',
24: 'Wednesday',
25: 'Wednesday',
26: 'Wednesday',
27: 'Thursday',
28: 'Thursday',
29: 'Thursday',
30: 'Thursday',
31: 'Thursday',
32: 'Thursday',
33: 'Thursday',
34: 'Thursday',
35: 'Thursday',
36: 'Friday',
37: 'Friday',
38: 'Friday',
39: 'Friday',
40: 'Friday',
41: 'Friday',
42: 'Friday',
43: 'Friday',
44: 'Friday',
45: 'Monday',
46: 'Monday',
47: 'Monday',
48: 'Monday',
49: 'Monday',
50: 'Monday',
51: 'Monday',
52: 'Monday',
53: 'Monday',
54: 'Tuesday',
55: 'Tuesday',
56: 'Tuesday',
57: 'Tuesday',
58: 'Tuesday',
59: 'Tuesday',
60: 'Tuesday',
61: 'Tuesday',
62: 'Tuesday',
63: 'Wednesday',
64: 'Wednesday',
65: 'Wednesday',
66: 'Wednesday',
67: 'Wednesday',
68: 'Wednesday',
69: 'Wednesday',
70: 'Wednesday',
71: 'Wednesday',
72: 'Thursday',
73: 'Thursday',
74: 'Thursday',
75: 'Thursday',
76: 'Thursday',
77: 'Thursday',
78: 'Thursday',
79: 'Thursday',
80: 'Thursday',
81: 'Friday',
82: 'Friday',
83: 'Friday',
84: 'Friday',
85: 'Friday',
86: 'Friday',
87: 'Friday',
88: 'Friday',
89: 'Friday'},
'Time': {0: '9am',
1: '10am',
2: '11am',
3: '12pm',
4: '1pm',
5: '2pm',
6: '3pm',
7: '4pm',
8: '5pm',
9: '9am',
10: '10am',
11: '11am',
12: '12pm',
13: '1pm',
14: '2pm',
15: '3pm',
16: '4pm',
17: '5pm',
18: '9am',
19: '10am',
20: '11am',
21: '12pm',
22: '1pm',
23: '2pm',
24: '3pm',
25: '4pm',
26: '5pm',
27: '9am',
28: '10am',
29: '11am',
30: '12pm',
31: '1pm',
32: '2pm',
33: '3pm',
34: '4pm',
35: '5pm',
36: '9am',
37: '10am',
38: '11am',
39: '12pm',
40: '1pm',
41: '2pm',
42: '3pm',
43: '4pm',
44: '5pm',
45: '9am',
46: '10am',
47: '11am',
48: '12pm',
49: '1pm',
50: '2pm',
51: '3pm',
52: '4pm',
53: '5pm',
54: '9am',
55: '10am',
56: '11am',
57: '12pm',
58: '1pm',
59: '2pm',
60: '3pm',
61: '4pm',
62: '5pm',
63: '9am',
64: '10am',
65: '11am',
66: '12pm',
67: '1pm',
68: '2pm',
69: '3pm',
70: '4pm',
71: '5pm',
72: '9am',
73: '10am',
74: '11am',
75: '12pm',
76: '1pm',
77: '2pm',
78: '3pm',
79: '4pm',
80: '5pm',
81: '9am',
82: '10am',
83: '11am',
84: '12pm',
85: '1pm',
86: '2pm',
87: '3pm',
88: '4pm',
89: '5pm'},
'Occupancy x Hour': {0: 1378.0,
1: 1369.0,
2: 1372.0,
3: 1261.0,
4: 1087.0,
5: 1355.0,
6: 1383.0,
7: 1325.0,
8: 1050.0,
9: 1313.0,
10: 1347.0,
11: 1323.0,
12: 1202.0,
13: 1033.0,
14: 1237.0,
15: 1324.0,
16: 1352.0,
17: 1108.0,
18: 1217.0,
19: 1276.0,
20: 1365.0,
21: 1204.0,
22: 977.0,
23: 1199.0,
24: 1331.0,
25: 1293.0,
26: 1159.0,
27: 1220.0,
28: 1327.0,
29: 1354.0,
30: 1257.0,
31: 982.0,
32: 1199.0,
33: 1218.0,
34: 1271.0,
35: 1101.0,
36: 1139.0,
37: 1207.0,
38: 1259.0,
39: 1189.0,
40: 903.0,
41: 1171.0,
42: 1193.0,
43: 1239.0,
44: 899.0,
45: 1220.0,
46: 1357.0,
47: 1336.0,
48: 1188.0,
49: 1032.0,
50: 1261.0,
51: 1330.0,
52: 1267.0,
53: 1074.0,
54: 1301.0,
55: 1337.0,
56: 1329.0,
57: 1247.0,
58: 970.0,
59: 1233.0,
60: 1271.0,
61: 1246.0,
62: 1063.0,
63: 1210.0,
64: 1288.0,
65: 1331.0,
66: 1220.0,
67: 948.0,
68: 1273.0,
69: 1289.0,
70: 1329.0,
71: 1153.0,
72: 1213.0,
73: 1248.0,
74: 1272.0,
75: 1190.0,
76: 890.0,
77: 1199.0,
78: 1284.0,
79: 1233.0,
80: 1102.0,
81: 1110.0,
82: 1210.0,
83: 1175.0,
84: 1083.0,
85: 807.0,
86: 1101.0,
87: 1188.0,
88: 1181.0,
89: 857.0},
'Population x Hour': {0: 339.0,
1: 516.0,
2: 564.0,
3: 616.0,
4: 637.0,
5: 548.0,
6: 582.0,
7: 527.0,
8: 341.0,
9: 457.0,
10: 711.0,
11: 731.0,
12: 685.0,
13: 747.0,
14: 735.0,
15: 723.0,
16: 657.0,
17: 388.0,
18: 497.0,
19: 703.0,
20: 690.0,
21: 758.0,
22: 759.0,
23: 745.0,
24: 686.0,
25: 633.0,
26: 374.0,
27: 465.0,
28: 588.0,
29: 648.0,
30: 603.0,
31: 663.0,
32: 687.0,
33: 613.0,
34: 597.0,
35: 262.0,
36: 333.0,
37: 477.0,
38: 521.0,
39: 417.0,
40: 443.0,
41: 513.0,
42: 456.0,
43: 389.0,
44: 159.0,
45: 388.0,
46: 584.0,
47: 646.0,
48: 635.0,
49: 640.0,
50: 643.0,
51: 592.0,
52: 496.0,
53: 282.0,
54: 431.0,
55: 677.0,
56: 646.0,
57: 623.0,
58: 717.0,
59: 674.0,
60: 604.0,
61: 553.0,
62: 348.0,
63: 539.0,
64: 724.0,
65: 651.0,
66: 635.0,
67: 729.0,
68: 600.0,
69: 611.0,
70: 596.0,
71: 353.0,
72: 456.0,
73: 673.0,
74: 639.0,
75: 690.0,
76: 655.0,
77: 616.0,
78: 663.0,
79: 576.0,
80: 340.0,
81: 394.0,
82: 518.0,
83: 527.0,
84: 475.0,
85: 466.0,
86: 452.0,
87: 421.0,
88: 288.0,
89: 181.0}}
This code gives this result
Then, i am trying to add a line but it doesn't show
#finding minimum of the population
tot_popul = x+y
min(tot_popul)
#here, I am trying to plot minimum line but it doesn't show
fig.add_hline(y=min(tot_popul))
fig.show()
I need a horizontal line with minimum values

Panda's qcut possibly rounding?

I'm trying to use Panda's qcut to bin my values in quantile-based buckets.
However, when doing so, it's just giving me whole numbers and does not match what I'm expecting.
I'm expecting something along the following - in particular not whole numbers:
Above was calculated with Excel's QUARTILE.EXC() using the exact same data.
Pandas however is just giving me the bins 1,2,3,4.
Any ideas? Here is the code:
import pandas as pd
data = {0: 2.75,
1: 2.875,
2: 3.5,
3: 3.875,
4: 3.125,
5: 2.25,
6: 2.125,
7: 3.375,
8: 3.75,
9: 1.875,
10: 3.125,
11: 2.625,
12: 1.25,
13: 2.625,
14: 2.25,
15: 3.125,
16: 3.375,
17: 2.25,
18: 2.25,
19: 3.125,
20: 3.375,
21: 2.5,
22: 3.375,
23: 3.5,
24: 3.125,
25: 3.0,
26: 2.125,
27: 3.125,
28: 2.375,
29: 2.375,
30: 2.75,
31: 3.0,
32: 2.625,
33: 2.0,
34: 2.75,
35: 3.25,
36: 3.0,
37: 1.5,
38: 3.5,
39: 2.375,
40: 3.375,
41: 2.625,
42: 3.0,
43: 2.5,
44: 2.625,
45: 2.875,
46: 2.25,
47: 2.5,
48: 1.125,
49: 1.625,
50: 1.375,
51: 2.125,
52: 1.625,
53: 2.125,
54: 1.0,
55: 1.5,
56: 1.25,
57: 3.125,
58: 1.125,
59: 1.75}
df = pd.Series(data).to_frame('values')
n_bins = 4
df['qcutbins'] = pd.qcut(df['values'], q=n_bins, labels=range(1,n_bins+1)).astype('float64')
df.groupby(['qcutbins'])['qcutbins'].describe()[['min','max']].sort_values(by='max').reset_index(drop=True)```
it looks like you want something like this instead:
df = pd.Series(data).to_frame('values')
n_bins = 4
df['qcutbins'] = pd.qcut(df['values'], q=n_bins)
df.groupby("qcutbins").agg([min, max])
values
min max
qcutbins
(0.999, 2.125] 1.00 2.125
(2.125, 2.625] 2.25 2.625
(2.625, 3.125] 2.75 3.125
(3.125, 3.875] 3.25 3.875

Plotting respecting the x-axis values scale

I generate the following plot:
By the following code:
data = {'BestFit_rej_ratio': {0: 0.1975987994, 1: 0.2006003002, 2: 0.1790895448, 3: 0.2216108054, 4: 0.1785892946, 5: 0.1890945473, 6: 0.1780890445, 7: 0.1780890445, 8: 0.2016008004, 9: 0.1900950475, 10: 0.1985992996, 11: 0.2031015508, 12: 0.2046023012, 13: 0.2071035518, 14: 0.1750875438, 15: 0.2166083042, 16: 0.1725862931, 17: 0.188094047, 18: 0.1870935468, 19: 0.1895947974, 20: 0.004502251126, 21: 0.006503251626, 22: 0.005002501251, 23: 0.006503251626, 24: 0.008004002001, 25: 0.006003001501, 26: 0.00300150075, 27: 0.005502751376, 28: 0.0100050025, 29: 0.005002501251, 30: 0.006003001501, 31: 0.005502751376, 32: 0.007503751876, 33: 0.005502751376, 34: 0.005502751376, 35: 0.005502751376, 36: 0.007503751876, 37: 0.005002501251, 38: 0.004002001001, 39: 0.009004502251, 40: 0.4172086043, 41: 0.4322161081, 42: 0.4017008504, 43: 0.4247123562, 44: 0.4292146073, 45: 0.4077038519, 46: 0.4282141071, 47: 0.4637318659, 48: 0.4392196098, 49: 0.4172086043, 50: 0.4187093547, 51: 0.4057028514, 52: 0.4287143572, 53: 0.4242121061, 54: 0.4347173587, 55: 0.4307153577, 56: 0.4102051026, 57: 0.4437218609, 58: 0.4212106053, 59: 0.4172086043}, 'MDP_rej_ratio': {0: 0.1660830415, 1: 0.1605802901, 2: 0.152076038, 3: 0.1885942971, 4: 0.152076038, 5: 0.1565782891, 6: 0.1445722861, 7: 0.1570785393, 8: 0.1705852926, 9: 0.1605802901, 10: 0.1740870435, 11: 0.1670835418, 12: 0.1805902951, 13: 0.1740870435, 14: 0.1460730365, 15: 0.1810905453, 16: 0.1425712856, 17: 0.1580790395, 18: 0.1455727864, 19: 0.1590795398, 20: 0.001500750375, 21: 0.00300150075, 22: 0.002501250625, 23: 0.002501250625, 24: 0.0020010005, 25: 0.002501250625, 26: 0.0020010005, 27: 0.001500750375, 28: 0.004002001001, 29: 0.00300150075, 30: 0.0020010005, 31: 0.0, 32: 0.004002001001, 33: 0.0005002501251, 34: 0.0020010005, 35: 0.0, 36: 0.004502251126, 37: 0.002501250625, 38: 0.001500750375, 39: 0.004002001001, 40: 0.3851925963, 41: 0.3851925963, 42: 0.4097048524, 43: 0.3756878439, 44: 0.4112056028, 45: 0.4212106053, 46: 0.3791895948, 47: 0.4127063532, 48: 0.4432216108, 49: 0.4152076038, 50: 0.3871935968, 51: 0.4197098549, 52: 0.3896948474, 53: 0.4107053527, 54: 0.4062031016, 55: 0.4252126063, 56: 0.4112056028, 57: 0.3931965983, 58: 0.4372186093, 59: 0.4157078539}, 'Q-Learning_rej_ratio': {0: 0.1790895448, 1: 0.1645822911, 2: 0.1545772886, 3: 0.1905952976, 4: 0.1510755378, 5: 0.1595797899, 6: 0.148074037, 7: 0.1575787894, 8: 0.1715857929, 9: 0.1590795398, 10: 0.1690845423, 11: 0.168084042, 12: 0.180090045, 13: 0.1785892946, 14: 0.1495747874, 15: 0.1815907954, 16: 0.1435717859, 17: 0.1685842921, 18: 0.1505752876, 19: 0.1670835418, 20: 0.001500750375, 21: 0.00300150075, 22: 0.002501250625, 23: 0.002501250625, 24: 0.0020010005, 25: 0.002501250625, 26: 0.0020010005, 27: 0.001500750375, 28: 0.004002001001, 29: 0.00300150075, 30: 0.0020010005, 31: 0.0, 32: 0.004002001001, 33: 0.0005002501251, 34: 0.0020010005, 35: 0.0, 36: 0.004502251126, 37: 0.002501250625, 38: 0.001500750375, 39: 0.004002001001, 40: 0.3856928464, 41: 0.4167083542, 42: 0.3786893447, 43: 0.4187093547, 44: 0.4157078539, 45: 0.392196098, 46: 0.4032016008, 47: 0.4452226113, 48: 0.4217108554, 49: 0.3876938469, 50: 0.4192096048, 51: 0.388194097, 52: 0.4122061031, 53: 0.4152076038, 54: 0.4172086043, 55: 0.4137068534, 56: 0.3956978489, 57: 0.4342171086, 58: 0.4082041021, 59: 0.4032016008}, 'Parametrized_factor': {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 6: 1.0, 7: 1.0, 8: 1.0, 9: 1.0, 10: 1.0, 11: 1.0, 12: 1.0, 13: 1.0, 14: 1.0, 15: 1.0, 16: 1.0, 17: 1.0, 18: 1.0, 19: 1.0, 20: 0.2, 21: 0.2, 22: 0.2, 23: 0.2, 24: 0.2, 25: 0.2, 26: 0.2, 27: 0.2, 28: 0.2, 29: 0.2, 30: 0.2, 31: 0.2, 32: 0.2, 33: 0.2, 34: 0.2, 35: 0.2, 36: 0.2, 37: 0.2, 38: 0.2, 39: 0.2, 40: 2.0, 41: 2.0, 42: 2.0, 43: 2.0, 44: 2.0, 45: 2.0, 46: 2.0, 47: 2.0, 48: 2.0, 49: 2.0, 50: 2.0, 51: 2.0, 52: 2.0, 53: 2.0, 54: 2.0, 55: 2.0, 56: 2.0, 57: 2.0, 58: 2.0, 59: 2.0}}
data2 = pd.DataFrame(data)
# figure size
plt.figure(figsize=(12, 8))
ax = sns.pointplot(y="BestFit_rej_ratio", x="Parametrized_factor", data=data2, linestyles='-.', color='g', capsize=.1, scale=.2, errwidth=.5)
ax = sns.pointplot(y="MDP_rej_ratio", x="Parametrized_factor", data=data2, linestyles='-', color='r', capsize=.12, scale=.2, errwidth=.5)
ax = sns.pointplot(y="Q-Learning_rej_ratio", x="Parametrized_factor", data=data2, linestyles=':', color='k', capsize=.15, scale=.5, errwidth=.5)
ax.legend(bbox_to_anchor=(1.15, 1), loc='upper left')
labels = ax.legend(['BestFit', 'MDP', 'Q-Learning'])
colors = ['green', 'red', 'black']
i = 0
for l in labels.get_texts():
l.set_color(colors[i])
i+=1
plt.setp(ax.get_legend().get_texts(), fontsize='12')
# for legend text
ax.set_ylabel('Rejection ratio')
ax.set_xlabel('Parametrized factor')
plt.show()
Problem is that the plot is not respecting the scale of the x-axis values (Parametrized_factor).
How can I solve it?

Plotting variance scale on y-axis for PCA in Python

I am trying to make PCA analysis, but cannot plot properly variance on y-axis.
I have data, which I exported for you
{1: {0: 242.0, 1: 290.0, 2: 340.0, 3: 363.0, 4: 430.0, 5: 450.0, 6: 500.0, 7: 390.0, 8: 450.0, 9: 500.0, 10: 475.0, 11: 500.0, 12: 500.0, 13: 600.0, 14: 600.0, 15: 700.0, 16: 700.0, 17: 610.0, 18: 650.0, 19: 575.0, 20: 685.0, 21: 620.0, 22: 680.0, 23: 700.0, 24: 725.0, 25: 720.0, 26: 714.0, 27: 850.0, 28: 1000.0, 29: 920.0, 30: 955.0, 31: 925.0, 32: 975.0, 33: 950.0, 34: 40.0, 35: 69.0, 36: 78.0, 37: 87.0, 38: 120.0, 39: 0.0, 40: 110.0, 41: 120.0, 42: 150.0, 43: 145.0, 44: 160.0, 45: 140.0, 46: 160.0, 47: 169.0, 48: 161.0, 49: 200.0, 50: 180.0, 51: 290.0, 52: 272.0, 53: 390.0, 54: 6.7, 55: 7.5, 56: 7.0, 57: 9.7, 58: 9.8, 59: 8.7, 60: 10.0, 61: 9.9, 62: 9.8, 63: 12.2, 64: 13.4, 65: 12.2, 66: 19.7, 67: 19.9, 68: 200.0, 69: 300.0, 70: 300.0, 71: 300.0, 72: 430.0, 73: 345.0, 74: 456.0, 75: 510.0, 76: 540.0, 77: 500.0, 78: 567.0, 79: 770.0, 80: 950.0, 81: 1250.0, 82: 1600.0, 83: 1550.0, 84: 1650.0}, 2: {0: 23.2, 1: 24.0, 2: 23.9, 3: 26.3, 4: 26.5, 5: 26.8, 6: 26.8, 7: 27.6, 8: 27.6, 9: 28.5, 10: 28.4, 11: 28.7, 12: 29.1, 13: 29.4, 14: 29.4, 15: 30.4, 16: 30.4, 17: 30.9, 18: 31.0, 19: 31.3, 20: 31.4, 21: 31.5, 22: 31.8, 23: 31.9, 24: 31.8, 25: 32.0, 26: 32.7, 27: 32.8, 28: 33.5, 29: 35.0, 30: 35.0, 31: 36.2, 32: 37.4, 33: 38.0, 34: 12.9, 35: 16.5, 36: 17.5, 37: 18.2, 38: 18.6, 39: 19.0, 40: 19.1, 41: 19.4, 42: 20.4, 43: 20.5, 44: 20.5, 45: 21.0, 46: 21.1, 47: 22.0, 48: 22.0, 49: 22.1, 50: 23.6, 51: 24.0, 52: 25.0, 53: 29.5, 54: 9.3, 55: 10.0, 56: 10.1, 57: 10.4, 58: 10.7, 59: 10.8, 60: 11.3, 61: 11.3, 62: 11.4, 63: 11.5, 64: 11.7, 65: 12.1, 66: 13.2, 67: 13.8, 68: 30.0, 69: 31.7, 70: 32.7, 71: 34.8, 72: 35.5, 73: 36.0, 74: 40.0, 75: 40.0, 76: 40.1, 77: 42.0, 78: 43.2, 79: 44.8, 80: 48.3, 81: 52.0, 82: 56.0, 83: 56.0, 84: 59.0}, 3: {0: 25.4, 1: 26.3, 2: 26.5, 3: 29.0, 4: 29.0, 5: 29.7, 6: 29.7, 7: 30.0, 8: 30.0, 9: 30.7, 10: 31.0, 11: 31.0, 12: 31.5, 13: 32.0, 14: 32.0, 15: 33.0, 16: 33.0, 17: 33.5, 18: 33.5, 19: 34.0, 20: 34.0, 21: 34.5, 22: 35.0, 23: 35.0, 24: 35.0, 25: 35.0, 26: 36.0, 27: 36.0, 28: 37.0, 29: 38.5, 30: 38.5, 31: 39.5, 32: 41.0, 33: 41.0, 34: 14.1, 35: 18.2, 36: 18.8, 37: 19.8, 38: 20.0, 39: 20.5, 40: 20.8, 41: 21.0, 42: 22.0, 43: 22.0, 44: 22.5, 45: 22.5, 46: 22.5, 47: 24.0, 48: 23.4, 49: 23.5, 50: 25.2, 51: 26.0, 52: 27.0, 53: 31.7, 54: 9.8, 55: 10.5, 56: 10.6, 57: 11.0, 58: 11.2, 59: 11.3, 60: 11.8, 61: 11.8, 62: 12.0, 63: 12.2, 64: 12.4, 65: 13.0, 66: 14.3, 67: 15.0, 68: 32.3, 69: 34.0, 70: 35.0, 71: 37.3, 72: 38.0, 73: 38.5, 74: 42.5, 75: 42.5, 76: 43.0, 77: 45.0, 78: 46.0, 79: 48.0, 80: 51.7, 81: 56.0, 82: 60.0, 83: 60.0, 84: 63.4}, 4: {0: 30.0, 1: 31.2, 2: 31.1, 3: 33.5, 4: 34.0, 5: 34.7, 6: 34.5, 7: 35.0, 8: 35.1, 9: 36.2, 10: 36.2, 11: 36.2, 12: 36.4, 13: 37.2, 14: 37.2, 15: 38.3, 16: 38.5, 17: 38.6, 18: 38.7, 19: 39.5, 20: 39.2, 21: 39.7, 22: 40.6, 23: 40.5, 24: 40.9, 25: 40.6, 26: 41.5, 27: 41.6, 28: 42.6, 29: 44.1, 30: 44.0, 31: 45.3, 32: 45.9, 33: 46.5, 34: 16.2, 35: 20.3, 36: 21.2, 37: 22.2, 38: 22.2, 39: 22.8, 40: 23.1, 41: 23.7, 42: 24.7, 43: 24.3, 44: 25.3, 45: 25.0, 46: 25.0, 47: 27.2, 48: 26.7, 49: 26.8, 50: 27.9, 51: 29.2, 52: 30.6, 53: 35.0, 54: 10.8, 55: 11.6, 56: 11.6, 57: 12.0, 58: 12.4, 59: 12.6, 60: 13.1, 61: 13.1, 62: 13.2, 63: 13.4, 64: 13.5, 65: 13.8, 66: 15.2, 67: 16.2, 68: 34.8, 69: 37.8, 70: 38.8, 71: 39.8, 72: 40.5, 73: 41.0, 74: 45.5, 75: 45.5, 76: 45.8, 77: 48.0, 78: 48.7, 79: 51.2, 80: 55.1, 81: 59.7, 82: 64.0, 83: 64.0, 84: 68.0}, 5: {0: 38.4, 1: 40.0, 2: 39.8, 3: 38.0, 4: 36.6, 5: 39.2, 6: 41.1, 7: 36.2, 8: 39.9, 9: 39.3, 10: 39.4, 11: 39.7, 12: 37.8, 13: 40.2, 14: 41.5, 15: 38.8, 16: 38.8, 17: 40.5, 18: 37.4, 19: 38.3, 20: 40.8, 21: 39.1, 22: 38.1, 23: 40.1, 24: 40.0, 25: 40.3, 26: 39.8, 27: 40.6, 28: 44.5, 29: 40.9, 30: 41.1, 31: 41.4, 32: 40.6, 33: 37.9, 34: 25.6, 35: 26.1, 36: 26.3, 37: 25.3, 38: 28.0, 39: 28.4, 40: 26.7, 41: 25.8, 42: 23.5, 43: 27.3, 44: 27.8, 45: 26.2, 46: 25.6, 47: 27.7, 48: 25.9, 49: 27.6, 50: 25.4, 51: 30.4, 52: 28.0, 53: 27.1, 54: 16.1, 55: 17.0, 56: 14.9, 57: 18.3, 58: 16.8, 59: 15.7, 60: 16.9, 61: 16.9, 62: 16.7, 63: 15.6, 64: 18.0, 65: 16.5, 66: 18.9, 67: 18.1, 68: 16.0, 69: 15.1, 70: 15.3, 71: 15.8, 72: 18.0, 73: 15.6, 74: 16.0, 75: 15.0, 76: 17.0, 77: 14.5, 78: 16.0, 79: 15.0, 80: 16.2, 81: 17.9, 82: 15.0, 83: 15.0, 84: 15.9}, 6: {0: 13.4, 1: 13.8, 2: 15.1, 3: 13.3, 4: 15.1, 5: 14.2, 6: 15.3, 7: 13.4, 8: 13.8, 9: 13.7, 10: 14.1, 11: 13.3, 12: 12.0, 13: 13.9, 14: 15.0, 15: 13.8, 16: 13.5, 17: 13.3, 18: 14.8, 19: 14.1, 20: 13.7, 21: 13.3, 22: 15.1, 23: 13.8, 24: 14.8, 25: 15.0, 26: 14.1, 27: 14.9, 28: 15.5, 29: 14.3, 30: 14.3, 31: 14.9, 32: 14.7, 33: 13.7, 34: 14.0, 35: 13.9, 36: 13.7, 37: 14.3, 38: 16.1, 39: 14.7, 40: 14.7, 41: 13.9, 42: 15.2, 43: 14.6, 44: 15.1, 45: 13.3, 46: 15.2, 47: 14.1, 48: 13.6, 49: 15.4, 50: 14.0, 51: 15.4, 52: 15.6, 53: 15.3, 54: 9.7, 55: 10.0, 56: 9.9, 57: 11.5, 58: 10.3, 59: 10.2, 60: 9.8, 61: 8.9, 62: 8.7, 63: 10.4, 64: 9.4, 65: 9.1, 66: 13.6, 67: 11.6, 68: 9.7, 69: 11.0, 70: 11.3, 71: 10.1, 72: 11.3, 73: 9.7, 74: 9.5, 75: 9.8, 76: 11.2, 77: 10.2, 78: 10.0, 79: 10.5, 80: 11.2, 81: 11.7, 82: 9.6, 83: 9.6, 84: 11.0}}
Import libraries
import pandas as pd
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
Data given above, but this is the code
fishes = pd.read_csv("fish.csv", header=None, index_col=False, skiprows=1, usecols=range(1,7))
fishes.head()
Create scaler
scaler = StandardScaler()
Create a PCA instance
pca = PCA()
Create pipeline
pipeline = make_pipeline(scaler, pca)
Fit the pipeline to 'samples'
pipeline.fit(fishes)
Plot the explained variances
features = range(pca.n_components_)
plt.bar(features, pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(features)
plt.show()
My current output is this, which does not make sense.
If I understand correctly, PCA variance on y-axis should be 100%. My three first factors does not explain too much with such scale. Even if 1 here = 10%, it is still not 100% in total.
Either I did something wrong (unlikely) or I need to adjust the scale for y-axis manually? Where is my mistake? Thanks.
Instead of plotting pca.explained_variance_, try plotting:
pca.explained_variance_ratio_
This will sum up to 1. The variance explained sums to 100%, but values depend on your data unless you expressed them as a ratio.

Python - Assigning Multiple Keys for a Single Value

I'd like to be able to assign the following keys to these values in Python:
Numbers 01 - 10 : 5.01
Numbers 11 - 20 : 7.02
Numbers 21 - 30 : 9.03
Numbers 31 - 40 : 11.04
Numbers 41 - 50 : 15.00
Numbers 51 - 60 : 17.08
Numbers 61 - 70 : 19.15
I know that this is possible:
rates = dict.fromkeys(range(1, 11), 5.01)
rates.update(dict.fromkeys(range(11, 21), 7.02)
# ...etc
and that's okay. However, is there a way to do this in one line or one initializer list in Python?
Use a dictionary comprehension and an initial mapping:
numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
numbers = {k: v for start, v in numbers.items() for k in range(start, start + 10)}
Demo:
>>> from pprint import pprint
>>> numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
>>> numbers = {k: v for start, v in numbers.items() for k in range(start, start + 10)}
>>> pprint(numbers)
{1: 5.01,
2: 5.01,
3: 5.01,
4: 5.01,
5: 5.01,
6: 5.01,
7: 5.01,
8: 5.01,
9: 5.01,
10: 5.01,
11: 7.02,
12: 7.02,
13: 7.02,
14: 7.02,
15: 7.02,
16: 7.02,
17: 7.02,
18: 7.02,
19: 7.02,
20: 7.02,
21: 9.03,
22: 9.03,
23: 9.03,
24: 9.03,
25: 9.03,
26: 9.03,
27: 9.03,
28: 9.03,
29: 9.03,
30: 9.03,
31: 11.04,
32: 11.04,
33: 11.04,
34: 11.04,
35: 11.04,
36: 11.04,
37: 11.04,
38: 11.04,
39: 11.04,
40: 11.04,
41: 15.0,
42: 15.0,
43: 15.0,
44: 15.0,
45: 15.0,
46: 15.0,
47: 15.0,
48: 15.0,
49: 15.0,
50: 15.0,
51: 71.08,
52: 71.08,
53: 71.08,
54: 71.08,
55: 71.08,
56: 71.08,
57: 71.08,
58: 71.08,
59: 71.08,
60: 71.08,
61: 19.15,
62: 19.15,
63: 19.15,
64: 19.15,
65: 19.15,
66: 19.15,
67: 19.15,
68: 19.15,
69: 19.15,
70: 19.15}
The dictionary expression produces both a key and a value for each iteration of the loops. There are two loops in that expression, and you need to read them from left to right as nested in that order. Written out as a non-comprehension set of loops, you'd get:
numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
output = {}
# loop over the (key, value) pairs in the numbers dictionary
for start, v in numbers.items():
for k in range(start, start + 10):
output[k] = v
numbers = output
Essentially the keys in the original numbers dictionary are turned into ranges to form 10 new keys in the output dictionary, all with the same value.

Categories