Related
I created a stacked bar chart and need to add a horizontal line but it doesn't show
What can be the problem?
Below is the code
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
#creating array for days of the week
arr1=(['Mon. W1']*18)
arr2=(['Tue. W1']*18)
arr3=(['Wed. W1']*18)
arr4=(['Thu. W1']*18)
arr5=(['Fri. W1']*18)
arr6=(['Mon. W2']*18)
arr7=(['Tue. W2']*18)
arr8=(['Wed. W2']*18)
arr9=(['Thu. W2']*18)
arr10=(['Fri. W2']*18)
dates = np.concatenate((arr1,arr2,arr3,arr4,arr5,arr6,arr7,arr8,arr9,arr10))
#creating array for desk and meeting space population
from itertools import chain
from itertools import zip_longest
x = df3['Occupancy x Hour'].to_numpy()
y = df3['Population x Hour'].to_numpy()
#this is to change numpy array to normal array for further coding
values_array = list(filter(lambda x: x != '', chain.from_iterable(zip_longest(x, y, fillvalue = ''))))
#creating dictionary for the figure
df = pd.DataFrame(
dict(
day=dates,
time=['9am','9am','10am','10am','11am','11am','12pm','12pm','1pm','1pm','2pm','2pm','3pm','3pm','4pm','4pm','5pm','5pm']*10,
type=["Desk", "Meeting"]*90,
numbers=values_array,
)
)
#creating the bar chart
fig = go.Figure()
fig.update_layout(
template="simple_white",
xaxis=dict(title_text="Time"),
yaxis=dict(title_text="Population"),
barmode="stack",
)
colors = ["Blue","LimeGreen"]
#here adding time and day of the week on x-axis
for r, c in zip(df.type.unique(), colors):
plot_df = df[df.type == r]
fig.add_trace(
go.Bar(x=[plot_df.day, plot_df.time], y=plot_df.numbers, name=r, marker_color=c),
)
fig
df3_dictionary = {'Week': {0: 1.0,
1: 1.0,
2: 1.0,
3: 1.0,
4: 1.0,
5: 1.0,
6: 1.0,
7: 1.0,
8: 1.0,
9: 1.0,
10: 1.0,
11: 1.0,
12: 1.0,
13: 1.0,
14: 1.0,
15: 1.0,
16: 1.0,
17: 1.0,
18: 1.0,
19: 1.0,
20: 1.0,
21: 1.0,
22: 1.0,
23: 1.0,
24: 1.0,
25: 1.0,
26: 1.0,
27: 1.0,
28: 1.0,
29: 1.0,
30: 1.0,
31: 1.0,
32: 1.0,
33: 1.0,
34: 1.0,
35: 1.0,
36: 1.0,
37: 1.0,
38: 1.0,
39: 1.0,
40: 1.0,
41: 1.0,
42: 1.0,
43: 1.0,
44: 1.0,
45: 2.0,
46: 2.0,
47: 2.0,
48: 2.0,
49: 2.0,
50: 2.0,
51: 2.0,
52: 2.0,
53: 2.0,
54: 2.0,
55: 2.0,
56: 2.0,
57: 2.0,
58: 2.0,
59: 2.0,
60: 2.0,
61: 2.0,
62: 2.0,
63: 2.0,
64: 2.0,
65: 2.0,
66: 2.0,
67: 2.0,
68: 2.0,
69: 2.0,
70: 2.0,
71: 2.0,
72: 2.0,
73: 2.0,
74: 2.0,
75: 2.0,
76: 2.0,
77: 2.0,
78: 2.0,
79: 2.0,
80: 2.0,
81: 2.0,
82: 2.0,
83: 2.0,
84: 2.0,
85: 2.0,
86: 2.0,
87: 2.0,
88: 2.0,
89: 2.0},
'Day': {0: 'Monday',
1: 'Monday',
2: 'Monday',
3: 'Monday',
4: 'Monday',
5: 'Monday',
6: 'Monday',
7: 'Monday',
8: 'Monday',
9: 'Tuesday',
10: 'Tuesday',
11: 'Tuesday',
12: 'Tuesday',
13: 'Tuesday',
14: 'Tuesday',
15: 'Tuesday',
16: 'Tuesday',
17: 'Tuesday',
18: 'Wednesday',
19: 'Wednesday',
20: 'Wednesday',
21: 'Wednesday',
22: 'Wednesday',
23: 'Wednesday',
24: 'Wednesday',
25: 'Wednesday',
26: 'Wednesday',
27: 'Thursday',
28: 'Thursday',
29: 'Thursday',
30: 'Thursday',
31: 'Thursday',
32: 'Thursday',
33: 'Thursday',
34: 'Thursday',
35: 'Thursday',
36: 'Friday',
37: 'Friday',
38: 'Friday',
39: 'Friday',
40: 'Friday',
41: 'Friday',
42: 'Friday',
43: 'Friday',
44: 'Friday',
45: 'Monday',
46: 'Monday',
47: 'Monday',
48: 'Monday',
49: 'Monday',
50: 'Monday',
51: 'Monday',
52: 'Monday',
53: 'Monday',
54: 'Tuesday',
55: 'Tuesday',
56: 'Tuesday',
57: 'Tuesday',
58: 'Tuesday',
59: 'Tuesday',
60: 'Tuesday',
61: 'Tuesday',
62: 'Tuesday',
63: 'Wednesday',
64: 'Wednesday',
65: 'Wednesday',
66: 'Wednesday',
67: 'Wednesday',
68: 'Wednesday',
69: 'Wednesday',
70: 'Wednesday',
71: 'Wednesday',
72: 'Thursday',
73: 'Thursday',
74: 'Thursday',
75: 'Thursday',
76: 'Thursday',
77: 'Thursday',
78: 'Thursday',
79: 'Thursday',
80: 'Thursday',
81: 'Friday',
82: 'Friday',
83: 'Friday',
84: 'Friday',
85: 'Friday',
86: 'Friday',
87: 'Friday',
88: 'Friday',
89: 'Friday'},
'Time': {0: '9am',
1: '10am',
2: '11am',
3: '12pm',
4: '1pm',
5: '2pm',
6: '3pm',
7: '4pm',
8: '5pm',
9: '9am',
10: '10am',
11: '11am',
12: '12pm',
13: '1pm',
14: '2pm',
15: '3pm',
16: '4pm',
17: '5pm',
18: '9am',
19: '10am',
20: '11am',
21: '12pm',
22: '1pm',
23: '2pm',
24: '3pm',
25: '4pm',
26: '5pm',
27: '9am',
28: '10am',
29: '11am',
30: '12pm',
31: '1pm',
32: '2pm',
33: '3pm',
34: '4pm',
35: '5pm',
36: '9am',
37: '10am',
38: '11am',
39: '12pm',
40: '1pm',
41: '2pm',
42: '3pm',
43: '4pm',
44: '5pm',
45: '9am',
46: '10am',
47: '11am',
48: '12pm',
49: '1pm',
50: '2pm',
51: '3pm',
52: '4pm',
53: '5pm',
54: '9am',
55: '10am',
56: '11am',
57: '12pm',
58: '1pm',
59: '2pm',
60: '3pm',
61: '4pm',
62: '5pm',
63: '9am',
64: '10am',
65: '11am',
66: '12pm',
67: '1pm',
68: '2pm',
69: '3pm',
70: '4pm',
71: '5pm',
72: '9am',
73: '10am',
74: '11am',
75: '12pm',
76: '1pm',
77: '2pm',
78: '3pm',
79: '4pm',
80: '5pm',
81: '9am',
82: '10am',
83: '11am',
84: '12pm',
85: '1pm',
86: '2pm',
87: '3pm',
88: '4pm',
89: '5pm'},
'Occupancy x Hour': {0: 1378.0,
1: 1369.0,
2: 1372.0,
3: 1261.0,
4: 1087.0,
5: 1355.0,
6: 1383.0,
7: 1325.0,
8: 1050.0,
9: 1313.0,
10: 1347.0,
11: 1323.0,
12: 1202.0,
13: 1033.0,
14: 1237.0,
15: 1324.0,
16: 1352.0,
17: 1108.0,
18: 1217.0,
19: 1276.0,
20: 1365.0,
21: 1204.0,
22: 977.0,
23: 1199.0,
24: 1331.0,
25: 1293.0,
26: 1159.0,
27: 1220.0,
28: 1327.0,
29: 1354.0,
30: 1257.0,
31: 982.0,
32: 1199.0,
33: 1218.0,
34: 1271.0,
35: 1101.0,
36: 1139.0,
37: 1207.0,
38: 1259.0,
39: 1189.0,
40: 903.0,
41: 1171.0,
42: 1193.0,
43: 1239.0,
44: 899.0,
45: 1220.0,
46: 1357.0,
47: 1336.0,
48: 1188.0,
49: 1032.0,
50: 1261.0,
51: 1330.0,
52: 1267.0,
53: 1074.0,
54: 1301.0,
55: 1337.0,
56: 1329.0,
57: 1247.0,
58: 970.0,
59: 1233.0,
60: 1271.0,
61: 1246.0,
62: 1063.0,
63: 1210.0,
64: 1288.0,
65: 1331.0,
66: 1220.0,
67: 948.0,
68: 1273.0,
69: 1289.0,
70: 1329.0,
71: 1153.0,
72: 1213.0,
73: 1248.0,
74: 1272.0,
75: 1190.0,
76: 890.0,
77: 1199.0,
78: 1284.0,
79: 1233.0,
80: 1102.0,
81: 1110.0,
82: 1210.0,
83: 1175.0,
84: 1083.0,
85: 807.0,
86: 1101.0,
87: 1188.0,
88: 1181.0,
89: 857.0},
'Population x Hour': {0: 339.0,
1: 516.0,
2: 564.0,
3: 616.0,
4: 637.0,
5: 548.0,
6: 582.0,
7: 527.0,
8: 341.0,
9: 457.0,
10: 711.0,
11: 731.0,
12: 685.0,
13: 747.0,
14: 735.0,
15: 723.0,
16: 657.0,
17: 388.0,
18: 497.0,
19: 703.0,
20: 690.0,
21: 758.0,
22: 759.0,
23: 745.0,
24: 686.0,
25: 633.0,
26: 374.0,
27: 465.0,
28: 588.0,
29: 648.0,
30: 603.0,
31: 663.0,
32: 687.0,
33: 613.0,
34: 597.0,
35: 262.0,
36: 333.0,
37: 477.0,
38: 521.0,
39: 417.0,
40: 443.0,
41: 513.0,
42: 456.0,
43: 389.0,
44: 159.0,
45: 388.0,
46: 584.0,
47: 646.0,
48: 635.0,
49: 640.0,
50: 643.0,
51: 592.0,
52: 496.0,
53: 282.0,
54: 431.0,
55: 677.0,
56: 646.0,
57: 623.0,
58: 717.0,
59: 674.0,
60: 604.0,
61: 553.0,
62: 348.0,
63: 539.0,
64: 724.0,
65: 651.0,
66: 635.0,
67: 729.0,
68: 600.0,
69: 611.0,
70: 596.0,
71: 353.0,
72: 456.0,
73: 673.0,
74: 639.0,
75: 690.0,
76: 655.0,
77: 616.0,
78: 663.0,
79: 576.0,
80: 340.0,
81: 394.0,
82: 518.0,
83: 527.0,
84: 475.0,
85: 466.0,
86: 452.0,
87: 421.0,
88: 288.0,
89: 181.0}}
This code gives this result
Then, i am trying to add a line but it doesn't show
#finding minimum of the population
tot_popul = x+y
min(tot_popul)
#here, I am trying to plot minimum line but it doesn't show
fig.add_hline(y=min(tot_popul))
fig.show()
I need a horizontal line with minimum values
I'm trying to use Panda's qcut to bin my values in quantile-based buckets.
However, when doing so, it's just giving me whole numbers and does not match what I'm expecting.
I'm expecting something along the following - in particular not whole numbers:
Above was calculated with Excel's QUARTILE.EXC() using the exact same data.
Pandas however is just giving me the bins 1,2,3,4.
Any ideas? Here is the code:
import pandas as pd
data = {0: 2.75,
1: 2.875,
2: 3.5,
3: 3.875,
4: 3.125,
5: 2.25,
6: 2.125,
7: 3.375,
8: 3.75,
9: 1.875,
10: 3.125,
11: 2.625,
12: 1.25,
13: 2.625,
14: 2.25,
15: 3.125,
16: 3.375,
17: 2.25,
18: 2.25,
19: 3.125,
20: 3.375,
21: 2.5,
22: 3.375,
23: 3.5,
24: 3.125,
25: 3.0,
26: 2.125,
27: 3.125,
28: 2.375,
29: 2.375,
30: 2.75,
31: 3.0,
32: 2.625,
33: 2.0,
34: 2.75,
35: 3.25,
36: 3.0,
37: 1.5,
38: 3.5,
39: 2.375,
40: 3.375,
41: 2.625,
42: 3.0,
43: 2.5,
44: 2.625,
45: 2.875,
46: 2.25,
47: 2.5,
48: 1.125,
49: 1.625,
50: 1.375,
51: 2.125,
52: 1.625,
53: 2.125,
54: 1.0,
55: 1.5,
56: 1.25,
57: 3.125,
58: 1.125,
59: 1.75}
df = pd.Series(data).to_frame('values')
n_bins = 4
df['qcutbins'] = pd.qcut(df['values'], q=n_bins, labels=range(1,n_bins+1)).astype('float64')
df.groupby(['qcutbins'])['qcutbins'].describe()[['min','max']].sort_values(by='max').reset_index(drop=True)```
it looks like you want something like this instead:
df = pd.Series(data).to_frame('values')
n_bins = 4
df['qcutbins'] = pd.qcut(df['values'], q=n_bins)
df.groupby("qcutbins").agg([min, max])
values
min max
qcutbins
(0.999, 2.125] 1.00 2.125
(2.125, 2.625] 2.25 2.625
(2.625, 3.125] 2.75 3.125
(3.125, 3.875] 3.25 3.875
I generate the following plot:
By the following code:
data = {'BestFit_rej_ratio': {0: 0.1975987994, 1: 0.2006003002, 2: 0.1790895448, 3: 0.2216108054, 4: 0.1785892946, 5: 0.1890945473, 6: 0.1780890445, 7: 0.1780890445, 8: 0.2016008004, 9: 0.1900950475, 10: 0.1985992996, 11: 0.2031015508, 12: 0.2046023012, 13: 0.2071035518, 14: 0.1750875438, 15: 0.2166083042, 16: 0.1725862931, 17: 0.188094047, 18: 0.1870935468, 19: 0.1895947974, 20: 0.004502251126, 21: 0.006503251626, 22: 0.005002501251, 23: 0.006503251626, 24: 0.008004002001, 25: 0.006003001501, 26: 0.00300150075, 27: 0.005502751376, 28: 0.0100050025, 29: 0.005002501251, 30: 0.006003001501, 31: 0.005502751376, 32: 0.007503751876, 33: 0.005502751376, 34: 0.005502751376, 35: 0.005502751376, 36: 0.007503751876, 37: 0.005002501251, 38: 0.004002001001, 39: 0.009004502251, 40: 0.4172086043, 41: 0.4322161081, 42: 0.4017008504, 43: 0.4247123562, 44: 0.4292146073, 45: 0.4077038519, 46: 0.4282141071, 47: 0.4637318659, 48: 0.4392196098, 49: 0.4172086043, 50: 0.4187093547, 51: 0.4057028514, 52: 0.4287143572, 53: 0.4242121061, 54: 0.4347173587, 55: 0.4307153577, 56: 0.4102051026, 57: 0.4437218609, 58: 0.4212106053, 59: 0.4172086043}, 'MDP_rej_ratio': {0: 0.1660830415, 1: 0.1605802901, 2: 0.152076038, 3: 0.1885942971, 4: 0.152076038, 5: 0.1565782891, 6: 0.1445722861, 7: 0.1570785393, 8: 0.1705852926, 9: 0.1605802901, 10: 0.1740870435, 11: 0.1670835418, 12: 0.1805902951, 13: 0.1740870435, 14: 0.1460730365, 15: 0.1810905453, 16: 0.1425712856, 17: 0.1580790395, 18: 0.1455727864, 19: 0.1590795398, 20: 0.001500750375, 21: 0.00300150075, 22: 0.002501250625, 23: 0.002501250625, 24: 0.0020010005, 25: 0.002501250625, 26: 0.0020010005, 27: 0.001500750375, 28: 0.004002001001, 29: 0.00300150075, 30: 0.0020010005, 31: 0.0, 32: 0.004002001001, 33: 0.0005002501251, 34: 0.0020010005, 35: 0.0, 36: 0.004502251126, 37: 0.002501250625, 38: 0.001500750375, 39: 0.004002001001, 40: 0.3851925963, 41: 0.3851925963, 42: 0.4097048524, 43: 0.3756878439, 44: 0.4112056028, 45: 0.4212106053, 46: 0.3791895948, 47: 0.4127063532, 48: 0.4432216108, 49: 0.4152076038, 50: 0.3871935968, 51: 0.4197098549, 52: 0.3896948474, 53: 0.4107053527, 54: 0.4062031016, 55: 0.4252126063, 56: 0.4112056028, 57: 0.3931965983, 58: 0.4372186093, 59: 0.4157078539}, 'Q-Learning_rej_ratio': {0: 0.1790895448, 1: 0.1645822911, 2: 0.1545772886, 3: 0.1905952976, 4: 0.1510755378, 5: 0.1595797899, 6: 0.148074037, 7: 0.1575787894, 8: 0.1715857929, 9: 0.1590795398, 10: 0.1690845423, 11: 0.168084042, 12: 0.180090045, 13: 0.1785892946, 14: 0.1495747874, 15: 0.1815907954, 16: 0.1435717859, 17: 0.1685842921, 18: 0.1505752876, 19: 0.1670835418, 20: 0.001500750375, 21: 0.00300150075, 22: 0.002501250625, 23: 0.002501250625, 24: 0.0020010005, 25: 0.002501250625, 26: 0.0020010005, 27: 0.001500750375, 28: 0.004002001001, 29: 0.00300150075, 30: 0.0020010005, 31: 0.0, 32: 0.004002001001, 33: 0.0005002501251, 34: 0.0020010005, 35: 0.0, 36: 0.004502251126, 37: 0.002501250625, 38: 0.001500750375, 39: 0.004002001001, 40: 0.3856928464, 41: 0.4167083542, 42: 0.3786893447, 43: 0.4187093547, 44: 0.4157078539, 45: 0.392196098, 46: 0.4032016008, 47: 0.4452226113, 48: 0.4217108554, 49: 0.3876938469, 50: 0.4192096048, 51: 0.388194097, 52: 0.4122061031, 53: 0.4152076038, 54: 0.4172086043, 55: 0.4137068534, 56: 0.3956978489, 57: 0.4342171086, 58: 0.4082041021, 59: 0.4032016008}, 'Parametrized_factor': {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 6: 1.0, 7: 1.0, 8: 1.0, 9: 1.0, 10: 1.0, 11: 1.0, 12: 1.0, 13: 1.0, 14: 1.0, 15: 1.0, 16: 1.0, 17: 1.0, 18: 1.0, 19: 1.0, 20: 0.2, 21: 0.2, 22: 0.2, 23: 0.2, 24: 0.2, 25: 0.2, 26: 0.2, 27: 0.2, 28: 0.2, 29: 0.2, 30: 0.2, 31: 0.2, 32: 0.2, 33: 0.2, 34: 0.2, 35: 0.2, 36: 0.2, 37: 0.2, 38: 0.2, 39: 0.2, 40: 2.0, 41: 2.0, 42: 2.0, 43: 2.0, 44: 2.0, 45: 2.0, 46: 2.0, 47: 2.0, 48: 2.0, 49: 2.0, 50: 2.0, 51: 2.0, 52: 2.0, 53: 2.0, 54: 2.0, 55: 2.0, 56: 2.0, 57: 2.0, 58: 2.0, 59: 2.0}}
data2 = pd.DataFrame(data)
# figure size
plt.figure(figsize=(12, 8))
ax = sns.pointplot(y="BestFit_rej_ratio", x="Parametrized_factor", data=data2, linestyles='-.', color='g', capsize=.1, scale=.2, errwidth=.5)
ax = sns.pointplot(y="MDP_rej_ratio", x="Parametrized_factor", data=data2, linestyles='-', color='r', capsize=.12, scale=.2, errwidth=.5)
ax = sns.pointplot(y="Q-Learning_rej_ratio", x="Parametrized_factor", data=data2, linestyles=':', color='k', capsize=.15, scale=.5, errwidth=.5)
ax.legend(bbox_to_anchor=(1.15, 1), loc='upper left')
labels = ax.legend(['BestFit', 'MDP', 'Q-Learning'])
colors = ['green', 'red', 'black']
i = 0
for l in labels.get_texts():
l.set_color(colors[i])
i+=1
plt.setp(ax.get_legend().get_texts(), fontsize='12')
# for legend text
ax.set_ylabel('Rejection ratio')
ax.set_xlabel('Parametrized factor')
plt.show()
Problem is that the plot is not respecting the scale of the x-axis values (Parametrized_factor).
How can I solve it?
I am trying to make PCA analysis, but cannot plot properly variance on y-axis.
I have data, which I exported for you
{1: {0: 242.0, 1: 290.0, 2: 340.0, 3: 363.0, 4: 430.0, 5: 450.0, 6: 500.0, 7: 390.0, 8: 450.0, 9: 500.0, 10: 475.0, 11: 500.0, 12: 500.0, 13: 600.0, 14: 600.0, 15: 700.0, 16: 700.0, 17: 610.0, 18: 650.0, 19: 575.0, 20: 685.0, 21: 620.0, 22: 680.0, 23: 700.0, 24: 725.0, 25: 720.0, 26: 714.0, 27: 850.0, 28: 1000.0, 29: 920.0, 30: 955.0, 31: 925.0, 32: 975.0, 33: 950.0, 34: 40.0, 35: 69.0, 36: 78.0, 37: 87.0, 38: 120.0, 39: 0.0, 40: 110.0, 41: 120.0, 42: 150.0, 43: 145.0, 44: 160.0, 45: 140.0, 46: 160.0, 47: 169.0, 48: 161.0, 49: 200.0, 50: 180.0, 51: 290.0, 52: 272.0, 53: 390.0, 54: 6.7, 55: 7.5, 56: 7.0, 57: 9.7, 58: 9.8, 59: 8.7, 60: 10.0, 61: 9.9, 62: 9.8, 63: 12.2, 64: 13.4, 65: 12.2, 66: 19.7, 67: 19.9, 68: 200.0, 69: 300.0, 70: 300.0, 71: 300.0, 72: 430.0, 73: 345.0, 74: 456.0, 75: 510.0, 76: 540.0, 77: 500.0, 78: 567.0, 79: 770.0, 80: 950.0, 81: 1250.0, 82: 1600.0, 83: 1550.0, 84: 1650.0}, 2: {0: 23.2, 1: 24.0, 2: 23.9, 3: 26.3, 4: 26.5, 5: 26.8, 6: 26.8, 7: 27.6, 8: 27.6, 9: 28.5, 10: 28.4, 11: 28.7, 12: 29.1, 13: 29.4, 14: 29.4, 15: 30.4, 16: 30.4, 17: 30.9, 18: 31.0, 19: 31.3, 20: 31.4, 21: 31.5, 22: 31.8, 23: 31.9, 24: 31.8, 25: 32.0, 26: 32.7, 27: 32.8, 28: 33.5, 29: 35.0, 30: 35.0, 31: 36.2, 32: 37.4, 33: 38.0, 34: 12.9, 35: 16.5, 36: 17.5, 37: 18.2, 38: 18.6, 39: 19.0, 40: 19.1, 41: 19.4, 42: 20.4, 43: 20.5, 44: 20.5, 45: 21.0, 46: 21.1, 47: 22.0, 48: 22.0, 49: 22.1, 50: 23.6, 51: 24.0, 52: 25.0, 53: 29.5, 54: 9.3, 55: 10.0, 56: 10.1, 57: 10.4, 58: 10.7, 59: 10.8, 60: 11.3, 61: 11.3, 62: 11.4, 63: 11.5, 64: 11.7, 65: 12.1, 66: 13.2, 67: 13.8, 68: 30.0, 69: 31.7, 70: 32.7, 71: 34.8, 72: 35.5, 73: 36.0, 74: 40.0, 75: 40.0, 76: 40.1, 77: 42.0, 78: 43.2, 79: 44.8, 80: 48.3, 81: 52.0, 82: 56.0, 83: 56.0, 84: 59.0}, 3: {0: 25.4, 1: 26.3, 2: 26.5, 3: 29.0, 4: 29.0, 5: 29.7, 6: 29.7, 7: 30.0, 8: 30.0, 9: 30.7, 10: 31.0, 11: 31.0, 12: 31.5, 13: 32.0, 14: 32.0, 15: 33.0, 16: 33.0, 17: 33.5, 18: 33.5, 19: 34.0, 20: 34.0, 21: 34.5, 22: 35.0, 23: 35.0, 24: 35.0, 25: 35.0, 26: 36.0, 27: 36.0, 28: 37.0, 29: 38.5, 30: 38.5, 31: 39.5, 32: 41.0, 33: 41.0, 34: 14.1, 35: 18.2, 36: 18.8, 37: 19.8, 38: 20.0, 39: 20.5, 40: 20.8, 41: 21.0, 42: 22.0, 43: 22.0, 44: 22.5, 45: 22.5, 46: 22.5, 47: 24.0, 48: 23.4, 49: 23.5, 50: 25.2, 51: 26.0, 52: 27.0, 53: 31.7, 54: 9.8, 55: 10.5, 56: 10.6, 57: 11.0, 58: 11.2, 59: 11.3, 60: 11.8, 61: 11.8, 62: 12.0, 63: 12.2, 64: 12.4, 65: 13.0, 66: 14.3, 67: 15.0, 68: 32.3, 69: 34.0, 70: 35.0, 71: 37.3, 72: 38.0, 73: 38.5, 74: 42.5, 75: 42.5, 76: 43.0, 77: 45.0, 78: 46.0, 79: 48.0, 80: 51.7, 81: 56.0, 82: 60.0, 83: 60.0, 84: 63.4}, 4: {0: 30.0, 1: 31.2, 2: 31.1, 3: 33.5, 4: 34.0, 5: 34.7, 6: 34.5, 7: 35.0, 8: 35.1, 9: 36.2, 10: 36.2, 11: 36.2, 12: 36.4, 13: 37.2, 14: 37.2, 15: 38.3, 16: 38.5, 17: 38.6, 18: 38.7, 19: 39.5, 20: 39.2, 21: 39.7, 22: 40.6, 23: 40.5, 24: 40.9, 25: 40.6, 26: 41.5, 27: 41.6, 28: 42.6, 29: 44.1, 30: 44.0, 31: 45.3, 32: 45.9, 33: 46.5, 34: 16.2, 35: 20.3, 36: 21.2, 37: 22.2, 38: 22.2, 39: 22.8, 40: 23.1, 41: 23.7, 42: 24.7, 43: 24.3, 44: 25.3, 45: 25.0, 46: 25.0, 47: 27.2, 48: 26.7, 49: 26.8, 50: 27.9, 51: 29.2, 52: 30.6, 53: 35.0, 54: 10.8, 55: 11.6, 56: 11.6, 57: 12.0, 58: 12.4, 59: 12.6, 60: 13.1, 61: 13.1, 62: 13.2, 63: 13.4, 64: 13.5, 65: 13.8, 66: 15.2, 67: 16.2, 68: 34.8, 69: 37.8, 70: 38.8, 71: 39.8, 72: 40.5, 73: 41.0, 74: 45.5, 75: 45.5, 76: 45.8, 77: 48.0, 78: 48.7, 79: 51.2, 80: 55.1, 81: 59.7, 82: 64.0, 83: 64.0, 84: 68.0}, 5: {0: 38.4, 1: 40.0, 2: 39.8, 3: 38.0, 4: 36.6, 5: 39.2, 6: 41.1, 7: 36.2, 8: 39.9, 9: 39.3, 10: 39.4, 11: 39.7, 12: 37.8, 13: 40.2, 14: 41.5, 15: 38.8, 16: 38.8, 17: 40.5, 18: 37.4, 19: 38.3, 20: 40.8, 21: 39.1, 22: 38.1, 23: 40.1, 24: 40.0, 25: 40.3, 26: 39.8, 27: 40.6, 28: 44.5, 29: 40.9, 30: 41.1, 31: 41.4, 32: 40.6, 33: 37.9, 34: 25.6, 35: 26.1, 36: 26.3, 37: 25.3, 38: 28.0, 39: 28.4, 40: 26.7, 41: 25.8, 42: 23.5, 43: 27.3, 44: 27.8, 45: 26.2, 46: 25.6, 47: 27.7, 48: 25.9, 49: 27.6, 50: 25.4, 51: 30.4, 52: 28.0, 53: 27.1, 54: 16.1, 55: 17.0, 56: 14.9, 57: 18.3, 58: 16.8, 59: 15.7, 60: 16.9, 61: 16.9, 62: 16.7, 63: 15.6, 64: 18.0, 65: 16.5, 66: 18.9, 67: 18.1, 68: 16.0, 69: 15.1, 70: 15.3, 71: 15.8, 72: 18.0, 73: 15.6, 74: 16.0, 75: 15.0, 76: 17.0, 77: 14.5, 78: 16.0, 79: 15.0, 80: 16.2, 81: 17.9, 82: 15.0, 83: 15.0, 84: 15.9}, 6: {0: 13.4, 1: 13.8, 2: 15.1, 3: 13.3, 4: 15.1, 5: 14.2, 6: 15.3, 7: 13.4, 8: 13.8, 9: 13.7, 10: 14.1, 11: 13.3, 12: 12.0, 13: 13.9, 14: 15.0, 15: 13.8, 16: 13.5, 17: 13.3, 18: 14.8, 19: 14.1, 20: 13.7, 21: 13.3, 22: 15.1, 23: 13.8, 24: 14.8, 25: 15.0, 26: 14.1, 27: 14.9, 28: 15.5, 29: 14.3, 30: 14.3, 31: 14.9, 32: 14.7, 33: 13.7, 34: 14.0, 35: 13.9, 36: 13.7, 37: 14.3, 38: 16.1, 39: 14.7, 40: 14.7, 41: 13.9, 42: 15.2, 43: 14.6, 44: 15.1, 45: 13.3, 46: 15.2, 47: 14.1, 48: 13.6, 49: 15.4, 50: 14.0, 51: 15.4, 52: 15.6, 53: 15.3, 54: 9.7, 55: 10.0, 56: 9.9, 57: 11.5, 58: 10.3, 59: 10.2, 60: 9.8, 61: 8.9, 62: 8.7, 63: 10.4, 64: 9.4, 65: 9.1, 66: 13.6, 67: 11.6, 68: 9.7, 69: 11.0, 70: 11.3, 71: 10.1, 72: 11.3, 73: 9.7, 74: 9.5, 75: 9.8, 76: 11.2, 77: 10.2, 78: 10.0, 79: 10.5, 80: 11.2, 81: 11.7, 82: 9.6, 83: 9.6, 84: 11.0}}
Import libraries
import pandas as pd
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
Data given above, but this is the code
fishes = pd.read_csv("fish.csv", header=None, index_col=False, skiprows=1, usecols=range(1,7))
fishes.head()
Create scaler
scaler = StandardScaler()
Create a PCA instance
pca = PCA()
Create pipeline
pipeline = make_pipeline(scaler, pca)
Fit the pipeline to 'samples'
pipeline.fit(fishes)
Plot the explained variances
features = range(pca.n_components_)
plt.bar(features, pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(features)
plt.show()
My current output is this, which does not make sense.
If I understand correctly, PCA variance on y-axis should be 100%. My three first factors does not explain too much with such scale. Even if 1 here = 10%, it is still not 100% in total.
Either I did something wrong (unlikely) or I need to adjust the scale for y-axis manually? Where is my mistake? Thanks.
Instead of plotting pca.explained_variance_, try plotting:
pca.explained_variance_ratio_
This will sum up to 1. The variance explained sums to 100%, but values depend on your data unless you expressed them as a ratio.
I'd like to be able to assign the following keys to these values in Python:
Numbers 01 - 10 : 5.01
Numbers 11 - 20 : 7.02
Numbers 21 - 30 : 9.03
Numbers 31 - 40 : 11.04
Numbers 41 - 50 : 15.00
Numbers 51 - 60 : 17.08
Numbers 61 - 70 : 19.15
I know that this is possible:
rates = dict.fromkeys(range(1, 11), 5.01)
rates.update(dict.fromkeys(range(11, 21), 7.02)
# ...etc
and that's okay. However, is there a way to do this in one line or one initializer list in Python?
Use a dictionary comprehension and an initial mapping:
numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
numbers = {k: v for start, v in numbers.items() for k in range(start, start + 10)}
Demo:
>>> from pprint import pprint
>>> numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
>>> numbers = {k: v for start, v in numbers.items() for k in range(start, start + 10)}
>>> pprint(numbers)
{1: 5.01,
2: 5.01,
3: 5.01,
4: 5.01,
5: 5.01,
6: 5.01,
7: 5.01,
8: 5.01,
9: 5.01,
10: 5.01,
11: 7.02,
12: 7.02,
13: 7.02,
14: 7.02,
15: 7.02,
16: 7.02,
17: 7.02,
18: 7.02,
19: 7.02,
20: 7.02,
21: 9.03,
22: 9.03,
23: 9.03,
24: 9.03,
25: 9.03,
26: 9.03,
27: 9.03,
28: 9.03,
29: 9.03,
30: 9.03,
31: 11.04,
32: 11.04,
33: 11.04,
34: 11.04,
35: 11.04,
36: 11.04,
37: 11.04,
38: 11.04,
39: 11.04,
40: 11.04,
41: 15.0,
42: 15.0,
43: 15.0,
44: 15.0,
45: 15.0,
46: 15.0,
47: 15.0,
48: 15.0,
49: 15.0,
50: 15.0,
51: 71.08,
52: 71.08,
53: 71.08,
54: 71.08,
55: 71.08,
56: 71.08,
57: 71.08,
58: 71.08,
59: 71.08,
60: 71.08,
61: 19.15,
62: 19.15,
63: 19.15,
64: 19.15,
65: 19.15,
66: 19.15,
67: 19.15,
68: 19.15,
69: 19.15,
70: 19.15}
The dictionary expression produces both a key and a value for each iteration of the loops. There are two loops in that expression, and you need to read them from left to right as nested in that order. Written out as a non-comprehension set of loops, you'd get:
numbers = {1: 5.01, 11: 7.02, 21: 9.03, 31: 11.04, 41: 15.0, 51: 71.08, 61: 19.15}
output = {}
# loop over the (key, value) pairs in the numbers dictionary
for start, v in numbers.items():
for k in range(start, start + 10):
output[k] = v
numbers = output
Essentially the keys in the original numbers dictionary are turned into ranges to form 10 new keys in the output dictionary, all with the same value.