This is a follow up question from this question.
The data:
df1 = pd.DataFrame.from_dict({('group', ''): {0: 'A',
1: 'A',
2: 'A',
3: 'A',
4: 'A',
5: 'A',
6: 'A',
7: 'A',
8: 'B',
9: 'B',
10: 'B',
11: 'B',
12: 'B',
13: 'B',
14: 'B',
15: 'B',
16: 'C',
17: 'C',
18: 'C',
19: 'C',
20: 'C',
21: 'C',
22: 'C',
23: 'C',
24: 'D',
25: 'D',
26: 'D',
27: 'D',
28: 'D',
29: 'D',
30: 'D'},
('category', ''): {0: 'Apple',
1: 'Amazon',
2: 'Google',
3: 'Netflix',
4: 'Facebook',
5: 'Uber',
6: 'Tesla',
7: 'total',
8: 'Apple',
9: 'Amazon',
10: 'Google',
11: 'Netflix',
12: 'Facebook',
13: 'Uber',
14: 'Tesla',
15: 'total',
16: 'Apple',
17: 'Amazon',
18: 'Google',
19: 'Netflix',
20: 'Facebook',
21: 'Uber',
22: 'Tesla',
23: 'total',
24: 'Apple',
25: 'Amazon',
26: 'Google',
27: 'Netflix',
28: 'Uber',
29: 'Tesla',
30: 'total'},
(pd.Timestamp('2021-06-28 00:00:00'), 'total_orders'): {0: 88.0,
1: 66.0,
2: 191.0,
3: 558.0,
4: 12.0,
5: 4.0,
6: 56.0,
7: 975.0,
8: 90.0,
9: 26.0,
10: 49.0,
11: 250.0,
12: 7.0,
13: 2.0,
14: 44.0,
15: 468.0,
16: 36.0,
17: 52.0,
18: 94.0,
19: 750.0,
20: 10.0,
21: 0.0,
22: 52.0,
23: 994.0,
24: 16.0,
25: 22.0,
26: 5.0,
27: 57.0,
28: 3.0,
29: 33.0,
30: 136.0},
(pd.Timestamp('2021-06-28 00:00:00'), 'total_sales'): {0: 4603.209999999999,
1: 2485.059999999998,
2: 4919.39999999998,
3: 6097.77,
4: 31.22,
5: 155.71,
6: 3484.99,
7: 17237.35999999996,
8: 561.54,
9: 698.75,
10: 1290.13,
11: 4292.68000000001,
12: 947.65,
13: 329.0,
14: 2889.65,
15: 9989.4,
16: 330.8899999999994,
17: 2076.26,
18: 2982.270000000004,
19: 11978.62000000002,
20: 683.0,
21: 0.0,
22: 3812.16999999999,
23: 20963.21000000002,
24: 234.4900000000002,
25: 896.1,
26: 231.0,
27: 893.810000000001,
28: 129.0,
29: 1712.329999999998,
30: 4106.729999999996},
(pd.Timestamp('2021-07-05 00:00:00'), 'total_orders'): {0: 109.0,
1: 48.0,
2: 174.0,
3: 592.0,
4: 13.0,
5: 5.0,
6: 43.0,
7: 984.0,
8: 62.0,
9: 13.0,
10: 37.0,
11: 196.0,
12: 8.0,
13: 1.0,
14: 3.0,
15: 30.0,
16: 76.0,
17: 5.0,
18: 147.0,
19: 88.0,
20: 8.0,
21: 1.0,
22: 78.0,
23: 1248.0,
24: 1.0,
25: 18.0,
26: 23.0,
27: 83.0,
28: 0.0,
29: 29.0,
30: 154.0},
(pd.Timestamp('2021-07-05 00:00:00'), 'total_sales'): {0: 3453.02,
1: 17868.730000000003,
2: 44707.82999999999,
3: 61425.99,
4: 1261.0,
5: 1914.6000000000001,
6: 24146.09,
7: 154777.25999999998,
8: 6201.489999999999,
9: 5513.960000000001,
10: 9645.87,
11: 25086.785,
12: 663.0,
13: 448.61,
14: 26332.7,
15: 73892.415,
16: 556.749999999999,
17: 1746.859999999997,
18: 4103.219999999994,
19: 15571.52000000008,
20: 86.0,
21: 69.0,
22: 5882.759999999995,
23: 26476.11000000004,
24: 53.0,
25: 801.220000000001,
26: 684.56,
27: 1232.600000000002,
28: 0.0,
29: 15902.1,
30: 43943.48},
(pd.Timestamp('2021-07-12 00:00:00'), 'total_orders'): {0: 32.0,
1: 15.0,
2: 89.0,
3: 239.0,
4: 2.0,
5: 3.0,
6: 20.0,
7: 400.0,
8: 0.0,
9: 0.0,
10: 0.0,
11: 0.0,
12: 0.0,
13: 0.0,
14: 0.0,
15: 0.0,
16: 21.0,
17: 14.0,
18: 58.0,
19: 281.0,
20: 3.0,
21: 3.0,
22: 33.0,
23: 413.0,
24: 7.0,
25: 6.0,
26: 4.0,
27: 13.0,
28: 0.0,
29: 18.0,
30: 48.0},
(pd.Timestamp('2021-07-12 00:00:00'), 'total_sales'): {0: 2147.7000000000003,
1: 4767.3,
2: 2399.300000000003,
3: 3137.440000000002,
4: 178.0,
5: 866.61,
6: 10639.03,
7: 73235.38,
8: 0.0,
9: 0.0,
10: 0.0,
11: 0.0,
12: 0.0,
13: 0.0,
14: 0.0,
15: 0.0,
16: 220.94,
17: 727.5199999999995,
18: 2500.96999999999,
19: 4414.00999999998,
20: 15.0,
21: 196.71,
22: 2170.1,
23: 9745.24999999997,
24: 126.55,
25: 290.2,
26: 146.01,
27: 233.0,
28: 0.0,
29: 973.18,
30: 1658.940000000002}}).set_index(['group','category'])
df2 = pd.DataFrame.from_dict({'group': {0: 'total_full',
1: 'total_full',
2: 'A',
3: 'A',
4: 'B',
5: 'B',
6: 'C',
7: 'C',
8: 'D',
9: 'D',
10: 'Apple_total',
11: 'Apple_total',
12: 'A',
13: 'A',
14: 'B',
15: 'B',
16: 'C',
17: 'C',
18: 'D',
19: 'D',
20: 'Amazon_total',
21: 'Amazon_total',
22: 'A',
23: 'A',
24: 'B',
25: 'B',
26: 'C',
27: 'C',
28: 'D',
29: 'D',
30: 'Google_total',
31: 'Google_total',
32: 'A',
33: 'A',
34: 'B',
35: 'B',
36: 'C',
37: 'C',
38: 'D',
39: 'D',
40: 'Facebook_total',
41: 'Facebook_total',
42: 'A',
43: 'A',
44: 'B',
45: 'B',
46: 'C',
47: 'C',
48: 'D',
49: 'D',
50: 'Netflix_total',
51: 'Netflix_total',
52: 'A',
53: 'A',
54: 'B',
55: 'B',
56: 'C',
57: 'C',
58: 'D',
59: 'D',
60: 'Tesla_total',
61: 'Tesla_total',
62: 'A',
63: 'A',
64: 'B',
65: 'B',
66: 'C',
67: 'C',
68: 'D',
69: 'D',
70: 'Uber_total',
71: 'Uber_total',
72: 'A',
73: 'A',
74: 'B',
75: 'B',
76: 'C',
77: 'C',
78: 'D',
79: 'D'},
'category': {0: 'total_full',
1: 'total_full',
2: 'group_total',
3: 'group_total',
4: 'group_total',
5: 'group_total',
6: 'group_total',
7: 'group_total',
8: 'group_total',
9: 'group_total',
10: 'Apple_total',
11: 'Apple_total',
12: 'Apple',
13: 'Apple',
14: 'Apple',
15: 'Apple',
16: 'Apple',
17: 'Apple',
18: 'Apple',
19: 'Apple',
20: 'Amazon_total',
21: 'Amazon_total',
22: 'Amazon',
23: 'Amazon',
24: 'Amazon',
25: 'Amazon',
26: 'Amazon',
27: 'Amazon',
28: 'Amazon',
29: 'Amazon',
30: 'Google_total',
31: 'Google_total',
32: 'Google',
33: 'Google',
34: 'Google',
35: 'Google',
36: 'Google',
37: 'Google',
38: 'Google',
39: 'Google',
40: 'Facebook_total',
41: 'Facebook_total',
42: 'Facebook',
43: 'Facebook',
44: 'Facebook',
45: 'Facebook',
46: 'Facebook',
47: 'Facebook',
48: 'Facebook',
49: 'Facebook',
50: 'Netflix_total',
51: 'Netflix_total',
52: 'Netflix',
53: 'Netflix',
54: 'Netflix',
55: 'Netflix',
56: 'Netflix',
57: 'Netflix',
58: 'Netflix',
59: 'Netflix',
60: 'Tesla_total',
61: 'Tesla_total',
62: 'Tesla',
63: 'Tesla',
64: 'Tesla',
65: 'Tesla',
66: 'Tesla',
67: 'Tesla',
68: 'Tesla',
69: 'Tesla',
70: 'Uber_total',
71: 'Uber_total',
72: 'Uber',
73: 'Uber',
74: 'Uber',
75: 'Uber',
76: 'Uber',
77: 'Uber',
78: 'Uber',
79: 'Uber'},
'type': {0: 'Sales_1',
1: 'Sales_2',
2: 'Sales_1',
3: 'Sales_2',
4: 'Sales_1',
5: 'Sales_2',
6: 'Sales_1',
7: 'Sales_2',
8: 'Sales_1',
9: 'Sales_2',
10: 'Sales_1',
11: 'Sales_2',
12: 'Sales_1',
13: 'Sales_2',
14: 'Sales_1',
15: 'Sales_2',
16: 'Sales_1',
17: 'Sales_2',
18: 'Sales_1',
19: 'Sales_2',
20: 'Sales_1',
21: 'Sales_2',
22: 'Sales_1',
23: 'Sales_2',
24: 'Sales_1',
25: 'Sales_2',
26: 'Sales_1',
27: 'Sales_2',
28: 'Sales_1',
29: 'Sales_2',
30: 'Sales_1',
31: 'Sales_2',
32: 'Sales_1',
33: 'Sales_2',
34: 'Sales_1',
35: 'Sales_2',
36: 'Sales_1',
37: 'Sales_2',
38: 'Sales_1',
39: 'Sales_2',
40: 'Sales_1',
41: 'Sales_2',
42: 'Sales_1',
43: 'Sales_2',
44: 'Sales_1',
45: 'Sales_2',
46: 'Sales_1',
47: 'Sales_2',
48: 'Sales_1',
49: 'Sales_2',
50: 'Sales_1',
51: 'Sales_2',
52: 'Sales_1',
53: 'Sales_2',
54: 'Sales_1',
55: 'Sales_2',
56: 'Sales_1',
57: 'Sales_2',
58: 'Sales_1',
59: 'Sales_2',
60: 'Sales_1',
61: 'Sales_2',
62: 'Sales_1',
63: 'Sales_2',
64: 'Sales_1',
65: 'Sales_2',
66: 'Sales_1',
67: 'Sales_2',
68: 'Sales_1',
69: 'Sales_2',
70: 'Sales_1',
71: 'Sales_2',
72: 'Sales_1',
73: 'Sales_2',
74: 'Sales_1',
75: 'Sales_2',
76: 'Sales_1',
77: 'Sales_2',
78: 'Sales_1',
79: 'Sales_2'},
'2021-06-28': {0: 67.5277641202152,
1: 82.7854700135998,
2: 21.50082266792856,
3: 22.03644997199996,
4: 64.460440147,
5: 10.1060499896,
6: 65.1530371974946,
7: 50.6429700519999,
8: 56.413464107792045,
9: 0,
10: 17.48074540313092,
11: 26.8376199976,
12: 52.172,
13: 61.16600000040001,
14: 20.9447844,
15: 40.69122000000001,
16: 83.55718929717925,
17: 14.98039999719995,
18: 20.806771705951697,
19: np.nan,
20: 18.3766353690825,
21: 12.82565001479992,
22: 52.425508769690694,
23: 25.661999978399994,
24: 17.88071596,
25: 24.384659998799997,
26: 91.10086982794643,
27: 12.77899003759993,
28: 16.969540811445366,
29: np.nan,
30: 18.8795397517309,
31: 26.73017999840005,
32: 53.52039700062155,
33: 58.81199999639999,
34: 12.1243325,
35: 24.0544100028,
36: 55.94068246571674,
37: 133.86376999920006,
38: 7.294127785392621,
39: np.nan,
40: 6.07807089184563,
41: 7.27483001599998,
42: 2.300470581874837,
43: 30.71300000639998,
44: 5.810764652,
45: 12.333119997600003,
46: 25.475930745418292,
47: 64.228710012,
48: 9.490904912552498,
49: np.nan,
50: 8.184780211399392,
51: 24.59321999400001,
52: 6.807138946302334,
53: 12.0879999972,
54: 0.869207661,
55: 0.324,
56: 0.5084336040970575,
57: 12.181219996800007,
58: 0,
59: np.nan,
60: 9.293956915067886,
61: 11.171379993599999,
62: 6.384936971649232,
63: 3.657999996,
64: 0.913782413,
65: 1.9992000012000002,
66: 1.5322078073061867,
67: 5.514179996399999,
68: 0.4630297231124678,
69: np.nan,
70: 36.23403557795798,
71: 53.35258999919999,
72: 21.890370397789923,
73: 9.937449997200002,
74: 5.916852561,
75: 6.319439989199998,
76: 7.03772344983066,
77: 37.095700012799995,
78: 1.3890891693374032,
79: np.nan},
'2021-07-05': {0: 65.4690491915759,
1: 98.5235100112003,
2: 21.4573181155924,
3: 241.06741999679997,
4: 67.481716829,
5: 11.60325000040002,
6: 27.5807099999998,
7: 65.8528400140003,
8: 58.949304246983736,
9: 0.0,
10: 185.65887577993723,
11: 318.9965699964001,
12: 54.517,
13: 66.55265999039996,
14: 21.92632044,
15: 43.67116000320002,
16: 87.47349898707688,
17: 208.7727500028001,
18: 21.742056352860352,
19: np.nan,
20: 16.6038963173654,
21: 25.28952001920013,
22: 54.7820864335212,
23: 36.75802000560001,
24: 18.71872129,
25: 30.1634600016,
26: 95.37075040035738,
27: 138.3680400120001,
28: 17.73233819348684,
29: np.nan,
30: 14.80302342121337,
31: 251.83851001200003,
32: 55.926190956481534,
33: 72.4443400032,
34: 12.69221484,
35: 26.032340003999998,
36: 58.56261169338368,
37: 153.36183000480003,
38: 7.622005931348156,
39: np.nan,
40: 72.24367956241771,
41: 14.83083001279999,
42: 29.5726042895728,
43: 38.723000005199985,
44: 6.083562133,
45: 12.845630001599998,
46: 26.66998281055652,
47: 63.26220000600001,
48: 9.917530329288393,
49: np.nan,
50: 8.555606693927,
51: 23.802009994800002,
52: 7.113126469779095,
53: 7.206999998399999,
54: 0.910216433,
55: 1.4089999991999997,
56: 0.5322637911479053,
57: 15.186009997200001,
58: 0.0,
59: np.nan,
60: 9.716385738295367,
61: 14.7327399948,
62: 6.671946105284065,
63: 5.691999996,
64: 0.956574175,
65: 1.0203399996,
66: 1.6040220980113027,
67: 8.020399999199999,
68: 0.4838433599999999,
69: np.nan,
70: 37.88758167841983,
71: 59.03332998119994,
72: 22.874363860953647,
73: 13.690399997999998,
74: 6.194107518,
75: 6.4613199911999954,
76: 7.367580219466185,
77: 38.881609991999944,
78: 1.4515300799999995,
79: np.nan},
'2021-07-12': {0: 607.2971827405001,
1: 88.9671100664001,
2: 21.26749278974862,
3: 17.1524199804,
4: 64.471138092,
5: 89.84481002279999,
6: 26.2044999999998,
7: 51.9698800632001,
8: 5.354051858751745,
9: 0.0,
10: 177.42361595891452,
11: 287.5395700032,
12: 52.117,
13: 47.388199995600004,
14: 20.94835038,
15: 41.4250800048,
16: 83.57340667555117,
17: 198.72629000280003,
18: 20.784858903363354,
19: np.nan,
20: 178.323907459086,
21: 185.83897002839998,
22: 52.37029646474982,
23: 27.87144997800001,
24: 17.88339044,
25: 23.645340010799984,
26: 91.11855133792106,
27: 134.3221800396,
28: 16.95166921641509,
29: np.nan,
30: 128.82813286243115,
31: 192.6867300156,
32: 53.46403160619618,
33: 41.412320006399995,
34: 12.1261155,
35: 11.840830002000002,
36: 55.95153983444301,
37: 139.43358000720002,
38: 7.286445921791947,
39: np.nan,
40: 69.04410667683521,
41: 93.877410018,
42: 28.270665735943805,
43: 27.512680004399986,
44: 5.811656147,
45: 5.2319800032,
46: 25.480875296710053,
47: 61.132750010400024,
48: 9.480909497181356,
49: np.nan,
50: 8.178601399067174,
51: 17.6743199976,
52: 6.7999699585309585,
53: 6.131999998799999,
54: 0.870099156,
55: 0.6185600004,
56: 0.5085322845362154,
57: 10.923759998400003,
58: 0.0,
59: np.nan,
60: 9.287042311133577,
61: 19.966500000000007,
62: 6.378212628950804,
63: 6.524999997600001,
64: 0.913782413,
65: 1.9303400016,
66: 1.5325051891827732,
67: 11.511160000800006,
68: 0.4625420799999998,
69: np.nan,
70: 36.21177607303267,
71: 51.3836100036,
72: 21.86731639537707,
73: 10.310769999600003,
74: 5.917744056,
75: 5.152679999999999,
76: 7.039089381655591,
77: 35.920160003999996,
78: 1.3876262399999995,
79: np.nan}}).set_index(['group','category','type'])
f = lambda x: pd.to_datetime(x)
df = (df1.merge(df2.rename(columns=f).unstack(), left_index=True, right_index=True)
.sort_index(axis=1))
I am trying to add a column last_week_sales to each multiindex column.
So the total_sales from the previous week becomes last_week_sales in the following week in every multiindex column except for the first week as there are no data of the previous week.
What I've tried:
pd.merge(df.shift(-4, axis = 1).xs('total_sales', level=1,drop_level=False))
I am not sure how I can select only the second index, few answers suggested using .xs() but I am not sure how I would merge it on the date column, I tried shifting everything by -4 as then I get the desired week shift, but maybe there are better ways of achieving this.
Right now df.columns return
MultiIndex([('2021-06-28', 'Sales_1'),
('2021-06-28', 'Sales_2'),
('2021-06-28', 'total_orders'),
('2021-06-28', 'total_sales'),
('2021-07-05', 'Sales_1'),
('2021-07-05', 'Sales_2'),
('2021-07-05', 'total_orders'),
('2021-07-05', 'total_sales'),
('2021-07-12', 'Sales_1'),
('2021-07-12', 'Sales_2'),
('2021-07-12', 'total_orders'),
('2021-07-12', 'total_sales')])
And I would like to have:
MultiIndex([('2021-06-28', 'Sales_1'),
('2021-06-28', 'Sales_2'),
('2021-06-28', 'total_orders'),
('2021-06-28', 'total_sales'),
('2021-06-28', 'last_week_sales')
('2021-07-05', 'Sales_1'),
('2021-07-05', 'Sales_2'),
('2021-07-05', 'total_orders'),
('2021-07-05', 'total_sales'),
('2021-07-05', 'last_week_sales')
('2021-07-12', 'Sales_1'),
('2021-07-12', 'Sales_2'),
('2021-07-12', 'total_orders'),
('2021-07-12', 'total_sales'),
('2021-07-12', 'last_week_sales')])
Let us do in steps
Select the cross section of dataframe using loc
Shift the cross section along the columns axis
Rename total_sales to last_week_sales and combine the cross section with df
s = df.loc[:, (slice(None), 'total_sales')].shift(axis=1)
s = s.rename({'total_sales': 'last_week_sales'}, axis=1, level=1)
df.combine_first(s)
Problem:
I have two pandas dataframes of ~3GB each, to join on 1) postcode & 2) every combination of house_identifier variables until either a row has found a join (in a for loop) or all variable-to-variable combinations have failed for the row.
After two columns have joined a row in the loop, that row will be appended to a separate list and removed from the dataframes. postcode is a non-unique index.
Table columns (not hirarchical)
dataset_1 has these variables:
postcode, house_identifier_1, house_identifier_2, house_identifier_3, id
dataset_2 has these variables:
postcode, house_identifier_a, house_identifier_b, id_2
Column combinations to join in a loop:
table_1_variables = ['number_x', 'number_y', 'number_z']
table_2_variables = ['number_a', 'number_b']
for i in table_1_variables:
for j in table_2_variables:
To join the tables efficiently a stratgy seems to be to first join on the indexes (postcodes), and then on the non-indexed columns. However it seems like this would create a very large intermediary join which would push an 8GB memory over limits and the syntax is also unclear between combining (left_index=True, right_index=True, left_on=, right_on=)
Meanwhile, indexing/reindexing and then sorting_index inside a loop seems very inefficient.
Is there a better way to join or merge theese efficiently?
Example of intersects:
{'id': {27: '{582D0636-8DEF-8F22-E053-6C04A8C01BAC}',
41: '{D9E869FE-7B55-4C36-AC43-695B9033A13B}',
33: '{93E6821E-554E-40FD-E053-6B04A8C0C1DF}',
1: '{288DCE29-0589-E510-E050-A8C06205480E}',
48: '{3A23DDD5-A0E8-41D2-A514-5B09385C301F}',
52: '{CEB16957-F7FA-4D1B-B45F-A390214735BC}',
13: '{404A5AF3-9B20-CD2B-E050-A8C063055C7B}',
16: '{64342BFD-FD07-422C-E053-6C04A8C0FB8A}',
57: '{29A8E769-8A10-4477-9494-FF55EF5FAE4B}',
10: '{404A5AF3-0B58-CD2B-E050-A8C063055C7B}',
21: '{55BDCAE6-0C10-521D-E053-6B04A8C0DD7A}',
31: '{5C676A02-1781-4152-950C-6E5CA2CBC487}',
7: '{68FEB20B-142E-38DA-E053-6C04A8C051AE}',
45: '{8F1B26BD-673F-53DB-E053-6C04A8C03649}',
12: '{2F115F7A-8F81-4124-9FD4-FB76E742B2C1}',
36: '{344AB2D7-4B59-4AB4-8F52-75B29BE8C509}',
20: '{965B6D91-D4B6-95E4-E053-6C04A8C07729}',
56: '{59872FD9-F39D-4BB9-95F6-91E002D948B1}',
22: '{6141DFF0-973F-4FEC-A582-7F310B566031}'},
'id_2': {27: 10002277489,
41: 64023255,
33: 10007367447,
1: 22229221,
48: 10033235735,
52: 100062162615,
13: 50103744,
16: 10022903998,
57: 12015624,
10: 12154940,
21: 10024247587,
31: 100041193990,
7: 10008230730,
45: 10091640210,
12: 202107394,
36: 5062293,
20: 48114659,
56: 10001311242,
22: 10000443154},
'postcode': {27: 'lu72la',
41: 'cf626nt',
33: 'hr40aq',
1: 'bn32pd',
48: 'sg13ae',
52: 'gu97jx',
13: 'ct202ef',
16: 'bh14rn',
57: 'ub24af',
10: 'w55bu',
21: 'po302dp',
31: 'tq148aq',
7: 'e82ag',
45: 'ch47ew',
12: 'ha90ae',
36: 'nw34tt',
20: 'sw192rw',
56: 'so143hw',
22: 'se218hp'},
'house_identifier_1': {27: '76',
41: 'flat6',
33: '49',
1: 'flat10',
48: '145',
52: '31',
13: 'flat19',
16: 'flat7',
57: '76',
10: 'flat1',
21: 'flat1',
31: 'flat43',
7: 'flata',
45: '8',
12: '42',
36: 'flat9',
20: 'flat43',
56: 'flat156',
22: 'flat2'},
'house_identifier_2': {27: 'eastdock',
41: 'courtlands',
33: 'watkinscourt',
1: 'ascothouse',
48: 'monumentcourt',
52: 'sumnercourt',
13: '22-24',
16: '77',
57: 'osterleyviews',
10: '55-59',
21: '138',
31: 'leandercourt',
7: '130',
45: 'greenbankhall',
12: 'danescourt',
36: 'holmefieldcourt',
20: 'bennetscourtyard',
56: 'oceanaboulevard',
22: '124f'},
'house_identifier_3': {27: 'eastdock',
41: 'courtlands',
33: 'watkinscourt',
1: 'ascothouse',
48: 'monumentcourt',
52: 'sumnercourt',
13: None,
16: None,
57: 'osterleyviews',
10: None,
21: None,
31: 'leandercourt',
7: None,
45: 'greenbankhall',
12: 'danescourt',
36: 'holmefieldcourt',
20: 'bennetscourtyard',
56: 'oceanaboulevard',
22: None},
'house_identifier_a': {27: None,
41: None,
33: None,
1: '18-20',
48: None,
52: None,
13: '22-24',
16: '77',
57: None,
10: '55-59',
21: '138',
31: None,
7: '130',
45: None,
12: None,
36: None,
20: None,
56: None,
22: '124f'},
'house_identifier_b': {27: '76',
41: 'flat6',
33: '49',
1: 'flat10',
48: '145',
52: '31',
13: 'flat19',
16: 'flat7',
57: '76',
10: 'flat1',
21: 'flat1',
31: 'flat43',
7: 'flata',
45: '8',
12: '42',
36: 'flat9',
20: 'flat43',
56: 'flat156',
22: 'flat2'}}
Is there a way to visualize the results below?
29% of the results has accuracy >= 90%
59% of the results has accuracy >= 80%
88% of the results has accuracy >= 70%
98% of the results has accuracy >= 60%
2 entries with 59% and 57%
Below is the code with test result (accuracy) and rolling percentage
df1 = pd.DataFrame( {'Test Results': {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 6: 1.0, 7: 1.0, 8: 1.0, 9: 1.0, 10: 1.0, 11: 1.0, 12: 1.0, 13: 1.0, 14: 1.0, 15: 1.0, 16: 1.0, 17: 1.0, 18: 1.0, 19: 1.0, 20: 0.9452757159999999, 21: 0.9450125009999999, 22: 0.941873717, 23: 0.9336287990000001, 24: 0.932937845, 25: 0.928728552, 26: 0.9217334709999999, 27: 0.9212251640000001, 28: 0.903416839, 29: 0.900519655, 30: 0.8946950090000001, 31: 0.894315668, 32: 0.893705918, 33: 0.8911087870000001, 34: 0.88341769, 35: 0.875316697, 36: 0.873369453, 37: 0.8724485759999999, 38: 0.870855835, 39: 0.8682100159999999, 40: 0.866413987, 41: 0.866225758, 42: 0.86607366, 43: 0.861137576, 44: 0.8570423090000001, 45: 0.855989443, 46: 0.8363952240000001, 47: 0.835153771, 48: 0.831573322, 49: 0.8297349890000001, 50: 0.827174152, 51: 0.82533598, 52: 0.8244730259999999, 53: 0.8231749909999999, 54: 0.821542323, 55: 0.81994456, 56: 0.8193151340000001, 57: 0.817544275, 58: 0.8043684740000001, 59: 0.804280654, 60: 0.804033467, 61: 0.8024588559999999, 62: 0.799949648, 63: 0.7998811370000001, 64: 0.7993836879999999, 65: 0.7978083340000001, 66: 0.794987453, 67: 0.7942367459999999, 68: 0.788060922, 69: 0.7833455859999999, 70: 0.783237411, 71: 0.781810415, 72: 0.77947656, 73: 0.775430937, 74: 0.769843183, 75: 0.763548283, 76: 0.76318018, 77: 0.75891367, 78: 0.7588845790000001, 79: 0.758045414, 80: 0.7545263740000001, 81: 0.7514679670000001, 82: 0.74697755, 83: 0.74597623, 84: 0.743042806, 85: 0.740900129, 86: 0.740844543, 87: 0.7246525220000001, 88: 0.7133019070000001, 89: 0.7124335999999999, 90: 0.703856728, 91: 0.694475843, 92: 0.68836286, 93: 0.683978596, 94: 0.679002735, 95: 0.664945699, 96: 0.662761826, 97: 0.649950991, 98: 0.638550239, 99: 0.60593566, 100: 0.603891537, 101: 0.602900777, 102: 0.594660442, 103: 0.565978017}, 'Rolling Percentage': {0: 0.009615385, 1: 0.019230768999999998, 2: 0.028846154, 3: 0.038461537999999997, 4: 0.048076923, 5: 0.057692308, 6: 0.067307692, 7: 0.076923077, 8: 0.086538462, 9: 0.096153846, 10: 0.10576923099999999, 11: 0.115384615, 12: 0.125, 13: 0.134615385, 14: 0.144230769, 15: 0.153846154, 16: 0.16346153800000002, 17: 0.173076923, 18: 0.182692308, 19: 0.192307692, 20: 0.201923077, 21: 0.21153846199999998, 22: 0.22115384600000002, 23: 0.23076923100000002, 24: 0.240384615, 25: 0.25, 26: 0.259615385, 27: 0.269230769, 28: 0.278846154, 29: 0.288461538, 30: 0.298076923, 31: 0.307692308, 32: 0.317307692, 33: 0.326923077, 34: 0.33653846200000004, 35: 0.346153846, 36: 0.355769231, 37: 0.365384615, 38: 0.375, 39: 0.384615385, 40: 0.394230769, 41: 0.403846154, 42: 0.41346153799999996, 43: 0.423076923, 44: 0.43269230799999997, 45: 0.44230769200000003, 46: 0.451923077, 47: 0.46153846200000004, 48: 0.471153846, 49: 0.480769231, 50: 0.490384615, 51: 0.5, 52: 0.509615385, 53: 0.519230769, 54: 0.528846154, 55: 0.538461538, 56: 0.548076923, 57: 0.557692308, 58: 0.567307692, 59: 0.576923077, 60: 0.586538462, 61: 0.596153846, 62: 0.605769231, 63: 0.615384615, 64: 0.625, 65: 0.634615385, 66: 0.644230769, 67: 0.653846154, 68: 0.663461538, 69: 0.673076923, 70: 0.682692308, 71: 0.692307692, 72: 0.701923077, 73: 0.711538462, 74: 0.721153846, 75: 0.7307692309999999, 76: 0.740384615, 77: 0.75, 78: 0.759615385, 79: 0.7692307690000001, 80: 0.778846154, 81: 0.788461538, 82: 0.798076923, 83: 0.807692308, 84: 0.817307692, 85: 0.826923077, 86: 0.836538462, 87: 0.846153846, 88: 0.8557692309999999, 89: 0.865384615, 90: 0.875, 91: 0.884615385, 92: 0.8942307690000001, 93: 0.903846154, 94: 0.913461538, 95: 0.923076923, 96: 0.932692308, 97: 0.942307692, 98: 0.951923077, 99: 0.961538462, 100: 0.971153846, 101: 0.9807692309999999, 102: 0.990384615, 103: 1.0}} )