how to add text on each rectangle in collection using matplotlib? - python

I am using matplotlib for plotting and convenient visualization of some graphs in xy coordinates.
I need to highlight some regions - and I use rectangles for this.
But I am interested to add some text upon each rectangle - to be able to distinguish those regions. How to do it using patches because I have a lot of objects in a plot?
Here is the code I use to plot rectangles:
# sample data for rectangles visualization
windows_df = pd.DataFrame( {'window_index_num': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}, 'left_pulse_num': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}, 'right_pulse_num': {0: 2, 1: 3, 2: 4, 3: 5, 4: 6, 5: 7, 6: 8, 7: 9, 8: 10, 9: 11}, 'idx_of_left_pulse': {0: 0, 1: 4036, 2: 4080, 3: 4107, 4: 4368, 5: 4491, 6: 4529, 7: 4624, 8: 4626, 9: 4639}, 'idx_of_right_pulse': {0: 4080, 1: 4107, 2: 4368, 3: 4491, 4: 4529, 5: 4624, 6: 4626, 7: 4639, 8: 4679, 9: 4781}, 'left_pulse_pos_in_E': {0: 10.002042118364418, 1: 40.29395464818188, 2: 41.19356816747343, 3: 41.76060061888303, 4: 47.90221207147802, 5: 51.27679395217831, 6: 52.39165780468267, 7: 55.37561818764979, 8: 55.47294132608167, 9: 55.99635666692289}, 'right_pulse_pos_in_E': {0: 41.19356816747343, 1: 41.76060061888303, 2: 47.90221207147802, 3: 51.27679395217831, 4: 52.39165780468267, 5: 55.37561818764979, 6: 55.47294132608167, 7: 55.99635666692289, 8: 57.33777021469516, 9: 60.984834434908144}, 'idx_window_left_border': {0: 0, 1: 3990, 2: 4058, 3: 4093, 4: 4237, 5: 4429, 6: 4510, 7: 4576, 8: 4625, 9: 4632}, 'idx_window_right_border': {0: 4094, 1: 4238, 2: 4430, 3: 4510, 4: 4577, 5: 4625, 6: 4633, 7: 4659, 8: 4730, 9: 4792}, 'left_win_pos_in_E': {0: 10.002042118364418, 1: 39.38459790393702, 2: 40.74003692229216, 3: 41.46513255508269, 4: 44.66179219947279, 5: 49.53272998148, 6: 51.82972979173252, 7: 53.82159300113625, 8: 55.40803086073492, 9: 55.76645477820397}, 'right_win_pos_in_E': {0: 41.48613320837913, 1: 44.6852679849016, 2: 49.56014983071213, 3: 51.82972979173252, 4: 53.85265044341121, 5: 55.40803086073492, 6: 55.79921126600202, 7: 56.66110947958804, 8: 59.119140585251095, 9: 61.39880967219205}, 'window_width': {0: 4095, 1: 249, 2: 373, 3: 418, 4: 341, 5: 197, 6: 124, 7: 84, 8: 106, 9: 161}, 'window_width_in_E': {0: 31.48409109001471, 1: 5.300670080964579, 2: 8.820112908419965, 3: 10.364597236649828, 4: 9.190858243938415, 5: 5.875300879254915, 6: 3.9694814742695, 7: 2.8395164784517917, 8: 3.7111097245161773, 9: 5.632354893988079}, 'sum_pulses_duration_in_E': {0: 0.5157099691135514, 1: 0.5408987779694527, 2: 0.6869248977656355, 3: 0.7304908951030242, 4: 0.7269657511683718, 5: 0.537271616198268, 6: 0.7609034761658222, 7: 0.6178183490930067, 8: 0.8269277926972265, 9: 0.5591109437337494}, 'sum_pulse_sq': {0: 3.7944375922206044, 1: 3.8756992116858715, 2: 2.9661915477796663, 3: 3.070559830941317, 4: 3.0597037730539385, 5: 10.2020204659669, 6: 45.77535573608872, 7: 45.87630607524008, 8: 39.10335270063814, 9: 3.437205923490125}, 'pulse_to_window_rate': {0: 0.01638001769335214, 1: 0.10204347180781788, 2: 0.07788164447530765, 3: 0.0704794290047244, 4: 0.0790966122938326, 5: 0.09144580460471718, 6: 0.1916883807363909, 7: 0.2175787158769594, 8: 0.22282493757444324, 9: 0.09926770493999569}, 'max_height_in_window': {0: 20.815950580921104, 1: 20.815950580921104, 2: 5.324888970962656, 3: 5.324888970962656, 4: 5.14075603114903, 5: 86.81228155905252, 6: 110.06755904473022, 7: 110.06755904473022, 8: 110.06755904473022, 9: 14.735092268739246}, 'min_height_in_window': {0: -0.011928180619527797, 1: 1.6172637244080776, 2: 1.6172637244080776, 3: 0.8658702248969847, 4: 0.8658702248969847, 5: 0.8658702248969847, 6: 1.8476229914953515, 7: 2.918666252051556, 8: 3.2397786967451707, 9: 2.4893555139463266}, 'windows_sq': {0: 655.3712842149647, 1: 110.33848645112575, 2: 46.96612194869083, 3: 55.19032951390669, 4: 47.24795994896218, 5: 510.0482741740266, 6: 436.911136546121, 7: 312.538647650477, 8: 408.4727887246568, 9: 82.9932690531994}} )
fig_w, axs_w = plt.subplots()
#theoretical cross-section
#axs_w.plot(df_wo_NANS['E'], df_wo_NANS['theo_cs'], marker = "o", markersize = 1, linewidth = 1.0, alpha=0.6, color = 'green', label = 'Theo Cross Section')
axs_w.grid(color = 'grey', linestyle = '--', linewidth = 0.2)
#windows rectangular
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
boxes = []
for index,row in windows_df.iterrows():
current_rect_left_corner = (row['left_win_pos_in_E'], row['min_height_in_window'])
current_w = row['window_width_in_E']
current_h = row['max_height_in_window']-row['min_height_in_window']
boxes.append(Rectangle(current_rect_left_corner, current_w, current_h))
left = row['left_win_pos_in_E']
right = row['right_win_pos_in_E']
bottom = row['min_height_in_window']
top = row['max_height_in_window']
#mark of the start of the current window
axs_w.text(
left, #left corner, #0.5*(left+right), #middle of the rectangle
top, #top
str(index),
horizontalalignment='center',
verticalalignment='center',
fontsize=5
)
#mark of the end of the current window
axs_w.text(
right, #right corner, #0.5*(left+right), #middle of the rectangle
top+0.5*bottom, #top
str(index)+'e',
horizontalalignment='center',
verticalalignment='center',
fontsize=5
)
pc = PatchCollection(boxes, facecolor='y', alpha=0.2, edgecolor='black')
axs_w.add_collection(pc)
Added text marks using cycle but is it possible to do it using patch and collections to make more efficient code?

Related

How to substract two dates based on filter of two other columns

I am new in Python and I am struggling to reshape my dataFrame.
For a particular client (contact_id), I want to add an new date column that actually substracts the DTHR_OPERATION date for a 'TYPE_OPER_VALIDATION = 3' minus the DTHR_OPERATION date for a 'TYPE_OPER_VALIDATION = 1'.
If the 'TYPE_OPER_VALIDATION' is equal to 3 and that there is less than a hour difference between those two dates, I want to add a string such as 'connection' for example in the new column.
I have an issue "python Series' object has no attribute 'total_seconds" when I try to compare if the time difference is indeed minus or equal to an hour. I tried many solutions I found on Internet but I always seem to have a data type issue.
Here is my code snippet:
df_oper_one = merged_table.loc[(merged_table['TYPE_OPER_VALIDATION']==1),['contact_id','TYPE_OPER_VALIDATION','DTHR_OPERATION']]
df_oper_three = merged_table.loc[(merged_table['TYPE_OPER_VALIDATION']==3),['contact_id','TYPE_OPER_VALIDATION','DTHR_OPERATION']]
connection = []
for row in merged_table['contact_id']:
if (df_validation.loc[(df_validation['TYPE_OPER_VALIDATION']==3)]) & ((pd.to_datetime(df_oper_three['DTHR_OPERATION'],format='%Y-%m-%d %H:%M:%S') - pd.to_datetime(df_oper_one['DTHR_OPERATION'],format='%Y-%m-%d %H:%M:%S').total_seconds()) <= 3600): connection.append('connection')
# if diff_date.total_seconds() <= 3600: connection.append('connection')
else: connection.append('null')
merged_table['connection'] = pd.Series(connection)
Hello Nicolas and welcome to Stack Overflow. Please remember to always include sample data to reproduce your issue. Here is sample data to reproduce part of your dataframe:
df = pd.DataFrame({'Id contact':['cf2e79bc-8cac-ec11-9840-000d3ab078e6']*12+['865c5edf-c7ac-ec11-9840-000d3ab078e6']*10,
'DTHR OPERATION':['11/10/2022 07:07', '11/10/2022 07:29', '11/10/2022 15:47', '11/10/2022 16:22', '11/10/2022 16:44', '11/10/2022 18:06', '12/10/2022 07:11', '12/10/2022 07:25', '12/10/2022 17:21', '12/10/2022 18:04', '13/10/2022 07:09', '13/10/2022 18:36', '14/09/2022 17:59', '15/09/2022 09:34', '15/09/2022 19:17', '16/09/2022 08:31', '16/09/2022 19:18', '17/09/2022 06:41', '17/09/2022 11:19', '17/09/2022 15:48', '17/09/2022 16:13', '17/09/2022 17:07'],
'lastname':['BOUALAMI']*12+['VERVOORT']*10,
'TYPE_OPER_VALIDATION':[1, 3, 1, 3, 3, 3, 1, 3, 1, 3, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3]})
df['DTHR OPERATION'] = pd.to_datetime(df['DTHR OPERATION'])
I would recommend creating a new table to more easily accomplish your task:
df2 = pd.merge(df[['Id contact', 'DTHR OPERATION']][df['TYPE_OPER_VALIDATION']==3], df[['Id contact', 'DTHR OPERATION']][df['TYPE_OPER_VALIDATION']==1], on='Id contact', suffixes=('_type3','_type1'))
Then find the time difference:
df2['seconds'] = (df2['DTHR OPERATION_type3']-df2['DTHR OPERATION_type1']).dt.total_seconds()
Finally, flag connections of an hour or less:
df2['connection'] = np.where(df2['seconds']<=3600, 'yes', 'no')
Hope this helps!
sure, here is the information you are looking for :
df_contact = pd.DataFrame{'contact_id': {0: '865C5EDF-C7AC-EC11-9840', 1: '9C9690B1-F8AC-EC11', 2: '4DD27359-14AF-EC11-9840', 3: '0091373E-E7F4-4170-BCAC'}, 'birthdate': {0: Timestamp('2005-05-19
00:00:00'), 1: Timestamp('1982-01-28 00:00:00'), 2: Timestamp('1997-05-15 00:00:00'), 3: Timestamp('2005-03-22 00:00:00')}, 'fullname': {0: 'Laura VERVO', 1: 'Mélanie ALBE', 2: 'Eric VANO', 3: 'Jean Docq'}, 'lastname': {0: 'VERVO', 1: 'ALBE', 2: 'VANO', 3: 'Docq'}, 'age': {0: 17, 1: 40, 2: 25, 3: 17}}
df_validation = pd.dataframe{'validation_id': {0: 8263835881, 1: 8263841517, 2: 8263843376, 3: 8263843377, 4: 8263843381, 5: 8263843382, 6: 8263863088, 7: 8263863124, 8: 8263868113, 9: 8263868123}, 'LIBEL_LONG_PRODUIT_TITRE': {0: 'Mens NEXT 12-17', 1: 'Ann NEXT 25-64%B', 2: 'Ann EXPRESS CBLANCHE', 3: 'Multi 8 NEXT', 4: 'Ann EXPRESS 18-24', 5: 'SNCB+TEC NEXT ABO', 6: 'Ann EXPRESS 18-24', 7: 'Ann EXPRESS 12-17%B', 8: '1 jour EX Réfugié', 9: 'Ann EXPRESS 2564%B'}, 'DTHR_OPERATION':
{0: Timestamp('2022-10-01 00:02:02'), 1: Timestamp('2022-10-01 00:22:45'), 2: Timestamp('2022-10-01 00:02:45'), 3: Timestamp('2022-10-01 00:02:49'), 4: Timestamp('2022-10-01 00:07:03'), 5: Timestamp('2022-10-01 00:07:06'), 6: Timestamp('2022-10-01 00:07:40'), 7: Timestamp('2022-10-01 00:31:51'), 8: Timestamp('2022-10-01 00:03:33'), 9: Timestamp('2022-10-01 00:07:40')}, 'TYPE_OPER_VALIDATION': {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 3, 7: 3, 8: 2, 9: 1}, 'NUM_SERIE_SUPPORT': {0: '2040121921', 1: '2035998914', 2: '2034456458', 3: '14988572652829627697', 4: '2035956003', 5: '2033613155', 6: '2040119429', 7: '2036114867', 8: '14988572650230713650', 9: '2040146199'}}
{'support_id': {0: '8D3A331D-3E86-EC11-93B0', 1: '44863926-3E86-EC11-93B0', 2: '45863926-3E86-EC11-93B0', 3: '46863926-3E86-EC11-93B0', 4: '47863926-3E86-EC11-93B0', 5: 'E3863926-3E86-EC11-93B0', 6: '56873926-3E86-EC11', 7: 'E3CE312C-3E86-EC11-93B0', 8: 'F3CE312C-3E86-EC11-93B0', 9: '3CCF312C-3E86-EC11-93B0'}, 'bd_linkedcustomer': {0: '15CCC384-C4AD-EC11', 1: '9D27061D-14AE-EC11-9840', 2: '74CAE68F-D4AC-EC11-9840', 3: '18F5FE1A-58AC-EC11-983F', 4: None, 5: '9FBDA103-2FAD-EC11-9840', 6: 'EEA1FB63-75AC-EC11-9840', 7: 'F150EC3D-0DAD-EC11-9840', 8: '111DE8C4-CAAC-EC11-9840', 9: None}, 'bd_supportserialnumber': {0: '44884259', 1: '2036010559', 2: '62863150', 3: '2034498160', 4: '62989611', 5: '2036094315', 6: '2033192919', 7: '2036051529', 8: '2036062236', 9: '2033889172'}}
df_support = pd.dataframe{'support_id': {0: '8D3A331D-3E86-EC11-93B0', 1: '44863926-3E86-EC11', 2: '45863926-3E86-EC11-93B0', 3: '46863926-3E86-EC11-93B0', 4: '47863926-3E86-EC11-93B0', 5: 'E3863926-3E86-EC11-93B0', 6: '56873926-3E86-EC11-93B0', 7: 'E3CE312C-3E86-EC11-93B0', 8: 'F3CE312C-3E86-EC11-93B0', 9: '3CCF312C-3E86-EC11-93B0'}, 'bd_linkedcustomer': {0: '15CCC384-C4AD-EC11-9840', 1: '9D27061D-14AE-EC11-9840', 2: '74CAE68F-D4AC-EC11-9840', 3: '18F5FE1A-58AC-EC11-983F', 4: None, 5: '9FBDA103-2FAD-EC11', 6: 'EEA1FB63-75AC-EC11-9840', 7: 'F150EC3D-0DAD-EC11-9840', 8: '111DE8C4-CAAC-EC11-9840', 9: None}, 'bd_supportserialnumber': {0: '44884259', 1: '2036010559', 2: '62863150', 3: '2034498160', 4: '62989611', 5: '2036094315', 6: '2033192919', 7: '2036051529', 8: '2036062236', 9: '2033889172'}}
df2 = pd.dataframe{'support_id': {0: '4BE73E8C-B8F9-EC11-BB3D', 1: '4BE73E8C-B8F9-EC11-BB3D', 2: '4BE73E8C-B8F9-EC11-BB3D', 3: '4BE73E8C-B8F9-EC11-BB3D', 4: '4BE73E8C-B8F9-EC11-BB3D', 5: '4BE73E8C-B8F9-EC11-BB3D', 6: '4BE73E8C-B8F9-EC11', 7: '4BE73E8C-B8F9-EC11-BB3D', 8: '4BE73E8C-B8F9-EC11-BB3D', 9: '4BE73E8C-B8F9-EC11-BB3D'}, 'bd_linkedcustomer': {0: '9C9690B1-F8AC-EC11-9840', 1: '9C9690B1-F8AC-EC11-9840', 2: '9C9690B1-F8AC-EC11-9840', 3: '9C9690B1-F8AC-EC11-9840', 4: '9C9690B1-F8AC-EC11-9840',
5: '9C9690B1-F8AC-EC11-9840', 6: '9C9690B1-F8AC-EC11-9840', 7: '9C9690B1-F8AC-EC11-9840', 8: '9C9690B1-F8AC-EC11-9840', 9: '9C9690B1-F8AC-EC11-9840'}, 'bd_supportserialnumber': {0: '2036002771', 1: '2036002771', 2: '2036002771', 3: '2036002771', 4: '2036002771', 5: '2036002771', 6: '2036002771', 7: '2036002771', 8: '2036002771', 9: '2036002771'}, 'contact_id': {0: '9C9690B1-F8AC-EC11-9840', 1: '9C9690B1-F8AC-EC11-9840', 2: '9C9690B1-F8AC-EC11-9840', 3: '9C9690B1-F8AC-EC11-9840', 4: '9C9690B1-F8AC-EC11-9840', 5: '9C9690B1-F8AC-EC11-9840', 6: '9C9690B1-F8AC-EC11-9840', 7: '9C9690B1-F8AC-EC11-9840', 8: '9C9690B1-F8AC-EC11-9840', 9: '9C9690B1-F8AC-EC11-9840'}, 'birthdate': {0: Timestamp('1982-01-28 00:00:00'), 1: Timestamp('1982-01-28 00:00:00'), 2: Timestamp('1982-01-28 00:00:00'), 3: Timestamp('1982-01-28 00:00:00'), 4: Timestamp('1982-01-28 00:00:00'), 5: Timestamp('1982-01-28 00:00:00'), 6: Timestamp('1982-01-28 00:00:00'), 7: Timestamp('1982-01-28 00:00:00'), 8: Timestamp('1982-01-28 00:00:00'), 9: Timestamp('1982-01-28 00:00:00')}, 'fullname': {0: 'Mélanie ALBE', 1: 'Mélanie ALBE', 2: 'Mélanie ALBE', 3: 'Mélanie ALBE', 4: 'Mélanie ALBE', 5: 'Mélanie ALBE', 6: 'Mélanie ALBE', 7: 'Mélanie ALBE', 8: 'Mélanie ALBE', 9: 'Mélanie ALBE'}, 'lastname': {0: 'ALBE', 1: 'ALBE', 2: 'ALBE', 3: 'ALBE', 4: 'ALBE', 5: 'ALBE', 6: 'ALBE', 7: 'ALBE', 8: 'ALBE', 9: 'ALBE'}, 'age': {0: 40, 1: 40, 2: 40, 3: 40, 4: 40, 5: 40, 6: 40, 7: 40, 8: 40, 9: 40}, 'validation_id': {0: 8264573419, 1: 8264574166, 2: 8264574345, 3: 8264676975, 4: 8265441741, 5: 8272463799, 6: 8272471694, 7: 8274368291, 8: 8274397366, 9: 8277077728}, 'LIBEL_LONG_PRODUIT_TITRE': {0: 'Ann NEXT 25-64', 1: 'Ann NEXT 25-64', 2: 'Ann NEXT 25-64', 3: 'Ann NEXT 25-64', 4: 'Ann NEXT 25-64', 5: 'Ann NEXT 25-64', 6: 'Ann NEXT 25-64', 7: 'Ann NEXT 25-64', 8: 'Ann NEXT 25-64', 9: 'Ann NEXT 25-64'}, 'DTHR_OPERATION': {0: Timestamp('2022-10-01 08:30:18'), 1: Timestamp('2022-10-01 12:23:34'), 2: Timestamp('2022-10-01 07:47:46'), 3: Timestamp('2022-10-01 13:11:54'), 4: Timestamp('2022-10-01 12:35:02'), 5: Timestamp('2022-10-04 08:34:23'), 6: Timestamp('2022-10-04 08:04:50'), 7: Timestamp('2022-10-04 17:17:47'), 8: Timestamp('2022-10-04 15:20:29'), 9: Timestamp('2022-10-05 07:54:14')}, 'TYPE_OPER_VALIDATION': {0: 3, 1: 1, 2: 1, 3: 3, 4: 3, 5: 3, 6: 1, 7: 1, 8: 1, 9: 1}, 'NUM_SERIE_SUPPORT': {0: '2036002771', 1: '2036002771', 2: '2036002771', 3: '2036002771', 4: '2036002771', 5: '2036002771', 6: '2036002771', 7: '2036002771', 8: '2036002771', 9: '2036002771'}}
df3 = pd.dataframe{'contact_id': {0: '9C9690B1-F8AC-EC11-9840', 1: '9C9690B1-F8AC-EC11-9840', 2: '9C9690B1-F8AC-EC11-9840', 3: '9C9690B1-F8AC-EC11-9840', 4: '9C9690B1-F8AC-EC11-9840', 5: '9C9690B1-F8AC-EC11-9840', 6: '9C9690B1-F8AC-EC11-9840', 7: '9C9690B1-F8AC-EC11-9840', 8: '9C9690B1-F8AC-EC11-9840', 9: '9C9690B1-F8AC-EC11-9840'}, 'DTHR_OPERATION_type3': {0: Timestamp('2022-10-01 08:30:18'), 1: Timestamp('2022-10-01 08:30:18'), 2: Timestamp('2022-10-01 08:30:18'), 3: Timestamp('2022-10-01 08:30:18'), 4: Timestamp('2022-10-01 08:30:18'), 5: Timestamp('2022-10-01 08:30:18'), 6: Timestamp('2022-10-01 08:30:18'), 7: Timestamp('2022-10-01 08:30:18'), 8: Timestamp('2022-10-01 08:30:18'), 9: Timestamp('2022-10-01 08:30:18')}, 'DTHR_OPERATION_type1': {0: Timestamp('2022-10-01 12:23:34'), 1: Timestamp('2022-10-01 07:47:46'), 2: Timestamp('2022-10-04 08:04:50'), 3: Timestamp('2022-10-04 17:17:47'), 4: Timestamp('2022-10-04 15:20:29'), 5: Timestamp('2022-10-05 07:54:14'), 6: Timestamp('2022-10-05 18:22:42'), 7: Timestamp('2022-10-06 08:14:28'), 8: Timestamp('2022-10-06 18:19:33'), 9: Timestamp('2022-10-08 07:46:45')}, 'seconds': {0: -13996.0, 1: 2552.0, 2: -257672.00000000003, 3: -290849.0, 4: -283811.0, 5: -343436.0, 6: -381144.0, 7: -431050.0, 8: -467355.00000000006, 9: -602187.0}, 'first_connection': {0: 'no', 1: 'yes', 2: 'no', 3: 'no', 4: 'no', 5: 'no', 6: 'no', 7: 'no', 8: 'no', 9: 'no'}}
df4 = pd.dataframe{'contact_id': {0: '9C9690B1-F8AC-EC11-9840', 1: '9C9690B1-F8AC-EC11-9840', 2: '9C9690B1-F8AC-EC11-9840', 3: '9C9690B1-F8AC-EC11-9840', 4: '9C9690B1-F8AC-EC11-9840', 5: '9C9690B1-F8AC-EC11-9840', 6: '9C9690B1-F8AC-EC11-9840', 7: '9C9690B1-F8AC-EC11-9840', 8: '9C9690B1-F8AC-EC11-9840', 9: '9C9690B1-F8AC-EC11-9840'}, 'DTHR_OPERATION_type3': {0: Timestamp('2022-10-01 08:30:18'), 1: Timestamp('2022-10-01 08:30:18'), 2: Timestamp('2022-10-01 08:30:18'), 3: Timestamp('2022-10-01 08:30:18'), 4: Timestamp('2022-10-01 08:30:18'), 5: Timestamp('2022-10-01 08:30:18'), 6: Timestamp('2022-10-01 08:30:18'), 7: Timestamp('2022-10-01 08:30:18'), 8: Timestamp('2022-10-01 08:30:18'), 9: Timestamp('2022-10-01 08:30:18')}, 'DTHR_OPERATION_type3bis': {0: Timestamp('2022-10-01 08:30:18'), 1: Timestamp('2022-10-01 13:11:54'), 2: Timestamp('2022-10-01 12:35:02'), 3: Timestamp('2022-10-04 08:34:23'), 4: Timestamp('2022-10-05 08:27:04'), 5: Timestamp('2022-10-05 19:05:29'), 6: Timestamp('2022-10-06 08:34:21'), 7: Timestamp('2022-10-06 18:37:56'), 8: Timestamp('2022-10-06 19:08:30'), 9: Timestamp('2022-10-08 13:01:13')}, 'seconds_type3': {0: 0.0, 1: -16896.0, 2: -14684.000000000002, 3: -259445.00000000003, 4: -345406.0, 5: -383711.0, 6: -432243.0, 7: -468458.00000000006, 8: -470292.00000000006, 9: -621055.0}, 'second_or_more_connection': {0: 'no', 1: 'no', 2: 'no', 3: 'no', 4: 'no', 5: 'no', 6: 'no', 7: 'no', 8: 'no', 9: 'no'}}
The desired result is a dF5 with the following columns [['contact_id', 'fullname', 'validation_id', 'LIBEL_LONG_PRODUIT_TITRE', 'TYPE_OPER_VALIDATION']] as well as this new colum dF5['connection]. Don't hestitate to reach out if you need further information or clarifications. Many thanks for your support :)

Efficient way to apply function with multiple operations on dataframe row

I have a pandas dataframe that looks like this:
X[m] Y[m] Z[m] ... beta newx newy
0 1.439485 0.087100 0.029771 ... 0.063807 1439 87
1 1.439485 0.089729 0.029121 ... 0.065871 1439 89
2 1.439485 0.091992 0.030059 ... 0.067653 1439 91
3 1.439485 0.082073 0.030721 ... 0.059883 1439 82
4 1.439485 0.084095 0.028952 ... 0.061458 1439 84
5 1.439485 0.085937 0.028019 ... 0.062897 1439 85
There are hundreds of thousands of such lines, while I have multiple dataframes like this. X and Y are coordinates on plane (Z is not important) that is moved 45 degrees by the middle to the right. I need to put all points back to the original place, -45 degrees from its location. I have variables newx and newy that represent coordinates before changing, I want to edit these two columns to have values of new coordinates. As I know coordinates of middle point, the point itself, the angle of middle-to-point (alpha) and angle middle-to-fixedpoint (beta), I can use approach presented in mathematics SO. I have transformed the code to python like this:
for i in range(len(df)):
if df.iloc[i].alpha == math.pi/2 or df.iloc[i].alpha == 3*math.pi/2:
df.newx[i] = mid
df.newy[i] = int(math.tan(df.iloc[i].beta*(df.iloc[i].x-mid)+mid))
elif df.iloc[i].beta == math.pi/2 or df.iloc[i].beta == 3*math.pi/2:
#df.newx[i] = df.iloc[i].x -- this is already set
df.newy[i] = int(math.tan(df.iloc[i].alpha*(mid-df.iloc[i].x)+mid))
else:
m0 = math.tan(df.iloc[i].alpha)
m1 = math.tan(df.iloc[i].beta)
x = ((m0 * df.iloc[i].x - m1 * mid) - (df.iloc[i].y - mid)) / (m0 - m1)
df.newx[i] = int(x)
df.newy[i] = int(m0 * (x - df.iloc[i].x) + df.iloc[i].y)
Although this does what I need and moves the point to the correct position, the time complexity is enormous and I have too much files to proceed it like this. I know that there are way faster methods, such as serialization, apply and list comprehension. I however can't figure out how to use it with this function.
Here are first 10 lines as dictionary:
{'X[m]': {0: 1.439484727008419, 1: 1.439484727008419, 2: 1.439484727008419, 3: 1.439484727008419, 4: 1.439484727008419, 5: 1.439484727008419, 6: 1.439484727008419, 7: 1.439484727008419, 8: 1.439484727008419, 9: 1.439484727008419}, 'Y[m]': {0: 0.08709958190841899, 1: 0.08972904270841897, 2: 0.091991981408419, 3: 0.08207325440841898, 4: 0.08409548540841899, 5: 0.08593746080841899, 6: 0.09416210370841899, 7: 0.08874029660841898, 8: 0.09168940400841899, 9: 0.09434491760841898}, 'Z[m]': {0: 0.029770726299999998, 1: 0.0291213803, 2: 0.030058834700000002, 3: 0.0307212565, 4: 0.028951926200000002, 5: 0.0280194897, 6: 0.030717188500000003, 7: 0.026446931099999998, 8: 0.0269318204, 9: 0.0273838975}, 'Velocity[ms^-1]': {0: ['-1.67570162e+00', '-2.59946979e-15', '-2.54510192e-15'], 1: ['-1.63915336e+00', '-2.54277343e-15', '-2.48959140e-15'], 2: ['-1.69191790e+00', '-2.62462561e-15', '-2.56973173e-15'], 3: ['-1.72920227e+00', '-2.68246377e-15', '-2.62636012e-15'], 4: ['-1.62961555e+00', '-2.52797767e-15', '-2.47510523e-15'], 5: ['-1.57713342e+00', '-2.44656340e-15', '-2.39539372e-15'], 6: ['-1.72897375e+00', '-2.68210929e-15', '-2.62601305e-15'], 7: ['-1.48862195e+00', '-2.30925809e-15', '-2.26096006e-15'], 8: ['-1.51591396e+00', '-2.35159534e-15', '-2.30241195e-15'], 9: ['-1.54135919e+00', '-2.39106792e-15', '-2.34105888e-15']}, 'L': {0: 0.9582306809661671, 1: 0.9564957485824027, 2: 0.9550059224371557, 3: 0.9615583774318917, 4: 0.9602177760259737, 5: 0.9589987519260235, 6: 0.9535800607266656, 7: 0.9571476500665267, 8: 0.9552049510914844, 9: 0.953460072490227}, 'x': {0: 1439, 1: 1439, 2: 1439, 3: 1439, 4: 1439, 5: 1439, 6: 1439, 7: 1439, 8: 1439, 9: 1439}, 'y': {0: 87, 1: 89, 2: 91, 3: 82, 4: 84, 5: 85, 6: 94, 7: 88, 8: 91, 9: 94}, 'alpha': {0: -0.7215912027987663, 1: -0.719527331916007, 2: -0.7177451479100487, 3: -0.7255156166536015, 4: -0.7239399868865558, 5: -0.7225009735356016, 6: -0.7160308360594005, 7: -0.7203042790640757, 8: -0.7179837655204843, 9: -0.7158861861473951}, 'beta': {0: 0.06380696059868196, 1: 0.06587083148144124, 2: 0.06765301548739955, 3: 0.05988254674384674, 4: 0.06145817651089247, 5: 0.06289718986184667, 6: 0.06936732733804774, 7: 0.0650938843333726, 8: 0.06741439787696402, 9: 0.0695119772500532}, 'newx': {0: 1439, 1: 1439, 2: 1439, 3: 1439, 4: 1439, 5: 1439, 6: 1439, 7: 1439, 8: 1439, 9: 1439}, 'newy': {0: 87, 1: 89, 2: 91, 3: 82, 4: 84, 5: 85, 6: 94, 7: 88, 8: 91, 9: 94}}
I suspect how we're using mid as from your code may be causing you problems. Is mid a numeric? Are the x- and y-coordinates of your middle point the same value?
#OP, please confirm your variable names as compared to your linked source are as I have translated them:
linked name
your name
a0
beta
a1
alpha
(x0, y0)
(df.x, df.y)
(x1, y1)
(mid, mid)
Update this answer shares some ideas with #mitoRibo's answer, but I re-translated from OP's linked source and suspect OP made some transcription error. Noted in comments. Both of us used a strategy of "selectively calculate newx/newy using masking, where the masks are equivalent to the if/elif/else conditions provided".
#setup
import pandas as pd
import numpy as np
import math
df = pd.DataFrame({'X[m]': {0: 1.439484727008419, 1: 1.439484727008419, 2: 1.439484727008419, 3: 1.439484727008419, 4: 1.439484727008419, 5: 1.439484727008419, 6: 1.439484727008419, 7: 1.439484727008419, 8: 1.439484727008419, 9: 1.439484727008419}, 'Y[m]': {0: 0.08709958190841899, 1: 0.08972904270841897, 2: 0.091991981408419, 3: 0.08207325440841898, 4: 0.08409548540841899, 5: 0.08593746080841899, 6: 0.09416210370841899, 7: 0.08874029660841898, 8: 0.09168940400841899, 9: 0.09434491760841898}, 'Z[m]': {0: 0.029770726299999998, 1: 0.0291213803, 2: 0.030058834700000002, 3: 0.0307212565, 4: 0.028951926200000002, 5: 0.0280194897, 6: 0.030717188500000003, 7: 0.026446931099999998, 8: 0.0269318204, 9: 0.0273838975}, 'Velocity[ms^-1]': {0: ['-1.67570162e+00', '-2.59946979e-15', '-2.54510192e-15'], 1: ['-1.63915336e+00', '-2.54277343e-15', '-2.48959140e-15'], 2: ['-1.69191790e+00', '-2.62462561e-15', '-2.56973173e-15'], 3: ['-1.72920227e+00', '-2.68246377e-15', '-2.62636012e-15'], 4: ['-1.62961555e+00', '-2.52797767e-15', '-2.47510523e-15'], 5: ['-1.57713342e+00', '-2.44656340e-15', '-2.39539372e-15'], 6: ['-1.72897375e+00', '-2.68210929e-15', '-2.62601305e-15'], 7: ['-1.48862195e+00', '-2.30925809e-15', '-2.26096006e-15'], 8: ['-1.51591396e+00', '-2.35159534e-15', '-2.30241195e-15'], 9: ['-1.54135919e+00', '-2.39106792e-15', '-2.34105888e-15']}, 'L': {0: 0.9582306809661671, 1: 0.9564957485824027, 2: 0.9550059224371557, 3: 0.9615583774318917, 4: 0.9602177760259737, 5: 0.9589987519260235, 6: 0.9535800607266656, 7: 0.9571476500665267, 8: 0.9552049510914844, 9: 0.953460072490227}, 'x': {0: 1439, 1: 1439, 2: 1439, 3: 1439, 4: 1439, 5: 1439, 6: 1439, 7: 1439, 8: 1439, 9: 1439}, 'y': {0: 87, 1: 89, 2: 91, 3: 82, 4: 84, 5: 85, 6: 94, 7: 88, 8: 91, 9: 94}, 'alpha': {0: -0.7215912027987663, 1: -0.719527331916007, 2: -0.7177451479100487, 3: -0.7255156166536015, 4: -0.7239399868865558, 5: -0.7225009735356016, 6: -0.7160308360594005, 7: -0.7203042790640757, 8: -0.7179837655204843, 9: -0.7158861861473951}, 'beta': {0: 0.06380696059868196, 1: 0.06587083148144124, 2: 0.06765301548739955, 3: 0.05988254674384674, 4: 0.06145817651089247, 5: 0.06289718986184667, 6: 0.06936732733804774, 7: 0.0650938843333726, 8: 0.06741439787696402, 9: 0.0695119772500532}})
# make the new columns
df['newx'] = np.nan
df['newy'] = np.nan
# if any of the values are np.nan when we're done, something went wrong
# Do the float `between` comparison but cleverly
EPSILON = 1e-6
# windows = ((pi/2 ± EPSILON), (3pi/2 ± EPSILON))
windows = tuple(tuple(d*math.pi/2 + s*EPSILON for s in (1, -1)) for d in (1, 3))
# challenge: make this more DRY (don't repeat yourself)
alpha_cond = sum([df.alpha.between(*w) for w in windows]).astype(bool)
beta_cond = sum([ df.beta.between(*w) for w in windows]).astype(bool)\
& ~alpha_cond
neither = (~alpha_cond & ~beta_cond)
# Handle `alpha near pi/2 or 3pi/2`:
c1 = df.loc[alpha_cond]
df.loc[alpha_cond,'newx'] = mid
# changed `tan` parenthesis
# | changed `df.x - mid` to `mid - df.x`
# | | changed to `df.y` from `mid`
df.loc[alpha_cond,'newy'] = (np.tan(c1.beta) * (mid - c1.x) + c1.y).astype(int)
# Handle `beta near pi/2 or 3pi/2`:
c2 = df.loc[beta_cond]
df loc[beta_cond,'newx'] = c2.x
# changed `tan` parenthesis
# | changed `mid - df.x` to `df.x - mid`
df.loc[beta_cond,'newy'] = (np.tan(c2.alpha) * (c2.x - mid) + mid).astype(int)
# Handle the remainder:
c3 = df.loc[neither]
m0 = np.tan(c3.alpha)
m1 = np.tan(c3.beta)
t = ((m0 * c3.x - m1 * mid) - (c3.y - mid)) / (m0 - m1)
df.loc[neither,'newx'] = t.astype(int)
df.loc[neither,'newy'] = (m0 * (t - c3.x) + c3.y).astype(int)
Same approach as #Joshua Voskamp, but I still wanted to share
import pandas as pd
import numpy as np
import math
df = pd.DataFrame({'X[m]': {0: 1.439484727008419, 1: 1.439484727008419, 2: 1.439484727008419, 3: 1.439484727008419, 4: 1.439484727008419, 5: 1.439484727008419, 6: 1.439484727008419, 7: 1.439484727008419, 8: 1.439484727008419, 9: 1.439484727008419}, 'Y[m]': {0: 0.08709958190841899, 1: 0.08972904270841897, 2: 0.091991981408419, 3: 0.08207325440841898, 4: 0.08409548540841899, 5: 0.08593746080841899, 6: 0.09416210370841899, 7: 0.08874029660841898, 8: 0.09168940400841899, 9: 0.09434491760841898}, 'Z[m]': {0: 0.029770726299999998, 1: 0.0291213803, 2: 0.030058834700000002, 3: 0.0307212565, 4: 0.028951926200000002, 5: 0.0280194897, 6: 0.030717188500000003, 7: 0.026446931099999998, 8: 0.0269318204, 9: 0.0273838975}, 'Velocity[ms^-1]': {0: ['-1.67570162e+00', '-2.59946979e-15', '-2.54510192e-15'], 1: ['-1.63915336e+00', '-2.54277343e-15', '-2.48959140e-15'], 2: ['-1.69191790e+00', '-2.62462561e-15', '-2.56973173e-15'], 3: ['-1.72920227e+00', '-2.68246377e-15', '-2.62636012e-15'], 4: ['-1.62961555e+00', '-2.52797767e-15', '-2.47510523e-15'], 5: ['-1.57713342e+00', '-2.44656340e-15', '-2.39539372e-15'], 6: ['-1.72897375e+00', '-2.68210929e-15', '-2.62601305e-15'], 7: ['-1.48862195e+00', '-2.30925809e-15', '-2.26096006e-15'], 8: ['-1.51591396e+00', '-2.35159534e-15', '-2.30241195e-15'], 9: ['-1.54135919e+00', '-2.39106792e-15', '-2.34105888e-15']}, 'L': {0: 0.9582306809661671, 1: 0.9564957485824027, 2: 0.9550059224371557, 3: 0.9615583774318917, 4: 0.9602177760259737, 5: 0.9589987519260235, 6: 0.9535800607266656, 7: 0.9571476500665267, 8: 0.9552049510914844, 9: 0.953460072490227}, 'x': {0: 1439, 1: 1439, 2: 1439, 3: 1439, 4: 1439, 5: 1439, 6: 1439, 7: 1439, 8: 1439, 9: 1439}, 'y': {0: 87, 1: 89, 2: 91, 3: 82, 4: 84, 5: 85, 6: 94, 7: 88, 8: 91, 9: 94}, 'alpha': {0: -0.7215912027987663, 1: -0.719527331916007, 2: -0.7177451479100487, 3: -0.7255156166536015, 4: -0.7239399868865558, 5: -0.7225009735356016, 6: -0.7160308360594005, 7: -0.7203042790640757, 8: -0.7179837655204843, 9: -0.7158861861473951}, 'beta': {0: 0.06380696059868196, 1: 0.06587083148144124, 2: 0.06765301548739955, 3: 0.05988254674384674, 4: 0.06145817651089247, 5: 0.06289718986184667, 6: 0.06936732733804774, 7: 0.0650938843333726, 8: 0.06741439787696402, 9: 0.0695119772500532}, 'newx': {0: 1439, 1: 1439, 2: 1439, 3: 1439, 4: 1439, 5: 1439, 6: 1439, 7: 1439, 8: 1439, 9: 1439}, 'newy': {0: 87, 1: 89, 2: 91, 3: 82, 4: 84, 5: 85, 6: 94, 7: 88, 8: 91, 9: 94}})
mid = 0 #not sure what mid value should be
near_threshold = 0.001
alpha_near_half_pi = df.alpha.sub(math.pi/2).abs().le(near_threshold)
alpha_near_three_half_pi = df.alpha.sub(3*math.pi/2).abs().le(near_threshold)
beta_near_half_pi = df.beta.sub(math.pi/2).abs().le(near_threshold)
beta_near_three_half_pi = df.beta.sub(3*math.pi/2).abs().le(near_threshold)
cond1 = alpha_near_half_pi | alpha_near_three_half_pi
cond2 = beta_near_half_pi | beta_near_three_half_pi
cond2 = cond2 & (~cond1) #if cond1 is true, we don't want to do cond2
cond3 = ~(cond1 | cond2) #if neither cond1 nor cond2, then we are in cond3
#Process cond1 rows
c1 = df.loc[cond1]
df.loc[cond1,'newx'] = mid
df.loc[cond1,'newy'] = np.tan(c1.beta*(c1.x-mid)+mid)
#Process cond2 rows
c2 = df.loc[cond2]
df.loc[cond2,'newy'] = np.tan(c2.alpha*(mid-c2.x)+mid)
#Process cond3 rows
c3 = df.loc[cond3]
m0 = np.tan(c3.alpha)
m1 = np.tan(c3.beta)
# Is this a mistake? always 0?
# |
# --------------
x = ((m0 * c3.x - m1 * mid) - (c3.y - c3.y)) / (m0 - m1)
df.loc[cond3,'newx'] = x.astype(int)
df.loc[cond3,'newy'] = (m0 * (x - c3.x) + c3.y).astype(int)
df

Give multiple dictionaries can I get the lowest(or highest)value for each key?

I'm not sure how to approach this problem but given the following dict:
{'diff': {0: 358438.3179047619, 1: 2877912.924419369, 2: 822017.9039274186, 3: 4914425.223282051, 4: 574184.9971827588, 5: 7432268.5341428565, 6: 1111639.5132252753, 7: 1322861.412610346, 8: 1179799.2592362808, 9: 87556.64146904761}}
{'diff': {0: 292811.4124761905, 1: 2831096.9336261265, 2: 760006.755798387, 3: 4868369.423293451, 4: 509515.30310344836, 5: 7390444.080714285, 6: 1028933.0801098899, 7: 1240273.4906724147, 8: 1138039.7093932922, 9: 43618.81660000001}}
{'diff': {0: 393148.40700238093, 1: 2923931.0134306327, 2: 878450.4552137096, 3: 4962539.102763245, 4: 660218.1550965513, 5: 7483527.590967346, 6: 1223029.819152747, 7: 1372622.6893804593, 8: 1202322.4719079277, 9: 113611.58858809523}}
{'diff': {0: 386402.65016666666, 1: 2916900.423062612, 2: 870947.0239475806, 3: 4954526.795990028, 4: 652106.3039551723, 5: 7475754.573836735, 6: 1212934.2664368134, 7: 1365836.4194977009, 8: 1196003.2297920743, 9: 108039.20073571429}}
{'diff': {0: 349975.29688095255, 1: 2876674.3017342356, 2: 827975.0650000006, 3: 4913329.426507118, 4: 605245.163706897, 5: 7431737.75197959, 6: 1154341.8745934067, 7: 1325611.1466724137, 8: 1167062.6884146344, 9: 78813.5207857143}}
{'diff': {0: 389236.3094642856, 1: 2919969.395930179, 2: 873295.801427419, 3: 4957163.9330507135, 4: 653377.0037568965, 5: 7479596.044428572, 6: 1214463.8978571433, 7: 1366351.4634890805, 8: 1200255.7743564018, 9: 112641.91081666667}}
{'diff': {0: 391681.69095, 1: 2921278.030853604, 2: 874417.996964516, 3: 4960328.984978635, 4: 658758.8998741381, 5: 7484168.382208164, 6: 1218278.5344219788, 7: 1367466.964590805, 8: 1200111.4596570123, 9: 113533.64980238095}}
{'diff': {0: 355994.5180714284, 1: 2882303.7541306294, 2: 835458.8338790324, 3: 4919442.302396014, 4: 610290.0786551724, 5: 7441912.343979592, 6: 1164700.055917583, 7: 1327737.2043103438, 8: 1169616.6454146332, 9: 81680.70286904761}}
{'diff': {0: 379893.7180714286, 1: 2913403.720793244, 2: 865857.7399225802, 3: 4948973.331188316, 4: 643761.719862069, 5: 7468621.204883674, 6: 1209897.9149901094, 7: 1359244.204440804, 8: 1192828.6090381108, 9: 104051.28336904761}}
{'diff': {0: 390466.6839142858, 1: 2923088.262698646, 2: 877156.1510145164, 3: 4962513.822048144, 4: 659759.9533551724, 5: 7483875.484744897, 6: 1222339.2461901105, 7: 1369121.0132643674, 8: 1201501.5448817061, 9: 113458.57306428571}}
{'diff': {0: 301792.62588095234, 1: 2854027.945333335, 2: 804759.8740564514, 3: 4876267.124210826, 4: 584088.0599310346, 5: 7378153.378530612, 6: 1133044.61306044, 7: 1291385.6421149436, 8: 1139054.1821890248, 9: 38275.36907142856}}
{'diff': {0: 387509.1658071429, 1: 2919049.8491373872, 2: 874219.6323653222, 3: 4955459.435102557, 4: 656559.3065396551, 5: 7476533.654826531, 6: 1217855.0112197807, 7: 1366842.1718931037, 8: 1198388.2114634141, 9: 108848.47544047613}}
{'diff': {0: 377328.5187738094, 1: 2907686.5556463962, 2: 861963.8367822578, 3: 4942903.962752138, 4: 642356.8619948275, 5: 7463152.797857141, 6: 1203804.631930769, 7: 1356454.3497155162, 8: 1189309.752909755, 9: 99476.27148809524}}
{'diff': {0: 352355.7500238095, 1: 2887318.768563064, 2: 841642.5822338712, 3: 4925029.717854701, 4: 621227.5312931032, 5: 7443790.6748775495, 6: 1183558.6595329673, 7: 1333697.2241666662, 8: 1172889.8671798778, 9: 81039.74188095241}}
{'diff': {0: 396255.3198571428, 1: 2926250.0441639633, 2: 880795.3943693547, 3: 4965277.919590886, 4: 663756.0494362068, 5: 7487330.063967346, 6: 1225360.306425824, 7: 1374148.3940419543, 8: 1204383.4553957311, 9: 117133.45492380955}}
{'diff': {0: 397275.22611428564, 1: 2928138.3937932434, 2: 882549.978358064, 3: 4967271.384024783, 4: 665063.7757241379, 5: 7489353.048779594, 6: 1227848.7195598893, 7: 1375612.8537936783, 8: 1205968.5550199081, 9: 118622.92846666674}}
{'diff': {0: 370638.9714999999, 1: 2901794.814063063, 2: 854231.343169355, 3: 4941840.968413107, 4: 636963.8949827587, 5: 7462906.844836734, 6: 1198474.5955769236, 7: 1349199.7593390818, 8: 1181772.3528810989, 9: 94418.88628571431}}
{'diff': {0: 399605.39451595227, 1: 2930519.3274677014, 2: 884866.901809758, 3: 4970067.843492109, 4: 668209.9673794828, 5: 7492181.322271633, 6: 1230438.6087753302, 7: 1377940.4613927014, 8: 1207999.2446168917, 9: 120766.5979569048}}
{'diff': {0: 394437.6273380953, 1: 2926444.621315316, 2: 880587.8419403222, 3: 4965842.826658971, 4: 663091.0379724137, 5: 7487427.719579593, 6: 1226653.0014609892, 7: 1373195.8957902302, 8: 1204177.9670981697, 9: 116665.88206190476}}
{'diff': {0: 343177.5738333332, 1: 2872438.88899099, 2: 824308.511145161, 3: 4901171.498498574, 4: 594996.441275862, 5: 7417912.784775511, 6: 1150261.9712527473, 7: 1323742.7629367814, 8: 1160229.847768293, 9: 70927.47897619048}}
{'diff': {0: 388380.7712333334, 1: 2919408.214353603, 2: 872090.4287451615, 3: 4957030.500496009, 4: 653652.7608896552, 5: 7478706.309169387, 6: 1210682.681124176, 7: 1365970.9027482753, 8: 1199850.2533893296, 9: 112281.24546190478}}
{'diff': {0: 397734.3032357143, 1: 2928578.1657990995, 2: 883142.2520741936, 3: 4967757.699845867, 4: 666066.6204482759, 5: 7489537.145657143, 6: 1228560.9616604394, 7: 1376307.6271563205, 8: 1206326.1817006094, 9: 118790.83264523809}}
{'diff': {0: 382516.58267857146, 1: 2915073.5680945935, 2: 869510.7518991937, 3: 4954054.122938737, 4: 651079.4144172415, 5: 7474328.030612243, 6: 1213478.4472478013, 7: 1363524.8940068972, 8: 1194637.4700762194, 9: 106180.7426238095}}
{'diff': {0: 395288.79071904765, 1: 2925967.5104626133, 2: 880203.2222838707, 3: 4964695.553390598, 4: 663391.0626017239, 5: 7486925.765212244, 6: 1224951.42912967, 7: 1373539.6626603457, 8: 1204264.0033243895, 9: 116594.66418571424}}
{'diff': {0: 397971.03177380946, 1: 2928499.596860811, 2: 882838.8586330643, 3: 4967899.803066952, 4: 665805.6550189656, 5: 7489992.297071429, 6: 1227996.4815417586, 7: 1376172.2323091957, 8: 1206288.5885036567, 9: 119011.36759404762}}
{'diff': {0: 381045.4717000001, 1: 2915758.7289301776, 2: 868614.6180701618, 3: 4952364.463031057, 4: 649488.6040396551, 5: 7473145.036408164, 6: 1211084.349763737, 7: 1359986.3620787358, 8: 1195206.9199817067, 9: 106315.4963142857}}
{'diff': {0: 396112.6919309524, 1: 2927023.3355063056, 2: 881553.0804177421, 3: 4965659.387115391, 4: 664581.2356241376, 5: 7487379.988112247, 6: 1226928.2231780214, 7: 1375081.4878034485, 8: 1204924.4063521333, 9: 117568.09009999997}}
{'diff': {0: 398791.83564142865, 1: 2929904.1134937378, 2: 884268.6928083871, 3: 4969186.882990996, 4: 667453.0766655172, 5: 7491095.65462857, 6: 1229856.0675498352, 7: 1377319.7854759197, 8: 1207308.6914243596, 9: 119888.01348333333}}
{'diff': {0: 361949.4825238095, 1: 2896682.3701126124, 2: 848862.5437822583, 3: 4928751.334897436, 4: 630220.5688913792, 5: 7450946.972428572, 6: 1187394.4575274729, 7: 1340303.0000775873, 8: 1179480.5218445128, 9: 87392.22891666667}}
{'diff': {0: 315083.9590238095, 1: 2875386.155256756, 2: 791020.9478145165, 3: 4919627.580308269, 4: 527800.8608620691, 5: 7418705.913040818, 6: 1038398.8350329669, 7: 1278070.7195321836, 8: 1177528.7164743906, 9: 74112.39018833333}}
{'diff': {0: 372749.6816428571, 1: 2896460.682382884, 2: 847202.8589435485, 3: 4930094.333652413, 4: 625970.5209655174, 5: 7459734.096877551, 6: 1180734.9652692305, 7: 1343552.4978419545, 8: 1181099.5920807915, 9: 96045.81380238094}}
{'diff': {0: 344613.2344047619, 1: 2879554.2198873875, 2: 832675.4379758065, 3: 4903486.329074071, 4: 607037.1537931036, 5: 7421040.479510205, 6: 1162536.2775054947, 7: 1323128.7494655175, 8: 1167686.5103018305, 9: 71892.5343452381}}
{'diff': {0: 384315.38414285716, 1: 2912584.135851351, 2: 868077.15266129, 3: 4948184.254348999, 4: 649068.8833655174, 5: 7468412.446755102, 6: 1210267.5453626376, 7: 1363363.9941091961, 8: 1193581.5346847544, 9: 105967.95393333334}}
{'diff': {0: 301436.47307142866, 1: 2814021.3400585586, 2: 745901.4201774193, 3: 4840460.746145298, 4: 474416.3661724136, 5: 7368423.201306123, 6: 956427.8998351648, 7: 1251712.683614943, 8: 1124866.8502560984, 9: 39000.96757142855}}
{'diff': {0: 350845.38038095244, 1: 2877748.116752253, 2: 830765.2659354841, 3: 4904564.910629633, 4: 609597.3515431035, 5: 7433322.860551022, 6: 1165465.3320219782, 7: 1328359.751505749, 8: 1164311.8769268298, 9: 75226.42738095239}}
{'diff': {0: 273791.3171666667, 1: 2806945.4198468463, 2: 722071.8942903227, 3: 4835423.34654701, 4: -74570571675091.14, 5: 7366856.7167755095, 6: 949178.8157032969, 7: 1234820.710781609, 8: 1113428.221018293, 9: 19590.516166666657}}
For each key 0-9, in my case, it is larger. I want an outcome where there is one dictionary with the lowest value for each key across all the different dictionaries.
so if, we have:
{'diff': {0: 5, 1: 4, 2: 3, 3: 43, 4: -34, 5: 43, 6: 65, 7: 543, 8: 23, 9: 23}}
{'diff': {0: 6, 1: 3, 2: 8, 3: 78, 4: -23, 5: 54, 6: 76, 7: 43, 8: 234, 9: 54}}
Then I would expect:
{'diff': {0: 5, 1: 3, 2: 3, 3: 43, 4: -23, 5: 43, 6: 65, 7: 43, 8: 23, 9: 23}}
update: when you print the list of dicts, you get:
[{'diff': {0: 358438.3179047619, 1: 2877912.924419369, 2: 822017.9039274186, 3: 4914425.223282051, 4: 574184.9971827588, 5: 7432268.5341428565, 6: 1111639.5132252753, 7: 1322861.412610346, 8: 1179799.2592362808, 9: 87556.64146904761}}, {'diff': {0: 292811.4124761905, 1: 2831096.9336261265, 2: 760006.755798387, 3: 4868369.423293451, 4: 509515.30310344836, 5: 7390444.080714285, 6: 1028933.0801098899, 7: 1240273.4906724147, 8: 1138039.7093932922, 9: 43618.81660000001}}, {'diff': {0: 393148.40700238093, 1: 2923931.0134306327, 2: 878450.4552137096, 3: 4962539.102763245, 4: 660218.1550965513, 5: 7483527.590967346, 6: 1223029.819152747, 7: 1372622.6893804593, 8: 1202322.4719079277, 9: 113611.58858809523}}, {'diff': {0: 386402.65016666666, 1: 2916900.423062612, 2: 870947.0239475806, 3: 4954526.795990028, 4: 652106.3039551723, 5: 7475754.573836735, 6: 1212934.2664368134, 7: 1365836.4194977009, 8: 1196003.2297920743, 9: 108039.20073571429}}, {'diff': {0: 349975.29688095255, 1: 2876674.3017342356, 2: 827975.0650000006, 3: 4913329.426507118, 4: 605245.163706897, 5: 7431737.75197959, 6: 1154341.8745934067, 7: 1325611.1466724137, 8: 1167062.6884146344, 9: 78813.5207857143}}, {'diff': {0: 389236.3094642856, 1: 2919969.395930179, 2: 873295.801427419, 3: 4957163.9330507135, 4: 653377.0037568965, 5: 7479596.044428572, 6: 1214463.8978571433, 7: 1366351.4634890805, 8: 1200255.7743564018, 9: 112641.91081666667}}, {'diff': {0: 391681.69095, 1: 2921278.030853604, 2: 874417.996964516, 3: 4960328.984978635, 4: 658758.8998741381, 5: 7484168.382208164, 6: 1218278.5344219788, 7: 1367466.964590805, 8: 1200111.4596570123, 9: 113533.64980238095}}, {'diff': {0: 355994.5180714284, 1: 2882303.7541306294, 2: 835458.8338790324, 3: 4919442.302396014, 4: 610290.0786551724, 5: 7441912.343979592, 6: 1164700.055917583, 7: 1327737.2043103438, 8: 1169616.6454146332, 9: 81680.70286904761}}, {'diff': {0: 379893.7180714286, 1: 2913403.720793244, 2: 865857.7399225802, 3: 4948973.331188316, 4: 643761.719862069, 5: 7468621.204883674, 6: 1209897.9149901094, 7: 1359244.204440804, 8: 1192828.6090381108, 9: 104051.28336904761}}, {'diff': {0: 390466.6839142858, 1: 2923088.262698646, 2: 877156.1510145164, 3: 4962513.822048144, 4: 659759.9533551724, 5: 7483875.484744897, 6: 1222339.2461901105, 7: 1369121.0132643674, 8: 1201501.5448817061, 9: 113458.57306428571}}, {'diff': {0: 301792.62588095234, 1: 2854027.945333335, 2: 804759.8740564514, 3: 4876267.124210826, 4: 584088.0599310346, 5: 7378153.378530612, 6: 1133044.61306044, 7: 1291385.6421149436, 8: 1139054.1821890248, 9: 38275.36907142856}}, {'diff': {0: 387509.1658071429, 1: 2919049.8491373872, 2: 874219.6323653222, 3: 4955459.435102557, 4: 656559.3065396551, 5: 7476533.654826531, 6: 1217855.0112197807, 7: 1366842.1718931037, 8: 1198388.2114634141, 9: 108848.47544047613}}, {'diff': {0: 377328.5187738094, 1: 2907686.5556463962, 2: 861963.8367822578, 3: 4942903.962752138, 4: 642356.8619948275, 5: 7463152.797857141, 6: 1203804.631930769, 7: 1356454.3497155162, 8: 1189309.752909755, 9: 99476.27148809524}}, {'diff': {0: 352355.7500238095, 1: 2887318.768563064, 2: 841642.5822338712, 3: 4925029.717854701, 4: 621227.5312931032, 5: 7443790.6748775495, 6: 1183558.6595329673, 7: 1333697.2241666662, 8: 1172889.8671798778, 9: 81039.74188095241}}, {'diff': {0: 396255.3198571428, 1: 2926250.0441639633, 2: 880795.3943693547, 3: 4965277.919590886, 4: 663756.0494362068, 5: 7487330.063967346, 6: 1225360.306425824, 7: 1374148.3940419543, 8: 1204383.4553957311, 9: 117133.45492380955}}, {'diff': {0: 397275.22611428564, 1: 2928138.3937932434, 2: 882549.978358064, 3: 4967271.384024783, 4: 665063.7757241379, 5: 7489353.048779594, 6: 1227848.7195598893, 7: 1375612.8537936783, 8: 1205968.5550199081, 9: 118622.92846666674}}, {'diff': {0: 370638.9714999999, 1: 2901794.814063063, 2: 854231.343169355, 3: 4941840.968413107, 4: 636963.8949827587, 5: 7462906.844836734, 6: 1198474.5955769236, 7: 1349199.7593390818, 8: 1181772.3528810989, 9: 94418.88628571431}}, {'diff': {0: 399605.39451595227, 1: 2930519.3274677014, 2: 884866.901809758, 3: 4970067.843492109, 4: 668209.9673794828, 5: 7492181.322271633, 6: 1230438.6087753302, 7: 1377940.4613927014, 8: 1207999.2446168917, 9: 120766.5979569048}}, {'diff': {0: 394437.6273380953, 1: 2926444.621315316, 2: 880587.8419403222, 3: 4965842.826658971, 4: 663091.0379724137, 5: 7487427.719579593, 6: 1226653.0014609892, 7: 1373195.8957902302, 8: 1204177.9670981697, 9: 116665.88206190476}}, {'diff': {0: 343177.5738333332, 1: 2872438.88899099, 2: 824308.511145161, 3: 4901171.498498574, 4: 594996.441275862, 5: 7417912.784775511, 6: 1150261.9712527473, 7: 1323742.7629367814, 8: 1160229.847768293, 9: 70927.47897619048}}, {'diff': {0: 388380.7712333334, 1: 2919408.214353603, 2: 872090.4287451615, 3: 4957030.500496009, 4: 653652.7608896552, 5: 7478706.309169387, 6: 1210682.681124176, 7: 1365970.9027482753, 8: 1199850.2533893296, 9: 112281.24546190478}}, {'diff': {0: 397734.3032357143, 1: 2928578.1657990995, 2: 883142.2520741936, 3: 4967757.699845867, 4: 666066.6204482759, 5: 7489537.145657143, 6: 1228560.9616604394, 7: 1376307.6271563205, 8: 1206326.1817006094, 9: 118790.83264523809}}, {'diff': {0: 382516.58267857146, 1: 2915073.5680945935, 2: 869510.7518991937, 3: 4954054.122938737, 4: 651079.4144172415, 5: 7474328.030612243, 6: 1213478.4472478013, 7: 1363524.8940068972, 8: 1194637.4700762194, 9: 106180.7426238095}}, {'diff': {0: 395288.79071904765, 1: 2925967.5104626133, 2: 880203.2222838707, 3: 4964695.553390598, 4: 663391.0626017239, 5: 7486925.765212244, 6: 1224951.42912967, 7: 1373539.6626603457, 8: 1204264.0033243895, 9: 116594.66418571424}}, {'diff': {0: 397971.03177380946, 1: 2928499.596860811, 2: 882838.8586330643, 3: 4967899.803066952, 4: 665805.6550189656, 5: 7489992.297071429, 6: 1227996.4815417586, 7: 1376172.2323091957, 8: 1206288.5885036567, 9: 119011.36759404762}}, {'diff': {0: 381045.4717000001, 1: 2915758.7289301776, 2: 868614.6180701618, 3: 4952364.463031057, 4: 649488.6040396551, 5: 7473145.036408164, 6: 1211084.349763737, 7: 1359986.3620787358, 8: 1195206.9199817067, 9: 106315.4963142857}}, {'diff': {0: 396112.6919309524, 1: 2927023.3355063056, 2: 881553.0804177421, 3: 4965659.387115391, 4: 664581.2356241376, 5: 7487379.988112247, 6: 1226928.2231780214, 7: 1375081.4878034485, 8: 1204924.4063521333, 9: 117568.09009999997}}, {'diff': {0: 398791.83564142865, 1: 2929904.1134937378, 2: 884268.6928083871, 3: 4969186.882990996, 4: 667453.0766655172, 5: 7491095.65462857, 6: 1229856.0675498352, 7: 1377319.7854759197, 8: 1207308.6914243596, 9: 119888.01348333333}}, {'diff': {0: 361949.4825238095, 1: 2896682.3701126124, 2: 848862.5437822583, 3: 4928751.334897436, 4: 630220.5688913792, 5: 7450946.972428572, 6: 1187394.4575274729, 7: 1340303.0000775873, 8: 1179480.5218445128, 9: 87392.22891666667}}, {'diff': {0: 315083.9590238095, 1: 2875386.155256756, 2: 791020.9478145165, 3: 4919627.580308269, 4: 527800.8608620691, 5: 7418705.913040818, 6: 1038398.8350329669, 7: 1278070.7195321836, 8: 1177528.7164743906, 9: 74112.39018833333}}, {'diff': {0: 372749.6816428571, 1: 2896460.682382884, 2: 847202.8589435485, 3: 4930094.333652413, 4: 625970.5209655174, 5: 7459734.096877551, 6: 1180734.9652692305, 7: 1343552.4978419545, 8: 1181099.5920807915, 9: 96045.81380238094}}, {'diff': {0: 344613.2344047619, 1: 2879554.2198873875, 2: 832675.4379758065, 3: 4903486.329074071, 4: 607037.1537931036, 5: 7421040.479510205, 6: 1162536.2775054947, 7: 1323128.7494655175, 8: 1167686.5103018305, 9: 71892.5343452381}}, {'diff': {0: 384315.38414285716, 1: 2912584.135851351, 2: 868077.15266129, 3: 4948184.254348999, 4: 649068.8833655174, 5: 7468412.446755102, 6: 1210267.5453626376, 7: 1363363.9941091961, 8: 1193581.5346847544, 9: 105967.95393333334}}, {'diff': {0: 301436.47307142866, 1: 2814021.3400585586, 2: 745901.4201774193, 3: 4840460.746145298, 4: 474416.3661724136, 5: 7368423.201306123, 6: 956427.8998351648, 7: 1251712.683614943, 8: 1124866.8502560984, 9: 39000.96757142855}}, {'diff': {0: 350845.38038095244, 1: 2877748.116752253, 2: 830765.2659354841, 3: 4904564.910629633, 4: 609597.3515431035, 5: 7433322.860551022, 6: 1165465.3320219782, 7: 1328359.751505749, 8: 1164311.8769268298, 9: 75226.42738095239}}, {'diff': {0: 273791.3171666667, 1: 2806945.4198468463, 2: 722071.8942903227, 3: 4835423.34654701, 4: -74570571675091.14, 5: 7366856.7167755095, 6: 949178.8157032969, 7: 1234820.710781609, 8: 1113428.221018293, 9: 19590.516166666657}}]
This is a classical reduce problem, so one approach is to use the built-in function functools.reduce:
from functools import reduce
def min_(x, y, key="diff"):
return { key : { ki : min(xi, y[key][ki]) for ki, xi in x[key].items() } }
res = reduce(min_, data)
print(res)
Output (for the given data)
{'diff': {0: 273791.3171666667, 1: 2806945.4198468463, 2: 722071.8942903227, 3: 4835423.34654701, 4: -74570571675091.14, 5: 7366856.7167755095, 6: 949178.8157032969, 7: 1234820.710781609, 8: 1113428.221018293, 9: 19590.516166666657}}
As an alternative, you could use pandas, as below:
import pandas as pd
# assuming data is a list of dictionaries with the same format of the question
res = {"diff": pd.DataFrame(data=[d["diff"] for d in data]).min().to_dict()}
print(res)
Output (using pandas)
{'diff': {0: 273791.3171666667, 1: 2806945.4198468463, 2: 722071.8942903227, 3: 4835423.34654701, 4: -74570571675091.14, 5: 7366856.7167755095, 6: 949178.8157032969, 7: 1234820.710781609, 8: 1113428.221018293, 9: 19590.516166666657}}
Note that pandas is a (heavy) third-party library that needs to be installed.
You can do it with a dictionary comprehension that calls min() across all the dictionaries in the list.
result = {'diff':
{key: min(item['diff'][key] for item in list_of_dicts)
for key in list_of_dicts[0]['diff']}
}
A combination of zip and map can do this for you:
Input:
dicts = [ {'diff': {0: 358438.3179047619, 1: 2877912.924419369, 2: 822017.9039274186, 3: 4914425.223282051, 4: 574184.9971827588, 5: 7432268.5341428565, 6: 1111639.5132252753, 7: 1322861.412610346, 8: 1179799.2592362808, 9: 87556.64146904761}},
{'diff': {0: 292811.4124761905, 1: 2831096.9336261265, 2: 760006.755798387, 3: 4868369.423293451, 4: 509515.30310344836, 5: 7390444.080714285, 6: 1028933.0801098899, 7: 1240273.4906724147, 8: 1138039.7093932922, 9: 43618.81660000001}},
{'diff': {0: 393148.40700238093, 1: 2923931.0134306327, 2: 878450.4552137096, 3: 4962539.102763245, 4: 660218.1550965513, 5: 7483527.590967346, 6: 1223029.819152747, 7: 1372622.6893804593, 8: 1202322.4719079277, 9: 113611.58858809523}},
{'diff': {0: 386402.65016666666, 1: 2916900.423062612, 2: 870947.0239475806, 3: 4954526.795990028, 4: 652106.3039551723, 5: 7475754.573836735, 6: 1212934.2664368134, 7: 1365836.4194977009, 8: 1196003.2297920743, 9: 108039.20073571429}},
{'diff': {0: 349975.29688095255, 1: 2876674.3017342356, 2: 827975.0650000006, 3: 4913329.426507118, 4: 605245.163706897, 5: 7431737.75197959, 6: 1154341.8745934067, 7: 1325611.1466724137, 8: 1167062.6884146344, 9: 78813.5207857143}},
{'diff': {0: 389236.3094642856, 1: 2919969.395930179, 2: 873295.801427419, 3: 4957163.9330507135, 4: 653377.0037568965, 5: 7479596.044428572, 6: 1214463.8978571433, 7: 1366351.4634890805, 8: 1200255.7743564018, 9: 112641.91081666667}},
{'diff': {0: 391681.69095, 1: 2921278.030853604, 2: 874417.996964516, 3: 4960328.984978635, 4: 658758.8998741381, 5: 7484168.382208164, 6: 1218278.5344219788, 7: 1367466.964590805, 8: 1200111.4596570123, 9: 113533.64980238095}},
{'diff': {0: 355994.5180714284, 1: 2882303.7541306294, 2: 835458.8338790324, 3: 4919442.302396014, 4: 610290.0786551724, 5: 7441912.343979592, 6: 1164700.055917583, 7: 1327737.2043103438, 8: 1169616.6454146332, 9: 81680.70286904761}},
{'diff': {0: 379893.7180714286, 1: 2913403.720793244, 2: 865857.7399225802, 3: 4948973.331188316, 4: 643761.719862069, 5: 7468621.204883674, 6: 1209897.9149901094, 7: 1359244.204440804, 8: 1192828.6090381108, 9: 104051.28336904761}},
{'diff': {0: 390466.6839142858, 1: 2923088.262698646, 2: 877156.1510145164, 3: 4962513.822048144, 4: 659759.9533551724, 5: 7483875.484744897, 6: 1222339.2461901105, 7: 1369121.0132643674, 8: 1201501.5448817061, 9: 113458.57306428571}},
{'diff': {0: 301792.62588095234, 1: 2854027.945333335, 2: 804759.8740564514, 3: 4876267.124210826, 4: 584088.0599310346, 5: 7378153.378530612, 6: 1133044.61306044, 7: 1291385.6421149436, 8: 1139054.1821890248, 9: 38275.36907142856}},
{'diff': {0: 387509.1658071429, 1: 2919049.8491373872, 2: 874219.6323653222, 3: 4955459.435102557, 4: 656559.3065396551, 5: 7476533.654826531, 6: 1217855.0112197807, 7: 1366842.1718931037, 8: 1198388.2114634141, 9: 108848.47544047613}},
{'diff': {0: 377328.5187738094, 1: 2907686.5556463962, 2: 861963.8367822578, 3: 4942903.962752138, 4: 642356.8619948275, 5: 7463152.797857141, 6: 1203804.631930769, 7: 1356454.3497155162, 8: 1189309.752909755, 9: 99476.27148809524}},
{'diff': {0: 352355.7500238095, 1: 2887318.768563064, 2: 841642.5822338712, 3: 4925029.717854701, 4: 621227.5312931032, 5: 7443790.6748775495, 6: 1183558.6595329673, 7: 1333697.2241666662, 8: 1172889.8671798778, 9: 81039.74188095241}},
{'diff': {0: 396255.3198571428, 1: 2926250.0441639633, 2: 880795.3943693547, 3: 4965277.919590886, 4: 663756.0494362068, 5: 7487330.063967346, 6: 1225360.306425824, 7: 1374148.3940419543, 8: 1204383.4553957311, 9: 117133.45492380955}},
{'diff': {0: 397275.22611428564, 1: 2928138.3937932434, 2: 882549.978358064, 3: 4967271.384024783, 4: 665063.7757241379, 5: 7489353.048779594, 6: 1227848.7195598893, 7: 1375612.8537936783, 8: 1205968.5550199081, 9: 118622.92846666674}},
{'diff': {0: 370638.9714999999, 1: 2901794.814063063, 2: 854231.343169355, 3: 4941840.968413107, 4: 636963.8949827587, 5: 7462906.844836734, 6: 1198474.5955769236, 7: 1349199.7593390818, 8: 1181772.3528810989, 9: 94418.88628571431}},
{'diff': {0: 399605.39451595227, 1: 2930519.3274677014, 2: 884866.901809758, 3: 4970067.843492109, 4: 668209.9673794828, 5: 7492181.322271633, 6: 1230438.6087753302, 7: 1377940.4613927014, 8: 1207999.2446168917, 9: 120766.5979569048}},
{'diff': {0: 394437.6273380953, 1: 2926444.621315316, 2: 880587.8419403222, 3: 4965842.826658971, 4: 663091.0379724137, 5: 7487427.719579593, 6: 1226653.0014609892, 7: 1373195.8957902302, 8: 1204177.9670981697, 9: 116665.88206190476}},
{'diff': {0: 343177.5738333332, 1: 2872438.88899099, 2: 824308.511145161, 3: 4901171.498498574, 4: 594996.441275862, 5: 7417912.784775511, 6: 1150261.9712527473, 7: 1323742.7629367814, 8: 1160229.847768293, 9: 70927.47897619048}},
{'diff': {0: 388380.7712333334, 1: 2919408.214353603, 2: 872090.4287451615, 3: 4957030.500496009, 4: 653652.7608896552, 5: 7478706.309169387, 6: 1210682.681124176, 7: 1365970.9027482753, 8: 1199850.2533893296, 9: 112281.24546190478}},
{'diff': {0: 397734.3032357143, 1: 2928578.1657990995, 2: 883142.2520741936, 3: 4967757.699845867, 4: 666066.6204482759, 5: 7489537.145657143, 6: 1228560.9616604394, 7: 1376307.6271563205, 8: 1206326.1817006094, 9: 118790.83264523809}},
{'diff': {0: 382516.58267857146, 1: 2915073.5680945935, 2: 869510.7518991937, 3: 4954054.122938737, 4: 651079.4144172415, 5: 7474328.030612243, 6: 1213478.4472478013, 7: 1363524.8940068972, 8: 1194637.4700762194, 9: 106180.7426238095}},
{'diff': {0: 395288.79071904765, 1: 2925967.5104626133, 2: 880203.2222838707, 3: 4964695.553390598, 4: 663391.0626017239, 5: 7486925.765212244, 6: 1224951.42912967, 7: 1373539.6626603457, 8: 1204264.0033243895, 9: 116594.66418571424}},
{'diff': {0: 397971.03177380946, 1: 2928499.596860811, 2: 882838.8586330643, 3: 4967899.803066952, 4: 665805.6550189656, 5: 7489992.297071429, 6: 1227996.4815417586, 7: 1376172.2323091957, 8: 1206288.5885036567, 9: 119011.36759404762}},
{'diff': {0: 381045.4717000001, 1: 2915758.7289301776, 2: 868614.6180701618, 3: 4952364.463031057, 4: 649488.6040396551, 5: 7473145.036408164, 6: 1211084.349763737, 7: 1359986.3620787358, 8: 1195206.9199817067, 9: 106315.4963142857}},
{'diff': {0: 396112.6919309524, 1: 2927023.3355063056, 2: 881553.0804177421, 3: 4965659.387115391, 4: 664581.2356241376, 5: 7487379.988112247, 6: 1226928.2231780214, 7: 1375081.4878034485, 8: 1204924.4063521333, 9: 117568.09009999997}},
{'diff': {0: 398791.83564142865, 1: 2929904.1134937378, 2: 884268.6928083871, 3: 4969186.882990996, 4: 667453.0766655172, 5: 7491095.65462857, 6: 1229856.0675498352, 7: 1377319.7854759197, 8: 1207308.6914243596, 9: 119888.01348333333}},
{'diff': {0: 361949.4825238095, 1: 2896682.3701126124, 2: 848862.5437822583, 3: 4928751.334897436, 4: 630220.5688913792, 5: 7450946.972428572, 6: 1187394.4575274729, 7: 1340303.0000775873, 8: 1179480.5218445128, 9: 87392.22891666667}},
{'diff': {0: 315083.9590238095, 1: 2875386.155256756, 2: 791020.9478145165, 3: 4919627.580308269, 4: 527800.8608620691, 5: 7418705.913040818, 6: 1038398.8350329669, 7: 1278070.7195321836, 8: 1177528.7164743906, 9: 74112.39018833333}},
{'diff': {0: 372749.6816428571, 1: 2896460.682382884, 2: 847202.8589435485, 3: 4930094.333652413, 4: 625970.5209655174, 5: 7459734.096877551, 6: 1180734.9652692305, 7: 1343552.4978419545, 8: 1181099.5920807915, 9: 96045.81380238094}},
{'diff': {0: 344613.2344047619, 1: 2879554.2198873875, 2: 832675.4379758065, 3: 4903486.329074071, 4: 607037.1537931036, 5: 7421040.479510205, 6: 1162536.2775054947, 7: 1323128.7494655175, 8: 1167686.5103018305, 9: 71892.5343452381}},
{'diff': {0: 384315.38414285716, 1: 2912584.135851351, 2: 868077.15266129, 3: 4948184.254348999, 4: 649068.8833655174, 5: 7468412.446755102, 6: 1210267.5453626376, 7: 1363363.9941091961, 8: 1193581.5346847544, 9: 105967.95393333334}},
{'diff': {0: 301436.47307142866, 1: 2814021.3400585586, 2: 745901.4201774193, 3: 4840460.746145298, 4: 474416.3661724136, 5: 7368423.201306123, 6: 956427.8998351648, 7: 1251712.683614943, 8: 1124866.8502560984, 9: 39000.96757142855}},
{'diff': {0: 350845.38038095244, 1: 2877748.116752253, 2: 830765.2659354841, 3: 4904564.910629633, 4: 609597.3515431035, 5: 7433322.860551022, 6: 1165465.3320219782, 7: 1328359.751505749, 8: 1164311.8769268298, 9: 75226.42738095239}},
{'diff': {0: 273791.3171666667, 1: 2806945.4198468463, 2: 722071.8942903227, 3: 4835423.34654701, 4: -74570571675091.14, 5: 7366856.7167755095, 6: 949178.8157032969, 7: 1234820.710781609, 8: 1113428.221018293, 9: 19590.516166666657}}]
Output:
result = {'diff':dict(map(max,zip(*(d['diff'].items() for d in dicts))))}
print(result)
{'diff': {0: 399605.39451595227, 1: 2930519.3274677014,
2: 884866.901809758, 3: 4970067.843492109,
4: 668209.9673794828, 5: 7492181.322271633,
6: 1230438.6087753302, 7: 1377940.4613927014,
8: 1207999.2446168917, 9: 120766.5979569048}}
Note that this assumes that all 10 keys are always present and in the same order in every dictionary
If the keys are not always present or not in the same order, you could do this:
result = {'diff':{k:max(d['diff'].get(k,0) for d in dicts) for k in range(10)}}

How to update a seaborn line plot with ipywidgets checkboxes?

I am struggling with the ipywidgets module.
I am trying to make a plot where you can toggle lines off/on with checkboxes based on a province.
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(data=df5, x="Date_of_report", y="Total_reported", hue="Province", ax=ax)
provinces = df5["Province"].unique()
chk = [widgets.Checkbox(description=a) for a in provinces]
def updatePlot(**kwargs):
print([(k,v) for k, v in kwargs.items()])
widgets.interact(updatePlot, **{c.description: c.value for c in chk})
As you can see, I can draw the checkboxes and it prints out the status of the boxes.
but I don't know how to update the seaborn line plot. So when you select say: Drenthe it only shows the line from Drenthe.
here is the dataframe as a dict:
{'Date_of_report': {0: Timestamp('2020-03-13 10:00:00'), 1: Timestamp('2020-03-13 10:00:00'), 2: Timestamp('2020-03-13 10:00:00'), 3: Timestamp('2020-03-13 10:00:00'), 4: Timestamp('2020-03-13 10:00:00'), 5: Timestamp('2020-03-13 10:00:00'), 6: Timestamp('2020-03-13 10:00:00'), 7: Timestamp('2020-03-13 10:00:00'), 8: Timestamp('2020-03-13 10:00:00'), 9: Timestamp('2020-03-13 10:00:00')}, 'Province': {0: 'Drenthe', 1: 'Flevoland', 2: 'Friesland', 3: 'Gelderland', 4: 'Groningen', 5: 'Limburg', 6: 'Noord-Brabant', 7: 'Noord-Holland', 8: 'Overijssel', 9: 'Utrecht'}, 'Total_reported': {0: 14, 1: 7, 2: 8, 3: 64, 4: 4, 5: 71, 6: 377, 7: 66, 8: 18, 9: 83}, 'Hospital_admission': {0: 0, 1: 3, 2: 2, 3: 9, 4: 1, 5: 17, 6: 65, 7: 4, 8: 0, 9: 7}, 'Deceased': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 3, 6: 5, 7: 0, 8: 0, 9: 0}}

Plotly: Dodge overlapping points on scatterplot categorical axis

I am trying to use plotly to compare the coefficents of regression models using error bars for the confidence intervals. I used the following code to plot it, using the variable as a categorical y axis in a scatter plot. The problem is that the points are overlapping, and I'd like to dodge them like happens in bar charts when you set barmode='group'. If I had a numerical axis I could manually dodge them, but I can't do that.
fig = px.scatter(
df, y='index', x='coef', text='label', color='model',
error_x_minus='lerr', error_x='uerr',
hover_data=['coef', 'pvalue', 'lower', 'upper']
)
fig.update_traces(textposition='top center')
fig.update_yaxes(autorange="reversed")
Using facets I get almost the result I want, but some of the labels goes off-plot and are not visible:
fig = px.scatter(
df, y='model', x='coef', text='label', color='model',
facet_row='index',
error_x_minus='lerr', error_x='uerr',
hover_data=['coef', 'pvalue', 'lower', 'upper']
)
fig.update_traces(textposition='top center')
fig.update_yaxes(visible=False)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
Somebody has any idea or workaround for either dodging points in the first case or displaying labels in the second case?
Thanks in advance.
PS: Here's the random fake dataframe I made to generate the plots:
df = pd.DataFrame({'coef': {0: 1.0018729737113143,
1: 0.9408864645423858,
2: 0.29796556981484884,
3: -0.6844053575764955,
4: -0.13689631932690113,
5: 0.1473096200402363,
6: 0.9564712505670716,
7: 0.956099003887811,
8: 0.33319108930207175,
9: -0.7022778825729681,
10: -0.1773916842612131,
11: 0.09485417304851751},
'index': {0: 'const',
1: 'x1',
2: 'x2',
3: 'x3',
4: 'x4',
5: 'x5',
6: 'const',
7: 'x1',
8: 'x2',
9: 'x3',
10: 'x4',
11: 'x5'},
'label': {0: '1.002***',
1: '0.941***',
2: '0.298***',
3: '-0.684***',
4: '-0.137',
5: '0.147',
6: '0.956***',
7: '0.956***',
8: '0.333***',
9: '-0.702***',
10: '-0.177',
11: '0.095'},
'lerr': {0: 0.19788416996400904,
1: 0.19972987383410545,
2: 0.0606849959013587,
3: 0.1772734289533593,
4: 0.1988122854078155,
5: 0.21870366703236832,
6: 0.2734783191688098,
7: 0.2760291042678362,
8: 0.08386739920069491,
9: 0.2449940255063039,
10: 0.27476098595116555,
11: 0.3022511162310027},
'lower': {0: 0.8039888037473053,
1: 0.7411565907082803,
2: 0.23728057391349014,
3: -0.8616787865298547,
4: -0.33570860473471664,
5: -0.07139404699213203,
6: 0.6829929313982618,
7: 0.6800698996199748,
8: 0.24932369010137684,
9: -0.947271908079272,
10: -0.45215267021237865,
11: -0.2073969431824852},
'model': {0: 'OLS',
1: 'OLS',
2: 'OLS',
3: 'OLS',
4: 'OLS',
5: 'OLS',
6: 'QuantReg',
7: 'QuantReg',
8: 'QuantReg',
9: 'QuantReg',
10: 'QuantReg',
11: 'QuantReg'},
'pvalue': {0: 1.4211692095019375e-16,
1: 4.3583690618389965e-15,
2: 6.278403727223468e-16,
3: 1.596372747840846e-11,
4: 0.17483151363955116,
5: 0.18433051296752084,
6: 4.877385844808361e-10,
7: 6.665860891682504e-10,
8: 5.476882838731488e-12,
9: 1.4240852942202845e-07,
10: 0.20303143985022934,
11: 0.5347222575215599},
'uerr': {0: 0.19788416996400904,
1: 0.19972987383410556,
2: 0.06068499590135873,
3: 0.1772734289533593,
4: 0.19881228540781554,
5: 0.21870366703236832,
6: 0.27347831916880994,
7: 0.2760291042678362,
8: 0.08386739920069491,
9: 0.2449940255063039,
10: 0.27476098595116555,
11: 0.3022511162310027},
'upper': {0: 1.1997571436753234,
1: 1.1406163383764913,
2: 0.35865056571620757,
3: -0.5071319286231362,
4: 0.0619159660809144,
5: 0.3660132870726046,
6: 1.2299495697358815,
7: 1.2321281081556472,
8: 0.41705848850276667,
9: -0.4572838570666642,
10: 0.09736930168995245,
11: 0.3971052892795202}})
You were very close to a working solution with your second attempt. Just make more room for your labels with:
height=600, width=800
And then place the labels for the traces named 'OLS' within the boundaries of each subplot with:
fig.for_each_trace(lambda t: t.update(textposition='bottom center') if t.name == 'OLS' else ())
Plot:
Complete code:
import plotly.express as px
import pandas as pd
df = pd.DataFrame({'coef': {0: 1.0018729737113143,
1: 0.9408864645423858,
2: 0.29796556981484884,
3: -0.6844053575764955,
4: -0.13689631932690113,
5: 0.1473096200402363,
6: 0.9564712505670716,
7: 0.956099003887811,
8: 0.33319108930207175,
9: -0.7022778825729681,
10: -0.1773916842612131,
11: 0.09485417304851751},
'index': {0: 'const',
1: 'x1',
2: 'x2',
3: 'x3',
4: 'x4',
5: 'x5',
6: 'const',
7: 'x1',
8: 'x2',
9: 'x3',
10: 'x4',
11: 'x5'},
'label': {0: '1.002***',
1: '0.941***',
2: '0.298***',
3: '-0.684***',
4: '-0.137',
5: '0.147',
6: '0.956***',
7: '0.956***',
8: '0.333***',
9: '-0.702***',
10: '-0.177',
11: '0.095'},
'lerr': {0: 0.19788416996400904,
1: 0.19972987383410545,
2: 0.0606849959013587,
3: 0.1772734289533593,
4: 0.1988122854078155,
5: 0.21870366703236832,
6: 0.2734783191688098,
7: 0.2760291042678362,
8: 0.08386739920069491,
9: 0.2449940255063039,
10: 0.27476098595116555,
11: 0.3022511162310027},
'lower': {0: 0.8039888037473053,
1: 0.7411565907082803,
2: 0.23728057391349014,
3: -0.8616787865298547,
4: -0.33570860473471664,
5: -0.07139404699213203,
6: 0.6829929313982618,
7: 0.6800698996199748,
8: 0.24932369010137684,
9: -0.947271908079272,
10: -0.45215267021237865,
11: -0.2073969431824852},
'model': {0: 'OLS',
1: 'OLS',
2: 'OLS',
3: 'OLS',
4: 'OLS',
5: 'OLS',
6: 'QuantReg',
7: 'QuantReg',
8: 'QuantReg',
9: 'QuantReg',
10: 'QuantReg',
11: 'QuantReg'},
'pvalue': {0: 1.4211692095019375e-16,
1: 4.3583690618389965e-15,
2: 6.278403727223468e-16,
3: 1.596372747840846e-11,
4: 0.17483151363955116,
5: 0.18433051296752084,
6: 4.877385844808361e-10,
7: 6.665860891682504e-10,
8: 5.476882838731488e-12,
9: 1.4240852942202845e-07,
10: 0.20303143985022934,
11: 0.5347222575215599},
'uerr': {0: 0.19788416996400904,
1: 0.19972987383410556,
2: 0.06068499590135873,
3: 0.1772734289533593,
4: 0.19881228540781554,
5: 0.21870366703236832,
6: 0.27347831916880994,
7: 0.2760291042678362,
8: 0.08386739920069491,
9: 0.2449940255063039,
10: 0.27476098595116555,
11: 0.3022511162310027},
'upper': {0: 1.1997571436753234,
1: 1.1406163383764913,
2: 0.35865056571620757,
3: -0.5071319286231362,
4: 0.0619159660809144,
5: 0.3660132870726046,
6: 1.2299495697358815,
7: 1.2321281081556472,
8: 0.41705848850276667,
9: -0.4572838570666642,
10: 0.09736930168995245,
11: 0.3971052892795202}})
fig = px.scatter(
df, y='model', x='coef', text='label', color='model',
facet_row='index',
error_x_minus='lerr', error_x='uerr',
hover_data=['coef', 'pvalue', 'lower', 'upper'],
height=600, width=800,
)
fig.update_traces(textposition='top center')
fig.update_yaxes(visible=False)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.for_each_trace(lambda t: t.update(textposition='bottom center') if t.name == 'OLS' else ())
fig.show()

Categories