Insert list and slice in single dict comprehension - python

I'm trying to build a dict comprehension that does an insert and takes a slice.
Does anybody know how to do this, or even is this is possible at all?
I'm trying to get the same output in cprd with a dict comprehension, as in newd with a for loop.
Code (Python 3.6.1)
# Initializations
hline = "-"*80
h = ['H1', 'H2', 'H3', 'H4']
d = {'A': [['Y1', 'Y2', 'Y3', 'Y4'], [-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [['Y1', 'Y2', 'Y3', 'Y4'], [-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [['Y1', 'Y2', 'Y3', 'Y4'], [-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
print(f"h = {h}")
print(f"d = {d}")
print(hline)
# Without dict/list comprehension
newd = {}
for key,value in d.items():
value.insert(1,h)
newd[key] = value[1:]
print(f"newd = {newd}")
print(hline)
# Re-Initializations
d = {'A': [['Y1', 'Y2', 'Y3', 'Y4'], [-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [['Y1', 'Y2', 'Y3', 'Y4'], [-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [['Y1', 'Y2', 'Y3', 'Y4'], [-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
# Tryout with dict comprehension
cprd = {key:value[1:] for key,value in d.items()}
print(f"cprd = {cprd}")
print(hline)
Output
h = ['H1', 'H2', 'H3', 'H4']
d = {'A': [['Y1', 'Y2', 'Y3', 'Y4'], [-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [['Y1', 'Y2', 'Y3', 'Y4'], [-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [['Y1', 'Y2', 'Y3', 'Y4'], [-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
--------------------------------------------------------------------------------
newd = {'A': [['H1', 'H2', 'H3', 'H4'], [-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [['H1', 'H2', 'H3', 'H4'], [-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [['H1', 'H2', 'H3', 'H4'], [-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
--------------------------------------------------------------------------------
cprd = {'A': [[-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [[-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [[-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
--------------------------------------------------------------------------------

You can use list concatenation to create the desired values:
{key:[h]+value[1:] for key,value in d.items()}
# {'A': [['H1', 'H2', 'H3', 'H4'], [-3.4, 15.9, 'NA', 6.0], [-3.4, 4.2, -7.4, 6.3], [22.7, 7.4, 2.8, 'NA']], 'B': [['H1', 'H2', 'H3', 'H4'], [-45.8, -10.7, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']], 'C': [['H1', 'H2', 'H3', 'H4'], [-10.5, 32.8, 'NA', 'NA'], [5.4, 12.7, 19.2, 20.3], [22.7, 7.4, 2.8, 'NA']]}
Note that:
it returns the exact same data as newd
it does not mutate d
In your example, d was changed after having defined newd. Is it a bug or a feature? :)

Related

python, create a series of lists from two other lists with index

HELLO thank you in advance for your help, I've been trying to learn python on my own over the last few months!
I have two list of lists :
countries_list = [['Canada'], ['China'], ['Finland'], ...]
ratios = [[10.2, 10.3, 11.4, 12.0], [8.2, 8.1, 9.0, 9.1], [15.4, 15.5, 15.8, 16.0], ...]
I want to merge the lists together according to the indices. For example, countries[0] = ['Canada'] and ratios[0] = [10.2, 10.3, 11.4, 12.0]. I want to use the indices to create this final list:
final_list = [[10.2, 10.3, 11.4, 12.0, 'Canada'], [8.2, 8.1, 9.0, 9.1,'China'], [15.4, 15.5, 15.8, 16.0, 'Finland']...]
this is the code I've come up with for now:
final_list = []
for countries in countries_list:
for ratio_list in ratios:
current_ratios = []
for r in ratio_list:
current_ratios.append(r)
current_ratios.append(countries)
rows_list.append(current_ratios)
print(rows_list)
this is the output:
[[9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Eswatini'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Bahamas'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Jamaica'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Chad'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Kenya'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Mali'], [9.8, 10.3, 10.9, 11.4, 12.0, 12.6, 14.8, 19.2, 25.2, 'Guyana'] ...]
As you can see, it is kinda close to the desired outcome, but the ratios are always the same. The nested loops are very confusing to me and I find myself wondering what the ordering is and just what's happening here in general.
You can use zip() + list-comprehension:
countries_list = [["Canada"], ["China"], ["Finland"]]
ratios = [
[10.2, 10.3, 11.4, 12.0],
[8.2, 8.1, 9.0, 9.1],
[15.4, 15.5, 15.8, 16.0],
]
out = [[*r, *c] for c, r in zip(countries_list, ratios)]
print(out)
Prints:
[
[10.2, 10.3, 11.4, 12.0, "Canada"],
[8.2, 8.1, 9.0, 9.1, "China"],
[15.4, 15.5, 15.8, 16.0, "Finland"],
]

Replace dictionary keys from values of another dictionary

I have three dictionaries:
packed_items = {0: [0, 3],
2: [1],
1: [2]}
trucks_dict = {0: [9.5, 5.5, 5.5],
1: [13.0, 5.5, 7.0],
2: [16.0, 6.0, 7.0]}
items_dict = {0: [4.6, 4.3, 4.3],
1: [4.6, 4.3, 4.3],
2: [6.0, 5.6, 9.0],
3: [8.75, 5.6, 6.6]}
packed_items consists of trucks as keys and values as list of items. I want to change my packed_dict such that it gives me output in this format
packed_dict = {[9.5, 5.5, 5.5]:[[4.6, 4.3, 4.3],[8.75, 5.6, 6.6]]
[16.0, 6.0, 7.0]:[[4.6, 4.3, 4.3]]
[13.0, 5.5, 7.0]:[[6.0, 5.6, 9.0]]}
Basically I want to replace my keys in packed_items with the values in trucks_dict, and values in packed_items with values in items_dict.
By converting your list keys to tuples, you can do that with something like:
Code:
result = {}
for k, v in packed_items.items():
for i in v:
result.setdefault(tuple(trucks_dict[k]), []).append(items_dict[i])
Test Code:
packed_items = {0: [0, 3],
2: [1],
1: [2]}
trucks_dict = {0: [9.5, 5.5, 5.5],
1: [13.0, 5.5, 7.0],
2: [16.0, 6.0, 7.0]}
items_dict = {0: [4.6, 4.3, 4.3],
1: [4.6, 4.3, 4.3],
2: [6.0, 5.6, 9.0],
3: [8.75, 5.6, 6.6]}
result = {}
for k, v in packed_items.items():
for i in v:
result.setdefault(tuple(trucks_dict[k]), []).append(items_dict[i])
print(result)
Results:
{(9.5, 5.5, 5.5): [[4.6, 4.3, 4.3], [8.75, 5.6, 6.6]],
(16.0, 6.0, 7.0): [[4.6, 4.3, 4.3]],
(13.0, 5.5, 7.0): [[6.0, 5.6, 9.0]]
}
You cannot have lists as dictionary keys because they are unhashable.
Because you asked for string keys, you can do:
from collections import defaultdict
packed_items = {0: [0, 3],
2: [1],
1: [2]}
trucks_dict = {0: [9.5, 5.5, 5.5],
1: [13.0, 5.5, 7.0],
2: [16.0, 6.0, 7.0]}
items_dict = {0: [4.6, 4.3, 4.3],
1: [4.6, 4.3, 4.3],
2: [6.0, 5.6, 9.0],
3: [8.75, 5.6, 6.6]}
d = defaultdict(list)
for k1, v1 in trucks_dict.items():
for k2, v2 in items_dict.items():
if k1 == k2 % 3:
d[str(v1)].append(v2)
print(d)
# {'[9.5, 5.5, 5.5]': [[4.6, 4.3, 4.3], [8.75, 5.6, 6.6]], '[16.0, 6.0, 7.0]': [[4.6, 4.3, 4.3]], '[13.0, 5.5, 7.0]': [[6.0, 5.6, 9.0]]}
You can use a dict comprehension to map the lists in trucks_dict to items in items_dict. The lists have to be converted to tuples so that they can be hashable as keys:
{tuple(trucks_dict[k]): [items_dict[i] for i in l] for k, l in packed_items.items()}
This returns:
{(9.5, 5.5, 5.5): [[4.6, 4.3, 4.3], [8.75, 5.6, 6.6]],
(13.0, 5.5, 7.0): [[6.0, 5.6, 9.0]],
(16.0, 6.0, 7.0): [[4.6, 4.3, 4.3]]}

Filtering a nested list by values in a second nested list

I have two sets of nested lists that contain similar information:
listA = [('1.140038401', 1096, 3.75, 3.9, 47999, 1.95, 2.06, 58805, 3.85, 4.6), ('1.140334422', 1096, 1.24, 1.37, 56301, 10.5, 90.0, 58805, 5.9, 10.5), ('1.139782842', 1141, 2.22, 2.24, 25422, 3.75, 3.85, 58805, 3.4, 3.45), ('1.140334234', 1141, 5.4, 8.6, 48224, 1.59, 1.79, 58805, 3.95, 5.4), ('1.140333219', 1703, 2.78, 3.3, 48461, 2.58, 3.05, 58805, 3.25, 4.0), ('1.139782512', 1703, 1.98, 2.0, 78984, 4.9, 5.0, 58805, 3.35, 3.4), ('1.140039878', 18567, 5.0, 6.8, 1096, 1.7, 1.8, 58805, 3.75, 4.5), ('1.139782182', 18567, 2.24, 2.28, 62683, 3.95, 4.1, 58805, 3.2, 3.25), ('1.140333386', 25422, 2.44, 2.86, 58943, 2.98, 3.65, 58805, 3.2, 3.95), ('1.140040066', 47999, 1.6, 1.71, 55190, 4.8, 7.4, 58805, 4.1, 6.0), ('1.140039300', 48224, 1.23, 1.25, 78984, 19.5, 22.0, 58805, 6.8, 7.4), ('1.139783364', 48351, 2.16, 2.22, 55190, 3.9, 4.1, 58805, 3.4, 3.5), ('1.140333033', 48351, 2.18, 2.42, 56323, 3.5, 4.2, 58805, 3.35, 3.95), ('1.140038782', 48461, 1.95, 2.12, 1141, 4.0, 4.8, 58805, 3.55, 4.3), ('1.139783176', 48461, 1.79, 1.81, 69720, 5.4, 5.5, 58805, 3.85, 3.95), ('1.140333894', 48756, 2.2, 2.64, 56299, 3.25, 4.3, 58805, 3.2, 4.3), ('1.139783552', 52689, 7.2, 7.6, 48224, 1.55, 1.57, 58805, 4.4, 4.6), ('1.140040261', 52689, 6.2, 7.2, 48351, 1.61, 1.71, 58805, 3.85, 4.8), ('1.140334059', 55190, 1.32, 1.46, 52689, 9.2, 50.0, 58805, 4.9, 7.6), ('1.140038590', 56299, 2.62, 2.94, 56343, 2.88, 3.35, 58805, 3.0, 3.2), ('1.139782347', 56299, 2.92, 2.98, 58943, 2.9, 2.94, 58805, 3.15, 3.2), ('1.140039520', 56301, 2.2, 2.42, 1703, 3.6, 4.2, 58805, 3.25, 3.7), ('1.139783007', 56301, 2.4, 2.46, 56343, 3.3, 3.4, 58805, 3.4, 3.45), ('1.140039686', 56323, 1.25, 1.31, 25422, 12.0, 40.0, 58805, 6.2, 9.8), ('1.139782677', 56323, 1.25, 1.26, 48756, 14.5, 15.0, 58805, 7.4, 7.6), ('1.140333722', 56343, 1.84, 2.06, 18567, 4.5, 6.0, 58805, 3.45, 4.3), ('1.140038951', 58943, 1.83, 1.93, 69720, 4.5, 5.5, 58805, 3.65, 4.2), ('1.140039119', 62683, 2.36, 2.58, 48756, 3.25, 3.7, 58805, 3.25, 3.4), ('1.140334610', 69720, 10.5, 90.0, 47999, 1.24, 1.37, 58805, 5.9, 10.5), ('1.140333552', 78984, 2.78, 3.3, 62683, 2.6, 3.1, 58805, 3.15, 3.95)]
listB = [(1096, 47999, '3.90', '1.95', '4.15'), (1096, 56301, '1.28', '10.50', '5.50'), (1141, 25422, '2.20', '3.70', '3.44'), (1141, 48224, '5.50', '1.60', '4.00'), (1703, 48461, '2.80', '2.60', '3.25'), (1703, 78984, '1.97', '5.00', '3.30'), (18567, 1096, '5.25', '1.70', '4.04'), (18567, 62683, '2.20', '4.02', '3.25'), (25422, 58943, '2.40', '3.00', '3.20'), (47999, 55190, '1.64', '5.82', '4.30'), (48224, 78984, '1.23', '19.00', '7.00'), (48351, 55190, '2.16', '3.88', '3.40'), (48351, 56323, '2.20', '3.40', '3.35'), (48461, 1141, '1.97', '4.00', '3.75'), (48461, 69720, '1.75', '5.40', '3.95'), (48756, 56299, '2.30', '3.30', '3.20'), (52689, 48351, '6.25', '1.65', '4.05'), (55190, 52689, '1.35', '9.50', '5.00'), (56299, 56343, '2.75', '3.00', '3.10'), (56299, 58943, '2.92', '2.84', '3.10'), (56301, 1703, '2.25', '3.69', '3.29'), (56301, 56343, '2.38', '3.37', '3.40'), (56323, 25422, '1.25', '13.00', '6.60'), (56323, 48756, '1.25', '14.50', '7.55'), (56343, 18567, '1.83', '4.50', '3.40'), (58943, 69720, '1.83', '4.80', '3.70'), (62683, 48756, '2.40', '3.31', '3.27'), (69720, 47999, '10.50', '1.28', '5.50'), (78984, 62683, '2.80', '2.65', '3.10')]
I'm trying to produce a new nested list (or amend the existing one) so that it contains all elements of listA in which:
Elements [1] and [4] in listA match elements [0] and [1] in listB respectively.
So far, I've tried this code:
newListA=[]
for i in range(0, len(listB)):
for j in range(0, len(listB)):
if listA[i][1] == listB[j][0] and listA[i][4] == listB[j][1]:
newListA.append(listA[j])
This doesn't produce the desired result, which should be a list containing all elements of listA except one:
('1.139783552', 52689, 7.2, 7.6, 48224, 1.55, 1.57, 58805, 4.4, 4.6)
To do this with reasonable time efficiency, create a set of tuples with the relevant items from listB:
listb_hash = {(a,b) for a,b,*_ in listB}
Then create your new list:
new_list = [x for x in listA if (x[1], x[4]) in listb_hash]
print(len(new_list), len(listA))
Output:
29 30
And here is a pretty-printed new_list:
[('1.140038401', 1096, 3.75, 3.9, 47999, 1.95, 2.06, 58805, 3.85, 4.6),
('1.140334422', 1096, 1.24, 1.37, 56301, 10.5, 90.0, 58805, 5.9, 10.5),
('1.139782842', 1141, 2.22, 2.24, 25422, 3.75, 3.85, 58805, 3.4, 3.45),
('1.140334234', 1141, 5.4, 8.6, 48224, 1.59, 1.79, 58805, 3.95, 5.4),
('1.140333219', 1703, 2.78, 3.3, 48461, 2.58, 3.05, 58805, 3.25, 4.0),
('1.139782512', 1703, 1.98, 2.0, 78984, 4.9, 5.0, 58805, 3.35, 3.4),
('1.140039878', 18567, 5.0, 6.8, 1096, 1.7, 1.8, 58805, 3.75, 4.5),
('1.139782182', 18567, 2.24, 2.28, 62683, 3.95, 4.1, 58805, 3.2, 3.25),
('1.140333386', 25422, 2.44, 2.86, 58943, 2.98, 3.65, 58805, 3.2, 3.95),
('1.140040066', 47999, 1.6, 1.71, 55190, 4.8, 7.4, 58805, 4.1, 6.0),
('1.140039300', 48224, 1.23, 1.25, 78984, 19.5, 22.0, 58805, 6.8, 7.4),
('1.139783364', 48351, 2.16, 2.22, 55190, 3.9, 4.1, 58805, 3.4, 3.5),
('1.140333033', 48351, 2.18, 2.42, 56323, 3.5, 4.2, 58805, 3.35, 3.95),
('1.140038782', 48461, 1.95, 2.12, 1141, 4.0, 4.8, 58805, 3.55, 4.3),
('1.139783176', 48461, 1.79, 1.81, 69720, 5.4, 5.5, 58805, 3.85, 3.95),
('1.140333894', 48756, 2.2, 2.64, 56299, 3.25, 4.3, 58805, 3.2, 4.3),
('1.140040261', 52689, 6.2, 7.2, 48351, 1.61, 1.71, 58805, 3.85, 4.8),
('1.140334059', 55190, 1.32, 1.46, 52689, 9.2, 50.0, 58805, 4.9, 7.6),
('1.140038590', 56299, 2.62, 2.94, 56343, 2.88, 3.35, 58805, 3.0, 3.2),
('1.139782347', 56299, 2.92, 2.98, 58943, 2.9, 2.94, 58805, 3.15, 3.2),
('1.140039520', 56301, 2.2, 2.42, 1703, 3.6, 4.2, 58805, 3.25, 3.7),
('1.139783007', 56301, 2.4, 2.46, 56343, 3.3, 3.4, 58805, 3.4, 3.45),
('1.140039686', 56323, 1.25, 1.31, 25422, 12.0, 40.0, 58805, 6.2, 9.8),
('1.139782677', 56323, 1.25, 1.26, 48756, 14.5, 15.0, 58805, 7.4, 7.6),
('1.140333722', 56343, 1.84, 2.06, 18567, 4.5, 6.0, 58805, 3.45, 4.3),
('1.140038951', 58943, 1.83, 1.93, 69720, 4.5, 5.5, 58805, 3.65, 4.2),
('1.140039119', 62683, 2.36, 2.58, 48756, 3.25, 3.7, 58805, 3.25, 3.4),
('1.140334610', 69720, 10.5, 90.0, 47999, 1.24, 1.37, 58805, 5.9, 10.5),
('1.140333552', 78984, 2.78, 3.3, 62683, 2.6, 3.1, 58805, 3.15, 3.95)]

Converting dataframe into sub-list or dictionaries

I have the data in tabular format (rows and columns) which I read into a dataframe (Data1) :
Name D Score
0 Angelica D1 3.5
1 Angelica D2 2.0
2 Bill D1 2.0
3 Chan D3 1.0
......
I am able to convert it into a list using:
Data2 = Data1.values.tolist()
and get the below output:
[
['Angelica', 'D1', 3.5], ['Angelica', 'D2', 2.0],
['Bill', 'D1', 2.0], ['Bill', 'D2', 3.5],
['Chan', 'D8', 1.0], ['Chan', 'D3', 3.0], ['Chan', 'D4', 5.0],
['Dan', 'D4', 3.0], ['Dan', 'D5', 4.5], ['Dan', 'D6', 4.0]
]
What I want is, the output to be like this:
{
'Angelica': {'D1': 3.5, 'D2': 2.0} ,
'Bill': {'D1': 2.0, 'D2': 3.5}
'Chan': {'D8': 1.0, 'D3': 3.0, 'D4': 5.0 }
'Dan': {'D4': 3.0, 'D5': 4.5, 'D6': 4.0}
}
How can I achieve this in Python?
You can use a dictionary comprehension after grouping the df by the Name column:
>>> df = pd.DataFrame([{'Name': 'Angela', 'Score': 3.5, 'D': 'D1'}, {'Name': 'Angela', 'Score': 2.0, 'D': 'D2'}, {'Name': 'Bill', 'Score': 2.0, 'D': 'D1'}, {'Name': 'Chan', 'Score': 1.0, 'D': 'D3'}])
>>> df
D Name Score
0 D1 Angela 3.5
1 D2 Angela 2.0
2 D1 Bill 2.0
3 D3 Chan 1.0
>>> data2 = {name: {df.ix[v].D: df.ix[v].Score for v in val} for name, val in df.groupby('Name').groups.items()}
>>> data2
{'Chan': {'D3': 1.0}, 'Angela': {'D1': 3.5, 'D2': 2.0}, 'Bill': {'D1': 2.0}}
You can zip up the values from each group after grouping by Name:
In [4]: l = [
...: ['Angelica', 'D1', 3.5], ['Angelica', 'D2', 2.0],
...: ['Bill', 'D1', 2.0], ['Bill', 'D2', 3.5],
...: ['Chan', 'D8', 1.0], ['Chan', 'D3', 3.0], ['Chan', 'D4', 5.0],
...: ['Dan', 'D4', 3.0], ['Dan', 'D5', 4.5], ['Dan', 'D6', 4.0]
...: ]
...: columns=["Name", "D", "Score"]
...: df = pd.DataFrame(l, columns=columns)
...:
In [5]: {name: dict(zip(v["D"], v["Score"])) for name, v in df.groupby("Name")}
In [6]: data
Out[6]:
{'Angelica': {'D1': 3.5, 'D2': 2.0},
'Bill': {'D1': 2.0, 'D2': 3.5},
'Chan': {'D3': 3.0, 'D4': 5.0, 'D8': 1.0},
'Dan': {'D4': 3.0, 'D5': 4.5, 'D6': 4.0}}
from collections import defaultdict
result = defaultdict(dict)
for item in Data2:
result[item[0]].update(dict([item[1:]]))

How to improve the speed of my selection process, python

Edit: Due to errors in my code i updated with my oldest, but working code
I get a list of speed recordings from a database, and I want to find the max speed in that list. Sounds easy enough, but I got some requirements for any max speed to count:
If the max speed is over a certain level, it has to have more than a certain number of records to be recognized as maximum speed. The reason for this logic is that I want the max speed under normal conditions, not just an error or one time occurrence. I also have a constraint that a speed has to be over a certain limit to be counted, for the same reason.
Here is the example on a speed array:
v = [8.0, 1.3, 0.7, 0.8, 0.9, 1.1, 14.9, 14.0, 14.1, 14.2, 14.3, 13.8, 13.9, 13.7, 13.6, 13.5, 13.4, 15.7, 15.8, 15.0, 15.3, 15.4, 15.5, 15.6, 15.2, 12.8, 12.7, 12.6, 8.7, 8.8, 8.6, 9.0, 8.5, 8.4, 8.3, 0.1, 0.0, 16.4, 16.5, 16.7, 16.8, 17.0, 17.1, 17.8, 17.7, 17.6, 17.4, 17.5, 17.3, 17.9, 18.2, 18.3, 18.1, 18.0, 18.4, 18.5, 18.6, 19.0, 19.1, 18.9, 19.2, 19.3, 19.9, 20.1, 19.8, 20.0, 19.7, 19.6, 19.5, 20.2, 20.3, 18.7, 18.8, 17.2, 16.9, 11.5, 11.2, 11.3, 11.4, 7.1, 12.9, 14.4, 13.1, 13.2, 12.5, 12.1, 12.2, 13.0, 0.2, 3.6, 7.4, 4.6, 4.5, 4.3, 4.0, 9.4, 9.6, 9.7, 5.8, 5.7, 7.3, 2.1, 0.4, 0.3, 16.1, 11.9, 12.0, 11.7, 11.8, 10.0, 10.1, 9.8, 15.1, 14.7, 14.8, 10.2, 10.3, 1.2, 9.9, 1.9, 3.4, 14.6, 0.6, 5.1, 5.2, 7.5, 19.4, 10.7, 10.8, 10.9, 0.5, 16.3, 16.2, 16.0, 16.6, 12.4, 11.0, 1.7, 1.6, 2.4, 11.6, 3.9, 3.8, 14.5, 11.1]
This is my code to find what I define as the true maximum speed:
from collections import Counter
while max(speeds)>30:
speeds.remove(max(speeds))
nwsp = []
for s in speeds:
nwsp.append(np.floor(s))
count = Counter(nwsp)
while speeds and max(speeds)>14 and count[np.floor(max(speeds))]<10:
speeds.remove(max(speeds))
while speeds and max(speeds)<5:
speeds.remove(max(speeds))
if speeds:
print max(speeds)
return max(speeds)
else:
return False
Result with v as shown over: 19.9
The reason that i make the nwsp is that it doesn't matter for me if f.ex 19.6 is only found 9 times - if any number inside the same integer, f.ex 19.7 is found 3 times as well, then 19.6 will be valid.
How can I rewrite/optimize this code so the selection process is quicker? I already removed the max(speeds) and instead sorted the list and referenced the largest element using speeds[-1].
Sorry for not adding any unit to my speeds.
Your code is just slow because you call max and remove over and over and over again and each of those calls costs time proportional to the length of the list. Any reasonable solution will be much faster.
If you know that False can't happen, then this suffices:
speeds = [8.0, 1.3, 0.7, 0.8, 0.9, 1.1, 14.9, 14.0, 14.1, 14.2, 14.3, 13.8, 13.9, 13.7, 13.6, 13.5, 13.4, 15.7, 15.8, 15.0, 15.3, 15.4, 15.5, 15.6, 15.2, 12.8, 12.7, 12.6, 8.7, 8.8, 8.6, 9.0, 8.5, 8.4, 8.3, 0.1, 0.0, 16.4, 16.5, 16.7, 16.8, 17.0, 17.1, 17.8, 17.7, 17.6, 17.4, 17.5, 17.3, 17.9, 18.2, 18.3, 18.1, 18.0, 18.4, 18.5, 18.6, 19.0, 19.1, 18.9, 19.2, 19.3, 19.9, 20.1, 19.8, 20.0, 19.7, 19.6, 19.5, 20.2, 20.3, 18.7, 18.8, 17.2, 16.9, 11.5, 11.2, 11.3, 11.4, 7.1, 12.9, 14.4, 13.1, 13.2, 12.5, 12.1, 12.2, 13.0, 0.2, 3.6, 7.4, 4.6, 4.5, 4.3, 4.0, 9.4, 9.6, 9.7, 5.8, 5.7, 7.3, 2.1, 0.4, 0.3, 16.1, 11.9, 12.0, 11.7, 11.8, 10.0, 10.1, 9.8, 15.1, 14.7, 14.8, 10.2, 10.3, 1.2, 9.9, 1.9, 3.4, 14.6, 0.6, 5.1, 5.2, 7.5, 19.4, 10.7, 10.8, 10.9, 0.5, 16.3, 16.2, 16.0, 16.6, 12.4, 11.0, 1.7, 1.6, 2.4, 11.6, 3.9, 3.8, 14.5, 11.1]
from collections import Counter
count = Counter(map(int, speeds))
print max(s for s in speeds
if 5 <= s <= 30 and (s <= 14 or count[int(s)] >= 10))
If the False case can happen, this would be one way:
speeds = [8.0, 1.3, 0.7, 0.8, 0.9, 1.1, 14.9, 14.0, 14.1, 14.2, 14.3, 13.8, 13.9, 13.7, 13.6, 13.5, 13.4, 15.7, 15.8, 15.0, 15.3, 15.4, 15.5, 15.6, 15.2, 12.8, 12.7, 12.6, 8.7, 8.8, 8.6, 9.0, 8.5, 8.4, 8.3, 0.1, 0.0, 16.4, 16.5, 16.7, 16.8, 17.0, 17.1, 17.8, 17.7, 17.6, 17.4, 17.5, 17.3, 17.9, 18.2, 18.3, 18.1, 18.0, 18.4, 18.5, 18.6, 19.0, 19.1, 18.9, 19.2, 19.3, 19.9, 20.1, 19.8, 20.0, 19.7, 19.6, 19.5, 20.2, 20.3, 18.7, 18.8, 17.2, 16.9, 11.5, 11.2, 11.3, 11.4, 7.1, 12.9, 14.4, 13.1, 13.2, 12.5, 12.1, 12.2, 13.0, 0.2, 3.6, 7.4, 4.6, 4.5, 4.3, 4.0, 9.4, 9.6, 9.7, 5.8, 5.7, 7.3, 2.1, 0.4, 0.3, 16.1, 11.9, 12.0, 11.7, 11.8, 10.0, 10.1, 9.8, 15.1, 14.7, 14.8, 10.2, 10.3, 1.2, 9.9, 1.9, 3.4, 14.6, 0.6, 5.1, 5.2, 7.5, 19.4, 10.7, 10.8, 10.9, 0.5, 16.3, 16.2, 16.0, 16.6, 12.4, 11.0, 1.7, 1.6, 2.4, 11.6, 3.9, 3.8, 14.5, 11.1]
from collections import Counter
count = Counter(map(int, speeds))
valids = [s for s in speeds
if 5 <= s <= 30 and (s <= 14 or count[int(s)] >= 10)]
print max(valids) if valids else False
Or sort and use next, which can take your False as default:
speeds = [8.0, 1.3, 0.7, 0.8, 0.9, 1.1, 14.9, 14.0, 14.1, 14.2, 14.3, 13.8, 13.9, 13.7, 13.6, 13.5, 13.4, 15.7, 15.8, 15.0, 15.3, 15.4, 15.5, 15.6, 15.2, 12.8, 12.7, 12.6, 8.7, 8.8, 8.6, 9.0, 8.5, 8.4, 8.3, 0.1, 0.0, 16.4, 16.5, 16.7, 16.8, 17.0, 17.1, 17.8, 17.7, 17.6, 17.4, 17.5, 17.3, 17.9, 18.2, 18.3, 18.1, 18.0, 18.4, 18.5, 18.6, 19.0, 19.1, 18.9, 19.2, 19.3, 19.9, 20.1, 19.8, 20.0, 19.7, 19.6, 19.5, 20.2, 20.3, 18.7, 18.8, 17.2, 16.9, 11.5, 11.2, 11.3, 11.4, 7.1, 12.9, 14.4, 13.1, 13.2, 12.5, 12.1, 12.2, 13.0, 0.2, 3.6, 7.4, 4.6, 4.5, 4.3, 4.0, 9.4, 9.6, 9.7, 5.8, 5.7, 7.3, 2.1, 0.4, 0.3, 16.1, 11.9, 12.0, 11.7, 11.8, 10.0, 10.1, 9.8, 15.1, 14.7, 14.8, 10.2, 10.3, 1.2, 9.9, 1.9, 3.4, 14.6, 0.6, 5.1, 5.2, 7.5, 19.4, 10.7, 10.8, 10.9, 0.5, 16.3, 16.2, 16.0, 16.6, 12.4, 11.0, 1.7, 1.6, 2.4, 11.6, 3.9, 3.8, 14.5, 11.1]
count = Counter(map(int, speeds))
print next((s for s in reversed(sorted(speeds))
if 5 <= s <= 30 and (s <= 14 or count[int(s)] >= 10)),
False)
Instead of Counter, you could also use groupby:
speeds = [8.0, 1.3, 0.7, 0.8, 0.9, 1.1, 14.9, 14.0, 14.1, 14.2, 14.3, 13.8, 13.9, 13.7, 13.6, 13.5, 13.4, 15.7, 15.8, 15.0, 15.3, 15.4, 15.5, 15.6, 15.2, 12.8, 12.7, 12.6, 8.7, 8.8, 8.6, 9.0, 8.5, 8.4, 8.3, 0.1, 0.0, 16.4, 16.5, 16.7, 16.8, 17.0, 17.1, 17.8, 17.7, 17.6, 17.4, 17.5, 17.3, 17.9, 18.2, 18.3, 18.1, 18.0, 18.4, 18.5, 18.6, 19.0, 19.1, 18.9, 19.2, 19.3, 19.9, 20.1, 19.8, 20.0, 19.7, 19.6, 19.5, 20.2, 20.3, 18.7, 18.8, 17.2, 16.9, 11.5, 11.2, 11.3, 11.4, 7.1, 12.9, 14.4, 13.1, 13.2, 12.5, 12.1, 12.2, 13.0, 0.2, 3.6, 7.4, 4.6, 4.5, 4.3, 4.0, 9.4, 9.6, 9.7, 5.8, 5.7, 7.3, 2.1, 0.4, 0.3, 16.1, 11.9, 12.0, 11.7, 11.8, 10.0, 10.1, 9.8, 15.1, 14.7, 14.8, 10.2, 10.3, 1.2, 9.9, 1.9, 3.4, 14.6, 0.6, 5.1, 5.2, 7.5, 19.4, 10.7, 10.8, 10.9, 0.5, 16.3, 16.2, 16.0, 16.6, 12.4, 11.0, 1.7, 1.6, 2.4, 11.6, 3.9, 3.8, 14.5, 11.1]
from itertools import *
groups = (list(group) for _, group in groupby(reversed(sorted(speeds)), int))
print next((s[0] for s in groups
if 5 <= s[0] <= 30 and (s[0] <= 14 or len(s) >= 10)),
False)
Just in case all of these look odd to you, here's one close to your original. Just looking at the speeds from fastest to slowest and returning the first that matches the requirements:
def f(speeds):
count = Counter(map(int, speeds))
for speed in reversed(sorted(speeds)):
if 5 <= speed <= 30 and (speed <= 14 or count[int(speed)] >= 10):
return speed
return False
Btw, your definition of "the true maximum speed" seems rather odd to me. How about just looking at a certain percentile? Maybe like this:
print sorted(speeds)[len(speeds) * 9 // 10]
I'm not sure if this is faster, but it is shorter, and I think it achieves your requirements. It uses Counter.
from collections import Counter
import math
def valid(item):
speed,count = item
return speed <= 30 and (speed <= 13 or count >= 10)
speeds = [4,3,1,3,4,5,6,7,14,16,18,19,20,34,5,4,3,2,12,58,14,14,14]
speeds = map(math.floor,speeds)
counts = Counter(speeds)
max_valid_speed = max(filter(valid,counts.items()))
Result: max_valid_speed == (12,1)
Using your sort idea we can start at the end of the list at the numbers less than 30, returning on the first number that matched the criteria or returning False:
from collections import Counter
def f(speeds):
# get speeds that satisfy the range
rev = [speed for speed in speeds if 5 <= speed < 30]
rev.sort(reverse=True)
c = Counter((int(v) for v in rev))
for speed in rev:
# will hit highest numbers first
# so return first that matches
if speed > 14 and c[int(speed)] > 9 or speed < 15:
return speed
# we did not find any speed that matched our requirement
return False
Output for your list v:
In [70]: f(v)
Out[70]: 19.9
Without sorting you could use a dict, depending on your what your data is like will decide which is best, it will work for all cases including an empty list:
def f_dict(speeds):
d = defaultdict(lambda: defaultdict(lambda: 0, {}))
for speed in speeds:
key = int(speed)
d[key]["count"] += 1
if speed > d[key]["speed"]:
d[key]["speed"] = speed
filt = max(filter(lambda x: (15 <= x[0] < 30 and
x[1]["count"] > 9 or x[0] < 15), d.items()), default=False)
return filt[1]["speed"] if filt else False
Output:
In [95]: f_dict(v)
Out[95]: 19.9

Categories