Dictionary to classify speed of objects in a list - python

I'm having difficulty in using a dictionary to classify speed of objects.
Input:
Object_dict={"Airbus 380":{"Country":"France,Germany,Spain,UK","Top
Speed(Mach)":0.89},"Concorde":{"Country":"France,UK","Top
Speed(Mach)":2.01}, "Boeing X-43":{"Country": "USA","Top
Speed(Mach)":9.6}}
Ouput:
Objects_by_Mach={"Subsonic":["Airbus 380"],"Transonic":[],"Supersonic":["Concorde"],"Hypersonic":["Boeing X-43"]}
This is my code:
Mach_scale = {"Subsonic": 0,
"Transonic": 1,
"Supersonic":5,
"Hypersonic":5 ,
}
#Subsonic object has speed of Mach<0
#Transsonic object has speed of Mach=1
#Supersonic object has speed of 1<Mach<5
#Hypersonic object has speed of Mach>5
def mach_speeds(dict1):
Objects_by_Mach={}
for object,data in dict1.items():
for value in data["Top Speed(Mach)"]:
Subsonic=[object for object in dict1 if value<=Mach_scale["Transonic"] and value>Mach_scale["Subsonic"] in dict1["Top Speed(Mach)"] in dict1.values()]
Transonic=[object for object in dict1 if value==Mach_scale["Transonic"] in Mach_scale["Top Speed(Mach)"] in dict1.values()]
Supersonic=[object for object in dict1 if value<=Mach_scale["Supersonic"] and value>Mach_scale["Transonic"] in dict1["Top Speed(Mach)"] in dict1.values()]
Hypersonic=[object for object in dict1 if value>Mach_scale["Hypersonic"] in dict1["Top Speed(Mach)"] in dict1.values()]
return Objects_by_Mach.update({"Subsonic":Subsonic,"Transonic":Transonic,"Supersonic":Supersonic,"Hypersonic":Hypersonic})
print(mach_speeds(Object_dict))
Thanks in advance again fellow SO'ers.

You can generalise and therefore shorten your code by specifying ranges for the Mach scale. The values used here may not be correct but can be easily adjusted to suit.
Mach_scale = {"Subsonic": (0.0, 0.8),
"Transonic": (0.8, 1.2),
"Supersonic": (1.2, 5.0),
"Hypersonic": (5.0, 10.0),
"High-hypersonic": (10.0, float('inf'))
}
Object_dict = {"Airbus 380": {"Country": "France,Germany,Spain,UK", "Top Speed(Mach)": 0.89},
"Concorde": {"Country": "France,UK", "Top Speed(Mach)": 2.01},
"Boeing X-43": {"Country": "USA", "Top Speed(Mach)": 9.6}}
result = dict()
def getmach(m):
for k, v in Mach_scale.items():
if m >= v[0] and m < v[1]:
return k
for k, v in Object_dict.items():
result.setdefault(getmach(v['Top Speed(Mach)']), []).append(k)
print(result)
Output:
{'Subsonic': ['Airbus 380'], 'Transonic': ['Concorde'], 'Hypersonic': ['Boeing X-43']}

First of all please note that this question is very specific and will most likely help only you, We love questions that are general and will help as many people as possible!
There are some things in your code that are considered bad practice and are problematic.
First looks like the indentations are not correct
Second, dont use the reserved word object - use a different variable name
Notice that in each iteration you are creating a new list, looks to me
that you want to update it - not creating a new one.
I would try something like the following code:
input = {
"Airbus 380":{"Country":"France,Germany,Spain,UK","Top Speed(Mach)":0.89},
"Concorde":{"Country":"France,UK","Top Speed(Mach)":2.01},
"Boeing X-43":{"Country": "USA","Top Speed(Mach)":9.6}
}
mach_scale = {"Subsonic": 0.8,
"Transonic": 1.2,
"Supersonic":5,
"Hypersonic":5
}
"""
subsonic speed - below 0.8 mach
transonic speed - between 0.8 - 1.2 mach
supersonic speed - between 1.2 - 5 mach
hypersonic speed - above 5 mach
"""
def mach_speeds(airplane_data):
subsonic, transonic, supersonic, hypersonic = [], [], [], []
for plane, data in airplane_data.items():
top_speed = data["Top Speed(Mach)"]
if top_speed <= mach_scale["Subsonic"]:
subsonic.append(plane)
elif top_speed <= mach_scale["Transonic"]:
transonic.append(plane)
elif top_speed <= mach_scale["Supersonic"]:
supersonic.append(plane)
else:
hypersonic.append(plane)
result = {}
result["Subsonic"] = subsonic
result["Transonic"] = transonic
result["Supersonic"] = supersonic
result["Hypersonic"] = hypersonic
return result
if __name__ == "__main__":
print(mach_speeds(input))
Output:
{'Subsonic': [], 'Transonic': ['Airbus 380'], 'Supersonic': ['Concorde'], 'Hypersonic': ['Boeing X-43']}

Related

Fastest way to iterate permutation with value guidelines

I have an array of dicts that I need each combination of without duplicates based on no repeating id value and a sum of a ratio value
So the results would be:
results = [
[
{
'id': 1
'ratio': .01
},
{
'id': 2
'ratio': .99
},
],
[
{
'id': 1
'ratio': .50
},
{
'id': 2
'ratio': .50
},
],
[ ... ],
[ ... ],
]
For example:
_array_list = [
{
'id': 1
'ratio': .01
},
{
'id': 1
'ratio': .02
},
....
{
'id': 2
'ratio': .01
}
{
'id': 3
'ratio': .02
}
...
]
Each id has between .01-1.0 by .01
I then do to get each possible combination
(there is a reason for this but i am leaving out the stuff that hasn't anything to do with the issue)
from itertools import combinations
unique_list_count = 2 #(this is each id)
all_combos = []
for i in range(1,len(unique_list_count)+1):
for combo in combinations(_array_list , i):
_iter_count += 1
ids = []
# if iter_count > 1:
# break
for c in combo:
ids.append(c['id'])
is_id_duplicate = len(ids) != len(set(ids))
if is_id_duplicate is False:
# make sure only appending full values
if sum(v['ratio'] for v in combo) == 1.0:
iter_count += 1
print(iter_count, _iter_count)
all_combos.append(list(combo))
I'm not sure if this is a good way or if i can even make this better but it works. The issue is that when i have 5 IDs, each with 100 dictionaries, it will do about 600,000,000 combinations and take about 20 minutes
Is there a way to do this in a more efficient and faster way?
You could use the below code. The advantage of using it is that it won't consider cases with repeating ids:
import itertools
from math import isclose
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s)+1))
def combosSumToOneById(inDictArray):
results = []
uniqueIds = {d['id'] for d in inDictArray}
valuesDict = {id:[d['ratio'] for d in inDictArray if d['id']==id] for id in uniqueIds}
for idCombo in powerset(uniqueIds):
for valueCombo in itertools.product(*[v for k,v in valuesDict.items() if k in idCombo]):
if isclose(sum(valueCombo), 1.0):
results.append([{'id':xid, 'ratio': xv} for xid, xv in zip(idCombo, valueCombo)])
return results
I tested it on the below input
_array_list = [
{
'id': '1',
'ratio': .1
},
{
'id': '1',
'ratio': .2
},
{
'id': '2',
'ratio': .9
},
{
'id': '2',
'ratio': .8
},
{
'id': '3',
'ratio': .8
}]
combosSumToOneById(_array_list)
Returns: [[{'id': '1', 'ratio': 0.1}, {'id': '2', 'ratio': 0.9}], [{'id': '1', 'ratio': 0.2}, {'id': '2', 'ratio': 0.8}], [{'id': '1', 'ratio': 0.2}, {'id': '3', 'ratio': 0.8}]]
Yous should test it if the performance really exceeds the previous one.
Please note that I modified the code to check for isclose(sum, 1.0) rather than sum == 1.Since we are summing double values there most likely will be some error from the representation of the numbers which is why using this condition seems more appropriate.
Until someone who understands the algorithm better than I do comes along, I don't think there's any way of speeding that up with the data types you have.
With the algorithm you are cuurently using:
can you pre-sort your data and filter some branches out that way?
is the ratio sum test more likely to fail than the duplicate test? if so move it above.
drop the print (obviously)
avoid the cast to list from tuple when appending
And then use a multiprocessing.Pool() to use all your cpus at once. Since this is cpu-bound it will get you a reasonable speed up.
But I'm sure there is a more efficient way of doing this. You haven't said how you're getting your data, but if you can represent in an array it might be vectorisable, which will be orders of magnitude faster.
I assume the general case where not each id has all values in [0.01, 1.0].
There are 3 main optimisations you can make and they all aim to instantly drop branches that are guaranteed to not satisfy your conditions.
1. Split the ratios of each id in a dictionary
This way you instantly avoid pointless combinations, .e.g., [{'id': 1, 'ratio': 0.01}, {'id': 1, 'ratio': 0.02}]. It also makes it easier to try combinations between ids. So, instead of having everything in a flat list of dicts, reorganise the data in the following form:
# if your ids are 0-based consecutive integer numbers, a list of lists works too
array_list = {
1: [0.01, 0.02, ...],
2: [0.01, 0.02, ...],
3: [...],
}
2. For an N-size pairing, you have N-1 degrees of freedom
If you're searching for a triplet and you already have (0.54, 0.33, _), you don't have to search all possible values for the last id. There is only one that can satisfy the condition sum(ratios) == 1.0.
3. You can further restrict the possible value range of each id based on the min/max values of the others.
Say you have 3 ids and they all have all the values in [0.01, 0.44]. It is pointless to try any combinations for (0.99, _, _), because the minimum sum for the last two ids is 0.02. Therefore, the maximum value that the first id can explore is 0.98 (well, 0.44 in this example but you get my drift). Similarly, if the maximum sum of the last two ids is 0.88, there is no reason to explore values below 0.12 for the first id. A special case of this is where the sum of the minimum value of all ids is more than 1.0 (or the max < 1.0), in which case you can instantly drop this combination and move on.
Using integers instead of floats
You are blessed in dealing only with some discrete values, so you're better off converting everything to integers. The first reason is to avoid any headaches with floating arithmetic. Case in point, did you know that your code misses some combinations exactly due to these inaccuracies?
And since you will be generating your own value ranges due to optimisation #3, it's much simpler to do for i in range(12, 99) than some roundabout way to generate all values in [0.12, .99) while making sure everything is properly rounded off at the second decimal digit AND THEN properly added together and checked against some tolerance value close to 1.0.
Code
from collections import defaultdict
import itertools as it
def combined_sum(values):
def _combined_sum(values, comb, partial_sum, depth, mins, maxs):
if depth == len(values) - 1:
required = 100 - partial_sum
if required in values[-1]:
yield comb + (required,)
else:
min_value = mins[depth+1]
max_value = maxs[depth+1]
start_value = max(min(values[depth]), 100 - partial_sum - max_value)
end_value = max(1, 100 - partial_sum - min_value)
for value in range(start_value, end_value+1):
if value in values[depth]:
yield from _combined_sum(values, comb+(value,), partial_sum+value, depth+1, mins, maxs)
# precompute all the partial min/max sums, because we will be using them a lot
mins = [sum(min(v) for v in values[i:]) for i in range(len(values))]
maxs = [sum(max(v) for v in values[i:]) for i in range(len(values))]
if mins[0] > 100 or maxs[0] < 100:
return []
return _combined_sum(values, tuple(), 0, 0, mins, maxs)
def asset_allocation(array_list, max_size):
# a set is preferred here because we will be checking a lot whether
# a value is in the iterable, which is faster a set than in a tuple/list
collection = defaultdict(set)
for d in array_list:
collection[d['id']].add(int(round(d['ratio'] * 100)))
all_combos = []
for i in range(1, max_size+1):
for ids in it.combinations(collection.keys(), i):
values = [collection[ID] for ID in ids]
for group in combined_sum(values):
all_combos.append([{'id': ID, 'ratio': ratio/100} for ID, ratio in zip(ids, group)])
return all_combos
array_list = [{'id': ID, 'ratio': ratio/100}
for ID in (1, 2, 3, 4, 5)
for ratio in range(1, 101)
]
max_size = 5
result = asset_allocation(array_list, max_size)
This finishes in 14-15 seconds on my machine.
For comparison, for 3 ids this finishes in 0.007 seconds and Gabor's solution which effectively implements only optimisation #1 finishes in 0.18 seconds. For 4 ids it's .43 s and 18.45 s respectively. For 5 ids I stopped timing his solution after a few minutes, but it was expected to take at least 10 minutes.
If you are dealing with the case where all ids have all the values in [0.01, 1.0] and you insist on having the specific output indicated in your question, the above approach is still optimal. However, if you are okay with generating the output in a different format, you can do better.
For a specific group size, e.g., singles, pairs, triplets, etc, generate all the partitions that add up to 100 using the stars and bars approach. That way, instead of generating (1, 99), (2, 98), etc, for each pair of ids, i.e., (1, 2), (1, 3) and (2, 3), you do this only once.
I've modified the code from here to not allow for 0 in any partition.
import itertools as it
def partitions(n, k):
for c in it.combinations(range(1, n), k-1):
yield tuple(b-a for a, b in zip((0,)+c, c+(n,)))
def asset_allocation(ids, max_size):
all_combos = []
for k in range(1, max_size+1):
id_comb = tuple(it.combinations(ids, k))
p = tuple(partitions(100, k))
all_combos.append((id_comb, p))
return all_combos
ids = (1, 2, 3, 4, 5)
result = asset_allocation(ids, 5)
This finishes much faster, takes up less space, and also allows you to home in to all the combinations for singles, pairs, etc, individually. Now, if you were to take the product of id_comb and p to generate the output in your question, you'd lose all that time saved. In fact, it'd come out as a biiit slower than the general method from above, but at least this piece of code is still more compact.

Iterate over part of tuple key in Python dictionary

I am working on an optimization project where I have a series of dictionaries with tuples as keys and another dictionary (a decision variable with Gurobi) where the key is the first element of the tuples in the other dictionaries. I need to be able to do the following:
data1 = {(place, person): q}
data2 = {person: s}
x = {place: var}
qx = {k: x[k]*data1[k] for k in x}
total1 = {}
for key, value in qx.items():
person = key[1]
if person in total1:
total1[person] = total1[person] + value
else:
total1[person] = value
total2 = {k: total1[k]/data2[k] for k in total1}
(Please note that the data1, data2, and x dictionaries are very large, 10,000+ distinct place/person pairs).
This same process works when I use the raw data in place of the decision variable, which uses the same (place, person) key. Unfortunately, my variable within the Gurobi model itself must be a dictionary and it cannot contain the person key value.
Is there any way to iterate over just the first value in the tuple key?
EDIT:
Here are some sample values (sensitive data, so placeholder values):
data1 = {(1, a): 28, (1, c): 57, (2, b): 125}
data2 = {a: 7.8, b: 8.5, c: 8.4}
x = {1: 0.002, 2: 0.013}
Values in data1 are all integers, data2 are hours, and x are small decimals.
Outputs in total2 should look similar to the following (assuming there are many other rows for each person):
total2 = {a: 0.85, b: 1.2, c: 1.01}
This code is essentially calculating a "productivity score" for each person. The decision variable, x, is looking only at each individual place for business purposes, so it cannot include the person identifiers. Also, the Gurobi package is very limiting about how things can be formatted, so I have not found a way to even use the tuple key for x.
Generally, the most efficient way to aggregate values into bins is to use a for loop and store the values in a dictionary, as you did with total1 in your example. In the code below, I have fixed your qx line so it runs, but I don't know if this matches your intention. I also used total1.setdefault to streamline the code a little:
a, b, c = 'a', 'b', 'c'
data1 = {(1, a): 28, (1, c): 57, (2, b): 125}
data2 = {a: 7.8, b: 8.5, c: 8.4}
x = {1: 0.002, 2: 0.013}
qx = {place, person: x[place] * value for (place, person), value in data1.items()}
total1 = {}
for (place, person), value in qx.items():
total1.setdefault(person, 0.0)
total1[person] += value
total2 = {k: total1[k] / data2[k] for k in total1}
print(total2)
# {'a': 0.0071794871794871795, 'c': 0.013571428571428571, 'b': 0.19117647058823528}
But this doesn't produce the result you asked for. I can't tell at a glance how you get the result you showed, but this may help you move in the right direction.
It might also be easier to read if you moved the qx logic into the loop, like this:
total1 = {}
for (place, person), value in data1.items():
total1.setdefault(person, 0.0)
total1[person] += x[place] * value
total2 = {k: total1[k] / data2[k] for k in total1}
Or, if you want to do this often, it might be worth creating a cross-reference between persons and their matching places, as #martijn-pieters suggested (note, you still need a for loop to do the initial cross-referencing):
# create a list of valid places for each person
places_for_person = {}
for place, person in data1:
places_for_person.setdefault(person, [])
places_for_person[person].append(place)
# now do the calculation
total2 = {
person:
sum(
data1[place, person] * x[place]
for place in places_for_person[person]
) / data2[person]
for person in data2
}
For creating a new dictionary removing the tuple:
a, b, c = "a", "b", "c"
data1 = {(1, a): 28, (1, c): 57, (2, b): 125}
total = list()
spot = 0
for a in data1:
total.append(list(a[1])) # Add new Lists to list "total" containing the Key values
total[spot].append(data1[a]) # Add Values to Keys judging from their spot in the list
spot += 1 # to keep the spot in correct place in lists
total = dict(total) # convert it to dictionary
print(total)
Output:
{'a': 28, 'c': 57, 'b': 125}

Finding largest areas in dictionary

I'm writing a function where I go through a dictionary. The dictionary contains artists as keys and their paintings as values. I need to find the painting in a dictionary that has the largest area and if there are two that have equal area they should be returned as a list of tuples.
Example Dictionary:
{
'A, Jr.':[("One",1400,10,20.5,"oil paint","Austria"),("Three",1400,100.0,100.0,"oil paint","France"),("Twenty",1410,50.0,200.0,"oil paint","France")],
'X':[("Eight",1460, 100.0, 20.0, "oil paint","France"),("Six",1465,10.0, 23.0, "oil paint", "France"),("Ten",1465,12.0,15.0,"oil paint","Austria"),("Thirty",1466,30.0,30.0,"watercolor","Germany")],
'M':[("One, Two", 1500, 10.0, 10.0, "panel","Germany")]
}
Basically the four digit number is the year that the painting or work of art was created and the next two numbers are the length and width. I need to return the values that have the largest area when multiplying the lengths and widths. So for the above dictionary the function find_largest should return
find_largest(dictionary2())
[('A, Jr.', 'Three'), ('A, Jr.', 'Twenty')]
Since 100 * 100 = 10,000 for the "Three" painting and 50 * 200 = 10,000 for the "Twenty" painting they are both returned as tuples within a list.
Does anyone have advice on how to do this? I have started code below but I don't think its the right approach for this.
def find_largest(dictionary):
matches = {}
for key, the_list in db.items():
for record in the_list:
value = record[4]
if dictionary in record:
if key in matches:
max(the_list)
max(lst, key=lambda tupl: tupl[2]*tupl[3])
matches[key].append(record)
else:
matches[key] = [record]
return matches
This is basically my code from an earlier function with a few significant changes. This basic framework has worked for a few of my goals. I added max(matches) but I realize this isn't doing much unless the function multiplies the lengths and widths and then looks for the max. If anyone has advice it would be helpful
It would probably be easier to just keep track of your current max instead
data = {
'A, Jr.':[("One",1400,10,20.5,"oil paint","Austria"),("Three",1400,100.0,100.0,"oil paint","France"),("Twenty",1410,50.0,200.0,"oil paint","France")],
'X':[("Eight",1460, 100.0, 20.0, "oil paint","France"),("Six",1465,10.0, 23.0, "oil paint", "France"),("Ten",1465,12.0,15.0,"oil paint","Austria"),("Thirty",1466,30.0,30.0,"watercolor","Germany")],
'M':[("One, Two", 1500, 10.0, 10.0, "panel","Germany")]
}
def find_largest(d):
matches = []
max_value = 0
for key in d:
for record in d[key]:
value = record[2] * record[3]
if value > max_value:
matches = [(key, record[0])]
max_value = value
elif value == max_value:
matches.append((key, record[0]))
return matches
# Output
>>> find_largest(data)
[('A, Jr.', 'Three'), ('A, Jr.', 'Twenty')]

Comparing the elements in two JSON dicts and getting the difference as a ratio or percentage

I have a JSON object and I am working on some data manipulation. I want to get the difference as a ratio so I can more accurately rank the elements in my dict.
[{condition: functional, location:Sydney }, {condition:functional, location: Adelaide}, {condition:broken, location:Sydney}]
I can get the number of points where the location is not functional like so:
filter(lambda x: x['condition']!='functional', json_obj)
But I would like to return this as a percentage ratio.
You can try Counter and defaultdict as below-
from collections import Counter,defaultdict
d = [{'condition': 'functional', 'location':'Sydney' }, {'condition':'functional', 'location': 'Adelaide'}, {'condition':'broken', 'location':'Sydney'}]
cities = [j['location'] for j in d]
#initialize data
data = defaultdict(float)
for city in cities:
data[city]=0
#Count occurrances of a single city as a counter dictionary
counters = Counter((i['location'] for i in d))
#Do the calculation
for i in d:
if i['condition']== 'functional':
inc = (counters[i['location']]*100)/len(d)
data[i['location']]+= float(inc)
elif i['condition']== 'broken':
dec = (counters[i['location']]*100)/len(d)
data[i['location']]-=float(dec)
else:
raise Exception("Error")
print {k:"{0}%".format(v) for k,v in data.items()}
Output-
{'Sydney': '0.0%', 'Adelaide': '33.0%'}
It's easy:
a = [{'condition': 'functional', 'location':'Sydney' }, {'condition':'functional', 'location': 'Adelaide'}, {'condition':'broken', 'location':'Sydney'}]
b = filter(lambda x: x['condition']!='functional', a)
all_locations = [item['location'] for item in b]
result = {}
for location in all_locations:
if location not in result.keys():
result[location] = all_locations.count(location)*100/float(len(all_locations))
print result
It's will return percent for every location
Is this what you want? This compares the elements in two JSON dicts and getting the difference as a ratio, as you ask for in the title. But reading the question body, it not really clear what it is you want to do.
This assumes that both dictionaries have the same keys.
def dictionary_similarity(d1, d2):
return sum(d1[key] == d2[key] for key in d1) / float(len(d1))
dictionary_similarity(
{'condition': 'functional', 'location': 'Sydney' },
{'condition': 'functional', 'location': 'Adelaide'},)
0.5

Merging python lists based on a 'similar' float value

I have a list (containing tuples) and I want to merge the list based on if the first element is within a maximum distance of the other elements (if if delta value < 0.05). I have the following list as an example:
[(0.0, 0.9811758192941256), (1.00422, 0.9998252466431066), (0.0, 0.9024831978342827), (2.00425, 0.9951777494430947)]
This should yield something like:
[(0.0, 1.883659017),(1.00422, 0.9998252466431066),(2.00425,0.9951777494430947)]
I am thinking that I can use something similar as in this question (Merge nested list items based on a repeating value) altho a lot of other questions yield a similar answer. The only problem that I see there is that they use collections.defaultdict or itertools.groupby which require exact matching of the element. An important addition here is that I want the first element of a merged tuple to be the weighted mixture of elements, example as follows:
(1.001,80) and (0.99,20) are matched then the result should be (0.9988,100).
Is something similar possible but with the matching based on value difference and not exact match?
What I was trying myself (but don't really like the look of it) is:
Res = 0.05
combinations = itertools.combination(list,2)
for i in combinations:
if i[0][0] > i[1][0]-Res and i[0][0] < i[1][0]+Res:
newValue = ...
-- UPDATE --
Based on some comments and Dawgs answer I tried the following approach:
for fv, v in total:
k=round(fv, 2)
data[k]=data.get(k, 0)+v
using the following list (actual data example, instead of short example list):
total = [(0.0, 0.11630591852564721), (1.00335, 0.25158664272201053), (2.0067, 0.2707487305913156), (3.0100499999999997, 0.19327075057473678), (4.0134, 0.10295042331357719), (5.01675, 0.04364856520231155), (6.020099999999999, 0.015342958201863783), (0.0, 0.9811758192941256), (1.00422, 0.018649427348981), (0.0, 0.9024831978342827), (2.00425, 0.09269455160881204), (0.0, 0.6944298762418107), (0.99703, 0.2536959281304138), (1.99406, 0.045877927988415786)]
which then yields problems with values such as 2.0067 (rounded to 2.01) and 1.99406 (rounded to 1.99( where the total difference is 0.01264 (which is far below 0.05, a value that I had in mind as a 'limit' for now but that should set changeable). Rounding the values to 1 decimal place is also not an option since that would result in a window of ~0.09 with values such as 2.04999 and 1.95001 which both yield 2.0 in that case.
The exact output was:
{0.0: 2.694394811895866, 1.0: 0.5239319982014053, 4.01: 0.10295042331357719, 5.02: 0.04364856520231155, 2.0: 0.09269455160881204, 1.99: 0.045877927988415786, 3.01: 0.19327075057473678, 6.02: 0.015342958201863783, 2.01: 0.2707487305913156}
accum = list()
data = [(0.0, 0.9811758192941256), (1.00422, 0.9998252466431066), (0.0, 0.9024831978342827), (2.00425, 0.9951777494430947)]
EPSILON = 0.05
newdata = {d: True for d in data}
for k, v in data:
if not newdata[(k,v)]: continue
newdata[(k,v)] = False
# use each piece of data only once
keys,values = [k*v],[v]
for kk, vv in [d for d in data if newdata[d]]:
if abs(k-kk) < EPSILON:
keys.append(kk*vv)
values.append(vv)
newdata[(kk,vv)] = False
accum.append((sum(keys)/sum(values),sum(values)))
You can round the float values then use setdefault:
li=[(0.0, 0.9811758192941256), (1.00422, 0.9998252466431066), (0.0, 0.9024831978342827), (2.00425, 0.9951777494430947)]
data={}
for fv, v in li:
k=round(fv, 5)
data.setdefault(k, 0)
data[k]+=v
print data
# {0.0: 1.8836590171284082, 2.00425: 0.9951777494430947, 1.00422: 0.9998252466431066}
If you want some more complex comparison (other than fixed rounding) you can create a hashable object based on the epsilon value you want and use the same method from there.
As pointed out in the comments, this works too:
data={}
for fv, v in li:
k=round(fv, 5)
data[k]=data.get(k, 0)+v

Categories