valid = {'Temp': [10, 55], 'rain_percent': [49, 100], 'humidity': [30,50]}
data = {'Temp': 30.45, 'rain_percent': 80.56 }
min_temp , max_temp = valid['Temp']
if not(min_temp <= data['Temp'] <= max_temp):
print "Bad Temp"
min_rain , max_rain = valid['rain_percent']
if not(min_rain <= data['rain_percent'] <= max_rain):
print "It's not going to rain"
This is what I'm doing with the 2 dictionarties I'm having. I know that this check can be further modified. Since both the dictionaries i.e valid and data have the same keys, there must be some better way of implementing this check. Can anyone help me in this?
Thanks a lot.
If I understand the question correctly, you're trying to check if each value data[k] is in the range defined by the 2-element list/tuple valid[k].
Try using a for loop and dict.items() to iterate through data and compare each value to the corresponding range in valid:
valid = {'Temp': [10, 55], 'rain_percent': [49, 100], 'humidity': [30,50]}
data = {'Temp': 30.45, 'rain_percent': 80.56, 'humidity': 70 }
for key,val in data.items():
min, max = valid[key]
if not( min <= val <= max ):
print "%s=%g is out of valid range (%g-%g)" % (key, val, min, max)
else:
print "%s=%g is in the valid range (%g-%g)" % (key, val, min, max)
In the case of the example data values I gave, it will print this:
rain_percent=80.56 is in the valid range (49-100)
Temp=30.45 is in the valid range (10-55)
humidity=70 is out of valid range (30-50)
This answer builds off #Dan's.
It could be the case that you want to add other parameters to your 'valid' dictionary such as avg, standard deviation, etc and many more data points such as air_pressure, wind_speed, visibility, etc.
Especially in the case where you have many more data points(temp, humidity, etc.) and many more parameters and labels (min, max, 'high temp,' 'low temp,' etc.), you would want your 'valid' dictionary to be more descriptive. You can then write general functions that are more flexible and descriptive depending on the depth of your 'valid' dictionary.
Here's an example. Let's now call the 'valid' dictionary 'parameters.'
parameters = {
'temp': {
'min':10,
'max':55,
'avg':40,
'stddev':10,
'in_range_label':"Good Temp",
'out_range_label':"Bad Temp",
'above_average_label':"Above average temp",
'below_average_label':"Below average temp",
},
'rain_percent': {
'min':49,
'max':100,
'avg':75,
'in_range_label':"Going to rain",
'out_range_label':"Not going to rain",
'above_average_label':"Above average rain",
'below_average_label':"Below average rain",
},
'humidity': {
'min':30,
'max':50,
'avg':45,
'in_range_label':"Humid",
'out_range_label':"Not humid" ,
'above_average_label':"Above average hemp",
'below_average_label':"Below average humidity",
}
}
data = {'temp': 30.45, 'rain_percent': 80.56 }
def check_min_max(data, parameters):
for k, v in data.items():
min = parameters[k]['min']
max = parameters[k]['max']
if min <= v <= max:
print '{}={}, {}'.format(k, v, parameters[k]['in_range_label'])
else:
print '{}={}, {}'.format(k, v, parameters[k]['out_range_label'])
def check_avg(data, parameters):
for k, v in data.items():
avg = parameters[k]['avg']
if v > avg:
print '{}={}, {}'.format(k, v, parameters[k]['above_average_label'])
else:
print '{}={}, {}'.format(k, v, parameters[k]['below_average_label'])
check_min_max(data, parameters)
check_avg(data, parameters)
>>>
rain_percent=80.56, Going to rain
temp=30.45, Good Temp
rain_percent=80.56, Above average rain
temp=30.45, Below average temp
Related
I have the following list
count = 3.5, price = 2500
count = 3, price = 400
count = 2, price = 3000
count = 3.5, price = 750
count = 2, price = 500
I want to find the average price for all where the count is the same. For example:
count = 2, price = 3000
count = 2, price = 500
3000 + 500 = 3500
3500/2 = 1750
Avg for 'count 2' is 1750
Here's my code so far
avg_list = [value["average"] for value in dictionary_database_list]
counter_obj = collections.Counter(count_list)
print ("AVG:")
for i in counter_obj:
print (i, counter_obj[i])
I'll admit I'm not 100% clear on what you're looking for here, but I'll give it a shot:
A good strategy when you want to iterate over a list of "things" and accumulate some kind of information about "the same kind of thing" is to use a hash table. In Python, we usually use a dict for algorithms that require a hash table.
To collect enough information to get the average price for each item in your list, we need:
a) the total number of items with a specific "count"
b) the total price of items with a specific "count"
So let's build a data structure that maps a "count" to a dict containing "total items" and "total price" for the item with that "count".
Let's take our input in the format:
item_list = [
{'count': 3.5, 'price': 2500},
{'count': 3, 'price': 400},
{'count': 2, 'price': 3000},
{'count': 3.5, 'price': 750},
{'count': 2, 'price': 500},
]
Now let's map the info about "total items" and "total price" in a dict called items_by_count:
for item in item_list:
count, price = item['count'], item['price']
items_by_count[count]['total_items'] += 1
items_by_count[count]['total_price'] += price
But wait! items_by_count[count] will throw a KeyError if count isn't already in the dict. This is a good use case for defaultdict. Let's define the default value of a count we've never seen before as 0 total price, and 0 total items:
from collections import defaultdict
items_by_count = defaultdict(lambda: {
'total_items': 0,
'total_price': 0
})
Now our code won't throw an exception every time we see a new value for count.
Finally, we need to actually take the average. Let's get the information we need in another dict, mapping count to average price. This is a good use case for a dict comprehension:
{count: item['total_price'] / item['total_items']
for count, item in items_by_count.iteritems()}
This iterates over the items_by_count dict and creates the new dict that we want.
Putting it all together:
from collections import defaultdict
def get_average_price(item_list):
items_by_count = defaultdict(lambda: {
'total_items': 0,
'total_price': 0
})
for item in item_list:
count, price = item['count'], item['price']
items_by_count[count]['total_items'] += 1
items_by_count[count]['total_price'] += price
return {count: item['total_price'] / item['total_items']
for count, item in items_by_count.iteritems()}
If we pass in our example input dict, this function returns:
{3.5: 1625, 2: 1750, 3: 400}
Which is hopefully the output you want! Be cautious of gotchas like float division in your particular Python version.
You need to iterate over your items
See documentation
avg(dictionary.values()) is probably what you want
I have List of dictionaries like:
Stock=[
{'ID':1,'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'},
{'ID':2,'color':'green','size':'M','material':'cotton','weight':200,'length':300,'location':'China'},
{'ID':3,'color':'blue','size':'L','material':'cotton','weight':100,'length':300,'location':'China'}
]
And other list of dictionaries like:
Prices=[
{'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'}
{'color':'blue','size':'S','weight':500,'length':150,'location':'USA', 'cost':1$}
{'color':'pink','size':'L','material':'cotton','location':'China','cost':5$},
{'cost':5$,'color':'blue','size':'L','material':'silk','weight':100,'length':300}
]
So I need find 'cost' for each record in Stock from Prices. But may be a situation, when I don't find 100% coincidence of dict elements, and in this case I need most similar element and get it's "cost".
output=[{'ID':1,'cost':1$},{'ID':2,'cost':5$},...]
Please, prompt the optimal solution for this task. I think it's like Loop from highest to lowest compliance, when we try find record with max coincidence, and if not found - try less matching condition.
how about this
Stock=[
{'ID':1,'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'},
{'ID':2,'color':'green','size':'M','material':'cotton','weight':200,'length':300,'location':'China'},
{'ID':3,'color':'blue','size':'L','material':'cotton','weight':100,'length':300,'location':'China'}
]
Prices=[
{'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'},
{'cost':'2$','color':'blue','size':'S','weight':500,'length':150,'location':'USA'},
{'cost':'5$','color':'pink','size':'L','material':'cotton','location':'China'},
{'cost':'15$','color':'blue','size':'L','material':'silk','weight':100,'length':300}
]
Prices = [p for p in Prices if "cost" in p] #make sure that everything have a 'cost'
result = []
for s in Stock:
field = set(s.items())
best_match = max(Prices, key=lambda p: len( field.intersection(p.items()) ), default=None)
if best_match:
result.append( {"ID":s["ID"], "cost":best_match["cost"] } )
print(result)
#[{'ID': 1, 'cost': '5$'}, {'ID': 2, 'cost': '5$'}, {'ID': 3, 'cost': '15$'}]
to find the most similar entry I first transform the dict to a set then use max to find the largest intersection of a price with the stock that I'm checking using a lambda function for the key of max
it reminds me of fuzzy or neural network solutions,
[on python2]
anyway , here is a Numpy solution, :
Stock=[
{'ID':1,'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'},
{'ID':2,'color':'green','size':'M','material':'cotton','weight':200,'length':300,'location':'China'},
{'ID':3,'color':'blue','size':'L','material':'cotton','weight':100,'length':300,'location':'China'}
]
Prices=[
{'color':'red','size':'L','material':'cotton','weight':100,'length':300,'location':'China'},
{'cost':2,'color':'blue','size':'S','weight':500,'length':150,'location':'USA'},
{'cost':5,'color':'pink','size':'L','material':'cotton','location':'China'},
{'cost':15,'color':'blue','size':'L','material':'silk','weight':100,'length':300}
]
import numpy as np
# replace non useful records.
for p in Prices:
if not(p.has_key('cost')):
Prices.remove(p)
def numerize(lst_of_dics):
r=[]
for d in lst_of_dics:
r1=[]
for n in ['color','size','material','weight','length','location']:
try:
if n=='color':
# it is 0s if unknown
# only 3 letters, should work ,bug!!!
z=[0,0,0]
r1+=[ord(d[n][0]),ord(d[n][1]),ord(d[n][2])]
elif n=='size':
z=[0,0,0]
r1+=[ord(d[n])]*3
elif n=='material':
z=[0,0,0]
r1+=[ord(d[n][0]),ord(d[n][1]),ord(d[n][2])]
elif n=='location':
z=[0,0,0]
r1+=[ord(d[n][0]),ord(d[n][1]),ord(d[n][2])]
else:
z=[0,0,0]
r1+=[d[n]]*3
except:
r1+=z
r.append(r1)
return r
St = numerize(Stock)
Pr = np.array(numerize(Prices))
output=[]
for i,s in enumerate(St):
s0 = np.reshape(s*Pr.shape[0],Pr.shape)
# stage 0: make one element array to subtract easily
s1 = abs(Pr -s0)
# abs diff
s2 = s1 * Pr.astype('bool') * s0.astype('bool')
# non-extentent does'nt mean match..
s21 = np.logical_not(Pr.astype('bool') * s0.astype('bool'))*25
s2 = s2+s21
# ignore the zero fields..(non-extentse)
s3 = np.sum(s2,1)
# take the smallest
s4 = np.where(s3==np.min(s3))[0][0]
c = Prices[s4]['cost']
#print c,i
output.append( {'ID':i+1 ,'cost':c})
print(output)
that gives me the next results (with many assumptions):
[{'cost': 15, 'ID': 1}, {'cost': 5, 'ID': 2}, {'cost': 15, 'ID': 3}]
Note, that this is correct comparison result based on Values and Kinds of properties
please up vote and check the answer if it satisfies you..
I'm writing a function where I go through a dictionary. The dictionary contains artists as keys and their paintings as values. I need to find the painting in a dictionary that has the largest area and if there are two that have equal area they should be returned as a list of tuples.
Example Dictionary:
{
'A, Jr.':[("One",1400,10,20.5,"oil paint","Austria"),("Three",1400,100.0,100.0,"oil paint","France"),("Twenty",1410,50.0,200.0,"oil paint","France")],
'X':[("Eight",1460, 100.0, 20.0, "oil paint","France"),("Six",1465,10.0, 23.0, "oil paint", "France"),("Ten",1465,12.0,15.0,"oil paint","Austria"),("Thirty",1466,30.0,30.0,"watercolor","Germany")],
'M':[("One, Two", 1500, 10.0, 10.0, "panel","Germany")]
}
Basically the four digit number is the year that the painting or work of art was created and the next two numbers are the length and width. I need to return the values that have the largest area when multiplying the lengths and widths. So for the above dictionary the function find_largest should return
find_largest(dictionary2())
[('A, Jr.', 'Three'), ('A, Jr.', 'Twenty')]
Since 100 * 100 = 10,000 for the "Three" painting and 50 * 200 = 10,000 for the "Twenty" painting they are both returned as tuples within a list.
Does anyone have advice on how to do this? I have started code below but I don't think its the right approach for this.
def find_largest(dictionary):
matches = {}
for key, the_list in db.items():
for record in the_list:
value = record[4]
if dictionary in record:
if key in matches:
max(the_list)
max(lst, key=lambda tupl: tupl[2]*tupl[3])
matches[key].append(record)
else:
matches[key] = [record]
return matches
This is basically my code from an earlier function with a few significant changes. This basic framework has worked for a few of my goals. I added max(matches) but I realize this isn't doing much unless the function multiplies the lengths and widths and then looks for the max. If anyone has advice it would be helpful
It would probably be easier to just keep track of your current max instead
data = {
'A, Jr.':[("One",1400,10,20.5,"oil paint","Austria"),("Three",1400,100.0,100.0,"oil paint","France"),("Twenty",1410,50.0,200.0,"oil paint","France")],
'X':[("Eight",1460, 100.0, 20.0, "oil paint","France"),("Six",1465,10.0, 23.0, "oil paint", "France"),("Ten",1465,12.0,15.0,"oil paint","Austria"),("Thirty",1466,30.0,30.0,"watercolor","Germany")],
'M':[("One, Two", 1500, 10.0, 10.0, "panel","Germany")]
}
def find_largest(d):
matches = []
max_value = 0
for key in d:
for record in d[key]:
value = record[2] * record[3]
if value > max_value:
matches = [(key, record[0])]
max_value = value
elif value == max_value:
matches.append((key, record[0]))
return matches
# Output
>>> find_largest(data)
[('A, Jr.', 'Three'), ('A, Jr.', 'Twenty')]
I have a JSON object and I am working on some data manipulation. I want to get the difference as a ratio so I can more accurately rank the elements in my dict.
[{condition: functional, location:Sydney }, {condition:functional, location: Adelaide}, {condition:broken, location:Sydney}]
I can get the number of points where the location is not functional like so:
filter(lambda x: x['condition']!='functional', json_obj)
But I would like to return this as a percentage ratio.
You can try Counter and defaultdict as below-
from collections import Counter,defaultdict
d = [{'condition': 'functional', 'location':'Sydney' }, {'condition':'functional', 'location': 'Adelaide'}, {'condition':'broken', 'location':'Sydney'}]
cities = [j['location'] for j in d]
#initialize data
data = defaultdict(float)
for city in cities:
data[city]=0
#Count occurrances of a single city as a counter dictionary
counters = Counter((i['location'] for i in d))
#Do the calculation
for i in d:
if i['condition']== 'functional':
inc = (counters[i['location']]*100)/len(d)
data[i['location']]+= float(inc)
elif i['condition']== 'broken':
dec = (counters[i['location']]*100)/len(d)
data[i['location']]-=float(dec)
else:
raise Exception("Error")
print {k:"{0}%".format(v) for k,v in data.items()}
Output-
{'Sydney': '0.0%', 'Adelaide': '33.0%'}
It's easy:
a = [{'condition': 'functional', 'location':'Sydney' }, {'condition':'functional', 'location': 'Adelaide'}, {'condition':'broken', 'location':'Sydney'}]
b = filter(lambda x: x['condition']!='functional', a)
all_locations = [item['location'] for item in b]
result = {}
for location in all_locations:
if location not in result.keys():
result[location] = all_locations.count(location)*100/float(len(all_locations))
print result
It's will return percent for every location
Is this what you want? This compares the elements in two JSON dicts and getting the difference as a ratio, as you ask for in the title. But reading the question body, it not really clear what it is you want to do.
This assumes that both dictionaries have the same keys.
def dictionary_similarity(d1, d2):
return sum(d1[key] == d2[key] for key in d1) / float(len(d1))
dictionary_similarity(
{'condition': 'functional', 'location': 'Sydney' },
{'condition': 'functional', 'location': 'Adelaide'},)
0.5
I have lists of dictionary. Let's say it
total = [{"date": "2014-03-01", "value": 200}, {"date": "2014-03-02", "value": 100}{"date": "2014-03-03", "value": 400}]
I need get maximum, minimum, average value from it. I can get max and min values with below code:
print min(d['value'] for d in total)
print max(d['value'] for d in total)
But now I need get average value from it. How to do it?
Just divide the sum of values by the length of the list:
print sum(d['value'] for d in total) / len(total)
Note that division of integers returns the integer value. This means that average of the [5, 5, 0, 0] will be 2 instead of 2.5. If you need more precise result then you can use the float() value:
print float(sum(d['value'] for d in total)) / len(total)
I needed a more general implementation of the same thing to work on the whole dictionary. So here is one simple option:
def dict_mean(dict_list):
mean_dict = {}
for key in dict_list[0].keys():
mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list)
return mean_dict
Testing:
dicts = [{"X": 5, "value": 200}, {"X": -2, "value": 100}, {"X": 3, "value": 400}]
dict_mean(dicts)
{'X': 2.0, 'value': 233.33333333333334}
reduce(lambda x, y: x + y, [d['value'] for d in total]) / len(total)
catavaran's anwser is more easy, you don't need a lambda
An improvement on dsalaj's answer if the values are numeric lists instead:
def dict_mean(dict_list):
mean_dict = {}
for key in dict_list[0].keys():
mean_dict[key] = np.mean([d[key] for d in dict_list], axis=0)
return mean_dict