Merging list of dictionary in python - python

I have a list of dictionaries in python. Now how do i merge these dictionaries into single entity in python.
Example dictionary is
input_dictionary = [{"name":"kishore", "playing":["cricket","basket ball"]},
{"name":"kishore", "playing":["volley ball","cricket"]},
{"name":"kishore", "playing":["cricket","hockey"]},
{"name":"kishore", "playing":["volley ball"]},
{"name":"xyz","playing":["cricket"]}]
output shouled be:
[{"name":"kishore", "playing":["cricket","basket ball","volley ball","hockey"]},{"name":"xyz","playing":["cricket"]}]

Using itertools.groupby:
input_dictionary = [{"name":"kishore", "playing":["cricket","basket ball"]},
{"name":"kishore", "playing":["volley ball","cricket"]},
{"name":"kishore", "playing":["cricket","hockey"]},
{"name":"kishore", "playing":["volley ball"]},
{"name":"xyz","playing":["cricket"]}]
import itertools
import operator
by_name = operator.itemgetter('name')
result = []
for name, grp in itertools.groupby(sorted(input_dictionary, key=by_name), key=by_name):
playing = set(itertools.chain.from_iterable(x['playing'] for x in grp))
# If order of `playing` is important use `collections.OrderedDict`
# playing = collections.OrderedDict.fromkeys(itertools.chain.from_iterable(x['playing'] for x in grp))
result.append({'name': name, 'playing': list(playing)})
print(result)
output:
[{'playing': ['volley ball', 'basket ball', 'hockey', 'cricket'], 'name': 'kishore'}, {'playing': ['cricket'], 'name': 'xyz'}]

toutput = {}
for entry in input_dictionary:
if entry['name'] not in toutput: toutput[entry['name']] = []
for p in entry['playing']:
if p not in toutput[entry['name']]:
toutput[entry['name']].append(p)
output = list({'name':n, 'playing':l} for n,l in toutput.items())
Produces:
[{'name': 'kishore', 'playing': ['cricket', 'basket ball', 'volley ball', 'hockey']}, {'name': 'xyz', 'playing': ['cricket']}]
Or, using sets:
from collections import defaultdict
toutput = defaultdict(set)
for entry in input_dictionary:
toutput[entry['name']].update(entry['playing'])
output = list({'name':n, 'playing':list(l)} for n,l in toutput.items())

This is basically a slight variant of #perreal's answer (the answer before the defaultdict version was added, I mean!)
merged = {}
for d in input_dictionary:
merged.setdefault(d["name"], set()).update(d["playing"])
output = [{"name": k, "playing": list(v)} for k,v in merged.items()]

from collections import defaultdict
result = defaultdict(set)
[result[k[1]].update(v[1]) for k,v in [d.items() for d in input_dictionary]]
print [{'name':k, 'playing':v} for k,v in result.items()]

Related

Create a dictionary where the keys are values of dictionaries inside lists in a dictionary and the values are the number of times they appear

I have this dictionary of lists of dictionaries (I cannot change the structure for the work):
dict_countries = {'gb': [{'datetime': '1955-10-10 17:00:00', 'city': 'chester'},
{'datetime': '1974-10-10 23:00:00', 'city': 'chester'}],
'us': [{'datetime': '1955-10-10 17:00:00', 'city': 'hudson'}]
}
And the function:
def Seen_in_the_city(dict_countries:dict,)-> dict:
city_dict = {}
for each_country in dict_countries.values():
for each_sight in each_country:
citi = each_sight["city"]
if citi in city_dict.keys():
city_dict[each_sight["city"]] =+1
else:
city_dict[citi] =+1
return city_dict
I get:
{'chester': 1,'hudson': 1}
instead of
{'chester': 2,'hudson': 1}
You can try using Counter (a subclass of dict) from the collections module in the Python Standard Library:
from collections import Counter
c = Counter()
for key in dict_countries:
for d in dict_countries[key]:
c.update(v for k, v in d.items() if k == 'city')
print(c)
Output
Counter({'chester': 2, 'hudson': 1})
Try:
output = dict()
for country, cities in dict_countries.items():
for city in cities:
if city["city"] not in output:
output[city["city"]] = 0
output[city["city"]] += 1
You don't need to say +1 in order to add a positive number. Also in the if citi statement, += 1 means adding 1 to the existing value (1+1) where as =+1 is basically saying giving it a value of 1 once again.
if citi in city_dict.keys():
city_dict[each_sight["city"]] +=1
else:
city_dict[citi] = 1
You can use groupby from itertools
from itertools import groupby
print({i: len(list(j)[0]) for i,j in groupby(dict_countries.values(), key=lambda x: x[0]["city"])})
If you don't want additional imports (not that you shouldn't use Counter) here's another way:
dict_countries = {'gb': [{'datetime': '1955-10-10 17:00:00', 'city': 'chester'},
{'datetime': '1974-10-10 23:00:00', 'city': 'chester'}],
'us': [{'datetime': '1955-10-10 17:00:00', 'city': 'hudson'}]
}
def Seen_in_the_city(dict_countries:dict,)-> dict:
city_dict = {}
for each_country in dict_countries.values():
for each_sight in each_country:
citi = each_sight["city"]
city_dict[citi] = city_dict.get(citi, 0) + 1
return city_dict
print(Seen_in_the_city(dict_countries))

multiple separator in a string python

text="Brand.*/Smart Planet.#/Color.*/Yellow.#/Type.*/Sandwich Maker.#/Power Source.*/Electrical."
I have this kind of string. I am facing the problem which splits it to 2 lists. Output will be approximately like this :
name = ['Brand','Color','Type','Power Source']
value = ['Smart Plane','Yellow','Sandwich Maker','Electrical']
Is there any solution for this.
name = []
value = []
text = text.split('.#/')
for i in text:
i = i.split('.*/')
name.append(i[0])
value.append(i[1])
This is one approach using re.split and list slicing.
Ex:
import re
text="Brand.*/Smart Planet.#/Color.*/Yellow.#/Type.*/Sandwich Maker.#/Power Source.*/Electrical."
data = [i for i in re.split("[^A-Za-z\s]+", text) if i]
name = data[::2]
value = data[1::2]
print(name)
print(value)
Output:
['Brand', 'Color', 'Type', 'Power Source']
['Smart Planet', 'Yellow', 'Sandwich Maker', 'Electrical']
You can use regex to split the text, and populate the lists in a loop.
Using regex you protect your code from invalid input.
import re
name, value = [], []
for ele in re.split(r'\.#\/', text):
k, v = ele.split('.*/')
name.append(k)
value.append(v)
>>> print(name, val)
['Brand', 'Color', 'Type', 'Power Source'] ['Smart Planet', 'Yellow', 'Sandwich Maker', 'Electrical.']
text="Brand.*/Smart Planet.#/Color.*/Yellow.#/Type.*/Sandwich Maker.#/Power Source.*/Electrical."
name=[]
value=[]
word=''
for i in range(len(text)):
temp=i
if text[i]!='.' and text[i]!='/' and text[i]!='*' and text[i]!='#':
word=word+''.join(text[i])
elif temp+1<len(text) and temp+2<=len(text):
if text[i]=='.' and text[temp+1]=='*' and text[temp+2]=='/':
name.append(word)
word=''
elif text[i]=='.' and text[temp+1]=='#' and text[temp+2]=='/':
value.append(word)
word=''
else:
value.append(word)
print(name)
print(value)
this will be work...

How to insert data into an array of dictionaries in an order without missing data via regex

This is my code:
I'm trying to use the following code to insert data into an array of dictionaries but unable to insert properly.
Code:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if re.match("module_name", k):
module['name'] = v
if re.match("module_serial", k):
module['serial'] = v
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Expected output:
[{'name':'CH1.FM5', serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Actual output:
['serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Run it here: https://repl.it/repls/WetSteelblueRange
Please note that the order of the data test_list cannot be altered as it comes via an external API so I used regex. Any ideas would be appreciated.
Your code relies on the wrong assumption that keys are ordered and that the serial will always follow the name. The proper solution here is to use a dict (actually a collections.defaultdict to make things easier) to collect and regroup the values you're interested in based on the module number (the final '-N' in the key). Note that you don't need regexps here - Python string already provide the necessary operations for this task:
from collections import defaultdict
def parse_subdevice_modules(row):
modules = defaultdict(dict)
for k, v in row.items():
# first get rid of what we're not interested in
if not v:
continue
if not k.startswith("module_"):
continue
# retrieve the key number (last char) with
# negative string indexing:
key_num = k[-1]
# retrieve the useful part of the key ("name" or "serial")
# by splitting the string:
key_name = k.split("_")[1].split("-")[0]
# and now we just have to store this in our defaultdict
modules[key_num][key_name] = v
# and return only the values.
# NB: in py2.x you don't need the call to `list`,
# you can just return `modules.values()` directly
modules = list(modules.values())
return modules
test_list = {
'profile': '', 'chassis_name': '123', 'supplier_order_num': '',
'device_type': 'mass_storage', 'device_subtype': 'flashblade',
'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5',
'module_name-2': 'CH1.FM6', 'rack_total_pos': '',
'asset_tag': '002000027493', 'module_serial-2': 'PSUXA12345681',
'purchase_order': '0004530869', 'build': 'Test_Build_for_SNOW',
'po_line_num': '00190', 'mac_address': '', 'position': '7',
'model': 'FB-528TB-10X52.8TB', 'manufacturer': 'PureStorage',
'rack': 'Test_Rack_2', 'serial': 'PMPAM1842147D', 'name': 'FB02'
}
print(parse_subdevice_modules(test_list))
You can do somthing like this also.
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6',
'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules_list = []
for key, value in row.items():
if not value or key.startswith('module_name'):
continue
if key.startswith('module_serial'):
module_name_key = f'module_name-{key.split("-")[-1]}'
modules_list.append({'serial': value, 'name': row[module_name_key]})
return modules_list
print(parse_subdevice_modules(test_list))
Output:
[{'serial': 'PSUXA12345680', 'name': 'CH1.FM5'}, {'serial': 'PSUXA12345681', 'name': 'CH1.FM6'}]
You would need to check if module contains 2 elements and append it to modules:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if k.startswith('module_name'):
module['name'] = v
elif k.startswith("module_serial"):
module['serial'] = v
if len(module) == 2:
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Returns:
[{'serial': 'PSUXA12345680'}, {'name': 'CH1.FM5'}, {'name': 'CH1.FM6'}, {'serial': 'PSUXA12345681'}]

i want to save a list in dictionary in a way that it will have a key and attribute

please help
value = 'http://localhost:8001/issues/load?project_name=react&since=2016-03-24&until=2017-03-25&state=closed&sort=created&direction=asc&per_page=100&labels=Type:%20Bug'
hashing = hash(value)
words = value.split('&')
for data in words:
words2 = data.split('=')
print(words2)
Since words2 has each split into two like:
['http://localhost:8001/issues/load?project_name', 'react']
['since', '2016-03-24']
['until', '2017-03-25']
Use that to add values to a dictionary:
>>> key_vals = {}
>>> for data in words:
... words2 = data.split('=')
... key_vals[words2[0]] = words2[1]
...
>>> pprint.pprint(key_vals)
{'direction': 'asc',
'http://localhost:8001/issues/load?project_name': 'react',
'labels': 'Type:%20Bug',
'per_page': '100',
'since': '2016-03-24',
'sort': 'created',
'state': 'closed',
'until': '2017-03-25'}
And the assignment to key_vals can be reduced to:
key_vals = {key: val for (key, val) in [data.split('=') for data in words]}

Creating lists from the dictionary or just simply sort it

I have the following code:
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
for line in open(file_path,'r'):
split_line = line.split()
worker = 'worker{}'.format(k)
worker_name = '{}_{}'.format(worker, 'name')
worker_yob = '{}_{}'.format(worker, 'yob')
worker_job = '{}_{}'.format(worker, 'job')
worker_salary = '{}_{}'.format(worker, 'salary')
worker_dict[worker_name] = ' '.join(split_line[0:2])
worker_dict[worker_yob] = ' '.join(split_line[2:3])
worker_dict[worker_job] = ' '.join(split_line[3:4])
worker_dict[worker_salary] = ' '.join(split_line[4:5])
k += 1
else:
print('Error: Invalid file path')
File:
John Snow 1967 CEO 3400$
Adam Brown 1954 engineer 1200$
Output from worker_dict:
{
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
And I want to sort data by worker name and after that by salary. So my idea was to create a separate list with salaries and worker names to sort. But I have problems with filling it, maybe there is a more elegant way to solve my problem?
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
with open(file_path,'r') as file:
content=file.read().splitlines()
res=[]
for i in content:
val = i.split()
name = [" ".join([val[0],val[1]]),]#concatenate first name and last name
i=name+val[2:] #prepend name
res.append(i) #append modified value to new list
res.sort(key=lambda x: x[3])#sort by salary
print res
res.sort(key=lambda x: x[0])#sort by name
print res
Output:
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
d = {
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
from itertools import zip_longest
#re-group:
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
#re-order:
res = []
for group in list(grouper(d.values(), 4)):
reorder = [1,2,0,3]
res.append([ group[i] for i in reorder])
#sort:
res.sort(key=lambda x: (x[1], x[2]))
output:
[['Adam Brown', '1200$', 'engineer', '1954'],
['John Snow', '3400$', 'CEO', '1967']]
Grouper is defined and explained in itertools. I've grouped your dictionary by records pertaining to each worker, returned it as a reordered list of lists. As lists, I sort them by the name and salary. This is solution is modular: it distinctly groups, re-orders and sorts.
I recommend to store the workers in a different format, for example .csv, then you could use csv.DictReader and put it into a list of dictionaries (this would also allow you to use jobs, names, etc. with more words like "tomb raider").
Note that you have to convert the year of birth and salary to ints or floats to sort them correctly, otherwise they would get sorted lexicographically as in a real world dictionary (book) because they are strings, e.g.:
>>> sorted(['100', '11', '1001'])
['100', '1001', '11']
To sort the list of dicts you can use operator.itemgetter as the key argument of sorted, instead of a lambda function, and just pass the desired key to itemgetter.
The k variable is useless, because it's just the len of the list.
The .csv file:
"name","year of birth","job","salary"
John Snow,1967,CEO,3400$
Adam Brown,1954,engineer,1200$
Lara Croft,1984,tomb raider,5600$
The .py file:
import os
import csv
from operator import itemgetter
from pprint import pprint
file_path = input('Please, enter the path to the file: ')
if os.path.exists(file_path):
with open(file_path, 'r', newline='') as f:
worker_list = list(csv.DictReader(f))
for worker in worker_list:
worker['salary'] = int(worker['salary'].strip('$'))
worker['year of birth'] = int(worker['year of birth'])
pprint(worker_list)
pprint(sorted(worker_list, key=itemgetter('name')))
pprint(sorted(worker_list, key=itemgetter('salary')))
pprint(sorted(worker_list, key=itemgetter('year of birth')))
You still need some error handling, if a int conversion fails, or just let the program crash.

Categories