Group List of Dicts by Key in Python - python

Looking for a Pythonic way to iterate over a list of Dicts and group them by a certain key.
E.g. a list like this should be grouped by position
[
{'Name': 'Bradley Greer', 'Position': 'Software Engineer', 'Office': 'London', 'Age': '41', 'Start date': '2012/10/13', 'Salary': '$132,000'},
{'Name': 'Brenden Wagner', 'Position': 'Software Engineer', 'Office': 'San Francisco', 'Age': '28', 'Start date': '2011/06/07', 'Salary': '$206,850'},
{'Name': 'Bruno Nash', 'Position': 'Software Engineer', 'Office': 'London', 'Age': '38', 'Start date': '2011/05/03', 'Salary': '$163,500'},
{'Name': 'Cara Stevens', 'Position': 'Sales Assistant', 'Office': 'New York', 'Age': '46', 'Start date': '2011/12/06', 'Salary': '$145,600'},
{'Name': 'Donna Snider', 'Position': 'Customer Support', 'Office': 'New York', 'Age': '27', 'Start date': '2011/01/25', 'Salary': '$112,000'},
{'Name': 'Doris Wilder', 'Position': 'Sales Assistant', 'Office': 'Sydney', 'Age': '23', 'Start date': '2010/09/20', 'Salary': '$85,600'},
{'Name': 'Gavin Joyce', 'Position': 'Sales Assistant', 'Office': 'Edinburgh', 'Age': '42', 'Start date': '2010/12/22', 'Salary': '$92,575'},
{'Name': 'Herrod Chandler', 'Position': 'Sales Assistant', 'Office': 'San Francisco', 'Age': '59', 'Start date': '2012/08/06', 'Salary': '$137,500'}
]
would result in something like this
[
{
'Position': 'Software Engineer',
'Items': [
{'Name': 'Bradley Greer', 'Position': 'Software Engineer', 'Office': 'London', 'Age': '41', 'Start date': '2012/10/13', 'Salary': '$132,000'},
{'Name': 'Brenden Wagner', 'Position': 'Software Engineer', 'Office': 'San Francisco', 'Age': '28', 'Start date': '2011/06/07', 'Salary': '$206,850'},
{'Name': 'Bruno Nash', 'Position': 'Software Engineer', 'Office': 'London', 'Age': '38', 'Start date': '2011/05/03', 'Salary': '$163,500'},
]
},
{
'Position': 'Sales Assistant',
'Items': [
{'Name': 'Cara Stevens', 'Position': 'Sales Assistant', 'Office': 'New York', 'Age': '46', 'Start date': '2011/12/06', 'Salary': '$145,600'},
{'Name': 'Doris Wilder', 'Position': 'Sales Assistant', 'Office': 'Sydney', 'Age': '23', 'Start date': '2010/09/20', 'Salary': '$85,600'},
{'Name': 'Gavin Joyce', 'Position': 'Sales Assistant', 'Office': 'Edinburgh', 'Age': '42', 'Start date': '2010/12/22', 'Salary': '$92,575'},
{'Name': 'Herrod Chandler', 'Position': 'Sales Assistant', 'Office': 'San Francisco', 'Age': '59', 'Start date': '2012/08/06', 'Salary': '$137,500'}
]
},
{
'Position': 'Customer Support',
'Items': [
{'Name': 'Donna Snider', 'Position': 'Customer Support', 'Office': 'New York', 'Age': '27', 'Start date': '2011/01/25', 'Salary': '$112,000'},
]
}
]

You can use itertools.groupby
items is your input list
import itertools
output = []
for k,v in itertools.groupby(items, key=lambda x:x['Position']):
output += [{
'Position': k,
'Items': list(v)
}]

If you want a one-liner (nearly as fast as the previous solution), then here it is:
people = # Your list of dicts
key = "Position" # The key to group by
output = [
{key: k, "items": [person for person in people if person[key] == k]}
for k in {person[key] for person in people}
]

Related

Combining three different list collection of dictionary having same value in key name “firstname” and “lastname” in python

I have three different list collection of dictionary as shown all three have same "firstname" and lastname". I need to combine this list in a copy of one without replicating the firstname and lastname, ie for each firstname and lastname a combination of the other three list collection of dictionary:
list one
[{'First Name': 'Justin',
'lastName': 'Walker',
'Age (Years)': '29',
'Sex': 'Male',
'Vehicle Make': 'Toyota',
'Vehicle Model': 'Continental',
'Vehicle Year': '2012',
'Vehicle Type': 'Sedan'},
{'First Name': 'Maria',
'lastName': 'Jones',
'Age (Years)': '66',
'Sex': 'Female',
'Vehicle Make': 'Mitsubishi',
'Vehicle Model': 'Yukon XL 2500',
'Vehicle Year': '2014',
'Vehicle Type': 'Van/Minivan'},
{'First Name': 'Samantha',
'lastName': 'Norman',
'Age (Years)': '19',
'Sex': 'Female',
'Vehicle Make': 'Aston Martin',
'Vehicle Model': 'Silverado 3500 HD Regular Cab',
'Vehicle Year': '1995',
'Vehicle Type': 'SUV'}
list two
[{'firstName': 'Justin',
'lastName': 'Walker',
'age': 71,
'iban': 'GB43YKET96816855547287',
'credit_card_number': '2221597849919620',
'credit_card_security_code': '646',
'credit_card_start_date': '03/18',
'credit_card_end_date': '06/26',
'address_main': '462 Marilyn radial',
'address_city': 'Lynneton',
'address_postcode': 'W4 0GW'},
{'firstName': 'Maria',
'lastName': 'Jones',
'age': 91,
'iban': 'GB53QKRK45175204753504',
'credit_card_number': '4050437758955103343',
'credit_card_security_code': '827',
'credit_card_start_date': '11/21',
'credit_card_end_date': '01/27',
'address_main': '366 Brenda radial',
'address_city': 'Ritafurt',
'address_postcode': 'NE85 1RG'}]
list three
{'firstName': 'Justin',
'lastName': 'Walker',
'age': '64',
'sex': 'Male',
'retired': 'False',
'dependants': '2',
'marital_status': 'single',
'salary': '56185',
'pension': '0',
'company': 'Hudson PLC',
'commute_distance': '14.1',
'address_postcode': 'G2J 0FH'},
{'firstName': 'Maria',
'lastName': 'Jones',
'age': '69',
'sex': 'Female',
'retired': 'False',
'dependants': '1',
'marital_status': 'divorced',
'salary': '36872',
'pension': '0',
'company': 'Wall, Reed and Whitehouse',
'commute_distance': '10.47',
'address_postcode': 'TD95 7FL'}
This is what I trying but
for i in range(0,2):
dict1 = list_one[i]
dict2 = list_two[i]
dict3 = list_three[i]
combine_file = list_three.copy()
for k, v in dict1.items():
if k == "firstname" or "lastname":
for k1, v1 in combine_file.items():
if dict1.get(k) == combine_file.v1:
This is what I'm expecting
print(combine_file)
{'firstName': 'Justin',
'lastName': 'Walker',
'age': '64',
'sex': 'Male',
'retired': 'False',
'dependants': '2',
'marital_status': 'single',
'salary': '56185',
'pension': '0',
'company': 'Hudson PLC',
'commute_distance': '14.1',
'iban': 'GB43YKET96816855547287',
'credit_card_number': '2221597849919620',
'credit_card_security_code': '646',
'credit_card_start_date': '03/18',
'credit_card_end_date': '06/26',
'address_main': '462 Marilyn radial',
'address_city': 'Lynneton',
'address_postcode': 'W4 0GW',
'Vehicle Make': 'Mitsubishi',
'Vehicle Model': 'Yukon XL 2500',
'Vehicle Year': '2014',
'Vehicle Type': 'Van/Minivan'},
{'firstName': 'Maria',
'lastName': 'Jones',
'age': '69',
'sex': 'Female',
'retired': 'False',
'dependants': '1',
'marital_status': 'divorced',
'salary': '36872',
'pension': '0',
'company': 'Wall, Reed and Whitehouse',
'commute_distance': '10.47',
'iban': 'GB53QKRK45175204753504',
'credit_card_number': '4050437758955103343',
'credit_card_security_code': '827',
'credit_card_start_date': '11/21',
'credit_card_end_date': '01/27',
'address_main': '366 Brenda radial',
'address_city': 'Ritafurt',
'address_postcode': 'NE85 1RG',
'Vehicle Make': 'Aston Martin',
'Vehicle Model': 'Silverado 3500 HD Regular Cab',
'Vehicle Year': '1995',
'Vehicle Type': 'SUV'}
Create a new dictionary keyed on a composite of either 'firstname_lastname' or 'First Name_lastname' then you can do this:
master = {}
for _list in list_1, list_2, list_3:
for d in _list:
if not (firstname := d.get('firstName')):
firstname = d['First Name']
name_key = f'{firstname}_{d["lastName"]}'
for k, v in d.items():
master.setdefault(name_key, {})[k] = v
print(list(master.values()))
Python's dict.update() functionality might be what you are looking for.
For example:
dict1 = { 'a' : 0,
'b' : 1,
'c' : 2}
dict2 = { 'c' : 0,
'd' : 1,
'e' : 2}
dict2.update(dict1)
dict2 is now:
{'a' : 0, 'b': 1, 'c': 2, 'd' 1, 'e': 2}
Notice how 'c' was overwritten with the updated value from dict1.
You can't update together dictionaries from different people, but if you run through your lists beforehand you could compile sets of dictionaries where each set belongs to one person.
You can create a new dictionary, called people, and then iterate through your lists of dictionaries and extract the person's name from those dictionaries and turn it into a key in the new "people" dictionary.
If that person's name is not in people yet, you can add that dictionary, so that people[name] points to that dictionary.
If people[name] does exist, then you can use the people[name].update() function on the new dictionary to add the new values.
After this process, you will have a dictionary whose keys are the names of people and the values point to a dictionary containing those people's attributes.

Remove duplicates from list of lists by column value

I have a list of list that look like this, they have been sorted so that duplicate IDs are arranged with the one I want to keep at the top..
[
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
What is the simplest way to remove rows that have duplicate IDs but always keep the first one? In this instance the end result would look like this...
[
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
I know I can remove duplicates from lists by converting to set like set(my_list) but in this instance it is duplicates by ID that I want to remove by
Since you already hav the list sorted properly, a simple way to do this is to use itertools.groupby to grab the first element of each group in a list comprehension:
from itertools import groupby
l= [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
[next(g) for k, g in groupby(l, key=lambda d: d['id'])]
# [{'id': '23', 'type': 'car', 'price': '445'},
# {'id': '125', 'type': 'truck', 'price': '998'},
# {'id': '87', 'type': 'bike', 'price': '50'}]
I would probably convert to Pandas DataFrame and then use drop_duplicates
import pandas as pd
data = [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
df = pd.DataFrame(data)
df.drop_duplicates(subset=['id'], inplace=True)
print(df.to_dict('records'))
# Output
# [{'id': '23', 'type': 'car', 'price': '445'},
# {'id': '125', 'type': 'truck', 'price': '998'},
# {'id': '87', 'type': 'bike', 'price': '50'}]
Here's an answer that involves no external modules or unnecessary manipulation of the data:
data = [
{'id': '23', 'type': 'car', 'price': '445'},
{'id': '23', 'type': 'car', 'price': '78'},
{'id': '23', 'type': 'car', 'price': '34'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '125', 'type': 'truck', 'price': '722'},
{'id': '125', 'type': 'truck', 'price': '100'},
{'id': '87', 'type': 'bike', 'price': '50'},
]
seen = set()
result = [row for row in data if row['id'] not in seen and not seen.add(row['id'])]
print(result)
Result:
[{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'}]
Note that the not seen.add(row['id'])] part of the list comprehension will always be True. It's just a way of noting that a unique entry has been seen by adding it to the seen set.
Let's take the name of the given list as data.
unique_ids = []
result = []
for item in data:
if item["id"] not in unique_ids:
result.append(item)
unique_ids.append(item["id"])
print(result)
The result will be,
[{'id': '23', 'type': 'car', 'price': '445'},
{'id': '125', 'type': 'truck', 'price': '998'},
{'id': '87', 'type': 'bike', 'price': '50'}]

How to create python dataframe from nested json dictionary with increasing key

I have a json file with the following structure:
{'0': {'transaction': [{'transaction_key': '406.l.657872.tr.374',
'transaction_id': '374',
'type': 'add/drop',
'status': 'successful',
'timestamp': '1639593953'},
{'players': {'0': {'player': [[{'player_key': '406.p.100006'},
{'player_id': '100006'},
{'name': {'full': 'Dallas',
'first': 'Dallas',
'last': '',
'ascii_first': 'Dallas',
'ascii_last': ''}},
{'editorial_team_abbr': 'Dal'},
{'display_position': 'DEF'},
{'position_type': 'DT'}],
{'transaction_data': [{'type': 'add',
'source_type': 'freeagents',
'destination_type': 'team',
'destination_team_key': '406.l.657872.t.10',
'destination_team_name': 'Team 1'}]}]},
'1': {'player': [[{'player_key': '406.p.24793'},
{'player_id': '24793'},
{'name': {'full': 'Julio Jones',
'first': 'Julio',
'last': 'Jones',
'ascii_first': 'Julio',
'ascii_last': 'Jones'}},
{'editorial_team_abbr': 'Ten'},
{'display_position': 'WR'},
{'position_type': 'O'}],
{'transaction_data': {'type': 'drop',
'source_type': 'team',
'source_team_key': '406.l.657872.t.10',
'source_team_name': 'Team 1',
'destination_type': 'waivers'}}]},
'count': 2}}]},
'1': {'transaction': [{'transaction_key': '406.l.657872.tr.373',
'transaction_id': '373',
'type': 'add/drop',
'status': 'successful',
'timestamp': '1639575496'},
{'players': {'0': {'player': [[{'player_key': '406.p.32722'},
{'player_id': '32722'},
{'name': {'full': 'Cam Akers',
'first': 'Cam',
'last': 'Akers',
'ascii_first': 'Cam',
'ascii_last': 'Akers'}},
{'editorial_team_abbr': 'LAR'},
{'display_position': 'RB'},
{'position_type': 'O'}],
{'transaction_data': [{'type': 'add',
'source_type': 'freeagents',
'destination_type': 'team',
'destination_team_key': '406.l.657872.t.5',
'destination_team_name': 'Team 2'}]}]},
'1': {'player': [[{'player_key': '406.p.100007'},
{'player_id': '100007'},
{'name': {'full': 'Denver',
'first': 'Denver',
'last': '',
'ascii_first': 'Denver',
'ascii_last': ''}},
{'editorial_team_abbr': 'Den'},
{'display_position': 'DEF'},
{'position_type': 'DT'}],
{'transaction_data': {'type': 'drop',
'source_type': 'team',
'source_team_key': '406.l.657872.t.5',
'source_team_name': 'Team 2',
'destination_type': 'waivers'}}]},
'count': 2}}]},
'2': {'transaction': [{'transaction_key': '406.l.657872.tr.372',
'transaction_id': '372',
'type': 'add/drop',
'status': 'successful',
'timestamp': '1639575448'},
{'players': {'0': {'player': [[{'player_key': '406.p.33413'},
{'player_id': '33413'},
{'name': {'full': 'Travis Etienne',
'first': 'Travis',
'last': 'Etienne',
'ascii_first': 'Travis',
'ascii_last': 'Etienne'}},
{'editorial_team_abbr': 'Jax'},
{'display_position': 'RB'},
{'position_type': 'O'}],
{'transaction_data': [{'type': 'add',
'source_type': 'freeagents',
'destination_type': 'team',
'destination_team_key': '406.l.657872.t.5',
'destination_team_name': 'Team 2'}]}]},
'1': {'player': [[{'player_key': '406.p.24815'},
{'player_id': '24815'},
{'name': {'full': 'Mark Ingram II',
'first': 'Mark',
'last': 'Ingram II',
'ascii_first': 'Mark',
'ascii_last': 'Ingram II'}},
{'editorial_team_abbr': 'NO'},
{'display_position': 'RB'},
{'position_type': 'O'}],
{'transaction_data': {'type': 'drop',
'source_type': 'team',
'source_team_key': '406.l.657872.t.5',
'source_team_name': 'Team 2',
'destination_type': 'waivers'}}]},
'count': 2}}]}
These are transactions for a fantasy football league and I'd like to organize each transaction into a dataframe, however I'm running into issues normalizing the data. I figure I'd need to begin a loop, but am slightly stuck in the mud and would appreciate if anyone has any suggestions. Thank You.
Ideally, I'm looking to summarize each transaction with the following dataframe structure:
transaction_id type added pos_1 dropped pos_2 timestamp
374 add/drop Dallas DEF Julio Jones WR 1639593953
373 add/drop Cam Akers RB Denver DEF 1639575496
372 add/drop Travis Etienne RB Mark Ingram II RB 1639575448

Get specific the nested key/values based on a condition from python nested dictionary

I'm stuck parsing the below python nested dictionary based on the nested key. I want to filter a key's value and return all the nested key/values related to that.
{ 'US': { 'Washington': {'Seattle': {1: {'name': 'John', 'age': '27', 'gender': 'Male'}}},
{ 'Florida': {'some city': {2: {'name': 'Marie', 'age': '22', 'gender': 'Female'}}},
{ 'Ohio': {'some city': {3: {'name': 'Luna', 'age': '24', 'gender': 'Female', 'married': 'No'}}},
{ 'Nevada': {'some city': {4: {'name': 'Peter', 'age': '29', 'gender': 'Male', 'married': 'Yes'}}}}}
For instance, filtering on gender "Male" should return the below:
US
Washington
Seattle
1
name:John
age: 27
US
Nevada
somecity
4
name:Peter
age: 29
married: Yes
Can you please suggest the best way to parse it. I tried to use contains within a loop that doesn't seem to work.
We can recursively explore the dict structure, keeping track of the path of keys at each point. When we reach a dict containing the target value, we yield the path and the content of the dict.
We can use this generator:
def recursive_search(dct, target, path=None):
if path is None:
path = []
if target in dct.values():
out = ' '.join(path) + ' ' + ' '.join(f'{key}:{value}' for key, value in dct.items())
yield out
else:
for key, value in dct.items():
if isinstance(value, dict):
yield from recursive_search(value, target, path+[str(key)])
this way:
data = { 'US': { 'Washington': {'Seattle': {1: {'name': 'John', 'age': '27', 'gender': 'Male'}}},
'Florida': {'some city': {2: {'name': 'Marie', 'age': '22', 'gender': 'Female'}}},
'Ohio': {'some city': {3: {'name': 'Luna', 'age': '24', 'gender': 'Female', 'married': 'No'}}},
'Nevada': {'some city': {4: {'name': 'Peter', 'age': '29', 'gender': 'Male', 'married': 'Yes'}}}}}
for match in recursive_search(data, 'Male'):
print(match)
# US Washington Seattle 1 name:John age:27 gender:Male
# US Nevada some city 4 name:Peter age:29 gender:Male married:Yes
This Code Will work...
a_dict={ 'US': { 'Washington': {'Seattle': {1: {'name': 'John', 'age': '27', 'gender': 'Male'}}}, 'Florida': {'some city': {2: {'name': 'Marie', 'age': '22', 'gender': 'Female'}}}, 'Ohio': {'some city': {3: {'name': 'Luna', 'age': '24', 'gender': 'Female', 'married': 'No'}}}, 'Nevada': {'some city': {4: {'name': 'Peter', 'age': '29', 'gender': 'Male', 'married': 'Yes'}}}}}
for k,v in a_dict.items():
for k1,v1 in v.items():
for k2,v2 in v1.items():
for k3,v3 in v2.items():
if v3["gender"]=="Male":
string=""
for k4,v4 in v3.items():
string=string+ k4+":"+v4+" "
print(k,k1,k2,k3, string.strip())

merge the dictionaries of sub fields into a single dictionary

Imagine I have the following dictionary.For every record (row of data), I want to merge the dictionaries of sub fields into a single dictionary. So in the end I have a list of dictionaries. One per each record.
Data = [{'Name': 'bob', 'age': '40’}
{'Name': 'tom', 'age': '30’},
{'Country’: 'US', 'City': ‘Boston’},
{'Country’: 'US', 'City': ‘New York},
{'Email’: 'bob#fake.com', 'Phone': ‘bob phone'},
{'Email’: 'tom#fake.com', 'Phone': ‘none'}]
Output = [
{'Name': 'bob', 'age': '40’,'Country’: 'US', 'City': ‘Boston’,'Email’: 'bob#fake.com', 'Phone': ‘bob phone'},
{'Name': 'tom', 'age': '30’,'Country’: 'US', 'City': ‘New York', 'Email’: 'tom#fake.com', 'Phone': ‘none'}
]
Related: How do I merge a list of dicts into a single dict?
I understand you know which dictionary relates to Bob and which dictionary relates to Tom by their position: dictionaries at even positions relate to Bob, while dictionaries at odd positions relate to Tom.
You can check whether a number is odd or even using % 2:
Data = [{'Name': 'bob', 'age': '40'},
{'Name': 'tom', 'age': '30'},
{'Country': 'US', 'City': 'Boston'},
{'Country': 'US', 'City': 'New York'},
{'Email': 'bob#fake.com', 'Phone': 'bob phone'},
{'Email': 'tom#fake.com', 'Phone': 'none'}]
bob_dict = {}
tom_dict = {}
for i,d in enumerate(Data):
if i % 2 == 0:
bob_dict.update(d)
else:
tom_dict.update(d)
Output=[bob_dict, tom_dict]
Or alternatively:
Output = [{}, {}]
for i, d in enumerate(Data):
Output[i%2].update(d)
This second approach is not only shorter to write, it's also faster to execute and easier to scale if you have more than 2 people.
Splitting the list into more than 2 dictionaries
k = 4 # number of dictionaries you want
Data = [{'Name': 'Alice', 'age': '40'},
{'Name': 'Bob', 'age': '30'},
{'Name': 'Charlie', 'age': '30'},
{'Name': 'Diane', 'age': '30'},
{'Country': 'US', 'City': 'Boston'},
{'Country': 'US', 'City': 'New York'},
{'Country': 'UK', 'City': 'London'},
{'Country': 'UK', 'City': 'Oxford'},
{'Email': 'alice#fake.com', 'Phone': 'alice phone'},
{'Email': 'bob#fake.com', 'Phone': '12345'},
{'Email': 'charlie#fake.com', 'Phone': '0000000'},
{'Email': 'diane#fake.com', 'Phone': 'none'}]
Output = [{} for j in range(k)]
for i, d in enumerate(Data):
Output[i%k].update(d)
# Output = [
# {'Name': 'Alice', 'age': '40', 'Country': 'US', 'City': 'Boston', 'Email': 'alice#fake.com', 'Phone': 'alice phone'},
# {'Name': 'Bob', 'age': '30', 'Country': 'US', 'City': 'New York', 'Email': 'bob#fake.com', 'Phone': '12345'},
# {'Name': 'Charlie', 'age': '30', 'Country': 'UK', 'City': 'London', 'Email': 'charlie#fake.com', 'Phone': '0000000'},
# {'Name': 'Diane', 'age': '30', 'Country': 'UK', 'City': 'Oxford', 'Email': 'diane#fake.com', 'Phone': 'none'}
#]
Additionally, instead of hardcoding k = 4:
If you know the number of fields but not the number of people, you can compute k by dividing the initial number of dictionaries by the number of dictionary types:
fields = ['Name', 'Country', 'Email']
assert(len(Data) % len(fields) == 0) # make sure Data is consistent with number of fields
k = len(Data) // len(fields)
Or alternatively, you can compute k by counting how many occurrences of the 'Names' field you have:
k = sum(1 for d in Data if 'Name' in d)

Categories