Myd is below
{ 'Owner': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Owner' }, { 'id': '2', 'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': 'Owner' } ], 'Manager': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Manager' } ] }
Extract id to outside
Add entire dictionary into a new key called 'employee'
For the same key role are there in two different keys merge to one
id=1 role is present as Owner and Manager, output will role:['Manager', 'Owner']
Expected out
{ 'employee': { '1': { 'email': 'john#nif.com', 'name': 'John', 'role': [ 'Owner', 'Manager' ] }, '2': { 'email': 'work#nif.com', 'name': 'Work', 'role': [ 'Owner' ] } } }
emp = {}
for key,val in event.items():
for each in val:
# [{'employee': key, **val} for key, val in event.items()] if event else []
emp['employee'] = each['id']
emp['name'] = each['name']
using python native method
Here's a try without using third party lib:
myd = {
'Owner': [
{ 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Owner' },
{ 'id': '2', 'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': 'Owner' }
],
'Manager': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Manager' } ]
}
empl_dict = {}
for employees in myd.values():
for emp in employees:
emp_id = emp.pop('id')
emp_role = emp.pop('role')
empl_dict[emp_id] = empl_dict.get(emp_id, {})
empl_dict[emp_id].update(emp)
empl_dict[emp_id]['role'] = empl_dict[emp_id].get('role', [])
empl_dict[emp_id]['role'].append(emp_role)
all_employees = {'employee': empl_dict}
print(all_employees)
results in:
{'employee': {'1': {'name': 'John', 'contactEmail': 'john#nif.com', 'role': ['Owner', 'Manager']}, '2': {'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': ['Owner']}}}
You can use pandas to achieve this
Converting to pandas dataframe followed by groupby on contactEmail and aggregating results in required manner
df = pd.concat([pd.DataFrame(v).assign(key=k) for k,v in a.items()])
res = df.groupby('contactEmail').agg({'role':list,'name':'first'}).reset_index().T.to_dict()
{'employee':res}
out:
{'employee': {0: {'contactEmail': 'john#nif.com',
'role': ['Owner', 'Manager'],
'name': 'John'},
1: {'contactEmail': 'work#nif.com', 'role': ['Owner'], 'name': nan}}}
Edit:
if you want to achieve this in python
for OM in a.keys():
for ids in a[OM]:
ids['role'] = [OM]
total_recs = sum(list(a.values()),[])
res = {}
for rec in total_recs:
ID = rec['id']
if ID not in res.keys():
rec.pop('id')
res[ID] = rec
else:
rec.pop('id')
res[ID]['role'].extend(rec['role'])
{'employee':res}
Out:
{'employee': {'1': {'name': 'John',
'contactEmail': 'john#nif.com',
'role': ['Owner', 'Manager']},
'2': {'contactName': 'Work',
'contactEmail': 'work#nif.com',
'role': ['Owner']}}}
Related
I have a response from my Dynamo DB as:
{
'subject': 'Mathematics',
'course_id': '123',
'Term': 'Second',
'stats':
{'student_stats':
[
{'student_id': '234',
'registration_id': '321'},
{'student_id': '987',
'registration_id': '456'}
]
},
}
Where the partition key is 'subject' and the sort key is 'course_id'.
I would like to update the 'student_stats' for student with 'student_id' as '234' to passed.
The output should look like this:
{
'subject': 'Mathematics',
'course_id': '123',
'Term': 'Second',
'stats':
{'student_stats':
[
{'student_id': '234',
'registration_id': '321',
'status: 'passed'},
{'student_id': '987',
'registration_id': '456'}
]
},
}
This is my implementation so far:
dynamodb = boto3.resource(
'dynamodb',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=ACCESS_SECRET
)
table = dynamodb.Table(TABLE_NAME)
response = table.update_item(Key={'subject': 'Mathematics',
'course_id': '123'},
UpdateExpression="set #status=:s",
ExpressionAttributeNames={
'#status': 'stats.student_stats'
},
ExpressionAttributeValues={
':s': 'passed'
},
ReturnValues="UPDATED_NEW"
)
Let's say that I have a Dictionary like this
dict1 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'group'
'id': '1'
},
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'group'
'id': '2'
}
]
}]
and I want to get an output that can replace all the groups and nested groups with all distinct users.
In this case the output should look like this:
dict2 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'user',
'id: '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
}]
Now let's assume that I have a large dataset to perform these actions on. (approx 20k individual groups)
What would be the best way to code this? I am attempting recursion, but I am not sure about how to search through the dictionary and lists in this manner such that it doesn't end up using too much memory
I do not think you need recursion. Looping is enough.
I think you can simply evaluate each Memberss, fetch users if group type, and make them unique. Then you can simply replace Members's value with distinct_users.
You might have a dictionary for groups like:
group_dict = {
'1': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'}
],
'2': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'},
{'type': 'user', 'id': '21'}
],
'3': [
{'type': 'group', 'id': '1'},
{'type': 'group', 'id': '2'},
{'type': 'group', 'id': '3'} # recursive
]
...
}
You can try:
def users_in_group(group_id):
users = []
groups_to_fetch = []
for user_or_group in group_dict[group_id]:
if user_or_group['type'] == 'group':
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
groups_fetched = set() # not to loop forever
while groups_to_fetch:
group = groups_to_fetch.pop()
if group['id'] not in groups_fetched:
groups_fetched.add(group['id'])
for user_or_group in group_dict[group['id']]:
if user_or_group['type'] == 'group' and user_or_group['id'] not in groups_fetched:
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
return users
def distinct_users_in(members):
distinct_users = []
def add(user):
if user['id'] not in user_id_set:
distinct_users.append(user)
user_id_set.add(user['id'])
user_id_set = set()
for member in members:
if member['type'] == 'group':
for user in users_in_group(member['id']):
add(user)
else: # 'user'
user = member
add(user)
return distinct_users
dict2 = dict1 # or `copy.deepcopy`
for element in dict2:
element['Members'] = distinct_users_in(element['Members'])
Each Members is re-assigned by distinct_users returned by the corresponding function.
The function takes Members and fetches users from each if member type. If user type, member itself is a user. While (fetched) users are appended to distinct_user, you can use their ids for uniquity.
When you fetch users_in_group, you can use two lists; groups_to_fetch and groups_fetched. The former is a stack to recursively fetch all groups in a group. The latter is not to fetch an already fetched group again. Or, it could loop forever.
Finally, if your data are already in memory, this approach may not exhaust memory and work.
I need to extract 2 values from this list of dictionary and store it as a key-value pair.
Here I attached sample data..Where I need to extract "Name" and "Service" from this input and store it as a dictionary. Where "Name" is Key and corresponding "Service" is its value.
Input:
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
From this input, I need output as a dictionary type.
Output Format: { Name : Service }
Output:
{ "Heera":"Service1","Prem" : " "}
My try:
Role_name =[]
response = {#INPUT WHICH I SPECIFIED ABOVE#}
roles = response['Roles']
for role in roles:
Role_name.append(role['Name'])
print(Role_name)
I need to pair the name with its corresponding service. Any help would be really appreciable.
Thanks in advance.
You just have to write a long line which can reach till the key 'Service'.
And you a syntax error in line Start_Month': 'January') and 'Start_Month': 'April'). You can't have one unclosed brackets.
Fix it and run the following.
This is the code:
output_dict = {}
for r in response['Roles']:
output_dict[r["Name"]] = r['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output_dict)
Output:
{'Heera': 'Service1', 'Prem': ''}
You just have to do like this:
liste = []
for role in response['Roles']:
liste.append(
{
role['Name']:role['PolicyDocument']['Statement'][0]['RoleType']['Service'],
}
)
print(liste)
It seems your input data is structured kind of strange and I am not sure what the ) are doing next to the months since they make things invalid but here is a working script assuming you removed the parenthesis from your input.
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
output = {}
for i in response['Roles']:
output[i['Name']] = i['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output)
This should give you what you want in a variable called role_services:
role_services = {}
for role in response['Roles']:
for st in role['PolicyDocument']['Statement']:
role_services[role['Name']] = st['RoleType']['Service']
It will ensure you'll go through all of the statements within that data structure but be aware you'll overwrite key-value pairs as you traverse the response, if they exist in more than a single entry!
A reference on for loops which might be helpful, illustrates using if statements within them which can help you to extend this to check if items already exist!
Hope that helps
I have two list of dict.
students = [{'lastname': 'JAKUB', 'id': '92051048757', 'name': 'BAJOREK'},
{'lastname': 'MARIANNA', 'id': '92051861424', 'name': 'SLOTARZ'}, {'lastname':
'SZYMON', 'id': '92052033215', 'name': 'WNUK'}, {'lastname': 'WOJCIECH', 'id':
'92052877491', 'name': 'LESKO'}]
And
house = [{'id_pok': '2', 'id': '92051048757'}, {'id_pok': '24', 'id': '92051861424'}]
How to find elements that not exist in house list of dict matching by id?
Output
output = [{'lastname':
'SZYMON', 'id': '92052033215', 'name': 'WNUK'}]
I try do that
for student in students:
for home in house:
if student['id'] != home['id']:
print student
But this only repeat list
The reason your code doesn't work is that if there's any house_id which doesn't match a student_id, the student will be printed. You'd need some more logic or the any function:
for student in students:
if not any (student['id'] == home['id'] for home in house):
print(student)
It outputs:
{'lastname': 'SZYMON', 'id': '92052033215', 'name': 'WNUK'}
{'lastname': 'WOJCIECH', 'id': '92052877491', 'name': 'LESKO'}
A more efficient solution would be to keep a set of house_ids, and find students whose id isn't included in this set:
students = [{'lastname': 'JAKUB', 'id': '92051048757', 'name': 'BAJOREK'},
{'lastname': 'MARIANNA', 'id': '92051861424', 'name': 'SLOTARZ'}, {'lastname':
'SZYMON', 'id': '92052033215', 'name': 'WNUK'}, {'lastname': 'WOJCIECH', 'id':
'92052877491', 'name': 'LESKO'}]
house = [{'id_pok': '2', 'id': '92051048757'}, {'id_pok': '24', 'id': '92051861424'}]
house_ids = set(house_dict['id'] for house_dict in house)
result = [student for student in students if student['id'] not in house_ids]
print(result)
It outputs:
[{'lastname': 'SZYMON', 'id': '92052033215', 'name': 'WNUK'}, {'lastname': 'WOJCIECH', 'id': '92052877491', 'name': 'LESKO'}]
Note that 2 students match your description.
The reason setenter link description here is used is that it allows much faster lookup than a list.
student_ids = set(d.get('id') for d in students)
house_ids = set(d.get('id') for d in house)
ids_not_in_house = student_ids ^ house_ids
students = [{'lastname': 'JAKUB', 'id': '92051048757', 'name': 'BAJOREK'},
{'lastname': 'MARIANNA', 'id': '92051861424', 'name': 'SLOTARZ'}, {'lastname':
'SZYMON', 'id': '92052033215', 'name': 'WNUK'}, {'lastname': 'WOJCIECH', 'id':
'92052877491', 'name': 'LESKO'}]
house = [{'id_pok': '2', 'id': '92051048757'}, {'id_pok': '24', 'id': '92051861424'}]
s = {item['id'] for item in students}
h = {item['id'] for item in house}
not_in_house_ids = s.difference(h)
not_in_house_items = [x for x in students if x['id'] in not_in_house_ids]
print (not_in_house_items)
>>>[{'name': 'WNUK', 'lastname': 'SZYMON', 'id': '92052033215'}, {'name': 'LESKO', 'lastname': 'WOJCIECH', 'id': '92052877491'}]
I am having difficulty creating a function that will produce a family tree in JSON format.
An example of a two parent, two offspring tree can be seen here:
{
"children": [
{
"id": 409,
"name": "Joe Bloggs",
"no_parent": "true"
},
{
"children": [
{
"children": [],
"id": 411,
"name": "Alice Bloggs"
},
{
"children": [],
"id": 412,
"name": "John Bloggs"
}
],
"hidden": "true",
"id": "empty_node_id_9",
"name": "",
"no_parent": "true"
},
{
"children": [],
"id": 410,
"name": "Sarah Smith",
"no_parent": "true"
}
],
"hidden": "true",
"id": "year0",
"name": ""
}
Joe Bloggs is married to Sarah Smith, with children Alice Bloggs and John Bloggs. The empty nodes exist purely to handle vertices in the tree-map diagram (see jsfiddle below).
The above example should help explain the syntax. A more complex tree can be found on this jsfiddle: http://jsfiddle.net/cyril123/0vbtvoon/22/
The JSON associated with the jsfiddle can be found from lines 34 to lines 101.
I am having difficulty writing a function that recursively produces the JSON for a family tree. I begin with a person class that represents the oldest member of the family. The function would then checks for marriages, for children etc and continues until the tree is complete, returning the json.
My code involves a person class as well as an associated marriage class. I have appropriate methods such as ids for each person, get_marriage() function, get_children() methods etc. I am wondering the best way to go about this is.
My attempt at a recursive function can be found below. The methods/functions involved etc are not detailed but their purpose should be self-explanatory. Many thanks.
def root_nodes(people, first_node=False): #begin by passing in oldest family member and first_node=True
global obj, current_obj, people_used
if obj is not None: print len(str(obj))
if type(people) != list:
people = [people]
for x in people:
if x in rootPeople and first_node: #handles the beginning of the JSON with an empty 'root' starting node.
first_node = False
obj = {'name': "", 'id': 'year0', 'hidden': 'true', 'children': root_nodes(people)}
return obj
else:
marriage_info = get_marriage(x)
if marriage_info is None: #if person is not married
current_obj = {'name': x.get_name(), 'id': x.get_id(), 'children': []}
people_used.append(x)
else:
partners = marriage_info.get_members()
husband, wife = partners[0].get_name(), partners[1].get_name()
husband_id, wife_id = marriage_info.husband.get_id(), marriage_info.wife.get_id()
marriage_year = marriage_info.year
children = marriage_info.get_children()
people_used.append(partners[0])
people_used.append(partners[1])
if partners[0].get_parents() == ['None', 'None'] or partners[1].get_parents() == ['None', 'None']:
if partners[0].get_parents() == ['None', 'None'] and partners[1].get_parents() == ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent': 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
if partners[0].get_parents() == ['None', 'None'] and partners[1].get_parents() != ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent': 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
if partners[0].get_parents() != ['None', 'None'] and partners[1].get_parents() == ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
else:
if not any((True for x in partners[0].get_parents() if x in people_used)):
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent' : 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
elif not any((True for x in partners[1].get_parents() if x in people_used)):
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
else:
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
return current_obj
if obj is None:
obj = current_obj
else:
obj = obj, current_obj
if people.index(x) == len(people)-1:
return obj
Even though the function above is badly written - it is almost successful. The only instance where it fails is if one child is married, then the other children are missed out from the JSON. This is because obj is returned without going to the next iteration in the for loop. Any suggestions on how to fix this would be appreciated.