Let's say that I have a Dictionary like this
dict1 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'group'
'id': '1'
},
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'group'
'id': '2'
}
]
}]
and I want to get an output that can replace all the groups and nested groups with all distinct users.
In this case the output should look like this:
dict2 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'user',
'id: '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
}]
Now let's assume that I have a large dataset to perform these actions on. (approx 20k individual groups)
What would be the best way to code this? I am attempting recursion, but I am not sure about how to search through the dictionary and lists in this manner such that it doesn't end up using too much memory
I do not think you need recursion. Looping is enough.
I think you can simply evaluate each Memberss, fetch users if group type, and make them unique. Then you can simply replace Members's value with distinct_users.
You might have a dictionary for groups like:
group_dict = {
'1': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'}
],
'2': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'},
{'type': 'user', 'id': '21'}
],
'3': [
{'type': 'group', 'id': '1'},
{'type': 'group', 'id': '2'},
{'type': 'group', 'id': '3'} # recursive
]
...
}
You can try:
def users_in_group(group_id):
users = []
groups_to_fetch = []
for user_or_group in group_dict[group_id]:
if user_or_group['type'] == 'group':
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
groups_fetched = set() # not to loop forever
while groups_to_fetch:
group = groups_to_fetch.pop()
if group['id'] not in groups_fetched:
groups_fetched.add(group['id'])
for user_or_group in group_dict[group['id']]:
if user_or_group['type'] == 'group' and user_or_group['id'] not in groups_fetched:
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
return users
def distinct_users_in(members):
distinct_users = []
def add(user):
if user['id'] not in user_id_set:
distinct_users.append(user)
user_id_set.add(user['id'])
user_id_set = set()
for member in members:
if member['type'] == 'group':
for user in users_in_group(member['id']):
add(user)
else: # 'user'
user = member
add(user)
return distinct_users
dict2 = dict1 # or `copy.deepcopy`
for element in dict2:
element['Members'] = distinct_users_in(element['Members'])
Each Members is re-assigned by distinct_users returned by the corresponding function.
The function takes Members and fetches users from each if member type. If user type, member itself is a user. While (fetched) users are appended to distinct_user, you can use their ids for uniquity.
When you fetch users_in_group, you can use two lists; groups_to_fetch and groups_fetched. The former is a stack to recursively fetch all groups in a group. The latter is not to fetch an already fetched group again. Or, it could loop forever.
Finally, if your data are already in memory, this approach may not exhaust memory and work.
Related
Myd is below
{ 'Owner': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Owner' }, { 'id': '2', 'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': 'Owner' } ], 'Manager': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Manager' } ] }
Extract id to outside
Add entire dictionary into a new key called 'employee'
For the same key role are there in two different keys merge to one
id=1 role is present as Owner and Manager, output will role:['Manager', 'Owner']
Expected out
{ 'employee': { '1': { 'email': 'john#nif.com', 'name': 'John', 'role': [ 'Owner', 'Manager' ] }, '2': { 'email': 'work#nif.com', 'name': 'Work', 'role': [ 'Owner' ] } } }
emp = {}
for key,val in event.items():
for each in val:
# [{'employee': key, **val} for key, val in event.items()] if event else []
emp['employee'] = each['id']
emp['name'] = each['name']
using python native method
Here's a try without using third party lib:
myd = {
'Owner': [
{ 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Owner' },
{ 'id': '2', 'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': 'Owner' }
],
'Manager': [ { 'id': '1', 'name': 'John', 'contactEmail': 'john#nif.com', 'role': 'Manager' } ]
}
empl_dict = {}
for employees in myd.values():
for emp in employees:
emp_id = emp.pop('id')
emp_role = emp.pop('role')
empl_dict[emp_id] = empl_dict.get(emp_id, {})
empl_dict[emp_id].update(emp)
empl_dict[emp_id]['role'] = empl_dict[emp_id].get('role', [])
empl_dict[emp_id]['role'].append(emp_role)
all_employees = {'employee': empl_dict}
print(all_employees)
results in:
{'employee': {'1': {'name': 'John', 'contactEmail': 'john#nif.com', 'role': ['Owner', 'Manager']}, '2': {'contactName': 'Work', 'contactEmail': 'work#nif.com', 'role': ['Owner']}}}
You can use pandas to achieve this
Converting to pandas dataframe followed by groupby on contactEmail and aggregating results in required manner
df = pd.concat([pd.DataFrame(v).assign(key=k) for k,v in a.items()])
res = df.groupby('contactEmail').agg({'role':list,'name':'first'}).reset_index().T.to_dict()
{'employee':res}
out:
{'employee': {0: {'contactEmail': 'john#nif.com',
'role': ['Owner', 'Manager'],
'name': 'John'},
1: {'contactEmail': 'work#nif.com', 'role': ['Owner'], 'name': nan}}}
Edit:
if you want to achieve this in python
for OM in a.keys():
for ids in a[OM]:
ids['role'] = [OM]
total_recs = sum(list(a.values()),[])
res = {}
for rec in total_recs:
ID = rec['id']
if ID not in res.keys():
rec.pop('id')
res[ID] = rec
else:
rec.pop('id')
res[ID]['role'].extend(rec['role'])
{'employee':res}
Out:
{'employee': {'1': {'name': 'John',
'contactEmail': 'john#nif.com',
'role': ['Owner', 'Manager']},
'2': {'contactName': 'Work',
'contactEmail': 'work#nif.com',
'role': ['Owner']}}}
I need to extract 2 values from this list of dictionary and store it as a key-value pair.
Here I attached sample data..Where I need to extract "Name" and "Service" from this input and store it as a dictionary. Where "Name" is Key and corresponding "Service" is its value.
Input:
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
From this input, I need output as a dictionary type.
Output Format: { Name : Service }
Output:
{ "Heera":"Service1","Prem" : " "}
My try:
Role_name =[]
response = {#INPUT WHICH I SPECIFIED ABOVE#}
roles = response['Roles']
for role in roles:
Role_name.append(role['Name'])
print(Role_name)
I need to pair the name with its corresponding service. Any help would be really appreciable.
Thanks in advance.
You just have to write a long line which can reach till the key 'Service'.
And you a syntax error in line Start_Month': 'January') and 'Start_Month': 'April'). You can't have one unclosed brackets.
Fix it and run the following.
This is the code:
output_dict = {}
for r in response['Roles']:
output_dict[r["Name"]] = r['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output_dict)
Output:
{'Heera': 'Service1', 'Prem': ''}
You just have to do like this:
liste = []
for role in response['Roles']:
liste.append(
{
role['Name']:role['PolicyDocument']['Statement'][0]['RoleType']['Service'],
}
)
print(liste)
It seems your input data is structured kind of strange and I am not sure what the ) are doing next to the months since they make things invalid but here is a working script assuming you removed the parenthesis from your input.
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
output = {}
for i in response['Roles']:
output[i['Name']] = i['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output)
This should give you what you want in a variable called role_services:
role_services = {}
for role in response['Roles']:
for st in role['PolicyDocument']['Statement']:
role_services[role['Name']] = st['RoleType']['Service']
It will ensure you'll go through all of the statements within that data structure but be aware you'll overwrite key-value pairs as you traverse the response, if they exist in more than a single entry!
A reference on for loops which might be helpful, illustrates using if statements within them which can help you to extend this to check if items already exist!
Hope that helps
I am grabbing sort of a complex MongoDB document with Python (v3.5) and I should update some values in it which are scattered all around the object and have no particular pattern in the structure and save it back to a different MongoDB collection. The object looks like this:
# after json.loads(mongo_db_document) my dict looks like this
notification = {
'_id': '570f934f45213b0d14b1256f',
'key': 'receipt',
'label': 'Delivery Receipt',
'version': '0.0.1',
'active': True,
'children': [
{
'key': 'started',
'label': 'Started',
'children': [
'date',
'time',
'offset'
]
},
{
'key': 'stop',
'label': 'Ended',
'children': [
'date',
'time',
'offset'
]
},
{
'label': '1. Particulars',
'template': 'formGroup',
'children': [
{
'children': [
{
'key': 'name',
'label': '2.1 Name',
'value': '********** THIS SHOULD BE UPDATED **********',
'readonly': 'true'
},
{
'key': 'ims_id',
'label': '2.2 IMS Number',
'value': '********** THIS SHOULD BE UPDATED **********',
'readonly': 'true'
}
]
},
{
'children': [
{
'key': 'type',
'readonly': '********** THIS SHOULD BE UPDATED **********',
'label': '2.3 Type',
'options': [
{
'label': 'Passenger',
'value': 'A37'
},
{
'label': 'Cargo',
'value': 'A35'
},
{
'label': 'Other',
'value': '********** THIS SHOULD BE UPDATED **********'
}
]
}
]
}
]
},
{
'template': 'formGroup',
'key': 'waste',
'label': '3. Waste',
'children': [
{
'label': 'Waste',
'children': [
{
'label': 'Plastics',
'key': 'A',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
},
{
'label': 'B. Oil',
'key': 'B',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
},
{
'label': 'C. Operational',
'key': 'C',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
}
]
}
]
},
{
'template': 'formRow',
'children': [
'empty',
'signature'
]
}
],
'filter': {
'timestamp_of_record': [
'date',
'time',
'offset'
]
}
}
My initial idea was to put placeholders (like $var_name) in places where I need to update values, and load the string with Python's string.Template, but that approach unfortunately breaks lots of stuff to other users of the same MongoDB document for some reason.
Is there a solution to simply modify this kind of object without "hardcoding" path to find the values I need to update?
There's this small script that I had written a couple years ago - I used it to find entries in some very long and unnerving JSONs. Admittedly it's not beautiful, but it might help in your case, perhaps?
You can find the script on Bitbucket, here (and here is the code).
Unfortunately it's not documented; at the time I wasn't really believing other people would use it, I guess.
Anyways, if you'd like to try it, save the script in your working directory and then use something like this:
from RecursiveSearch import Retriever
def alter_data(json_data, key, original, newval):
'''
Alter *all* values of said keys
'''
retr = Retriever(json_data)
for item_no, item in enumerate(retr.__track__(key)): # i.e. all 'value'
# Pick parent objects with a last element False in the __track__() result,
# indicating that `key` is either a dict key or a set element
if not item[-1]:
parent = retr.get_parent(key, item_no)
try:
if parent[key] == original:
parent[key] = newval
except TypeError:
# It's a set, this is not the key you're looking for
pass
if __name__ == '__main__':
alter_data(notification, key='value',
original = '********** THIS SHOULD BE UPDATED **********',
newval = '*UPDATED*')
Unfortunately as I said the script isn't well documented, so if you want to try it and need more info, I'll be glad to provide it.
Not sure if I understood correctly, but this will dynamically find all keys "value" and "readonly" and print out the paths to address the fields.
def findem(data, trail):
if isinstance(data, dict):
for k in data.keys():
if k in ('value', 'readonly'):
print("{}['{}']".format(trail, k))
else:
findem(data[k], "{}['{}']".format(trail, k))
elif isinstance(data, list):
for k in data:
findem(k, '{}[{}]'.format(trail, data.index(k)))
if __name__ == '__main__':
findem(notification, 'notification')
notification['children'][2]['children'][0]['children'][0]['readonly']
notification['children'][2]['children'][0]['children'][0]['value']
notification['children'][2]['children'][0]['children'][1]['readonly']
notification['children'][2]['children'][0]['children'][1]['value']
notification['children'][2]['children'][1]['children'][0]['readonly']
notification['children'][2]['children'][1]['children'][0]['options'][0]['value']
notification['children'][2]['children'][1]['children'][0]['options'][1]['value']
notification['children'][2]['children'][1]['children'][0]['options'][2]['value']
notification['children'][3]['children'][0]['children'][0]['value']
notification['children'][3]['children'][0]['children'][1]['value']
notification['children'][3]['children'][0]['children'][2]['value']
Add another list to the JSON object. Each item in that list would be a list of keys that lead to the values to be changed. An example for one such list is: ['children', 2, 'children', 'children', 0, 'value'].
Then, to access the value you could use a loop:
def change(json, path, newVal):
cur = json
for key in path[:-1]:
cur = cur[key]
cur[path[-1]] = newVal
path = notification['paths'][0]
#path, for example, could be ['children', 2, 'children', 'children', 0, 'value']
newVal = 'what ever you want'
change(notification, path, newVal)
first of all i'm new to mongo so I don't know much and i cannot just remove duplicate rows due to some dependencies.
I have following data stored in mongo
{'id': 1, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 2, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 3, 'key': 'pehnvosjijipehnvosjijipehnvosjijipehnvosjijipehnvosjiji', 'name': 'some name', 'country': 'IN'},
{'id': 4, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'},
{'id': 5, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'}
you can see some of the rows are duplicate with different id
as long as it will take to solve this issue from input I must tackle it on output.
I need the data in the following way:
{'id': 1, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 3, 'key': 'pehnvosjijipehnvosjijipehnvosjijipehnvosjijipehnvosjiji', 'name': 'some name', 'country': 'IN'},
{'id': 4, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'}
My query
keys = db.collection.distinct('key', {})
all_data = db.collection.find({'key': {$in: keys}})
As you can see it takes two queries for a same result set Please combine it to one as the database is very large
I might also create a unique key on the key but the value is so long (152 characters) that it will not help me.
Or it will??
You need to use the aggregation framework for this. There are multiple ways to do this, the solution below uses the $$ROOT variable to get the first document for each group:
db.data.aggregate([{
"$sort": {
"_id": 1
}
}, {
"$group": {
"_id": "$key",
"first": {
"$first": "$$ROOT"
}
}
}, {
"$project": {
"_id": 0,
"id":"$first.id",
"key":"$first.key",
"name":"$first.name",
"country":"$first.country"
}
}])
I am having difficulty creating a function that will produce a family tree in JSON format.
An example of a two parent, two offspring tree can be seen here:
{
"children": [
{
"id": 409,
"name": "Joe Bloggs",
"no_parent": "true"
},
{
"children": [
{
"children": [],
"id": 411,
"name": "Alice Bloggs"
},
{
"children": [],
"id": 412,
"name": "John Bloggs"
}
],
"hidden": "true",
"id": "empty_node_id_9",
"name": "",
"no_parent": "true"
},
{
"children": [],
"id": 410,
"name": "Sarah Smith",
"no_parent": "true"
}
],
"hidden": "true",
"id": "year0",
"name": ""
}
Joe Bloggs is married to Sarah Smith, with children Alice Bloggs and John Bloggs. The empty nodes exist purely to handle vertices in the tree-map diagram (see jsfiddle below).
The above example should help explain the syntax. A more complex tree can be found on this jsfiddle: http://jsfiddle.net/cyril123/0vbtvoon/22/
The JSON associated with the jsfiddle can be found from lines 34 to lines 101.
I am having difficulty writing a function that recursively produces the JSON for a family tree. I begin with a person class that represents the oldest member of the family. The function would then checks for marriages, for children etc and continues until the tree is complete, returning the json.
My code involves a person class as well as an associated marriage class. I have appropriate methods such as ids for each person, get_marriage() function, get_children() methods etc. I am wondering the best way to go about this is.
My attempt at a recursive function can be found below. The methods/functions involved etc are not detailed but their purpose should be self-explanatory. Many thanks.
def root_nodes(people, first_node=False): #begin by passing in oldest family member and first_node=True
global obj, current_obj, people_used
if obj is not None: print len(str(obj))
if type(people) != list:
people = [people]
for x in people:
if x in rootPeople and first_node: #handles the beginning of the JSON with an empty 'root' starting node.
first_node = False
obj = {'name': "", 'id': 'year0', 'hidden': 'true', 'children': root_nodes(people)}
return obj
else:
marriage_info = get_marriage(x)
if marriage_info is None: #if person is not married
current_obj = {'name': x.get_name(), 'id': x.get_id(), 'children': []}
people_used.append(x)
else:
partners = marriage_info.get_members()
husband, wife = partners[0].get_name(), partners[1].get_name()
husband_id, wife_id = marriage_info.husband.get_id(), marriage_info.wife.get_id()
marriage_year = marriage_info.year
children = marriage_info.get_children()
people_used.append(partners[0])
people_used.append(partners[1])
if partners[0].get_parents() == ['None', 'None'] or partners[1].get_parents() == ['None', 'None']:
if partners[0].get_parents() == ['None', 'None'] and partners[1].get_parents() == ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent': 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
if partners[0].get_parents() == ['None', 'None'] and partners[1].get_parents() != ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent': 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
if partners[0].get_parents() != ['None', 'None'] and partners[1].get_parents() == ['None', 'None']:
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
else:
if not any((True for x in partners[0].get_parents() if x in people_used)):
current_obj = {'name': str(husband), 'id': husband_id, 'no_parent' : 'true'}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
elif not any((True for x in partners[1].get_parents() if x in people_used)):
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'no_parent': 'true', 'children': []}
else:
current_obj = {'name': str(husband), 'id': husband_id}, {'name': '', 'id': 'empty_node_id_' + empty_node(), 'no_parent': 'true', 'hidden': 'true', 'children': root_nodes(children)}, {'name': str(wife), 'id': wife_id, 'children': []}
return current_obj
if obj is None:
obj = current_obj
else:
obj = obj, current_obj
if people.index(x) == len(people)-1:
return obj
Even though the function above is badly written - it is almost successful. The only instance where it fails is if one child is married, then the other children are missed out from the JSON. This is because obj is returned without going to the next iteration in the for loop. Any suggestions on how to fix this would be appreciated.