Updating complex JSON object in Python - python

I am grabbing sort of a complex MongoDB document with Python (v3.5) and I should update some values in it which are scattered all around the object and have no particular pattern in the structure and save it back to a different MongoDB collection. The object looks like this:
# after json.loads(mongo_db_document) my dict looks like this
notification = {
'_id': '570f934f45213b0d14b1256f',
'key': 'receipt',
'label': 'Delivery Receipt',
'version': '0.0.1',
'active': True,
'children': [
{
'key': 'started',
'label': 'Started',
'children': [
'date',
'time',
'offset'
]
},
{
'key': 'stop',
'label': 'Ended',
'children': [
'date',
'time',
'offset'
]
},
{
'label': '1. Particulars',
'template': 'formGroup',
'children': [
{
'children': [
{
'key': 'name',
'label': '2.1 Name',
'value': '********** THIS SHOULD BE UPDATED **********',
'readonly': 'true'
},
{
'key': 'ims_id',
'label': '2.2 IMS Number',
'value': '********** THIS SHOULD BE UPDATED **********',
'readonly': 'true'
}
]
},
{
'children': [
{
'key': 'type',
'readonly': '********** THIS SHOULD BE UPDATED **********',
'label': '2.3 Type',
'options': [
{
'label': 'Passenger',
'value': 'A37'
},
{
'label': 'Cargo',
'value': 'A35'
},
{
'label': 'Other',
'value': '********** THIS SHOULD BE UPDATED **********'
}
]
}
]
}
]
},
{
'template': 'formGroup',
'key': 'waste',
'label': '3. Waste',
'children': [
{
'label': 'Waste',
'children': [
{
'label': 'Plastics',
'key': 'A',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
},
{
'label': 'B. Oil',
'key': 'B',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
},
{
'label': 'C. Operational',
'key': 'C',
'inputType': 'number',
'inputAttributes': {
'min': 0
},
'value': '********** THIS SHOULD BE UPDATED **********'
}
]
}
]
},
{
'template': 'formRow',
'children': [
'empty',
'signature'
]
}
],
'filter': {
'timestamp_of_record': [
'date',
'time',
'offset'
]
}
}
My initial idea was to put placeholders (like $var_name) in places where I need to update values, and load the string with Python's string.Template, but that approach unfortunately breaks lots of stuff to other users of the same MongoDB document for some reason.
Is there a solution to simply modify this kind of object without "hardcoding" path to find the values I need to update?

There's this small script that I had written a couple years ago - I used it to find entries in some very long and unnerving JSONs. Admittedly it's not beautiful, but it might help in your case, perhaps?
You can find the script on Bitbucket, here (and here is the code).
Unfortunately it's not documented; at the time I wasn't really believing other people would use it, I guess.
Anyways, if you'd like to try it, save the script in your working directory and then use something like this:
from RecursiveSearch import Retriever
def alter_data(json_data, key, original, newval):
'''
Alter *all* values of said keys
'''
retr = Retriever(json_data)
for item_no, item in enumerate(retr.__track__(key)): # i.e. all 'value'
# Pick parent objects with a last element False in the __track__() result,
# indicating that `key` is either a dict key or a set element
if not item[-1]:
parent = retr.get_parent(key, item_no)
try:
if parent[key] == original:
parent[key] = newval
except TypeError:
# It's a set, this is not the key you're looking for
pass
if __name__ == '__main__':
alter_data(notification, key='value',
original = '********** THIS SHOULD BE UPDATED **********',
newval = '*UPDATED*')
Unfortunately as I said the script isn't well documented, so if you want to try it and need more info, I'll be glad to provide it.

Not sure if I understood correctly, but this will dynamically find all keys "value" and "readonly" and print out the paths to address the fields.
def findem(data, trail):
if isinstance(data, dict):
for k in data.keys():
if k in ('value', 'readonly'):
print("{}['{}']".format(trail, k))
else:
findem(data[k], "{}['{}']".format(trail, k))
elif isinstance(data, list):
for k in data:
findem(k, '{}[{}]'.format(trail, data.index(k)))
if __name__ == '__main__':
findem(notification, 'notification')
notification['children'][2]['children'][0]['children'][0]['readonly']
notification['children'][2]['children'][0]['children'][0]['value']
notification['children'][2]['children'][0]['children'][1]['readonly']
notification['children'][2]['children'][0]['children'][1]['value']
notification['children'][2]['children'][1]['children'][0]['readonly']
notification['children'][2]['children'][1]['children'][0]['options'][0]['value']
notification['children'][2]['children'][1]['children'][0]['options'][1]['value']
notification['children'][2]['children'][1]['children'][0]['options'][2]['value']
notification['children'][3]['children'][0]['children'][0]['value']
notification['children'][3]['children'][0]['children'][1]['value']
notification['children'][3]['children'][0]['children'][2]['value']

Add another list to the JSON object. Each item in that list would be a list of keys that lead to the values to be changed. An example for one such list is: ['children', 2, 'children', 'children', 0, 'value'].
Then, to access the value you could use a loop:
def change(json, path, newVal):
cur = json
for key in path[:-1]:
cur = cur[key]
cur[path[-1]] = newVal
path = notification['paths'][0]
#path, for example, could be ['children', 2, 'children', 'children', 0, 'value']
newVal = 'what ever you want'
change(notification, path, newVal)

Related

How can I use the batchupdate function of the google form api with python to modify the url and answers?

Here is an example of my form. How can I use python to modify the url and answer for the first question, as I am not familiar with using batchupdate?
I can use "get" to retrieve information from the form.
{'formId': '1q4pJMDtiLxQ2cjmLXxowqJ5VPfI68bUUo',
'info': {'title': 'PIXEL ', 'documentTitle': 'daily'},
'settings': {'quizSettings': {'isQuiz': True}},
'revisionId': '00000067',
'responderUri': 'https://docs.google.com/forms/d/e/1FAIpQLScap6ZdpOnWIyxWqZNXjlfWW9DgPe-Wv_CUtziWw/viewform',
'items': [{'itemId': '7c0ddb37', 'pageBreakItem': {}},
{'itemId': '2870b06c', 'videoItem': {'video': {'youtubeUri': 'www.youtube.com/watch?v=Lt5HqPvM-eI', 'properties': {'alignment': 'LEFT', 'width': 320}}}},
{'itemId': '381aedf6', 'questionGroupItem': {'questions': [{'questionId': '4d7f011e', 'required': True, 'rowQuestion': {'title': 'pick'}}], 'grid': {'columns': {'type': 'RADIO', 'options': [{'value': '1'}, {'value': '2'}, {'value': '3'}]}}}, 'title': 'pay'},
{'itemId': '0f9dc00b', 'title': 'number', 'questionItem': {'question': {'questionId': '39523976', 'required': True,
'grading': {'correctAnswers': {'answers': [{'value':'1115'}]}}, 'textQuestion': {}}}},
{'itemId': '0a12a42e', 'pageBreakItem': {}},
{'itemId': '19640fea', 'videoItem': {'video': {'youtubeUri': 'www.youtube.com/watch?v=Lt5HqPvM-eI', 'properties': {'alignment': 'LEFT', 'width': 320}}}},
{'itemId': '685ba545', 'questionGroupItem': {'questions': [{'questionId': '044f9f9b', 'required': True, 'rowQuestion': {'title': 'pick'}}], 'grid': {'columns': {'type': 'RADIO', 'options': [{'value': '1'}, {'value': '2'}, {'value': '3'}]}}}, 'title': 'pay'},
{'itemId': '6a9d1b88', 'title': 'number', 'questionItem': {'question': {'questionId': '2199beb0', 'required': True,
'grading': {'correctAnswers': {'answers': [{'value': '1115'}]}}, 'textQuestion': {}}}}]}
The official documentation has too few examples for me to understand how to apply it to my form.
update = {
"requests": [{
"updateItem": {
"item": {
"title": "Homework video",
"description": "Quizzes in Google Forms",
"videoItem": {
"video": {
"youtubeUri": "https://www.youtube.com/watch?v=Lt5HqPvM-eI"
}
}
},"location": {
"index": 0},
"updateMask": "description,youtubeUri"
}
}]
}
question_setting = service.forms().batchUpdate(
formId=form_id, body=update).execute()
From your following reply,
I want to update the youtubeUri item and use a new URL. How can I do this? i have two question use the video,how do i update the first question URL ?
I understood your question is as follows.
You want to update youtubeUri of 1st question in Google Forms using googleapis for python.
In this case, how about the following sample script?
Sample script:
service = # Please use your client
formId = "###" # Please set your Google Form ID.
after = "https://www.youtube.com/watch?v=###" # Please set YouTube URL you want to replace. In this sample, the existing URL is changed to this URL.
res = service.forms().get(formId=formId).execute()
itemIds = [[i, e["itemId"]] for i, e in enumerate(res.get("items")) if "videoItem" in e]
topItem = itemIds[0] # From your question, `youtubeUri` of the 1st question.
req = {
"requests": [
{
"updateItem": {
"item": {
"itemId": topItem[1],
"videoItem": {
"video": {
"youtubeUri": after,
}
},
},
"location": {"index": topItem[0]},
"updateMask": "videoItem.video.youtubeUri",
}
}
]
}
service.forms().batchUpdate(formId=formId, body=req).execute()
When this script is run, first, all items are retrieved. And, the item IDs including youtubeUri are retrieved. And, using the 1st item ID, the value of youtubeUri is changed to the value of after you set.
Note:
In this sample script, it supposes that you have already been able to get and out values to Google Form using Google Form API. Please be careful about this.
Reference:
Method: forms.batchUpdate

Python match nested structure

Im trying to define some logic, that verifies everything in one nested dictionary belongs to another nested nested dictionary.
Ie:
official_data = {
'Name': 'John Smith',
'ID': 123123232,
'Family': [
{'Name': 'Sarah Smith','ID': 12312323},
{'Name': 'Joe Smith','ID': 12312324}
{'Name': 'Tim Smith','ID': 12312325}
{'Name': 'Sally Smith','ID': 12312326}
],
'Info': {
'InfoList': [
{'text': ['Personal Info Message']},
{'text': ['Secondary Message']}
]
}
}
sample_data = {
'Family': [
{"Name": 'Joe Smith'}
],
'Info': {
'InfoList': [
{'text': ['Secondary Message']}
]
}
}
matches(official_data, sample_data) # True, because everything in sample data exists in official_data, despite official_data having MORE values.
different_sample = {
'Info': {
'InfoList': [{}]
}
}
matches(official_data, different_sample) # True, because the structure of Dict -> Dict -> List -> Dict exists
bad_data = {'ID': 54242343}
matches(official_data, bad_data) # False, because the ID of bad_data is not the ID of official_data
other_bad_data = {
'Info': {
'InfoList': {}
}
}
matches(official_data, other_bad_data) # False, because InfoList is a list in official data
I have a feeling such logic SHOULD be easy to implement, or has already been implemented and is in wide use, but I am struggling to find what i want, and implementing it on my own becomes complicated, with recursive solutions and casting lists into sets in order to make sure order is ignored.
Im wondering if im missing something obvious, or if this logic is actually really niche and would have to be designed from scratch.

How can I implement this recursion in python?

Let's say that I have a Dictionary like this
dict1 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'group'
'id': '1'
},
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'group'
'id': '2'
}
]
}]
and I want to get an output that can replace all the groups and nested groups with all distinct users.
In this case the output should look like this:
dict2 = [{
'Name': 'Team1',
'id': '1',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
]
},
{
'Name': 'Team2',
'id': '2',
'Members': [
{
'type': 'user',
'id': '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
},
{
'Name': 'Team3',
'id': '3',
'Members': [
{
'type': 'user',
'id: '11'
},
{
'type': 'user',
'id': '12'
}
{
'type': 'user',
'id': '21'
}
]
}]
Now let's assume that I have a large dataset to perform these actions on. (approx 20k individual groups)
What would be the best way to code this? I am attempting recursion, but I am not sure about how to search through the dictionary and lists in this manner such that it doesn't end up using too much memory
I do not think you need recursion. Looping is enough.
I think you can simply evaluate each Memberss, fetch users if group type, and make them unique. Then you can simply replace Members's value with distinct_users.
You might have a dictionary for groups like:
group_dict = {
'1': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'}
],
'2': [
{'type': 'user', 'id': '11'},
{'type': 'user', 'id': '12'},
{'type': 'user', 'id': '21'}
],
'3': [
{'type': 'group', 'id': '1'},
{'type': 'group', 'id': '2'},
{'type': 'group', 'id': '3'} # recursive
]
...
}
You can try:
def users_in_group(group_id):
users = []
groups_to_fetch = []
for user_or_group in group_dict[group_id]:
if user_or_group['type'] == 'group':
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
groups_fetched = set() # not to loop forever
while groups_to_fetch:
group = groups_to_fetch.pop()
if group['id'] not in groups_fetched:
groups_fetched.add(group['id'])
for user_or_group in group_dict[group['id']]:
if user_or_group['type'] == 'group' and user_or_group['id'] not in groups_fetched:
groups_to_fetch.append(user_or_group)
else: # 'user' type
users.append(user_or_group)
return users
def distinct_users_in(members):
distinct_users = []
def add(user):
if user['id'] not in user_id_set:
distinct_users.append(user)
user_id_set.add(user['id'])
user_id_set = set()
for member in members:
if member['type'] == 'group':
for user in users_in_group(member['id']):
add(user)
else: # 'user'
user = member
add(user)
return distinct_users
dict2 = dict1 # or `copy.deepcopy`
for element in dict2:
element['Members'] = distinct_users_in(element['Members'])
Each Members is re-assigned by distinct_users returned by the corresponding function.
The function takes Members and fetches users from each if member type. If user type, member itself is a user. While (fetched) users are appended to distinct_user, you can use their ids for uniquity.
When you fetch users_in_group, you can use two lists; groups_to_fetch and groups_fetched. The former is a stack to recursively fetch all groups in a group. The latter is not to fetch an already fetched group again. Or, it could loop forever.
Finally, if your data are already in memory, this approach may not exhaust memory and work.

How to extract values from list and store it as dictionary(key-value pair)?

I need to extract 2 values from this list of dictionary and store it as a key-value pair.
Here I attached sample data..Where I need to extract "Name" and "Service" from this input and store it as a dictionary. Where "Name" is Key and corresponding "Service" is its value.
Input:
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
From this input, I need output as a dictionary type.
Output Format: { Name : Service }
Output:
{ "Heera":"Service1","Prem" : " "}
My try:
Role_name =[]
response = {#INPUT WHICH I SPECIFIED ABOVE#}
roles = response['Roles']
for role in roles:
Role_name.append(role['Name'])
print(Role_name)
I need to pair the name with its corresponding service. Any help would be really appreciable.
Thanks in advance.
You just have to write a long line which can reach till the key 'Service'.
And you a syntax error in line Start_Month': 'January') and 'Start_Month': 'April'). You can't have one unclosed brackets.
Fix it and run the following.
This is the code:
output_dict = {}
for r in response['Roles']:
output_dict[r["Name"]] = r['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output_dict)
Output:
{'Heera': 'Service1', 'Prem': ''}
You just have to do like this:
liste = []
for role in response['Roles']:
liste.append(
{
role['Name']:role['PolicyDocument']['Statement'][0]['RoleType']['Service'],
}
)
print(liste)
It seems your input data is structured kind of strange and I am not sure what the ) are doing next to the months since they make things invalid but here is a working script assuming you removed the parenthesis from your input.
response = {
'Roles': [
{
'Path': '/',
'Name': 'Heera',
'Age': '25',
'Policy': 'Policy1',
'Start_Month': 'January',
'PolicyDocument':
{
'Date': '2012-10-17',
'Statement': [
{
'id': '',
'RoleStatus': 'New_Joinee',
'RoleType': {
'Service': 'Service1'
},
'Action': ''
}
]
},
'Duration': 3600
},
{
'Path': '/',
'Name': 'Prem',
'Age': '40',
'Policy': 'Policy2',
'Start_Month': 'April',
'PolicyDocument':
{
'Date': '2018-11-27',
'Statement': [
{
'id': '',
'RoleStatus': 'Senior',
'RoleType': {
'Service': ''
},
'Action': ''
}
]
},
'Duration': 2600
},
]
}
output = {}
for i in response['Roles']:
output[i['Name']] = i['PolicyDocument']['Statement'][0]['RoleType']['Service']
print(output)
This should give you what you want in a variable called role_services:
role_services = {}
for role in response['Roles']:
for st in role['PolicyDocument']['Statement']:
role_services[role['Name']] = st['RoleType']['Service']
It will ensure you'll go through all of the statements within that data structure but be aware you'll overwrite key-value pairs as you traverse the response, if they exist in more than a single entry!
A reference on for loops which might be helpful, illustrates using if statements within them which can help you to extend this to check if items already exist!
Hope that helps

Max IOPS of EBS Volume from Last 2 Weeks

I want to retrieve max IOPS utilized by EBS volume in the last 2 weeks. I am using cloudwatch get_metric_data function to obtain data about metric VolumeReadOps and VolumeWriteOps. I am using following code to get VolumeReadOps and VolumeWriteOps and then trying to calculate MaxIOPS:
This is the function to get metric values:
def cloudwatch_metric_value(CWsession,NameSpace,ResourceIdentifier,vStat,vUnit,vMetricName,vPeriod):
"""
Function that returns metric value of cloudwatch for a given resource and metric Name
"""
if NameSpace=='EBS':
responseCW = CWsession.get_metric_data(
MetricDataQueries=[
{
'Id': 'string',
'MetricStat': {
'Metric': {
'Namespace': 'AWS/EBS',
'MetricName': vMetricName,
'Dimensions': [
{
'Name': 'VolumeId',
'Value': ResourceIdentifier
},
]
},
'Period': vPeriod,
'Stat': vStat,
'Unit': vUnit
},
'ReturnData': True
},
],
StartTime=vStartTime,
EndTime=vEndTime,
)
vValue=responseCW['MetricDataResults'][0]['Values']
vTimeStamps=responseCW['MetricDataResults'][0]['Timestamps']
index, value = max(enumerate(vValue), key=operator.itemgetter(1))
metric_value=value
metric_time=vTimeStamps[index]
return metric_time,metric_value
From main, it is called like following:
metric_time,metric_value = cloudwatch_metric_value(cloudwatch,'EBS',v['VolumeId'],'Sum','Count','VolumeReadOps',300)
vReadIOPS=metric_value
metric_time,metric_value = cloudwatch_metric_value(cloudwatch,'EBS',v['VolumeId'],'Sum','Count','VolumeWriteOps',300)
vWriteIOPS=metric_value
vTotalIOPS=round((vReadIOPS+vWriteIOPS)/300)
I understand that IOPS are calculated by diving the ReadOps/Write with duration. The values I get from this code for MaxIOPS for a given volume doesn't match with the values I see for same in cloudwatch console. Please advise if I am doing this in right way?
Thanks.
Ok, I was able to fix and here is the working function:
def cloudwatch_metric_value(CWsession,NameSpace,ResourceIdentifier,vStat,vUnit,vPeriod):
"""
Function that returns metric value of cloudwatch for a given resource and metric Name
"""
if NameSpace=='EBS':
responseCW = CWsession.get_metric_data(
MetricDataQueries=[
{
'Id': 'string1',
'MetricStat': {
'Metric': {
'Namespace': 'AWS/EBS',
'MetricName': 'VolumeReadOps',
'Dimensions': [
{
'Name': 'VolumeId',
'Value': ResourceIdentifier
},
]
},
'Period': vPeriod,
'Stat': vStat,
'Unit': vUnit
},
'ReturnData': True
},
{
'Id': 'string2',
'MetricStat': {
'Metric': {
'Namespace': 'AWS/EBS',
'MetricName': 'VolumeWriteOps',
'Dimensions': [
{
'Name': 'VolumeId',
'Value': ResourceIdentifier
},
]
},
'Period': vPeriod,
'Stat': vStat,
'Unit': vUnit
},
'ReturnData': True
},
],
StartTime=vStartTime,
EndTime=vEndTime,
)
vReadValue=responseCW['MetricDataResults'][0]['Values']
vReadTimeStamps=responseCW['MetricDataResults'][0]['Timestamps']
vWriteValue=responseCW['MetricDataResults'][1]['Values']
vWriteTimeStamps=responseCW['MetricDataResults'][1]['Timestamps']
vReadWriteValue = [vReadValue[i]+vWriteValue[i] for i in range(len(vWriteValue))]
if vReadWriteValue:
metric_value = max(vReadWriteValue)
metric_time = vReadTimeStamps[vReadWriteValue.index(metric_value)]
metric_value = metric_value / 300
else:
metric_value=1
metric_time=date_t
return metric_time,metric_value

Categories