Iterating nested json in python - python

I have JSON in this format. How can I print each value and go inside the objects. Also, the json can vary and also the name, so I need a generic solution.
output = {'name':'StackOverflow',
'competitors':[{ 'competitor':'bing',
'link':'bing.com'},
{ 'competitor':'google',
'link':'google.com'}],
'acquisition': {'acquired_day': 16,
'acquired_month': 12,
'acquired_year': 2013,
'acquiring_company': {'name': 'Viggle',
'permalink': 'viggle'}}}

You can use isinstance to check if something is a dict or a list. Something like this may work, but I haven't checked it.
output = {'name': 'StackOverflow',
'competitors': [{'competitor': 'bing',
'link': 'bing.com'},
{'competitor': 'google',
'link': 'google.com'}],
'acquisition': {'acquired_day': 16,
'acquired_month': 12,
'acquired_year': 2013,
'acquiring_company': {'name': 'Viggle',
'permalink': 'viggle'}}}
def traverse(obj):
if isinstance(obj, dict):
for key, value in obj.iteritems():
print('dict_key', key)
traverse(value)
elif isinstance(obj, list):
for value in obj:
traverse(value)
else:
print('value', obj)
traverse(output)

Related

Add random values to json data in Python

I am new to python.I want read a json file and add random values to it.The json contains subset too.I am unable to solve this.
sample.json
{"name": "Kash","age": 12,"loc": {"loc1":"Uk","loc2":"Usa"}}
import json
import random
f=open("sample.json")
data=json.load(f)
def iterate(dictionary):
for key, value in dictionary.items():
dictionary[key]=random.randrange(1,10)
print(dictionary)
if isinstance(value, dict):
iterate(value)
return dictionary
iterate(data)
Output I Got
{'name': 8, 'age': 12, 'loc': {'loc1': 'tc', 'loc2': 'cbe'}}
{'name': 8, 'age': 6, 'loc': {'loc1': 'tc', 'loc2': 'cbe'}}
{'name': 8, 'age': 6, 'loc': 9}
{'loc1': 5, 'loc2': 'cbe'}
{'loc1': 5, 'loc2': 1}
===========================================
Output Expected
{"name": 15,"age": 85,"loc": {"loc1":52,"loc2":36}}
dictionary[key] = random.randrange(1,10)
This is performing:
dictionary['loc'] = some_number
So you lose the nested dict that was already there.
You only want to modify keys that do not have a dict as a value.
def iterate(dictionary):
for key, value in dictionary.items():
if isinstance(value, dict):
iterate(value)
else:
dictionary[key] = random.randrange(1, 10)
return dictionary
If your JSON can contain lists - you will need to handle that case too.

Handling response from DynamoDB - Python

I'm receiving following data from DynamoDB as a response to client scan API call.
Data = [{'id': {'S': '5'},
'FirstName': {'S': 'Prashanth'},
'LastName': {'S': 'Wadeyar'},
'ClientName': {'S': 'Test'}}]
I want to handle this response and get the output as
{'FirstName':'Prashanth', 'LastName': 'Wadeyar', 'ClientName': 'Test'}
I can handle it by separating it like
for field_obj in data:
obj = (field_obj['FirstName'])
but to get value of Firstname, Key 'S' may differ for each object. like boolean string, list etc.
is there a easy way to get the key and value pairs.
If you don't want to bring external dependencies you can use Table class as described here.
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('users')
response = table.get_item(
Key={
'username': 'janedoe',
'last_name': 'Doe'
}
)
item = response['Item']
print(item)
Expected Output:
{u'username': u'janedoe',
u'first_name': u'Jane',
u'last_name': u'Doe',
u'account_type': u'standard_user',
u'age': Decimal('25')}
But my personal preference everytime I hear about Python and DynamoDB is to use PynamoDB: https://pynamodb.readthedocs.io/en/latest/ which is sort of an ORM for DynamoDB.
Above answer from Mayank seems to work well, but I also wanted to map fields I want to return in a list. Hence I mapped field and then was able to get the result I was looking for
FIELD_MAP = ['firstName', 'lastName',
'clientName']
for field_obj in response['Items']:
for key in FIELD_MAP:
if field_obj.get(key):
obj = (field_obj.get(key))
for i in obj:
value = obj[i]
db_result.append({key: value})
print(db_result)
Something like this could work, where you don't need to care about the internal keys(S in your case):
In [1018]: d = {}
In [1016]: for i in Data:
...: for k,v in i.items():
...: if k != 'id':
...: if isinstance(v, dict):
...: for j, val in v.items():
...: d[k] = val
...:
In [1024]: d
Out[1024]: {'FirstName': 'Prashanth', 'LastName': 'Wadeyar', 'ClientName': 'Test'}

Accessing nested object values from MongoDB

I have a collection in MongoDB of nested objects (basically a tree structure). I am trying to access certain "id" and "user_id" values in "children".
The collection looks like this:
Image of a tree object in MongoDB
When I query "children" I get, e.g., this as output:
[{'children': [{'children': [{'children': [{'id': 737992252891537408, 'user_id': 3240396143}], 'id': 737991958161940480, 'user_id': 3240396143}], 'id': 737986305481682944, 'user_id': 56377143}], 'id': 737979183599652864, 'user_id': 3240396143}], 'id': 737978059291234304, 'user_id': 3240396143}]}
How do I efficiently access all the "id"'s with the 'user_id' = 56377143? I cannot seem to get all of them when it is nested too deep.
I tried using a for loop like this but it does not output all the 'id's which match the 'user_id's
val= "children"
lst_rt= []
lst_ids = []
def get_value(mydict, keys):
if type(mydict) == dict:
print(mydict[0]['user_id'], mydict[0]['id'], "TEST")
return get_value(mydict[keys], val)
if type(mydict) == list and keys in mydict[0] and mydict[0]['user_id'] == 56377143 :
print(mydict[0]['id'],mydict[0]['user_id'], 'COND')
return get_value(mydict[0][keys], val)
elif mydict[0]['user_id'] == 56377143 and mydict[0]['id'] != None:
print(mydict[0]['id'], mydict[0]['user_id'])
lst_rt.append(int(mydict[0]['id']))
return mydict[0]['id']
for x in root_tweets:
print(get_value(x['children'], val))

Unable to convert data types when looping through a list of dictionaries

I am trying to loop through a list of dictionaries and convert their datatype based on a reference to another configuration dictionary which contains the datatypes I want to convert to.
The config dictionary is the following:
search_results_config = {
'id':'int',
'description':'string',
'page':'int',
'position':'int',
'title':'string',
'type':'int',
'typedescription':'string',
'url':'string'
}
And the list of dictionaries that I am actually trying to loop through top_rank_data and change datatypes of looks like the following:
{
'description': 'Churchill contents insurance covers the things that matter most in your home. We offer cover of up to £50,000 as\xa0',
'position': 18, 'page': 2, 'title': 'Contents insurance | Home Insurance | Churchill UK', 'type': '0',
'typedescription': 'organic', 'url': 'https://www.churchill.com/home-insurance/options/contents'}, {
'description': 'Compare contents insurance and how to cut the cost of home contents insurance cover for your personal possessions\xa0',
'position': 19, 'page': 2, 'title': 'Contents Insurance - compare cheap contents insurance', 'type': '0',
'typedescription': 'organic', 'url': 'https://www.uswitch.com/home-insurance/contents-insurance/'}
Code below is:
for row in top_rank_data:
for item in row:
for key, value in search_results_config.items():
new_value = None
config_type = search_results_config[key]
if config_type == 'string':
new_value = str(value) or ''
if config_type == 'int':
new_value = int(value) or 9
So I expect the value of any key to change data type as per the search_results_config dictionary. Instead I only get back string datatype for all, so I presume the if config_type statements are not working. Any help much appreciated!
Additional function which is generating data:
path = 'C:\downloaded'
for filename in glob.glob(os.path.join(path, '*.json')):
with open(filename, encoding='utf-8', mode='r') as currentFile:
data = currentFile.read()
rank_data = json.loads(data)["rankdata"]
for entry in rank_data:
if (entry["page"]) <= 2 and (entry["typedescription"]) == "organic":
top_rank_data.append(entry)
this is a version to do that:
search_results_config = {
'id': int,
'description': str,
'page': int,
'position': int,
'title': str,
'type': int,
'typedescription': str,
'url': str
}
items = ({
'description': 'Churchill contents insurance covers the things that matter most in your home. We offer cover of up to £50,000 as\xa0',
'position': 18, 'page': 2, 'title': 'Contents insurance | Home Insurance | Churchill UK', 'type': '0',
'typedescription': 'organic', 'url': 'https://www.churchill.com/home-insurance/options/contents'}, {
'description': 'Compare contents insurance and how to cut the cost of home contents insurance cover for your personal possessions\xa0',
'position': 19, 'page': 2, 'title': 'Contents Insurance - compare cheap contents insurance', 'type': '0',
'typedescription': 'organic', 'url': 'https://www.uswitch.com/home-insurance/contents-insurance/'})
def convert(dct):
return {key: search_results_config[key](value) for key, value in dct.items()}
for dct in items:
print(convert(dct))
note that search_results_config directly contains the types (i.e. int instead of 'int') that are used in order to convert your data.
you can also add a default type (i used str in the example below) for keys that do not exist in search_results_config:
def convert(dct):
return {key: search_results_config.get(key, str)(value)
for key, value in dct.items()}
Try this Approach:
New_List = []
for dictionary in top_rank_data:
Sub_Dict = {}
for key, value in dictionary.items():
Type = search_results_config[key]
try:
New_Val = Type(value)
except:
New_Val = value
Sub_Dict[key] = New_Val
New_List.append(Sub_Dict)

Python Get dictionary value with a key list

Considering I have
users = \
{
'Sam': {
'age': 19,
'location': 'Flodira',
'country': 'United States'
},
'Max': {
'age': 16,
'location': 'Sydney',
'country': 'Australia'
}
}
def getUserValue(query):
return users[query]
getUserValue(['Sam', 'location'])
How could I do this? I know I could straightly do users['Sam']['location'], what I would like to do is get Sam's location via a list object.
Here's a generic solution that handles different levels of nesting:
def getUserValue(users, query):
v = users
for i in query:
v = v[i]
return v
print(getUserValue(users, ['Sam', 'location']))
# Flodira
def getUserValue(query):
assert(len(query) == 2)
return users[query[0]][query[1]]
Is that what you're looking for?
You could solve this problem with recursion:
def getByList(d, l):
if len(l) == 0:
return d
else:
return getByList(d[l[0]], l[1:])
def getUserValue(query):
return getByList(users, query)
For a more general solution than Jared's, you could use list unpacking with a recursive function..
def getUserValue(dic, lis):
if len(lis) > 1:
return getUserValue(dic[lis[0]], lis[1:])
else:
return dic[lis[0]]
How about the following?
users = \
{
'Sam': {
'age': 19,
'location': 'Flodira',
'country': 'United States'
},
'Max': {
'age': 16,
'location': 'Sydney',
'country': 'Australia'
}
}
def getUserValue(my_args):
item = users[my_args.pop(0)]
while my_args:
item = item[my_args.pop(0)]
return item
print(getUserValue(['Sam', 'location'])) # prints -> Flodira
keeps treating item as a dictionary until the passed list is depleted.
You could use plain old reduce and operator.getitem
import operator
def getUserValue(dic, lis):
return reduce(operator.getitem, lis, dic)

Categories