Writing a function that parses a nested json file in python - python

I have a json file that looks something like this:
{
"mAutomaticTestCompleted": true,
"mAutomaticTestList": [
{
"mName": "acceleratorEntity",
"mTestStatus": true,
"mX": 3.8043518,
"mY": 8.114105,
"mZ": -3.3895721
},
{
"mName": "barometerEntity",
"mTestStatus": false,
"mValue": 0
}]
}
There are actually lots of fields like mAutomaticTestlist, all of them are lists of objects that look just like that.
I need to write a function that takes device_name and JSON itself as its arguments and returns the value of the mTestStatus field.
Here's my attempt:
def hasPassed(device_name, data):
if isinstance(data, dict):
for key, value in data.items():
if not isinstance(value, dict) and not isinstance(value, list):
if key == 'mName' and value == device_name:
return data['mTestStatus']
else:
return hasPassed(device_name, value)
elif isinstance(data, list):
for element in data:
return hasPassed(device_name, element)
The problem with this function is that it doesn't go over the whole JSON object.
EDIT:
So I would like my function to work this way:
hasPassed('barometerEntity', json_obj)
would return False cos that's the value of the 'mTestStatus' corresponding the device_name (which is barometerEntity in this case).

You have wrong code formatting, should be:
def hasPassed(device_name, data):
if isinstance(data, dict):
if 'mName' in data and data['mName'] == device_name :
return data['mTestStatus']
else :
for k in data :
if hasPassed(device_name, data[k]) : return True
if isinstance(data, list):
for element in data:
if hasPassed(device_name, element) : return True
return False
Last 3 lines -- move to the left.
And there should be return in the end of the function, that returns when your data is not list and not dict -- otherwise your sunction will return None and may crash something.
>>> hasPassed( 'barometerEntity', a)
False
>>> hasPassed( 'acceleratorEntity', a)
True
>>>

Try this:
import json
def hasPassed(device_name, data):
test_list = json.loads(obj)["mAutomaticTestList"]
for elt in test_list:
if elt["mName"] == device_name: return elt["mTestStatus"]

Related

iterate a dict and pass as function parameter

I have this function what returns an assignment
def insertData(
Model:object,
data_entry:dict
)->any:
for k, v in data_entry.items():
if isinstance(v, list):
return getattr(Model, k).in_(v)
else:
return getattr(Model, k)== v
and then these function is called here for it to pass assign those values
def get_vehicles(
db:Session,
skip: int = 0,
limit: int= 100,
query:Query=None
)-> Union[list[Vehicle], list[None]]:
real_query = get_only_passed_values(query)
if real_query:
return db.query(Vehicle).filter(
insertData(Vehicle, real_query)
).offset(skip).limit(limit).all()
else:
return db.query(Vehicle).offset(skip).limit(limit).all()
I need to filter multiple columns in database table, but only filter the first argument passed.
e.g:
return db.query(Vehicle).filter(Vehicle.color == 'red',Vehicle.brand == 'BMW').offset(skip).limit(limit).all()

How can I get the specified key value in a nested dictionary in a most effective way?

There is a nested dictionery like :
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxxx.jpg",
...
"child_attachments":{
"picture":"xxxxx.jpg",
...
}
}
...
}
The problem is at every level of the dictionary, the key picture may exist, how can I get the picture's value in a most effective way?
Here's my trial, but failed:
def get_picture_url(data):
for key, value in data.items():
if key == "picture":
return data[key]
else:
if isinstance(value, dict):
return get_picture_url(value)
get_picture_url(data_dict)
This should work for the general case of an arbitrarily nested dictionary with JSON-like structure:
def get_picture(data):
# you can remove this case if the
# input doesn't contain lists
if isinstance(data, list):
ans = []
for e in data:
ans += get_picture(e)
return ans
elif not isinstance(data, dict):
return []
else:
ans = []
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
It'll traverse all levels of the data structure, looking for keys named 'picture' and accumulating all of their values in a single output list. If you're sure that there are no lists in the input, we can simplify the solution a bit:
def get_picture(data):
ans = []
if isinstance(data, dict):
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
Either way, it works as expected for your sample input:
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxx.jpg",
"child_attachments":{
"picture":"xxxxx.jpg"
}
}
}
get_picture(data_dict)
=> ['xxx.jpg', 'xxxx.jpg', 'xxxxx.jpg']
You are not checking the returned value of the recursive call to get_picture_url.
This should give you the top most picture in your dict:
def get_picture_url(data, picture_key="picture"):
if not isinstance(data, dict):
return None
picture_url = data.get(picture_key)
if picture_url is not None:
return picture_url
for value in data.values():
picture_url = get_picture_url(value)
if picture_url is not None:
return picture_url
return None

How can I read a dictionary with a list?

How can I read a list inside a dictionary and try to change string numbers to digits? For example:
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
I use this to change dictionary number strings to integers:
{k:int(v) if v.isdigit() else v for k,v in obj.items()}
But it doesn't work, so I was trying something like this:
for objs in obj:
if objs.isdigit():
k:int(v)
else:
for k,v in objs.items():
print k
But this fails as well.
this seems like a good problem for recursion
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
def fix_ints(obj):
if isinstance(obj,basestring):
try:
return int(obj)
except ValueError:
print "I cant Make %r an int"%obj
return obj
elif isinstance(obj,(list,tuple)):
return [fix_ints(item) for item in obj]
elif isinstance(obj,dict):
return dict((key,fix_ints(value)) for key,value in obj.items())
else:
print "I have no idea what to do with %r"%obj
new_obj = fix_ints(obj)
print new_obj
note that python does not support tail recursion so if this data structure goes very deep (greater than 1k levels of nesting) then recursion may not be appropriate ...
of coarse you can also do silly string tricks with it
import json,re
new_obj = json.loads(re.sub("\"(\d+)\"","\\1",json.dumps(obj)))
(although really you should do it like i do in my first exzample ... this second method is really just for fun)
String to number:
def int_it(obj):
if obj.isdigit():
obj = int(obj)
return obj
Dict to number (regardless of the number of nested dicts or lists):
class Convert(object):
def __init__(self, obj):
self.obj = obj
if isinstance(obj, dict):
self.handle_dict(obj)
def handle_dict(self, obj):
for key, value in obj.items():
if isinstance(value, str) and value.isdigit():
self.obj[key] = int_it(value)
elif isinstance(obj[key], list):
ins = HandleList(obj[key])
self.obj[key] = ins.obj
elif isinstance(obj[key], dict):
ins = Convert(obj.items())
self.obj[key] = ins.obj
return obj
List to numbers, regardless of the number of nested lists or dicts.
class HandleList(object):
def __init__(self, obj):
self.obj = obj
self.handle_list(obj)
def handle_list(self, obj):
for index, item in enumerate(obj):
if isinstance(item, list):
obj.index(index, [HandleList(val).obj for val in item])
elif isinstance(item, str):
obj.index(index, int_it(item))
elif isinstance(item, dict):
Convert(item)
return obj
output = Convert(values)
print(output.obj)
Returns:
{
'amarillo': 'xxx',
'naranja': [{'naranja_1': 1, 'naranja_2': 2}],
'rojo': [{'rojo_b': 2, 'rojo_a': 1}],
'azul': 4
}
Given the input:
values = {
'azul':'4',
'rojo': [
{'rojo_a':'1',
'rojo_b':'2'
}
],
'amarillo':'xxx',
'naranja': [
{'naranja_1':'1',
'naranja_2':'2'
}
]
}

How to filter by keys through a nested dictionary in a pythonic way

Try to filter a nested dictionary. My solution is clunky, was hoping to see if there is a better method something using comprehensions. Only interested in the dictionary and lists for this example.
_dict_key_filter() will filter the keys of a nested dictionary or a list of nested dictionaries. Anything not in the obj_filter will be ignored on all nested levels.
obj : can be a dictionary or a list of dictionaries.
obj_filter: has to be a list of filter values
def _dict_key_filter(self, obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
for key, value in obj.iteritems():
if key in obj_filter:
retdict[key] = copy.deepcopy(value)
elif isinstance(value, (dict, list)):
child = self._dict_key_filter(value, obj_filter)
if child:
retdict[key] = child
return retdict if retdict else None
elif isinstance(obj, list):
retlist = []
for value in list:
child = self._dict_key_filter(value, obj_filter)
if child:
retlist.append(child)
return retlist if retlist else None
else:
return None
Example#
dict1 = {'test1': {'test2':[1,2]}, 'test3': [{'test6': 2},
{'test8': {'test9': 23}}], 'test4':{'test5': 5}}
filter = ['test5' , 'test9']
return = _dict_key_filter(dict1, filter)
return value would be {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 5}}
It's a really old question. I came across a similar problem recently.
It maybe obvious, but you are dealing with a tree in which each node has an arbitray number of children. You want to cut the subtrees that do not contain some items as nodes (not leaves). To achieve this, you are using a custom DFS: the main function returns either a subtree or None. If the value is None then you "cut" the branch.
First of all, the function dict_key_filter returns a (non empty) dict, a (non empty) list or None if no filter key was not found in the branch.
To reduce complexity, you could return a sequence in every case: an empty sequence if no filter key was found, and a non empty sequence if you are still searching or you found the leaf of the tree. Your code would look like:
def dict_key_filter(obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
...
return retdict # empty or not
elif isinstance(obj, list):
retlist = []
...
return retlist # empty or not
else:
return [] # obvioulsy empty
This was the easy part. Now we have to fill the dots.
The list case
Let's begin with the list case, since it is the easier to refactor:
retlist = []
for value in obj:
child = dict_key_filter0(value, obj_filter)
if child:
retlist.append(child)
We can translate this into a simple list comprehension:
retlist = [dict_key_filter(value, obj_filter) for value in obj if dict_key_filter(value, obj_filter)]
The drawback is that dict_key_filter is evaluated twice. We can avoid this with a little trick (see https://stackoverflow.com/a/15812866):
retlist = [subtree for subtree in (dict_key_filter(value, obj_filter) for value in obj) if subtree]
The inner expression (dict_key_filter(value, obj_filter) for value in obj) is a generator that calls dict_key_filter once per value. But we can even do better if we build a closure of dict_key_filter:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
...
retlist = list(filter(len, map(inner_dict_key_filter, obj)))
Now we are in the functional world: map applies inner_dict_key_filter to every element of the list and then the subtrees are filtered to exclude empty subtrees (len(subtree) is true iff subtree is not empty). Now, the code looks like:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
if isinstance(obj, dict):
retdict = {}
...
return retdict
elif isinstance(obj, list):
return list(filter(len, map(inner_dict_key_filter, obj)))
else:
return []
If you are familiar with functional programming, the list case is readable (not quite as readable as it would be in Haskell, but still readable).
The dict case
I do not forget the dictionary-comprehension tag in your question. The first idea is to create a function to return either a whole copy of the branch or the result of the rest of the DFS.
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
As in the list case, we do not refuse empty subtrees for now:
retdict = {}
for key, value in obj.items():
retdict[key] = build_subtree(key, value)
We have now a perfect case for dict comprehension:
retdict = {key: build_subtree(key, value) for key, value in obj.items() if build_subtree(key, value)}
Again, we use the little trick to avoid to compute a value twice:
retdict = {key:subtree for key, subtree in ((key, build_subtree(key, value)) for key, value in obj.items()) if subtree}
But we have a little problem here: the code above is not exaclty equivalent to the original code. What if the value is 0? In the original version, we have retdict[key] = copy.deepcopy(0) but in the new version we have nothing. The 0 value is evaluated as false and filtered. And then the dict may become empty and we cut the branch wrongfully. We need another test to be sure we want to remove a value: if it's an empty list or dict, then remove it, else keep it:
def to_keep(subtree): return not (isinstance(subtree, (dict, list)) or len(subtree) == 0)
That is:
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
If you remember a bit of logic (https://en.wikipedia.org/wiki/Truth_table#Logical_implication) you can interpret this as: if subtree is a dict or a list, then it must not be empty.
Let's put the pieces together:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
if isinstance(obj, dict):
key_subtree_pairs = ((key, build_subtree(key, value)) for key, value in obj.items())
return {key:subtree for key, subtree in key_subtree_pairs if to_keep(subtree)}
elif isinstance(obj, list):
return list(filter(to_keep, map(inner_dict_key_filter, obj)))
return []
I don't know if this is more pythonic, but it seems clearer to me.
dict1 = {
'test1': { 'test2':[1,2] },
'test3': [
{'test6': 2},
{
'test8': { 'test9': 23 }
}
],
'test4':{'test5': 0}
}
obj_filter = ['test5' , 'test9']
print (dict_key_filter(dict1, obj_filter))
# {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 0}}

Parsing JSON to find value for key

I am trying to parse json to find the value of a desired key. I am doing so recursively. If there is another, fast or more efficient way to do so, I am open
example json:
{
"data_version":"5",
"application":{
"platform":"iPhone",
"os":"iPhone OS",
"locale":"en_US",
"app_version":"unknown",
"mobile":{
"device":"iPhone",
"carrier":"Verizon",
}
},
"event_header":{
"accept_language":"en-us",
"topic_name":"mobile-clickstream",
"server_timestamp":1416958459572,
"version":"1.0"
},
"session":{
"properties":{
}
},
"event":{
"timestamp":1416958459185,
"properties":{
"event_sequence_number":97
}
}
}
here is what I have so far
def json_scan(json_obj, key):
result = None
for element in json_obj:
if str(element) == key:
result = json_obj[element]
else:
if type(json_obj[element]) == DictType:
json_scan(json_obj[element], key)
elif type(json_obj[element]) == ListType:
json_scan(element, key)
return result
expected output:
>>> json_scan(json_obj, "timestamp")
1416958459185
As I go through the debugger, I am able to find the the desired value but the line result = None resets result to None and at the end of the method, the value I get is None. I'm not sure how to fix this. I tried removing the line but I get error because result is not preset to a value.
Using json library in order to parse the json file (some commas should be deleted) and using native dict types :
def json_scan(json_obj, key):
d = json.loads(json_obj)
def _(dictobj, lookup):
if lookup in dictobj.keys():
return dictobj[lookup]
else:
for sub_dictobj in [d for d in dictobj.values() if type(d) == DictType]:
result = _(sub_dictobj, lookup)
if result:
return result
return None
return _(d, key)
A more complete version :
def json_scan(json_obj, key):
d = json.loads(json_obj)
def _(dictobj, lookup):
if lookup in dictobj.keys():
return dictobj[lookup]
else:
for sub_dictobj in [d for d in dictobj.values() if type(d) == DictType]:
result = _(sub_dictobj, lookup)
if result:
return result
# if objects in dictobj.values() are lists, go through them
for listobject in [l for l in dictobj.values() if type(d) == list]:
for sub_dictobj in [d for d in listobject if type(d) == DictType]:
result = _(sub_dictobj, lookup)
if result:
return result
return None
return _(d, key)
EDIT (2015/04/25):
After looking # PyCon 2015 videos, I came across dict_digger :
http://jtushman.github.io/blog/2013/11/06/dict-digger/
https://github.com/jtushman/dict_digger
It comes with tests...
You should return result from inside your if statement. So, your code would be:
def json_scan(json_obj, key):
for element in json_obj:
if str(element) == key:
result = json_obj[element]
return result
else:
if type(json_obj[element]) == DictType:
json_scan(json_obj[element], key)
elif type(json_obj[element]) == ListType:
json_scan(element, key)
return None
That way if you find the result, it'll return it immediately instead of resetting it to None. If it doesn't find it, it'll still return None at the end.
The problem is that you don't assign the recursive calls to result:
def json_scan(json_obj, key):
result = None
for element in json_obj:
if str(element) == key:
result = json_obj[element]
else:
if type(json_obj[element]) == DictType:
result = json_scan(json_obj[element], key)
elif type(json_obj[element]) == ListType:
result = json_scan(element, key)
return result
Another problem is that your scan doesn't work for lists - json_obj[element] is only going to work for dicts - but since your data doesn't have lists, its working for now. You should remove list processing completely (unless you really have lists, then the algorithm needs to change).

Categories