How to find all keys with certain name in json? - python

I have many similar JSON objects with different sizes, I need to run through all keys with certain names and get values of it.
for example I have 3 JSONs:
[
{
"name": "temp",
"url": "http://temp.com",
"content": {
"content": "temp",
"url": "https://temp1.com"
}
},
{
"name": "temp",
"url": "http://temp.com",
"content": [
{
"content": "temp"
},
{
"url": "https://temp1.com"
}
]
},
{
"name": "temp",
"content": {
"content": "temp"
}
}
]
So I need extract the value from all keys "url" (but this key is not necessary and can be not in json )
the target output:
http://temp.com
https://temp1.com
http://temp.com
https://temp1.com

Load it using module json and you will have normal list with dictionares and you can use .keys() to get all keys and check if there is 'url'.
And you need recursion to get keys from nested lists/dictionares.
Minimal working code with your example data
text = '''[
{
"name": "temp",
"url": "http://temp.com",
"content": {
"content": "temp",
"url": "https://temp1.com"
}
},
{
"name": "temp",
"url": "http://temp.com",
"content": [
{
"content": "temp"
},
{
"url": "https://temp1.com"
}
]
},
{
"name": "temp",
"content": {
"content": "temp"
}
}
]'''
import json
def get_url(data, key):
if isinstance(data, list):
# check nested elements
for item in data:
yield from get_url(item, key)
elif isinstance(data, dict):
# check key in dictionary
if key in data.keys():
#print(data[key])
yield data[key]
# check nested elements
for item in data.values():
yield from get_url(item, key)
# --- main ---
#with open('data.json') as fh:
# data = json.load(fh)
data = json.loads(text)
results = list(get_url(data, 'url'))
print(results)
Result:
['http://temp.com', 'https://temp1.com', 'http://temp.com', 'https://temp1.com']
If you want to get also path to element
def get_url(data, key, path=''):
if isinstance(data, list):
for number, item in enumerate(data):
yield from get_url(item, key, f'{path}[{number}]')
elif isinstance(data, dict):
if key in data.keys():
#print(data[key])
yield (data[key], f'{path}["{key}"]')
for name, item in data.items():
yield from get_url(item, key, f'{path}["{name}"]')
Result:
[
('http://temp.com', '[0]["url"]'),
('https://temp1.com', '[0]["content"]["url"]'),
('http://temp.com', '[1]["url"]'),
('https://temp1.com', '[1]["content"][1]["url"]')
]

I think a recursive generator is probably the way to go about this:
def iter_values(key, list_or_dict):
if isinstance(list_or_dict, list):
# we're inside a list now, so yield from each element
for obj in list_or_dict:
if isinstance(obj, (list, dict)):
yield from iter_values(key, obj)
# we're dealing with a dict,
# so check if it has the key and if so,
# yield it
if key in list_or_dict:
yield list_or_dict[key]
# it's possible there is a nested object in a different key
# (e.g. your "content" which has a "url"),
# so iterate over those as well
for k, v in list_or_dict.items():
if k != key and isinstance(v, (list, dict)):
yield from iter_values(v)
If you don't know how generators work, you can turn it into a list by doing
list(iter_values("url", data))

Related

Create an object from a json value

I have a json file (estate.json) that looks like this:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": "724543e2-bd9d-4bef-b9d6-a3ae73d330b7",
"properties": {
"objektidentitet": "724543e2-bd9d-4bef-b9d6-a3ae73d330b7",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Infart"
}
}
},
{
"type": "Feature",
"id": "1209dd85-d454-46be-bf9c-f2472095fcdc",
"properties": {
"objektidentitet": "1209dd85-d454-46be-bf9c-f2472095fcdc",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Byggnad"
}
}
},
{
"type": "Feature",
"id": "e12ee844-c138-4f21-95cc-9254e78721d0",
"properties": {
"objektidentitet": "e12ee844-c138-4f21-95cc-9254e78721d0",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Infart"
}
}
}
]
}
With the following lines:
f = open('estate.json')
d = json.load(f)
for id in d['features']:
print(id['id'])
I can print the values from the keys 'objektidentitet' as:
724543e2-bd9d-4bef-b9d6-a3ae73d330b7
1209dd85-d454-46be-bf9c-f2472095fcdc
e12ee844-c138-4f21-95cc-9254e78721d0
I need to use these values in the upcoming step.
I believe that I need to create objects or variables for them?
I have tried creating classes (but I cant seem to understand them):
class obj(object):
def __init__(self, d):
for k, v in d.items():
if isinstance(k, (list, tuple)):
setattr(self, k, [obj(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, obj(v) if isinstance(v, dict) else v)
I have tried using recordclass and
def dict_to_class(class_name: Any, dictionary: dict) -> Any:
instance = class_name()
for key in dictionary.keys():
setattr(instance, key, dictionary[key])
return instance
I need to use these values in the upcoming step. I believe that I
need to create objects or variables for them? I have no idea where to
start, I have been googling this for three months and nothing seem to
match my dilemma?
Consider just tossing those values in a list, if you want to keep them separate from the json object:
d = json.loads('''{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": "724543e2-bd9d-4bef-b9d6-a3ae73d330b7",
"properties": {
"objektidentitet": "724543e2-bd9d-4bef-b9d6-a3ae73d330b7",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Infart"
}
}
},
{
"type": "Feature",
"id": "1209dd85-d454-46be-bf9c-f2472095fcdc",
"properties": {
"objektidentitet": "1209dd85-d454-46be-bf9c-f2472095fcdc",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Byggnad"
}
}
},
{
"type": "Feature",
"id": "e12ee844-c138-4f21-95cc-9254e78721d0",
"properties": {
"objektidentitet": "e12ee844-c138-4f21-95cc-9254e78721d0",
"adressplatsattribut": {
"adressplatstyp": "Gatuadressplats",
"insamlingslage": "Infart"
}
}
}
]
}''')
list_of_ids = [feature['id'] for feature in d['features']]
print(list_of_ids)
['724543e2-bd9d-4bef-b9d6-a3ae73d330b7', '1209dd85-d454-46be-bf9c-f2472095fcdc', 'e12ee844-c138-4f21-95cc-9254e78721d0']
Really though, do you need to put them in this separate list variable, when they are already easily accessible inside your json object d?
If you wanted to use classes, that would look like this, but it is very unncessary for actually parsing JSON. A dictionary returned by json.load is already a Python object.
class FeatureCollection:
def __init__(self, features):
self.features = features
class Feature:
def __init__(self, _id, properties):
self._id = _id
self.properties = properties
features = []
with open('estate.json') as f:
d = json.load(f)
for x in d['features']:
features.append(Feature(x['id'], x['properties']))
fc = FeatureCollection(features)
You don't need to convert json to object in this case but it is a satisfying way to work with jsons (and dictionary) like an object (like in javascript).
in your case the answer is quite simple:
d = json.loads("...") # or json.load(open("path to json file"))
ids = [i['id'] for i in d['features']]
now ids is:
['724543e2-bd9d-4bef-b9d6-a3ae73d330b7', '1209dd85-d454-46be-bf9c-f2472095fcdc', 'e12ee844-c138-4f21-95cc-9254e78721d0']
converting json to object-like instance
If we have a nested dictionary like below or that one in the question
{
"a": "a simple string",
"b": {
"a": "nested string",
"b": {
"c": [
[
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 0]
],
{"a": "test1", "b": "test2", "c": {"d": 1}},
]
},
},
}
in python you should write this code:
d = json.load(...)
d["b"]["b"]["c"][1]["c"]["d"] # -> 1
but it is more fun to just write:
d = jsonobj(json.load(...))
d.b.b.c[1].c.d # -> 1
so the code that I wrote for converting a json to object, is here:
class jsonobj:
def __init__(self, d):
for k, v in d.items():
self.__setattr__(k, jsonobj.nested_convert(v))
#classmethod
def nested_convert(cls, v):
if isinstance(v, dict):
return jsonobj(v)
elif isinstance(v, list):
return [jsonobj.nested_convert(i) for i in v]
else:
return v
My code aimed to convert json to object but a dictionary in python could be more complex.

Insert/Add a json object in a dictionary at nested level dynamically

I am working on a script to generate some test data based on a json spec. The intention of this script is to construct a json object/python dict record
To simplify things, I am using a list items here that represents my source items, which also represent the path where the value should be inserted.
Here's my intended output -
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "Test Data"
},
"item1": 1,
"item2": 0
}
I am able to build the nested objects but they are all getting inserted at first level of the dictionary instead.
How can I use dest_path to store the result in the intended location?
Source:
import json
import random
def get_nested_obj(items: list):
"""
Construct a nested json object
"""
res = 'Test Data'
for item in items[::-1]:
res = {item: res}
return res
def get_dest_path(source_fields):
"""
Construct dest path where result from `get_nested_obj` should go
"""
dest_path = ''
for x in source_fields:
dest_path += f'[\'{x}\']'
return 'record'+dest_path
record = {}
items = ['access.device.java.version', 'access.device.python.version', 'access.type', 'item1', 'item2']
for item in items:
if '.' in item:
source_fields = item.split('.')
temp = record
for i, source_field in enumerate(source_fields):
if source_field in temp:
temp = temp[source_field]
continue
res = get_nested_obj(source_fields[i+1:])
dest_path = get_dest_path(source_fields[:i])
print(dest_path)
record[source_field] = res # Here's the problem. How to use dest_path here?
break
else:
record[item] = random.randint(0, 1)
print(json.dumps(record))
My output:
{
"access": {
"device": {
"java": {
"version": "Test Data"
}
}
},
"python": {
"version": "Test Data"
},
"type": "Test Data",
"item1": 1,
"item2": 0
}
To construct the record dictionary from the items list you can use next example:
import random
record = {}
items = [
"access.device.java.version",
"access.device.python.version",
"access.type",
"item1",
"item2",
]
for i in items:
i = i.split(".")
if len(i) == 1:
record[i[0]] = random.randint(0, 1)
else:
r = record
for v in i[:-1]:
r.setdefault(v, {})
r = r[v]
r[i[-1]] = "Test Data"
print(record)
Prints:
{
"access": {
"device": {
"java": {"version": "Test Data"},
"python": {"version": "Test Data"},
},
"type": "Test Data",
},
"item1": 1,
"item2": 1,
}
Not very different from other answer, but recursive.
import json
items = ['access.device.java.version', 'access.device.python.version', 'access.type', 'item1', 'item2']
def populate(di, item):
parts = item.split(".", maxsplit=1)
key = parts[0]
if len(parts) == 1:
if key.startswith("item"):
v = 1
else:
v = "Test Data"
di[key] = v
else:
dikey = di.setdefault(key, {})
populate(dikey,parts[1])
return di
di = {}
for item in items:
populate(di,item)
print(json.dumps(di, indent=4))
output:
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "Test Data"
},
"item1": 1,
"item2": 1
}
And here's a version that directly specifies the data, which would probably be more useful (and that return di is also unnecessary in both cases):
import json
items = [('access.device.java.version',"Test Data"), ('access.device.python.version', "Test Data"), ('access.type', "type data"), ('item1',1), ('item2',2)]
def populate(di, item):
parts = item[0].split(".", maxsplit=1)
key = parts[0]
v = item[1]
if len(parts) == 1:
di[key] = v
else:
dikey = di.setdefault(key, {})
populate(dikey,(parts[1],v))
record = {}
for item in items:
populate(record,item)
print(json.dumps(record, indent=4))
{
"access": {
"device": {
"java": {
"version": "Test Data"
},
"python": {
"version": "Test Data"
}
},
"type": "type data"
},
"item1": 1,
"item2": 2
}
FWIW, tried collections.defaultdict for fun and that does not recurse itself.

Get the Parent key and the nested value in nested json

I have a nested json for a JSON schema like this:
{
"config": {
"x-permission": true
},
"deposit_schema": {
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"control_number": {
"type": "string",
"x-cap-permission": {
"users": [
"test#test.com"
]
}
},
"initial": {
"properties": {
"status": {
"x-permission": {
"users": [
"test3#test.com"
]
},
"title": "Status",
"type": "object",
"properties": {
"main_status": {
"type": "string",
"title": "Stage"
}
}
},
"gitlab_repo": {
"description": "Add your repository",
"items": {
"properties": {
"directory": {
"title": "Subdirectory",
"type": "string",
"x-permission": {
"users": [
"test1#test.com",
"test2#test.com"
]
}
},
"gitlab": {
"title": "Gitlab",
"type": "string"
}
},
"type": "object"
},
"title": "Gitlab Repository",
"type": "array"
},
"title": "Initial Input",
"type": "object"
}
},
"title": "Test Analysis"
}
}
The JSON is nested and I want to have the dict of x-permission fields with their parent_key like this:
{
"control_number": {"users": ["test#test.com"]},
"initial.properties.status": {"users": ["test3#test.com"]},
"initial.properties.gitlab_repo.items.properties.directory": {"users": [
"test1#test.com",
"test2#test.com"
]}
}
I am trying to do implement recursive logic for every key in JSON like this:
def extract(obj, parent_key):
"""Recursively search for values of key in JSON tree."""
for k, v in obj.items():
key = parent_key + '.' + k
if isinstance(v, dict):
if v.get('x-permission'):
return key, v.get('x-permission')
elif v.get('properties'):
return extract(v.get('properties'), key)
return None, None
def collect_permission_info(object_):
# _schema = _schema.deposit_schema.get('properties')
_schema = object_ # above json
x_cap_fields = {}
for k in _schema:
parent_key, permission_info = extract(_schema.get(k), k)
if parent_key and permission_info:
x_cap_fields.update({parent_key: permission_info})
return x_cap_fields
I am getting empty dict now, what I am missing here?
You could use this generator of key/value tuples:
def collect_permission_info(schema):
for key, child in schema.items():
if isinstance(child, dict):
if "x-permission" in child:
yield key, child["x-permission"]
if "properties" in child:
for rest, value in collect_permission_info(child["properties"]):
yield key + "." + rest, value
Then call it like this:
result = dict(collect_permission_info(schema))
A few issues I can spot:
You use the parent_key directly in the recursive function. In a case when multiple properties exist in an object ("_experiment" has 2 properties), the path will be incorrect (e.g. _experiment.type.x-permission is constructed in second loop call). Use a new variable so that each subsequent for loop call uses the initial parent_key value
The elif branch is never executed as the first branch has priority. It is a duplicate.
The return value from the recursive execute(...) call is ignored. Anything you might find on deeper levels is therefore ignored
Judging by your example json schema and the desired result, a recursive call on the "initial": {...} object should return multiple results. You would have to modify the extract(...) function to allow for multiple results instead of a single one
You only check if an object contains a x-permission or a properties attribute. This ignores the desired result in the provided "initial" schema branch which contains x-permission nested inside a status and main_status branch. The easiest solution is to invoke a recursive call every time isinstance(v, dict) == true
After reading through the comments and the answers. I got this solution working for my use case.
def parse_schema_permission_info(schema):
x_fields = {}
def extract_permission_field(field, parent_field):
for field, value in field.items():
if field == 'x-permission':
x_fields.update({parent_field: value})
if isinstance(value, dict):
key = parent_field + '.' + field
if value.get('x-permission'):
x_fields.update(
{key: value.get('x-permission')}
)
extract_permission_field(value, key)
for field in schema:
extract_permission_field(schema.get(field), field)
return x_fields

Pythonic way to transform/flatten JSON containing nested table-as-list-of-dicts structures

Suppose I have a table represented in JSON as a list of dicts, where the keys of each item are the same:
J = [
{
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
{
"symbol": "LTC",
"name": "LiteCoin"
:
},
And suppose I require efficient lookup, e.g. symbols['ETHBTC']['name']
I can transform with symbols = { item['name']: item for item in J }, producing:
{
"ETHBTC": {
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
"LTCBTC": {
"symbol": "LTCBTC",
"name": "LiteCoin",
:
},
(Ideally I would also remove the now redundant symbol field).
However, what if each item itself contains a "table-as-list-of-dicts"?
Here's a fuller minimal example (I've removed lines not pertinent to the problem):
J = {
"symbols": [
{
"symbol":"ETHBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
},
{
"symbol":"LTCBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
}
]
}
So the challenge is to transform this structure into:
J = {
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100",
:
}
I can write a flatten function:
def flatten(L:list, key) -> dict:
def remove_key_from(D):
del D[key]
return D
return { D[key]: remove_key_from(D) for D in L }
Then I can flatten the outer list and loop through each key/val in the resulting dict, flattening val['filters']:
J['symbols'] = flatten(J['symbols'], key="symbol")
for symbol, D in J['symbols'].items():
D['filters'] = flatten(D['filters'], key="filterType")
Is it possible to improve upon this using glom (or otherwise)?
Initial transform has no performance constraint, but I require efficient lookup.
I don't know if you'd call it pythonic but you could make your function more generic using recursion and dropping key as argument. Since you already suppose that your lists contain dictionaries you could benefit from python dynamic typing by taking any kind of input:
from pprint import pprint
def flatten_rec(I) -> dict:
if isinstance(I, dict):
I = {k: flatten_rec(v) for k,v in I.items()}
elif isinstance(I, list):
I = { list(D.values())[0]: {k:flatten_rec(v) for k,v in list(D.items())[1:]} for D in I }
return I
pprint(flatten_rec(J))
Output:
{'symbols': {'ETHBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}},
'LTCBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}}}}
Since you have different transformation rules for different keys, you can keep a list of the key names that require "grouping" on:
t = ['symbol', 'filterType']
def transform(d):
if (m:={a:b for a, b in d.items() if a in t}):
return {[*m.values()][0]:transform({a:b for a, b in d.items() if a not in m})}
return {a:b if not isinstance(b, list) else {x:y for j in b for x, y in transform(j).items()} for a, b in d.items()}
import json
print(json.dumps(transform(J), indent=4))
{
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
},
"LTCBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
}
}
}

Extracting data from JSON depending on other parameters

What are the options for extracting value from JSON depending on other parameters (using python)? For example, JSON:
"list": [
{
"name": "value",
"id": "123456789"
},
{
"name": "needed-value",
"id": "987654321"
}
]
When using json_name["list"][0]["id"] it obviously returns 123456789. Is there a way to indicate "name" value "needed-value" so i could get 987654321 in return?
For example:
import json as j
s = '''
{
"list": [
{
"name": "value",
"id": "123456789"
},
{
"name": "needed-value",
"id": "987654321"
}
]
}
'''
js = j.loads(s)
print [x["id"] for x in js["list"] if x["name"] == "needed-value"]
The best way to handle this is to refactor the json as a single dictionary. Since "name" and "id" are redundant you can make the dictionary with the value from "name" as the key and the value from "id" as the value.
import json
j = '''{
"list":[
{
"name": "value",
"id": "123456789"
},{
"name": "needed-value",
"id": "987654321"
}
]
}'''
jlist = json.loads(j)['list']
d = {jd['name']: jd['id'] for jd in jlist}
print(d) ##{'value': '123456789', 'needed-value': '987654321'}
Now you can iterate the items like you normally would from a dictionary.
for k, v in d.items():
print(k, v)
# value 123456789
# needed-value 987654321
And since the names are now hashed, you can check membership more efficiently than continually querying the list.
assert 'needed-value' in d
jsn = {
"list": [
{
"name": "value",
"id": "123456789"
},
{
"name": "needed-value",
"id": "987654321"
}
]
}
def get_id(list, name):
for el in list:
if el['name'] == name:
yield el['id']
print(list(get_id(jsn['list'], 'needed-value')))
Python innately treats JSON as a list of dictionaries. With this in mind, you can call the index of the list you need to be returned since you know it's location in the list (and child dictionary).
In your case, I would use list[1]["id"]
If, however, you don't know where the position of your needed value is within the list, the you can run an old fashioned for loop this way:
for user in list:
if user["name"] == "needed_value":
return user["id"]
This is assuming you only have one unique needed_value in your list.

Categories