I have the following list (notice "keyE" has a dictionary as a string):
[
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"
}
]
And I want to convert it to this (it can have any number of nested dictionaries and lists):
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
So far, I have the following but I don't know what to do after the json.loads. I know I have to recursively call the function but not sure how.
import json
def convert(data_list: list) -> list:
for i in range(len(data_list)):
obj = data_list[i]
for key, value in obj.items():
if isinstance(value, str) and any(char in "{[]}" for char in value):
try:
data = json.loads(value)
# What do I do here?
except:
continue
No idea if this'll work for your more complicated cases, but I was able to use ast.literal_eval() and some really janky chained str.replace calls:
import ast
def replace(s):
return ast.literal_eval(s.replace(r'"{', "{").replace(r'}"', "}"))
x = [{"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"},
{"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"}]
for d in x:
for key, value in d.items():
if "{" in value:
d[key] = replace(value)
Output:
In [4]: x
Out[4]:
[{'keyA': 'Example',
'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}},
{'keyA': 'Example2',
'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
In [5]: x[0]["keyB"]["keyD"]["keyE"]["name"]
Out[5]: 'foo'
Your nested key seems like a JSON string that can be loaded into a dictionary using json.loads method.Though the nested JSON won't get converted to the dictionary that's why I've added the recursive function to address the nested dictionary present in the JSON.
import json
from json import JSONDecodeError
def recurse(d):
try:
if isinstance(d, dict):
loaded_d = d
else:
loaded_d = json.loads(d)
for k, v in loaded_d.items():
loaded_d[k] = recurse(v)
except (JSONDecodeError, TypeError):
return d
return loaded_d
for d in data_list:
for key, val in d.items():
d[key] = recurse(val)
Output:
[
{
"keyA": "Example",
"keyB": {"keyC": 2, "keyD": {"keyE": {"name": "foo"}}, "keyF": 0},
},
{
"keyA": "Example2",
"keyB": {"keyC": 6, "keyD": {"keyE": {"name": "bar"}}, "keyF": 5},
},
]
Okay, here is recursive solution:
import json
from json import JSONDecodeError
data = [
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"foo\\\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"bar\\\"}\"},\"keyF\":5}"
}
]
def rec_convert(data):
for k, v in data.items():
try:
if type(v) == dict:
rec_convert(v)
continue
data[k] = json.loads(v)
rec_convert(data[k])
except (JSONDecodeError, TypeError):
continue
for el in data:
rec_convert(el)
print("raw print:")
print(data)
print("pretty print")
print(json.dumps(data, indent=2))
and output:
raw print:
[{'keyA': 'Example', 'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}}, {'keyA': 'Example2', 'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
pretty print
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
I've updated Vishal Singh's answer to accommodate for lists inside a dictionary.
def decode_json_recursively(obj):
try:
if isinstance(obj, list):
data = [decode_json_recursively(el) for el in obj]
elif isinstance(obj, dict):
data = obj
else:
data = json.loads(obj)
if isinstance(data, dict):
for k, v in data.items():
data[k] = decode_json_recursively(v)
except (JSONDecodeError, TypeError, AttributeError):
return obj
return data
Related
1.Need to remove any value in any level of the keys that has a value null
2.Need to remove any chained key that is named key_2.key_c
Should result in the outcome block below.
Original json
{
"key_1": {
"key_a": 111111},
"key_2": {
"key_a": "value",
"key_b": null,
"key_c": {
"key_c_a": {
"key_c_b": "value"}
},
"key_d": [{"key_c": "value"}],
}
Outcome
{
"key_1": {
"key_a": 111111},
"key_2": {
"key_a": "value",
"key_d": [{"key_c": "value"}],
}
You can achieve this by recursively traversing the input JSON object and filtering out the unwanted values:
import json
def filter_json(obj):
if isinstance(obj, dict):
new_obj = {}
for k, v in obj.items():
if v is None:
continue
if k == "key_2":
new_obj[k] = filter_json({k2: v2 for k2, v2 in v.items() if k2 != "key_c"})
else:
new_obj[k] = filter_json(v)
return new_obj
elif isinstance(obj, list):
return [filter_json(elem) for elem in obj]
else:
return obj
Usage
json_str = '''
{
"key_1": {
"key_a": 111111
},
"key_2": {
"key_a": "value",
"key_b": null,
"key_c": {
"key_c_a": {
"key_c_b": "value"
}
},
"key_d": [
{
"key_c": "value"
}
]
}
}
'''
json_obj = json.loads(json_str)
filtered_obj = filter_json(json_obj)
I would like to understand how I can rename keys in a list of dicts recursively without mutation of method parameters.
I have the following list of dicts:
filters = [
{
'or': [
{
'and': [
{
"column": {
"type": "string",
"name": "field_name"
},
"operator": "==",
"value": "field_value"
},
{
"column": {
"type": "string",
"name": "field_2_name"
},
"operator": "!=",
"value": "field_2_value"
}
]
},
{
'not': [
{
"column": {
"type": "number",
"name": "field_3_name"
},
"operator": "==",
"value": "field_3_value"
}
]
}
]
}]
This is what I expect to achieve:
filters = [
{
'or': [
{
'and': [
{'field': 'field_name', 'op': '==', 'value': 'field_value'},
{'field': 'field_2_name', 'op': '!=', 'value': 'field_2_value'},
]
},
{
'not': [
{'field': 'field_3_name', 'op': '==', 'value': 'field_3_value'}
]
},
],
}
]
Any way I can get around with this?
Thanks!
A recursive function should work, changing the sub-dicts if they contain column and otherwise recursing deeper for other operations.
def change(collection):
if isinstance(collection, list):
return [change(d) for d in collection]
if isinstance(collection, dict):
if "column" in collection:
return {
"field": collection["column"]["name"],
"value": collection["value"],
"op": collection["operator"]
}
else:
return {op: change(val) for op, val in collection.items()}
res = change(filters)
Result:
[{'or': [{'and': [{'field': 'field_name', 'op': '==', 'value': 'field_value'},
{'field': 'field_2_name', 'op': '!=', 'value': 'field_2_value'}]},
{'not': [{'field': 'field_3_name', 'op': '==', 'value': 'field_3_value'}]}]}]
Simple, you write a recursive function that fixes up your data structure. I assume that you don't want to change the original.
def fixup(d):
if isinstance(d,list):
return [fixup(x) for x in d]
elif not isinstance(d,dict):
return d
elif needs_mangling(d):
return mangler(d)
else:
return {k:fixup(v) for k,v in d.items()}
Add mangling functions as required.
Does this satisfy your scenario?
def change(obj):
# if recusive parameter is a list
if isinstance(obj, list):
# run next recursive iteration for each item in list
for i in range(len(obj)):
obj[i] = change(obj[i])
return obj
# if parameter is a dict
if isinstance(obj, dict):
# 1st case is to convert the object
if "operator" in obj:
return {"field": obj["column"]["name"], "op": obj["operator"], "value": obj["value"]}
# 2nd case is to go deeper into recursion with object values
else:
for key in obj:
obj[key] = change(obj[key])
return obj
print(change(filters))
Suppose I have a table represented in JSON as a list of dicts, where the keys of each item are the same:
J = [
{
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
{
"symbol": "LTC",
"name": "LiteCoin"
:
},
And suppose I require efficient lookup, e.g. symbols['ETHBTC']['name']
I can transform with symbols = { item['name']: item for item in J }, producing:
{
"ETHBTC": {
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
"LTCBTC": {
"symbol": "LTCBTC",
"name": "LiteCoin",
:
},
(Ideally I would also remove the now redundant symbol field).
However, what if each item itself contains a "table-as-list-of-dicts"?
Here's a fuller minimal example (I've removed lines not pertinent to the problem):
J = {
"symbols": [
{
"symbol":"ETHBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
},
{
"symbol":"LTCBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
}
]
}
So the challenge is to transform this structure into:
J = {
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100",
:
}
I can write a flatten function:
def flatten(L:list, key) -> dict:
def remove_key_from(D):
del D[key]
return D
return { D[key]: remove_key_from(D) for D in L }
Then I can flatten the outer list and loop through each key/val in the resulting dict, flattening val['filters']:
J['symbols'] = flatten(J['symbols'], key="symbol")
for symbol, D in J['symbols'].items():
D['filters'] = flatten(D['filters'], key="filterType")
Is it possible to improve upon this using glom (or otherwise)?
Initial transform has no performance constraint, but I require efficient lookup.
I don't know if you'd call it pythonic but you could make your function more generic using recursion and dropping key as argument. Since you already suppose that your lists contain dictionaries you could benefit from python dynamic typing by taking any kind of input:
from pprint import pprint
def flatten_rec(I) -> dict:
if isinstance(I, dict):
I = {k: flatten_rec(v) for k,v in I.items()}
elif isinstance(I, list):
I = { list(D.values())[0]: {k:flatten_rec(v) for k,v in list(D.items())[1:]} for D in I }
return I
pprint(flatten_rec(J))
Output:
{'symbols': {'ETHBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}},
'LTCBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}}}}
Since you have different transformation rules for different keys, you can keep a list of the key names that require "grouping" on:
t = ['symbol', 'filterType']
def transform(d):
if (m:={a:b for a, b in d.items() if a in t}):
return {[*m.values()][0]:transform({a:b for a, b in d.items() if a not in m})}
return {a:b if not isinstance(b, list) else {x:y for j in b for x, y in transform(j).items()} for a, b in d.items()}
import json
print(json.dumps(transform(J), indent=4))
{
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
},
"LTCBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
}
}
}
I want to get the value of all ObjectId from this Dictionary which is a combination of list and dictionary.
The required answer to me is
-> 2, 2, [[ "Value is 1"], ""], 3, 2
My solution to get the ObjectId. This was only giving me the value of inner ObjectId. Want to get the values of all ObjectId whether the value is int or list etc.
for key in dictObj:
if key == "RESULT":
if (type(dictObj[key])== list):
for list in dictObj[key]:
for key,value in list.items():
while(True):
if (type(value)==dict) and value:
for key1,value1 in value.items():
value = value1
key = key1
elif (key=="ObjectId"):
print(value)
break
else:
break
Dictionary Object is
dictObj = {
"Id": 1,
"RESULT": [
{
"Check": {
"checkinstance": {
"ObjectId": 2,
"Class": "Base"
}
},
"Class": "Base"
},
{
"ObjectId": 2,
"Class": "Base",
"Start": {}
},
{
"Display": {
"part": {
"Class": "Base",
"ObjectId": [
[
"Value is 1"
],
""
]
},
"load": {
"ObjectId": 3,
"Class": "Base"
}
},
"Class": "Base"
},
{
"ObjectId": 2,
"Class": "Base",
"Stop": {}
}
]
}
This code makes no assumption of the structure of the input object, obj (it can be a dictionary or list):
obj = {
"Id": 1,
"RESULT": [
{
"Check": {
"checkinstance": {
"ObjectId": 2,
"Class": "Base"
}
},
"Class": "Base"
},
{
"ObjectId": 2,
"Class": "Base",
"Start": {}
},
{
"Display": {
"part": {
"Class": "Base",
"ObjectId": [
[
"Value is 1"
],
""
]
},
"load": {
"ObjectId": 3,
"Class": "Base"
}
},
"Class": "Base"
},
{
"ObjectId": 2,
"Class": "Base",
"Stop": {}
}
]
}
def filter(obj):
if isinstance(obj, list):
for item in obj:
filter(item)
elif isinstance(obj, dict):
if "ObjectId" in obj:
print(obj["ObjectId"])
for v in obj.values():
if isinstance(v, (list, dict)):
filter(v)
filter(obj)
Prints:
2
2
[['Value is 1'], '']
3
2
Python Demo
If you don't want to print the values but instead accumulate them into a list then:
def filter2(obj):
if isinstance(obj, list):
for item in obj:
yield from filter2(item)
elif isinstance(obj, dict):
if "ObjectId" in obj:
yield obj["ObjectId"]
for v in obj.values():
if isinstance(v, (list, dict)):
yield from filter2(v)
print(list(filter2(obj)))
Prints:
[2, 2, [['Value is 1'], ''], 3, 2]
Pyhon Demo
You can have a recursive function which searches all keys in a dict and then all the keys of the values which are also dicts:
# dictObj as in post
def findKey(key, d, result):
for k,v in d.items():
if k == key:
result.append(v)
if isinstance(v,dict):
findKey(key, v, result)
result = []
for key in dictObj:
if key == "RESULT":
if (type(jsonObj[key])== list):
for d in jsonObj[key]:
findKey('ObjectId', d, result)
print(result)
Output:
[2, 2, [[ "Value is 1"], ""], 3, 2]
I have a list of dicts as follows:
[{"server":"8.8.8.8",
"domains":[{"google.com":[{"time":15, "serial":14}, {"time":78, "serial":14}]},
{"intuit.com":[{"time":20, "serial":23}, {"time":91, "serial":18}]}
]
},
{"server":"8.8.4.4",
"domains":[{"google.com":[{"time":19, "serial":45}, {"time":92, "serial":76}]},
{"intuit.com":[{"time":45, "serial":89}, {"time":93, "serial":74}]}
]
},
{"server":"206.67.222.222",
"domains":[{"google.com":[{"time":98, "serial":76}, {"time":64, "serial":54}]},
{"intuit.com":[{"time":43, "serial":21}, {"time":65, "serial":59}]}
]
}]
How would I go about creating a structure where I select only the dict for each domain with the max serial number and when I have the same serial number, select the max time so that I am left with the following:
[{"server":"8.8.8.8",
"domains":[{"google.com":{"time":78, "serial":14}},
{"intuit.com":{"time":20, "serial":23}}
]
},
{"server":"8.8.4.4",
"domains":[{"google.com":{"time":92, "serial":76}},
{"intuit.com":{"time":45, "serial":89}}
]
},
{"server":"206.67.222.222",
"domains":[{"google.com":{"time":98, "serial":76}},
{"intuit.com":{"time":65, "serial":59}}
]
}]
The solution using built-in max() function:
import json
# l is your initial list of dicts
for item in l:
for d in item['domains']:
for k, v in d.items():
# whether `serial` numbers are unique
has_uniq_serial = len(set([i['serial'] for i in v])) > 1
d[k] = max(v, key=lambda o: o['serial']) if has_uniq_serial else max(v, key=lambda o: o['time'])
# `json.dumps` used for pretty printing of nested dicts
print(json.dumps(l, indent=4))
The output:
[
{
"server": "8.8.8.8",
"domains": [
{
"google.com": {
"serial": 14,
"time": 78
}
},
{
"intuit.com": {
"serial": 23,
"time": 20
}
}
]
},
{
"server": "8.8.4.4",
"domains": [
{
"google.com": {
"serial": 76,
"time": 92
}
},
{
"intuit.com": {
"serial": 89,
"time": 45
}
}
]
},
{
"server": "206.67.222.222",
"domains": [
{
"google.com": {
"serial": 76,
"time": 98
}
},
{
"intuit.com": {
"serial": 59,
"time": 65
}
}
]
}
]
Try this (d is your dict):
for item in d:
for i in item["domains"]:
for k, v in i.items():
c = sorted([(j["time"], j["serial"]) for j in v], key=lambda x: (x[1], x[0]))
i[k] = {"time": c[-1][0], "serial": c[-1][1]}
print d
You can sort your time-serial list for each domain by your requirement and get the first one, let variable data be your input list:
def domain_sorter(d):
def compare(x, y):
k = y['serial'] - x['serial']
j = y['time'] - x['time']
return k if k != 0 else j
return sorted(d, cmp=compare)
def filter_domain(domain):
for k, v in domain.items():
return {
k: domain_sorter(v)[0]
}
print [{
"server": e['server'],
"domains": [filter_domain(domain) for domain in e['domains']]
} for e in data]