Python remove nested keys and a certain value's keys - python

1.Need to remove any value in any level of the keys that has a value null
2.Need to remove any chained key that is named key_2.key_c
Should result in the outcome block below.
Original json
{
"key_1": {
"key_a": 111111},
"key_2": {
"key_a": "value",
"key_b": null,
"key_c": {
"key_c_a": {
"key_c_b": "value"}
},
"key_d": [{"key_c": "value"}],
}
Outcome
{
"key_1": {
"key_a": 111111},
"key_2": {
"key_a": "value",
"key_d": [{"key_c": "value"}],
}

You can achieve this by recursively traversing the input JSON object and filtering out the unwanted values:
import json
def filter_json(obj):
if isinstance(obj, dict):
new_obj = {}
for k, v in obj.items():
if v is None:
continue
if k == "key_2":
new_obj[k] = filter_json({k2: v2 for k2, v2 in v.items() if k2 != "key_c"})
else:
new_obj[k] = filter_json(v)
return new_obj
elif isinstance(obj, list):
return [filter_json(elem) for elem in obj]
else:
return obj
Usage
json_str = '''
{
"key_1": {
"key_a": 111111
},
"key_2": {
"key_a": "value",
"key_b": null,
"key_c": {
"key_c_a": {
"key_c_b": "value"
}
},
"key_d": [
{
"key_c": "value"
}
]
}
}
'''
json_obj = json.loads(json_str)
filtered_obj = filter_json(json_obj)

Related

Python nested dictionary value convert to float or decimal from string

I have following dictionary
data={"Volkswagen":{
"Caddy Kombi":{
"2022":"285000000.00",
"2021":"212500000.00"
},
"Caddy Cargo":{
"2022":"193100000.00",
"2021":"190100000.00",
"2019":"1289456545.00"
}
},
"Tesla":{
"Model 3":{
"2022":"707160000.00",
"2021":"630000000.00",
"2020":"630000000.00"
},
"Model S":{
"2021":"630000000.00",
"2020":"630000000.00"
},
"Model X":{
"2021":"1102500000.00",
},
"Model Y":{
"2021":"735000000.00",
"2020":"735000000.00"
}
}}
I want to convert all the string prices to float values i.e "2021":285000000.00
I tried upto here but result is not expectd At the end I want same dictionary being converted to float
for i, y in data.items():
for k, x in y.items():
for z,q in (y[k].items()):
print(float(q))
dictionary = {
i: {
k: x
}
}
print(dictionary)
You can try recursion:
def to_float(o):
if isinstance(o, dict):
for k, v in o.items():
if isinstance(v, str):
o[k] = float(v)
else:
to_float(v)
elif isinstance(o, list):
for v in o:
to_float(v)
to_float(data)
print(data)
Prints:
{
"Volkswagen": {
"Caddy Kombi": {"2022": 285000000.0, "2021": 212500000.0},
"Caddy Cargo": {
"2022": 193100000.0,
"2021": 190100000.0,
"2019": 1289456545.0,
},
},
"Tesla": {
"Model 3": {
"2022": 707160000.0,
"2021": 630000000.0,
"2020": 630000000.0,
},
"Model S": {"2021": 630000000.0, "2020": 630000000.0},
"Model X": {"2021": 1102500000.0},
"Model Y": {"2021": 735000000.0, "2020": 735000000.0},
},
}
It helps to give your variables names that make sense. When you get to the last level, simply overwrite the value of the corresponding year. Since dicts are mutable, changing the value in the inner dictionary is reflected in the original dictionary.
for make, make_d in data.items():
for model, model_d in make_d.items():
for year, price in model_d.items():
model_d[year] = float(price)
print(data)
which gives:
{
"Volkswagen": {
"Caddy Kombi": {
"2022": 285000000.0,
"2021": 212500000.0
},
"Caddy Cargo": {
"2022": 193100000.0,
"2021": 190100000.0,
"2019": 1289456545.0
}
},
"Tesla": {
"Model 3": {
"2022": 707160000.0,
"2021": 630000000.0,
"2020": 630000000.0
},
"Model S": {
"2021": 630000000.0,
"2020": 630000000.0
},
"Model X": {
"2021": 1102500000.0
},
"Model Y": {
"2021": 735000000.0,
"2020": 735000000.0
}
}
}

Python convert string holding nested json to dict

I have the following list (notice "keyE" has a dictionary as a string):
[
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"
}
]
And I want to convert it to this (it can have any number of nested dictionaries and lists):
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
So far, I have the following but I don't know what to do after the json.loads. I know I have to recursively call the function but not sure how.
import json
def convert(data_list: list) -> list:
for i in range(len(data_list)):
obj = data_list[i]
for key, value in obj.items():
if isinstance(value, str) and any(char in "{[]}" for char in value):
try:
data = json.loads(value)
# What do I do here?
except:
continue
No idea if this'll work for your more complicated cases, but I was able to use ast.literal_eval() and some really janky chained str.replace calls:
import ast
def replace(s):
return ast.literal_eval(s.replace(r'"{', "{").replace(r'}"', "}"))
x = [{"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"},
{"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"}]
for d in x:
for key, value in d.items():
if "{" in value:
d[key] = replace(value)
Output:
In [4]: x
Out[4]:
[{'keyA': 'Example',
'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}},
{'keyA': 'Example2',
'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
In [5]: x[0]["keyB"]["keyD"]["keyE"]["name"]
Out[5]: 'foo'
Your nested key seems like a JSON string that can be loaded into a dictionary using json.loads method.Though the nested JSON won't get converted to the dictionary that's why I've added the recursive function to address the nested dictionary present in the JSON.
import json
from json import JSONDecodeError
def recurse(d):
try:
if isinstance(d, dict):
loaded_d = d
else:
loaded_d = json.loads(d)
for k, v in loaded_d.items():
loaded_d[k] = recurse(v)
except (JSONDecodeError, TypeError):
return d
return loaded_d
for d in data_list:
for key, val in d.items():
d[key] = recurse(val)
Output:
[
{
"keyA": "Example",
"keyB": {"keyC": 2, "keyD": {"keyE": {"name": "foo"}}, "keyF": 0},
},
{
"keyA": "Example2",
"keyB": {"keyC": 6, "keyD": {"keyE": {"name": "bar"}}, "keyF": 5},
},
]
Okay, here is recursive solution:
import json
from json import JSONDecodeError
data = [
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"foo\\\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"bar\\\"}\"},\"keyF\":5}"
}
]
def rec_convert(data):
for k, v in data.items():
try:
if type(v) == dict:
rec_convert(v)
continue
data[k] = json.loads(v)
rec_convert(data[k])
except (JSONDecodeError, TypeError):
continue
for el in data:
rec_convert(el)
print("raw print:")
print(data)
print("pretty print")
print(json.dumps(data, indent=2))
and output:
raw print:
[{'keyA': 'Example', 'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}}, {'keyA': 'Example2', 'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
pretty print
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
I've updated Vishal Singh's answer to accommodate for lists inside a dictionary.
def decode_json_recursively(obj):
try:
if isinstance(obj, list):
data = [decode_json_recursively(el) for el in obj]
elif isinstance(obj, dict):
data = obj
else:
data = json.loads(obj)
if isinstance(data, dict):
for k, v in data.items():
data[k] = decode_json_recursively(v)
except (JSONDecodeError, TypeError, AttributeError):
return obj
return data

Pythonic way to transform/flatten JSON containing nested table-as-list-of-dicts structures

Suppose I have a table represented in JSON as a list of dicts, where the keys of each item are the same:
J = [
{
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
{
"symbol": "LTC",
"name": "LiteCoin"
:
},
And suppose I require efficient lookup, e.g. symbols['ETHBTC']['name']
I can transform with symbols = { item['name']: item for item in J }, producing:
{
"ETHBTC": {
"symbol": "ETHBTC",
"name": "Ethereum",
:
},
"LTCBTC": {
"symbol": "LTCBTC",
"name": "LiteCoin",
:
},
(Ideally I would also remove the now redundant symbol field).
However, what if each item itself contains a "table-as-list-of-dicts"?
Here's a fuller minimal example (I've removed lines not pertinent to the problem):
J = {
"symbols": [
{
"symbol":"ETHBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
},
{
"symbol":"LTCBTC",
"filters":[
{
"filterType":"PRICE_FILTER",
"minPrice":"0.00000100",
},
{
"filterType":"PERCENT_PRICE",
"multiplierUp":"5",
},
],
}
]
}
So the challenge is to transform this structure into:
J = {
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100",
:
}
I can write a flatten function:
def flatten(L:list, key) -> dict:
def remove_key_from(D):
del D[key]
return D
return { D[key]: remove_key_from(D) for D in L }
Then I can flatten the outer list and loop through each key/val in the resulting dict, flattening val['filters']:
J['symbols'] = flatten(J['symbols'], key="symbol")
for symbol, D in J['symbols'].items():
D['filters'] = flatten(D['filters'], key="filterType")
Is it possible to improve upon this using glom (or otherwise)?
Initial transform has no performance constraint, but I require efficient lookup.
I don't know if you'd call it pythonic but you could make your function more generic using recursion and dropping key as argument. Since you already suppose that your lists contain dictionaries you could benefit from python dynamic typing by taking any kind of input:
from pprint import pprint
def flatten_rec(I) -> dict:
if isinstance(I, dict):
I = {k: flatten_rec(v) for k,v in I.items()}
elif isinstance(I, list):
I = { list(D.values())[0]: {k:flatten_rec(v) for k,v in list(D.items())[1:]} for D in I }
return I
pprint(flatten_rec(J))
Output:
{'symbols': {'ETHBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}},
'LTCBTC': {'filters': {'PERCENT_PRICE': {'multiplierUp': '5'},
'PRICE_FILTER': {'minPrice': '0.00000100'}}}}}
Since you have different transformation rules for different keys, you can keep a list of the key names that require "grouping" on:
t = ['symbol', 'filterType']
def transform(d):
if (m:={a:b for a, b in d.items() if a in t}):
return {[*m.values()][0]:transform({a:b for a, b in d.items() if a not in m})}
return {a:b if not isinstance(b, list) else {x:y for j in b for x, y in transform(j).items()} for a, b in d.items()}
import json
print(json.dumps(transform(J), indent=4))
{
"symbols": {
"ETHBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
},
"LTCBTC": {
"filters": {
"PRICE_FILTER": {
"minPrice": "0.00000100"
},
"PERCENT_PRICE": {
"multiplierUp": "5"
}
}
}
}
}

Remove keys but keep values from dictionary

Let's say I have this dictionary:
{
"id": "132-sd-sa-23-a-1",
"data": {
"lastUpdated": { "S": "2020-07-22T21:39:20Z" },
"profile": {
"M": {
"address": { "L": [] },
"fakeField": { "S": "someValue" },
"someKey": { "M": { "firstName": { "S": "Test" } } }
}
},
"groups": {
"L": [{ "S": "hello world!" }]
}
}
}
How can I remove the "M", "S", "L", etc. keys from the dictionary but keep the values. So it would turn into this:
{
"id": "132-sd-sa-23-a-1",
"data": {
"lastUpdated": "2020-07-22T21:39:20Z",
"profile": {
"address": [],
"fakeField": "someValue",
"someKey": { "firstName": "Test" }
},
"groups": ["hello world!"]
}
}
I could turn the dictionary into a string, loop through it, and remove what's necessary but that doesn't seem efficient or fast. I can't save the original list as the output that I'm hoping for initially so I need to convert it myself.
Sounds like a job for recursion:
def unsml(obj):
if isinstance(obj, dict):
if len(obj) == 1:
(key, value), *_ = obj.items() # get the only key and value
if key in "SML":
return unsml(value)
return {
key: unsml(value) if isinstance(value, dict) else value
for key, value
in obj.items()
}
elif isinstance(obj, list):
return [unsml(value) for value in obj]
return obj
Then just do new_dict = unsml(old_dict).

Remove all occurences of a value from a nested dictionary

I have a nested dictionary as the following.
myDict= {
"id": 10,
"state": "MY LIST",
"Stars":
{
"BookA": {
"id": 10,
"state": "new book",
"Mystery": {
"AuthorA":
{
"id": "100",
"state": "thriller"
},
"AuthorB":
{
"id": "112",
"state": "horror"
}
},
"Thriller": {
"Store1":
{
"id": "300",
"state": "Old"
}
}
}
}
}
I want to return a dictionary which has all of the "state": "text" removed. So that means, I want to remove all the "state" fields and have an output as below.
I want it to be generic method as the dictionary could be nested on many levels.
myDict=
{
id: 10,
"Stars":
{
"BookA": {
"id": 10
"Mystery": {
"AuthorA":
{
"id": "100"
},
"AuthorB":
{
"id": "112"
}
},
"Thriller": {
"Store1":
{
"id": "300"
}
}
}
}
I tried the following but it doesnt seem to work. It only removes the "state": "MY LIST". May someone help me to resolve the issue?
def get(self):
removelist= ["state"]
new_dict = {}
for key, item in myDict.items():
if key not in removelist:
new_dict.update({key: item})
return new_dict
It doesnt remove all the "state" values.
You can use a DFS:
def remove_keys(d, keys):
if isinstance(d, dict):
return {k: remove_keys(v, keys) for k, v in d.items() if k not in keys}
else:
return d
The idea is to remove recursively the keys from subtrees: for every subtree that is a nested dict, return a dict without the keys to remove, using a dict comprehension; for every leaf (that is a single value), just return the value.
Test:
from pprint import pprint
pprint(remove_keys(myDict, ['state']))
Output:
{'Stars': {'BookA': {'Mystery': {'AuthorA': {'id': '100'},
'AuthorB': {'id': '112'}},
'Thriller': {'Store1': {'id': '300'}},
'id': 10}},
'id': 10}
The problem is you aren't handling the nested dictionaries.
def get(self):
removelist= ["state"]
new_dict = {}
for key, item in myDict.items():
if key not in removelist:
new_dict.update({key: item})
if isinstance(item, dict):
# You'll need to handle this use case.
return new_dict
To elaborate, lets look back at your dictionary:
myDict= {
"id": 10, # int
"state": "MY LIST", # string
"Stars": { # dictionary
"BookA": {
"id": 10, # int
"state": "new book", # string
"Mystery": { # dictionary
"AuthorA": {
"id": "100",
"state": "thriller"
},
"AuthorB": {
"id": "112",
"state": "horror"
}
},
"Thriller": {
"Store1": {
"id": "300",
"state": "Old"
}
}
}
}
}
I commented in the types for clarity. Your code is currently parsing myDict and ignoring the key "state". Once you hit the value "Stars", you need to parse that dictionary to also ignore the key "state".

Categories