Related
My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']
I want to update Dict dictionary's value by inp dictionary's values using recursion or loop.
also the format should not change mean use recursion or loop on same format
please suggest a solution that is applicable to all level nesting not for this particular case
dict={
"name": "john",
"quality":
{
"type1":"honest",
"type2":"clever"
},
"marks":
[
{
"english":34
},
{
"math":90
}
]
}
inp = {
"name" : "jack",
"type1" : "dumb",
"type2" : "liar",
"english" : 28,
"math" : 89
}
Another solution, changing the dict in-place:
dct = {
"name": "john",
"quality": {"type1": "honest", "type2": "clever"},
"marks": [{"english": 34}, {"math": 90}],
}
inp = {
"name": "jack",
"type1": "dumb",
"type2": "liar",
"english": 28,
"math": 89,
}
def change(d, inp):
if isinstance(d, list):
for i in d:
change(i, inp)
elif isinstance(d, dict):
for k, v in d.items():
if not isinstance(v, (list, dict)):
d[k] = inp.get(k, v)
else:
change(v, inp)
change(dct, inp)
print(dct)
Prints:
{
"name": "jack",
"quality": {"type1": "dumb", "type2": "liar"},
"marks": [{"english": 28}, {"math": 89}],
}
First, make sure you change the name of the first Dictionary, say to myDict, since dict is reserved in Python as a Class Type.
The below function will do what you are looking for, in a recursive manner.
def recursive_swipe(input_var, updates):
if isinstance(input_var, list):
output_var = []
for entry in input_var:
output_var.append(recursive_swipe(entry, updates))
elif isinstance(input_var, dict):
output_var = {}
for label in input_var:
if isinstance(input_var[label], list) or isinstance(input_var[label], dict):
output_var[label] = recursive_swipe(input_var[label], updates)
else:
if label in updates:
output_var[label] = updates[label]
else:
output_var = input_var
return output_var
myDict = recursive_swipe(myDict, inp)
You may look for more optimal solutions if there are some limits to the formatting of the two dictionaries that were not stated in your question.
I have been working on a project that involves parsing a CSV file in order to turn all the data into a very specifically formatted JSON following a complex schema. I have to custom make this program as the required complexity of the JSON makes existing converters fail. I am mostly there, I have run into one final roadblock though:
I have nested dictionaries, and occasionally there must be a list within those, this list will contain further dictionaries. This is fine, I have been able to complete that, BUT now I need to find a way to add more nested dictionaries within those. Below is a simplified breakdown of the concept.
the CSV will look something like this, where the # before a tag indicates it's a list
x.a, x.b.z, x.b.y, x.#c.z.nest1, x.#c.z.nest2, x.#c.yy, x.d, x.e.z, x.e.y
ab, cd, ef, gh, ij, kl, mn, op, qr
this should result in the following JSON
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"z": {
"nest1": "gh",
"nest2": "ij"
}
},
{
"yy": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
}
This is one issue that I haven't been able to solve, my current code can only do one dictionary after the list item, not further. I also need to be able to somehow do the following within a list of dictionaries:
"c": [
{
"z": {
"nest1": "gh"
},
"zz": {
"nest2": "ij"
}
},
{
"yy": "kl"
}
i.e. somehow add multiple nested dictionaries within the dictionary in the list. The problem with this occurs within the fact that these aren't reference-able by name, so I do not know how I could potentially indicate to do that within the CSV format.
Here is the code I have that works up to the first dictionary nested within a list:
import json
import pandas as pd
from os.path import exists
# df1 = pd.read_csv("excelTestFacilities.csv", header = 1, sep=",", keep_default_na=False, engine="python")
# df2 = pd.read_csv("excelTestFacilityContacts.csv", header = 1, sep=",", keep_default_na=False, engine="python")
# df = pd.merge(df1, df2, how = 'inner')
df = pd.read_csv("csvTestFile.csv", header = 1, sep=", ", keep_default_na=False, engine="python")
#print(df) # uncomment to see the transformation
json_data = df.to_dict(orient="records")
#print(json_data)
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
for parent in parents:
if parent[0] == '#':
list_bool = True
if list_bool:
for parent in parents:
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
else:
node[parent] = node = node.get(parent, {})
node.append({key : v})
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
return ini
def merge_lists(dic):
"""
Removes duplicates within sets
"""
for k,v in list(dic.items()):
if isinstance(v, dict):
keys = list(v.keys())
vals = list(v.values())
if all(isinstance(l, list) and len(l)==len(vals[0]) for l in vals):
dic[k] = []
val_tuple = set(zip(*vals)) # removing duplicates with set()
for t in val_tuple:
dic[k].append({subkey: t[i] for i, subkey in enumerate(keys)})
else:
merge_lists(v)
elif isinstance(v, list):
dic[k] = list(set(v)) # removing list duplicates
def clean_blanks(value):
"""
Recursively remove all None values from dictionaries and lists, and returns
the result as a new dictionary or list.
"""
if isinstance(value, list):
return [clean_blanks(x) for x in value if x != ""]
elif isinstance(value, dict):
return {
key: clean_blanks(val)
for key, val in value.items()
if val != "" and val != {}
}
else:
return value
def add_to_dict(section_added_to, section_to_add, value, reportNum):
"""
Adds a value to a given spot within a dictionary set.
section_added_to is optional for adding the set to a deeper section such as facility
section_to_add is the name that the new dictionary entry will have
value is the item to be added
reportNum is the number indicating which report to add to, starting at 0
"""
if section_added_to != '':
end_list[reportNum][section_added_to][section_to_add] = value
else:
end_list[reportNum][section_to_add] = value
def read_add_vals(filename_prefix, added_to, section):
for i in range(len(end_list)):
temp_list = []
filename = filename_prefix + str(i+1) + ".csv"
if not exists(filename):
continue;
temp_df = pd.read_csv(filename, header = 1, sep=",", keep_default_na=False, engine="python")
temp_json = temp_df.to_dict(orient="records")
for y in temp_json:
return_ini = unflatten_dic(y)
temp_list.append(return_ini)
add_to_dict(added_to, section, temp_list, i)
global end_list
end_list = []
for x in json_data:
return_ini = unflatten_dic(x)
end_list.append(return_ini)
#read_add_vals('excelTestPermitsFac', 'facility', 'permits');
json_data = clean_blanks(end_list)
final_json = {"year":2021, "version":"2022-02-14", "reports":json_data}
print(json.dumps(final_json, indent=4))
There is some parts of this code that are involved in other components of the overall end JSON, but I am mainly concerned with how to change unflatten_dic()
Here is my current working code for changing unflatten_dic(), even though it doesn't work...
def list_get(list, list_item):
i = 0
for dict in list:
if list_item in dict:
return dict.get(list_item, {})
i += 1
return {}
def check_in_list(list, list_item):
i = 0
for dict in list:
if list_item in dict:
return i
i += 1
return -1
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
for parent in parents:
if parent[0] == '#':
list_bool = True
previous_node_list = False
if list_bool:
for parent in parents:
print(parent)
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
ends_with_dict = False
previous_node_list = True
else:
print("else")
if previous_node_list:
print("prev list")
i = check_in_list(node, parent)
if i >= 0:
node[i] = node = list_get(node, parent)
else:
node.append({parent : {}})
previous_node_list = False
ends_with_dict = True
else:
print("not prev list")
node[parent] = node = node.get(parent, {})
previous_node_list = False
if ends_with_dict:
node[key] = v
else:
node.append({key : v})
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
#print(node)
return ini
Any, even small, amount of help would be greatly appreciated.
It is easiest to use recursion and collections.defaultdict to group child entries on their parents (each entry is separated by the . in the csv data):
from collections import defaultdict
def to_dict(vals, is_list = 0):
def form_child(a, b):
return b[0][0] if len(b[0]) == 1 else to_dict(b, a[0] == '#')
d = defaultdict(list)
for a, *b in vals:
d[a].append(b)
if not is_list:
return {a[a[0] == '#':]:form_child(a, b) for a, b in d.items()}
return [{a[a[0] == '#':]:form_child(a, b)} for a, b in d.items()]
import csv, json
with open('filename.csv') as f:
data = list(csv.reader(f))
r = [a.split('.')+[b] for i in range(0, len(data), 2) for a, b in zip(data[i], data[i+1])]
print(json.dumps(to_dict(r), indent=4))
Output:
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"z": {
"nest1": "gh",
"nest2": "ij"
}
},
{
"yy": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
}
I managed to get it working in what seems to be all scenarios. Here is the code that I made for the unflatten_dic() function.
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
# print("parents")
# print(parents)
for parent in parents:
if parent[0] == '#':
list_bool = True
if list_bool:
for parent in parents:
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
elif parent.isnumeric():
# print("numeric parent")
# print("length of node")
# print(len(node))
if len(node) > int(parent):
# print("node length good")
node = node[int(parent)]
else:
node.append({})
node = node[int(parent)]
else:
node[parent] = node = node.get(parent, {})
try:
node.append({key : v})
except AttributeError:
node[key] = v
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
return ini
I haven't run into an issue thus far, this is based on the following rules for the CSV:
# before any name results in that item being a list
if the section immediately after a list in the CSV is a number, that will create multiple dictionaries within the list. Here is an example
x.a, x.b.z, x.b.y, x.#c.0.zz, x.#c.1.zz, x.#c.2.zz, x.d, x.e.z, x.e.y, x.#c.1.yy.l, x.#c.1.yy.#m.q, x.#c.1.yy.#m.r
ab, cd, ef, gh, , kl, mn, op, qr, st, uv, wx
12, 34, 56, 78, 90, 09, , 65, 43, 21, , 92
This will result in the following JSON after formatting
"reports": [
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"zz": "gh"
},
{
"yy": {
"l": "st",
"m": [
{
"q": "uv"
},
{
"r": "wx"
}
]
}
},
{
"zz": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
},
{
"x": {
"a": "12",
"b": {
"z": "34",
"y": "56"
},
"c": [
{
"zz": "78"
},
{
"zz": "90",
"yy": {
"l": "21",
"m": [
{
"r": "92"
}
]
}
},
{
"zz": "09"
}
],
"e": {
"z": "65",
"y": "43"
}
}
}
]
This question already has answers here:
How to find a particular JSON value by key?
(9 answers)
Closed 2 years ago.
for the most part of my day i've been trying to find a way to solve this. I'm trying to find a way to find all keys in my json code that have the key "price" and if they do, populate a dictionary or something with the price and the name of each item that has a price. This is the simplified json, please note that the "price" keys can also be further nested. I'm trying to search the whole json code for the key:
{
"status": "success",
"data": {
"top_products": {
"products": [
{
"price": 3,
"name": "Apple"
},
{
"price": 2,
"name": "Banana"
}
]
},
"products": {
"fruits": {
"list": [
{
"price": 4,
"name": "Pear"
},
{
"name": "Kiwi"
},
{
"price": 4,
"name": "Pineapple"
},
{
"name": "Cherry"
}
]
},
"veggies": {
"list": [
{
"price": 3,
"name": "cucumber"
},
{
"name": "tomato"
},
{
"price": 2,
"name": "onion"
},
{
"name": "green pepper"
}
]
}
}
}
}
Here is what i've managed to get working so far (didnt come up with this, found it in some other response):
def findkeys(node, kv):
if isinstance(node, list):
for i in node:
for x in findkeys(i, kv):
yield x
elif isinstance(node, dict):
if kv in node:
yield node[kv]
for j in node.values():
for x in findkeys(j, kv):
yield x
print(list(findkeys(jsonResponse, 'price')))
The first part works, it returns all the keys that have a price. I'm trying to figure out a way to also write the "name" key for all the prices, preferably into a dictionary. Whats the best approach to do this?
Thanks,
Rob
Use the following code, if there are only unique items:
def create_db(data, find, other):
db = {}
def recurse(data):
if isinstance(data, list):
for elem in data:
recurse(elem)
elif isinstance(data, dict):
if find in data:
db[data[other]] = data[find]
for k, v in data.items():
recurse(v)
recurse(data)
return db
>>> create_db(data, 'price', 'name')
{'Apple': 3, 'Banana': 2, 'Pear': 4, 'Pineapple': 4, 'cucumber': 3, 'onion': 2}
Else:
def create_db(data, find, other):
db = {}
ctr = {}
def recurse(data):
if isinstance(data, list):
for elem in data:
recurse(elem)
elif isinstance(data, dict):
if find in data:
if data[other] in ctr:
ctr[data[other]] = str(int(ctr[data[other]] or '1') + 1)
else:
ctr[data[other]] = ''
key = data[other] + ctr[data[other]]
db[key] = data[find]
for k, v in data.items():
recurse(v)
recurse(data)
return db
For example, if data had two Apples:
data = {'status': 'success',
'data': {'top_products': {'products': [{'price': 3, 'name': 'Apple'},
{'price': 4, 'name': 'Apple'},
{'price': 2, 'name': 'Banana'}]}}}
# Second approach will add a serial number to each duplicate item
>>> create_db(data, 'price', 'name')
{'Apple': 3, 'Apple2': 4, 'Banana': 2}
For easier access in case of duplicates, you can create a nested dict:
def create_db(data, find, other):
db = {}
def recurse(data):
if isinstance(data, list):
for elem in data:
recurse(elem)
elif isinstance(data, dict):
if find in data:
if data[other] in db:
if isinstance(db[data[other]], dict):
db[data[other]][len(db[data[other]]) + 1] = data[other]
else:
db[data[other]] = {0: db.pop(data[other]), 1: data[find]}
else:
db[data[other]] = data[find]
for k, v in data.items():
recurse(v)
recurse(data)
return db
# For the data in above approach:
>>> create_db(data, 'price', 'name')
{'Apple': {0: 3, 1: 4}, 'Banana': 2}
Let's say I have a dict like
{
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
What I want is to create a method to delete the given key or change its value.
def del_key(key):
my_dict = <dictionary described above>
keys = key.split(':')
if len(keys) == 1:
del my_dict[keys[0]]
elif len(keys) == 2:
del my_dict[keys[0]][keys[1]]
elif len(keys) == 3:
del my_dict[keys[0]][keys[1]][keys[2]]
. . .
del_key('key_b:key_b_b:key_b_b_a')
del_key('key_b:key_b_b')
del_key('key_a')
How can I do this gracefully?
It assumes your input is valid key,otherwise you have to check.
data = {
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
def del_key(key):
key = key.split(':')
temp = data
for i in key[:-1]:
temp = temp[i]
del temp[key[-1]]
return data
print del_key('key_b:key_b_b:key_b_b_a')
print del_key('key_b:key_b_b')
print del_key('key_a')
output:
{'key_a': 'value', 'key_b': {'key_b_a': 'value', 'key_b_b': {}}}
{'key_a': 'value', 'key_b': {'key_b_a': 'value'}}
{'key_b': {'key_b_a': 'value'}}