I've searched and found this Append a dictionary to a dictionary but that clobbers keys from b if they exist in a..
I'd like to essentially recursively append 1 dictionary to another, where:
keys are unique (obviously, it's a dictionary), but each dictionary is fully represented in the result such that a.keys() and b.keys() are both subsets of c.keys()
if the same key is in both dictionaries, the resulting key contains a list of values from both, such that a[key] and b[key] are in c[key]
the values could be another dictionary, (but nothing deeper than 1 level), in which case the same logic should apply (append values) such that a[key1][key2] and b[key1][key2] are in c[key][key2]
The basic example is where 2 dictionary have keys that don't overlap, and I can accomplish that in multiple ways.. c = {**a, **b} for example, so I haven't covered that below
A trickier case:
a = {
"key1": "value_a1"
"key2": "value_a2"
}
b = {
"key1": "value_b1"
"key3": "value_b3"
}
c = combine(a, b)
c >> {
"key1": ["value_a1", "value_b1"],
"key2": "value_a2",
"key3": "value_b3"
}
An even trickier case
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
c >> {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2", "sub_value_b1"], #sub_value_a1 is not duplicated
"sub_key_2": ["sub_value_a3", "sub_value_b3"]
},
"key2": "value_a2",
"key3": "value_b3" # ["value_b3"] this would be okay, following from the code comment above
}
Caveats:
Python 3.6
The examples show lists being created as_needed, but I'm okay with every non-dict value being a list, as mentioned in the code comments
The values within the lists will always be strings
I tried to explain as best I could but can elaborate more if needed. Been working on this for a few days and keep getting stuck on the sub key part
There is no simple built-in way of doing this, but you can recreate the logic in python.
def combine_lists(a: list, b: list) -> list:
return a + [i for i in b if i not in a]
def combine_strs(a: str, b: str) -> str:
if a == b:
return a
return [a, b]
class EMPTY:
"A sentinel representing an empty value."
def combine_dicts(a: dict, b: dict) -> dict:
output = {}
keys = list(a) + [k for k in b if k not in a]
for key in keys:
aval = a.get(key, EMPTY)
bval = b.get(key, EMPTY)
if isinstance(aval, list) and isinstance(bval, list):
output[key] = combine_lists(aval, bval)
elif isinstance(aval, str) and isinstance(bval, str):
output[key] = combine_strs(aval, bval)
elif isinstance(aval, dict) and isinstance(bval, dict):
output[key] = combine_dicts(aval, bval)
elif bval is EMPTY:
output[key] = aval
elif aval is EMPTY:
output[key] = bval
else:
raise RuntimeError(
f"Cannot combine types: {type(aval)} and {type(bval)}"
)
return output
Sounds like you want a specialised version of dict. So, you could subclass it to give you the behaviour you want. Being a bit of a Python noob, I started with the answer here : Subclassing Python dictionary to override __setitem__
Then I added the behaviour in your couple of examples.
I also added a MultiValue class which is a subclass of list. This makes it easy to tell if a value in the dict already has multiple values. Also it removes duplicates, as it looks like you don't want them.
class MultiValue(list):
# Class to hold multiple values for a dictionary key. Prevents duplicates.
def append(self, value):
if isinstance(value, MultiValue):
for v in value:
if not v in self:
super(MultiValue, self).append(v)
else:
super(MultiValue, self).append(value)
class MultiValueDict(dict):
# dict which converts a key's value to a MultiValue when the key already exists.
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
# optional processing here
if key in self:
existing_value = self[key]
if isinstance(existing_value, MultiValueDict) and isinstance(value, dict):
existing_value.update(value)
return
if isinstance(existing_value, MultiValue):
existing_value.append(value)
value = existing_value
else:
value = MultiValue([existing_value, value])
super(MultiValueDict, self).__setitem__(key, value)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, "
"got %d" % len(args))
other = dict(args[0])
for key in other:
self[key] = other[key]
for key in kwargs:
self[key] = kwargs[key]
def setdefault(self, key, value=None):
if key not in self:
self[key] = value
return self[key]
Example 1:
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
# combine by creating a MultiValueDict then using update to add b to it.
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': ['value_a1', 'value_b1'], 'key2': 'value_a2', 'key3': 'value_b3'}
Example 2: The value for key1 is created as a MultiValueDict and the value for the sub_key_1 is a MultiValue, so this may not fit what you're trying to do. It depends how you're building you data set.
a = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_a2"]),
"sub_key_2": "sub_value_a3"
}),
"key2": "value_a2"
}
b = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_b1"]),
"sub_key_2": "sub_value_b3"
}),
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': {'sub_key_1': ['sub_value_a1', 'sub_value_a2', 'sub_value_b1'], 'sub_key_2': ['sub_value_a3', 'sub_value_b3']}, 'key2': 'value_a2', 'key3': 'value_b3'}
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
def appendValues(ax,cx):
if type(ax)==list:#is key's value in a, a list?
cx.extend(ax)#if it is a list then extend
else:#key's value in a, os not a list
cx.append(ax)#so use append
cx=list(set(cx))#make values unique with set
return cx
def combine(a,b):
c={}
for x in b:#first copy b keys and values to c
c[x]=b[x]
for x in a:#now combine a with c
if not x in c:#this key is not in c
c[x]=a[x]#so add it
else:#key exists in c
if type(c[x])==list:#is key's value in c ,a list?
c[x]=appendValues(a[x],c[x])
elif type(c[x])==dict:#is key's value in c a dictionary?
c[x]=combine(c[x],a[x])#combine dictionaries
else:#so key';'s value is not list or dict
c[x]=[c[x]]#make value a list
c[x]=appendValues(a[x],c[x])
return c
c = combine(a, b)
print(c)
print("==========================")
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
print(c)
Related
I have this simplified dict:
{
{
"birthPlace" : "london"
},
"hello": "hello",
"birthPlace" : "rome"
}
And I want to make the value of birthPlace uppercase: how? I tried
smallalphabetDict={}
for key, value in myjson.items():
smallalphabetDict[key.upper()] = value
It doesn't work
This changes all the values of a dict to uppercase, if the value is a string:
d = {......}
for k in d:
if type(d[k]) == str: d[k] = d[k].upper()
I have a huge nested json file and I want to get the values of "text" but only on a certain level as there are many "text" keys deeper in the json file. The level I mean would be the "text:"Hi" after "event":"user".
The file looks like this:
`
{
"_id":{
"$oid":"123"
},
"events":[
{
"event":"action",
"metadata":{
"model_id":"12"
},
"action_text":null,
"hide_rule_turn":false
},
{
"event":"user",
"text":"Hi",
"parse_data":{
"intent":{
"name":"greet",
"confidence":{
"$numberDouble":"0.9601748585700989"
}
},
"entities":[
],
"text":"Hi",
"metadata":{
},
"text_tokens":[
[
{
"$numberInt":"0"
},
{
"$numberInt":"2"
}
]
],
"selector":{
"ideas":{
"response":{
"responses":[
{
"text":"yeah"
},
{
"text":"No"
},
{
"text":"Goo"
}
]
},
`
First I uses this function to get the text data but of course if gave me all of them:
def json_extract(obj, key):
"""Recursively fetch values from nested JSON."""
arr = []
def extract(obj, arr, key):
"""Recursively search for values of key in JSON tree."""
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
extract(v, arr, key)
elif k == key:
arr.append(v)
elif isinstance(obj, list):
for item in obj:
extract(item, arr, key)
return arr
values = extract(obj, arr, key)
return values
I also tried to access only the second level through this text but it gave me a KeyNotFound Error:
for i in data["events"][0]:
print(i["text"])
Maybe because that key is not in every nested list? ... I really don't know what else I could do
Since events is a list, you can write a list comprehension (if there are multiple items you need), or you can use the next function to get an element that you need from the iterator:
event = next(e for e in data.get('events', list()) if e.get('event')=='user')
print(event.get('text', ''))
Using get method gives you the safety that it won't throw an exception if the key doesn't exist in the dictionary
Edit:
If you need this for all events:
all_events = [e for e in data.get('events', list()) if e.get('event')=='user']
for event in all_events:
print(event.get('text', ''))
Convert your JSON to a Python dictionary (e.g., json.load or json.loads depending on how you're accessing the JSON). Then just pass a reference to the dictionary to this:
def json_extract(jdata):
assert isinstance(jdata, dict)
arr = []
def _extract(d, arr):
if 'event' in d and (t := d.get('text')):
arr.append(t)
for k, v in d.items():
if k not in {'event', 'text'}:
if isinstance(v, list):
for e in v:
if isinstance(e, dict):
_extract(e, arr)
elif isinstance(v, dict):
_extract(v, arr)
return arr
return _extract(jdata, arr)
This will return a list of all values associated with the key 'text' providing that key is found in a dictionary that also has an 'event' key
How can I call a dictionary recursively to find the last value, assuming dictionaries may have different depths?
a = {
'b': {
'c':'d'
}
}
m = {
'b': {
'c':{
'd':'e'
}
}
}
It's just two examples, I'm trying to get the last value, no matter how deep it's located.
The function doesn't work. How should I pass the final value when it get to the string type?
def get_value(x):
if isinstance(x, dict):
return get_value(x)
else:
return x.get(list(x.keys())[0])
Expected outputs are:
get_value(a) == 'd'
get_value(m) == 'e'
This seems to work:
a = {
'b': {
'c':'d'
}
}
m = {
'b': {
'c':{
'd':'e'
}
}
}
def get_value(x):
if not isinstance(x, dict):
return x
else:
return get_value(x[list(x.keys())[0]])
print (get_value(a))
print (get_value(m))
Output
d
e
def get_value(x):
for key in x.keys():
if isinstance(x[key], dict):
return get_value(x[key])
else:
return x.get(list(x.keys())[0])
The key had to be passed when recursive function was called again.
I have a dict, lets say mydict
I also know about this json, let's say myjson:
{
"actor":{
"name":"",
"type":"",
"mbox":""
},
"result":{
"completion":"",
"score":{ "scaled":"" },
"success":"",
"timestamp":""
},
"verb":{
"display":{
"en-US":""
},
"id":""
},
"context":{
"location":"",
"learner_id": "",
"session_id": ""
},
"object":{
"definition":{
"name":{
"en-US":""
}
},
"id":"",
"activity_type":""
}
}
I want to know if ALL of myjson keys (with the same hierarchy) are in mydict. I don't care if mydict has more data in it (it can have more data). How do I do this in python?
Make a dictionary of myjson
import json
with open('myjson.json') as j:
new_dict = json.loads(j.read())
Then go through each key of that dictionary, and confirm that the value of that key is the same in both dictionaries
def compare_dicts(new_dict, mydict):
for key in new_dict:
if key in mydict and mydict[key] == new_dict[key]:
continue
else:
return False
return True
EDIT:
A little more complex, but something like this should suit you needs
def compare(n, m):
for key in n:
if key in m:
if m[key] == n[key]:
continue
elif isinstance(n[key], dict) and isinstance(m[key],dict):
if compare(n[key], m[key]):
continue
else:
return False
else:
return False
return True
If you just care about the values and not the keys you can do this:
>>> all(v in mydict.items() for v in myjson.items())
True
Will be true if all values if myjson are in mydict, even if they have other keys.
Edit: If you only care about the keys, use this:
>>> all(v in mydict.keys() for v in myjson.keys())
True
This returns true if every key of myjson is in mydict, even if they point to different values.
I have two large nested dictionaries in the form
dictOne = "key1": {
"key2": {}
"key3": {
"key4" : {data...}
}
}
dictTwo = "key1": {
"key2": {}
}
Except they are thousands of lines long some of the dicts are nested 10-15 levels in.
I want to find a way to combine them together similar to an EXCEPT in SQL. I want any keys that show up in dictTwo to be deleted from dictOne, but only if the dict under the key doesn't have children.
So in this case the resulting dict would be
dictRes = "key1": {
"key3": {
"key4" : {data...}
}
}
I am assuming there is no easy way to do this, but I was hoping someone could point me in the right direction towards making a method that could accomplish this
Sounds like you need a recursive option.
def dict_parser(target, pruning):
d = {}
for k,v in target.items():
if (not v) and (k in pruning):
continue
if isinstance(v, dict):
d[k] = dict_parser(v, pruning.get(k, {}))
else:
d[k] = v
return d
DEMO:
dictOne = {"key1": {
"key2": {},
"key3": {
"key4" : {"some stuff"}
}
}}
dictTwo = {"key1": {
"key2": {}
}}
dict_parser(dictOne, dictTwo)
# gives:
# # {'key1': {
# # 'key3': {
# # 'key4': {'some stuff'}}}}