Recursively remove nested items in dictionary - python

Currently, I'm creating two lists and comparing them for duplicates.
Instead of that, I want to recursively remove nested items from the dictionary
The question I have is, how do I select a deeply nested item AND change the dictionary while performing this recursion?
Current function:
def _finditem(obj,key):
if key == 'haha' or key == 'haha1':
global a_list
global b_list
if isinstance(obj,dict):
_finditem(obj['children'],key)
else:
for x in obj:
if x['title'] == 'Bookmarks Menu':
_finditem(x['children'],'haha')
elif x['title'] == 'surf':
_finditem(x['children'],'haha1')
else:
try:
_finditem(x['children'],key)
except:
if key == 'haha':
a_list.append(x['title'])
elif key == 'haha1':
b_list.append(x['title'])
pass

Modifying a list while iterating over it:
I used a list slice and excluded items from the original based on a test function.
def discard_func(list_of_dicts):
list_of_dicts[:] = [x for x in list_of_dicts if test(x)]
return list_of_dicts
list[:] is list slice syntax for entire list
Explain Python's slice notation
Remove items from a list while iterating
Scope:
The list slice also solves that, since it modifies the original object.
However, I added returns to each function and assignments to each recursive function call anyway. So that the variable being assigned to, gets the returned value from the function, however deep it may go.
def _finditem(op_type, obj):
if isinstance(obj, dict):
obj['children'] = _finditem(op_type, obj['children'])
else:
for x in obj:
if x['title'] in subjects[op_type]:
x['children'] = operations[op_type](x['children'])
else:
try:
x['children'] = _finditem(op_type, x['children'])
except:
continue
return obj
entire file:
assign_var = {'compare' : None , 'discard' : [] , 'keep' : [] , 'pairs' : None}
subjects = {'keep' : ['Bookmarks Menu'] , 'discard' : ['surf'] , 'compare' : None , 'pairs' : [ {'keep' : ['Bookmarks Menu'] , 'discard' : ['surf']} , {'keep':'bla','discard':'etc'} ] }
def test(y):
if 'children' in y:
if y['title'] not in subjects['keep']:
discard_func(y['children'])
else:
pass
elif y['title'] in assign_var['keep']:
print 'Dupicate found'
return False
return True
def discard_func(loo):
loo[:] = [x for x in loo if test(x)]
return loo
def keep_func(x):
global assign_var
for y in x:
if 'children' in y:
if y['title'] not in subjects['discard']:
keep_func(y['children'])
else:
continue
else:
assign_var['keep'].append(y['title'])
return x
def _finditem(op_type, obj):
if isinstance(obj, dict):
obj['children'] = _finditem(op_type, obj['children'])
else:
for x in obj:
if x['title'] in subjects[op_type]:
x['children'] = operations[op_type](x['children'])
else:
try:
x['children'] = _finditem(op_type, x['children'])
except:
continue
return obj
operations = { 'keep' : keep_func , 'discard' : discard_func , 'pairs' : None , 'compare' : None }
def parent_function():
op_type = 'keep'
_finditem(op_type, book)
op_type = 'discard'
book_new = _finditem(op_type, book)
# for op_type in assign_var:
# try:
# _finditem(op_type = op_type, obj = book)
# except:
# pass
return book_new
if __name__ == '__main__':
print 'In __main__'
import json
loc = 'C:\\workspace\\temp\\'
with open(loc + 'bookmarks-2014-05-24.json', 'r') as nctn:
book = json.load(nctn)
book_new = parent_function()
with open(loc + 'bookmarks-2014-05-24_new.json', 'w') as nctn:
nctn.write(json.dumps(book_new))

Related

Multipurpose function to create and append json using specified base python

Can someone help me with my code below? The main purpose of this is to use the data_manager class as a way to store data into a json file.
After being created, a json file named with the specified name, containing a json base, also named with the specified name is created
The major function that handles the majority of the logic is in write_to_json function inside the data_manager class
def write_to_json(self, new_data, base = ""):
There are four major use cases that I am trying to handle:
where match means the item key is found in the file data and base is where we are trying to add the data to (an existing tier / base) (if blank, add it to base)
Case 1 - No match, no base: add to main list
Case 2 - No match, base: add to base
Case 3 - Match, no base: check if value is different and if so,
replace value in main list
Case 4 - Match, base, check if value is different and if so, replace
value in base list
Right now I have case 1 and 3 working, but I am having issues with 2 and 4.
I have been trying many different ways to code this and keep running into problems.
import os
import sys
import json
class file_manager:
def set_file_contents(self, file_name, contents):
file = open(file_name, "w")
file.write(contents)
file.close()
def set_file_contents_append(self, file_name, contents):
if not os.path.exists(file_name):
open(file_name, 'a').close()
file = open(file_name, "a")
file.write(contents)
file.close()
def get_file_size(self,file_name):
return os.path.getsize(file_name)
def get_file_content_json(self,file_name):
return json.load(open(file_name)) if self.get_file_size(file_name) != 0 else None
class data_manager(file_manager):
data_file = None
json_data = []
data_name = None
def __init__(self,data_name):
self.data_base = data_name
self.data_file = data_name + '.json'
self.create_data_file()
def create_base(self,base):
data = {base:[]}
self.set_file_contents(self.data_file,json.dumps(data,indent=4))
def create_data_file(self):
self.create_base(self.data_base)
def check_file_size(self):
print(self.get_file_size(self.data_file))
def check_if_exist(self, data_name):
file_data = self.get_file_content_json(self.data_file)
data_value = False
for item in file_data[self.data_base]:
if data_name in item.keys():
print(f'{data_name}: {item[data_name]}')
data_value = True
return data_value
def get_data_value(self,data_name):
d = self.get_file_content_json(self.data_file)
d = d[self.data_base]
print(data_name)
items = []
for item in self.item_generator(d,data_name):
print(f'returning value = {item}')
items.append(item)
return dict(items[0]) if items else None
def item_generator(self,json_input, lookup_key):
if isinstance(json_input, dict):
for key, value in json_input.items():
if key == lookup_key:
yield {key:value}
else:
yield from self.item_generator(value, lookup_key)
elif isinstance(json_input, list):
for item in json_input:
yield from self.item_generator(item, lookup_key)
def replace_data_value_json(self, file_data, data_name, data_value):
for item in file_data:
if data_name in item.keys():
item[data_name] = data_value
return file_data
def set_data_value(self, data_name, data_value):
file_data = self.get_file_content_json(self.data_file)
for item in file_data[self.data_base]:
if data_name in item.keys():
item[data_name] = data_value
self.set_file_contents(self.data_file,json.dumps(file_data,indent=4))
def view_all_data(self):
file_data = self.get_file_content_json(self.data_file)
print((file_data))
def remove_data_item(self, data_name):
file_data = self.get_file_content_json(self.data_file)
print(file_data)
for element in file_data[self.data_base]:
if data_name in element:
del element[data_name]
self.set_file_contents(self.data_file,json.dumps(file_data,indent=4).replace('{}','').replace('\{\},','') )
def prettyjson(self,data):
return json.dumps(data,indent=4)
def compare_equal(self, value1, value2):
print(f'{value1} really vs {value2}')
return True if str(value1) == str(value2) else False
def write_to_json(self, new_data, base = ""):
file_data = self.get_file_content_json(self.data_file)
#print(f'Data before:\n{json.dumps(file_data,indent=4)}')
base = self.data_base if base == "" else base
#print(list(file_data) - file_data[base])
print(f'Complete file:\n{self.prettyjson(file_data)}')
#file_starting_from_base = list(file_data[data_name])
print('Starting from base')
print(self.prettyjson(file_data[self.data_base]))
file_data_before = ([item for item in file_data if item not in file_data[self.data_base]])
#print(file_data_before)
for data_item in new_data:
match_found = False
index = None
value_from_name = self.get_data_value(data_item)
value_from_name = list(value_from_name) if value_from_name else None
# print(f'before: {value_from_name} {type(value_from_name)} {list(value_from_name)} ')
value_from_name = value_from_name[0] if value_from_name and value_from_name[0] else None
value_from_name_found = True if value_from_name else False
if value_from_name_found:
print(f'found: {value_from_name}')
match_found = True
#OLD METHOD USED TO FIND INDEX
# #if base != self.data_base:
# index = 0
# for item in file_data[self.data_base]:
# print(f'{list(item.keys())[0]} vs {base}')
# if str(base) in list(item.keys())[0]:
# print(f'MATCH FOUND = {base} = {list(item.keys())[0]}')#: {item.values()} {index}')
# match_found = True
# break
# index += 1
# print(index)
# #return
data_single_item = {data_item:new_data[data_item]}
if not match_found:
#Case 1 - No match, no base: add to main list
if base == self.data_base:
file_data[self.data_base].append(data_single_item)
else:
#Case 2 - No match, base: add to base
print(f'ADD {data_single_item} TO {file_data[self.data_base]} starting from {base}')
#possible idea: create base and try again adding values again
#self.create_base(base)
#self.write_to_json(data_single_item,base)
#file_data[self.data_base].append(data_single_item) #broken
#old working method, broken without index
#file_data[self.data_base][index][base].append(data_single_item)
#MATCH FOUND
else:
#Case 3 - Match, no base: check if value is different and if so, replace value in main list
if base == self.data_base:
file_data[base] = self.replace_data_value_json(file_data[base],str(data_item),new_data[data_item])
pass
else:
#Case 4 - Match, base, check if value is different and if so, replace value in base list
print(f'data = {self.get_data_value(data_item)}' )
# print(f'check {new_data[data_item]} vs {list(self.get_data_value(data_item))[0]}')
value_from_name = self.get_data_value(data_item)
value_from_name = list(value_from_name) if value_from_name else None
print(f'before: {value_from_name} {type(value_from_name)} {list(value_from_name)} ')
value_from_name = value_from_name[0] if value_from_name and value_from_name[0] else None
value_from_name_found = True if value_from_name else False
if value_from_name_found and (not self.compare_equal( new_data[data_item], value_from_name ) ):
print(f'{new_data[data_item]} NOT EQUAL TO {value_from_name}')
# change value to new value
#file_data[self.data_base][base].append(data_single_item)
#print(f'add {data_single_item}')
final_output = {self.data_base:file_data[self.data_base]}
self.set_file_contents(self.data_file,self.prettyjson(final_output))
def add_data_single(self, data_name, data_value, base):
new_data_item = {data_name: data_value}
self.write_to_json(new_data_item, base)
def add_data_multiple(self,data,base=""):
self.write_to_json(data,base)
# CREATE 'people.json' AND create json base matching name in file ( { "people": [] } )
test = data_manager('people')
# CREATE 3 ITEMS STARTING IN MAIN BASE
test.write_to_json({'John':[], 'Alex':[], 'Samantha':[]}) # CASE 1
# SHOULD ATTEMPT TO ADD VALUES TO BASE 'john',
# if dictionary key matches, check if key matches
# if key and value match, do nothing and do not overwrite file)
# if key matches and value does not, change the value of the item matching the key starting from base 'John'
# if dictionary key does not match, add full dictionary item to base
test.write_to_json({"Favorite-Food":"tacos" , "Age":45}, "John") # CASE 2
# CREATE
#test.write_to_json({'Example2-Sub1':44},'Example2')
I think/hope you might have some unwanted lists in your json and that when you indicate you are hoping for:
{
"people": [
{"John": [{"favorite-food": "tacos", "Age": 45}]},
{"Alex": []},
{"Samantha": []}
]
}
what you really want is:
{
"people": {
"John": {"favorite-food": "tacos", "Age": 45}
},
{"Alex": {}},
{"Samantha": {}}
}
If that is what you want in the end, then this code based on merging dictionaries via the {**a, **b} method is the way forward:
import json
import os
class data_manager():
BASE_COLLECTIONS_FOLDER = "./data"
def __init__(self, collection_name):
self.collection_name = collection_name
self.collection_file_path = f"{self.BASE_COLLECTIONS_FOLDER}/{self.collection_name}.json"
self.collection = {}
self.ensure_collection()
self.load_collection()
def ensure_collection(self):
if os.path.isfile(self.collection_file_path):
return
os.makedirs(self.BASE_COLLECTIONS_FOLDER, exist_ok=True)
self.save_collection()
def load_collection(self):
with open(self.collection_file_path, "r", encoding="utf-8") as collection_file:
self.collection = json.load(collection_file)[self.collection_name]
def save_collection(self):
with open(self.collection_file_path, "w", encoding="utf-8") as collection_file:
json.dump({self.collection_name: self.collection}, collection_file, indent=4)
def write_to_json(self, data, key=None):
if not key:
self.collection = {**self.collection, **data}
else:
self.collection[key] = {**self.collection.get(key, {}), **data}
self.save_collection()
people = data_manager("people")
people.write_to_json({"John": {}, "Alex": {}, "Samantha": {}})
people.write_to_json({"Favorite-Food": "tacos", "Age":45}, "John")
people.write_to_json({"Parents": {"Mother": "Britney", "Dad": "Adam"}}, "John")
people.write_to_json({"Parents": {"Mother": "Britney", "Dad": "John"}}, "John")
people.write_to_json({"Bob": {"name": "not bob"}})
people.write_to_json({"Bob": {"name": "bob"}})
people.write_to_json({"Example2-Sub1": 44}, "Example2")
Running this will result in a file who's contents are:
{
"people": {
"John": {
"Favorite-Food": "tacos",
"Age": 45,
"Parents": {
"Mother": "Britney",
"Dad": "John"
}
},
"Alex": {},
"Samantha": {},
"Example2": {
"Example2-Sub1": 44
},
"Bob": {
"name": "bob"
}
}
}

Why is my helper method not activating recursively?

I have a Binary Search Tree and I am trying to trace recursively in order through the tree and append each key,value to a list. It is only appending the first key,value to the list and not going through the list in order. I pasted my code below, along with the test code I used at the bottom. Any help on how to get past this issue is super appreciated!
class TreeMap:
class Node:
def __init__(self, key, value):
self.key = key
self.value = value
self.left = None
self.right = None
def __init__(self):
self.root = None
self.numsearches = 0
self.numcomparisons = 0
def add(self, newkey, newvalue):
newkey = newkey.lower()
if self.root == None:
self.root = TreeMap.Node(newkey, newvalue)
else:
TreeMap.add_helper(self.root, newkey, newvalue)
def add_helper(thisnode, newkey, newvalue):
if newkey <= thisnode.key:
if thisnode.left == None:
thisnode.left = TreeMap.Node(newkey, newvalue)
else:
TreeMap.add_helper(thisnode.left, newkey, newvalue)
else:
if thisnode.right == None:
thisnode.right = TreeMap.Node(newkey, newvalue)
else:
TreeMap.add_helper(thisnode.right, newkey, newvalue)
def print(self):
TreeMap.print_helper(self.root, 0)
def print_helper(somenode, indentlevel):
if somenode == None:
print(" "*(indentlevel),"---")
return
if not TreeMap.isleaf(somenode):
TreeMap.print_helper(somenode.right, indentlevel + 5)
print(" "*indentlevel + str(somenode.key) + ": " +str(somenode.value))
if not TreeMap.isleaf(somenode):
TreeMap.print_helper(somenode.left, indentlevel + 5)
def isleaf(anode):
return anode.left == None and anode.right == None
def listify(self, whichorder="in"):
'''
Returns a list consisting of all the payloads of the tree. (This returns a plain old Python List.)
The order of the payloads is determined by whichorder, which defaults to inorder.
The other possibilities are "pre" and "post".
If the tree is empty, return the empty list.
'''
assert type(whichorder) is str,"Whichorder is a string, and can only be pre, in or post"
assert whichorder in ["pre","in","post"],"Whichorder is a string, and can only be pre, in or post"
return TreeMap.listify_helper(self.root, whichorder)
def listify_helper(somenode, whichorder):
order_list = []
if somenode == None:
return order_list
elif somenode != None and whichorder == 'in':
TreeMap.listify_helper(somenode.left, 'in')
order_list.append(somenode.key+ '='+somenode.value)
TreeMap.listify_helper(somenode.right, 'in')
return order_list
TEST CODE:
import treemap
translator = treemap.TreeMap()
translator.add("cat", "Katze")
translator.add("bird", "Vogel")
translator.add("dog", "Hund")
translator.add("snake", "IDK")
translator.add("bear", "IDK")
translator.add("octopus", "Tintenfisch")
translator.add("horse", "Pferd")
translator.add("zebra", "IDK")
translator.print()
print("---------------------------------------------------")
print (translator.listify())
The problem is here:
def listify_helper(somenode, whichorder):
order_list = []
This function initialises its own local order_list every time it is invoked. Pass order_list as a parameter instead so that the same list is appended to by each recursive invocation.
Alternatively, append each element of the result of the recursive calls of listify_helper to order_list, although this approach could result in unneeded copying.

How can I get the specified key value in a nested dictionary in a most effective way?

There is a nested dictionery like :
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxxx.jpg",
...
"child_attachments":{
"picture":"xxxxx.jpg",
...
}
}
...
}
The problem is at every level of the dictionary, the key picture may exist, how can I get the picture's value in a most effective way?
Here's my trial, but failed:
def get_picture_url(data):
for key, value in data.items():
if key == "picture":
return data[key]
else:
if isinstance(value, dict):
return get_picture_url(value)
get_picture_url(data_dict)
This should work for the general case of an arbitrarily nested dictionary with JSON-like structure:
def get_picture(data):
# you can remove this case if the
# input doesn't contain lists
if isinstance(data, list):
ans = []
for e in data:
ans += get_picture(e)
return ans
elif not isinstance(data, dict):
return []
else:
ans = []
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
It'll traverse all levels of the data structure, looking for keys named 'picture' and accumulating all of their values in a single output list. If you're sure that there are no lists in the input, we can simplify the solution a bit:
def get_picture(data):
ans = []
if isinstance(data, dict):
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
Either way, it works as expected for your sample input:
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxx.jpg",
"child_attachments":{
"picture":"xxxxx.jpg"
}
}
}
get_picture(data_dict)
=> ['xxx.jpg', 'xxxx.jpg', 'xxxxx.jpg']
You are not checking the returned value of the recursive call to get_picture_url.
This should give you the top most picture in your dict:
def get_picture_url(data, picture_key="picture"):
if not isinstance(data, dict):
return None
picture_url = data.get(picture_key)
if picture_url is not None:
return picture_url
for value in data.values():
picture_url = get_picture_url(value)
if picture_url is not None:
return picture_url
return None

How can I read a dictionary with a list?

How can I read a list inside a dictionary and try to change string numbers to digits? For example:
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
I use this to change dictionary number strings to integers:
{k:int(v) if v.isdigit() else v for k,v in obj.items()}
But it doesn't work, so I was trying something like this:
for objs in obj:
if objs.isdigit():
k:int(v)
else:
for k,v in objs.items():
print k
But this fails as well.
this seems like a good problem for recursion
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
def fix_ints(obj):
if isinstance(obj,basestring):
try:
return int(obj)
except ValueError:
print "I cant Make %r an int"%obj
return obj
elif isinstance(obj,(list,tuple)):
return [fix_ints(item) for item in obj]
elif isinstance(obj,dict):
return dict((key,fix_ints(value)) for key,value in obj.items())
else:
print "I have no idea what to do with %r"%obj
new_obj = fix_ints(obj)
print new_obj
note that python does not support tail recursion so if this data structure goes very deep (greater than 1k levels of nesting) then recursion may not be appropriate ...
of coarse you can also do silly string tricks with it
import json,re
new_obj = json.loads(re.sub("\"(\d+)\"","\\1",json.dumps(obj)))
(although really you should do it like i do in my first exzample ... this second method is really just for fun)
String to number:
def int_it(obj):
if obj.isdigit():
obj = int(obj)
return obj
Dict to number (regardless of the number of nested dicts or lists):
class Convert(object):
def __init__(self, obj):
self.obj = obj
if isinstance(obj, dict):
self.handle_dict(obj)
def handle_dict(self, obj):
for key, value in obj.items():
if isinstance(value, str) and value.isdigit():
self.obj[key] = int_it(value)
elif isinstance(obj[key], list):
ins = HandleList(obj[key])
self.obj[key] = ins.obj
elif isinstance(obj[key], dict):
ins = Convert(obj.items())
self.obj[key] = ins.obj
return obj
List to numbers, regardless of the number of nested lists or dicts.
class HandleList(object):
def __init__(self, obj):
self.obj = obj
self.handle_list(obj)
def handle_list(self, obj):
for index, item in enumerate(obj):
if isinstance(item, list):
obj.index(index, [HandleList(val).obj for val in item])
elif isinstance(item, str):
obj.index(index, int_it(item))
elif isinstance(item, dict):
Convert(item)
return obj
output = Convert(values)
print(output.obj)
Returns:
{
'amarillo': 'xxx',
'naranja': [{'naranja_1': 1, 'naranja_2': 2}],
'rojo': [{'rojo_b': 2, 'rojo_a': 1}],
'azul': 4
}
Given the input:
values = {
'azul':'4',
'rojo': [
{'rojo_a':'1',
'rojo_b':'2'
}
],
'amarillo':'xxx',
'naranja': [
{'naranja_1':'1',
'naranja_2':'2'
}
]
}

Python: Implementing a Map with Chaining

My assignment is to implement a map with chaining by creating a hash table with two lists, one called "slots" and one called "data. My code seems to work until the 'G' character. I can't quite pinpoint what is going on here and I have tried debugging.
class HashTable:
def __init__(self):
self.size = 11
self.slots = [None] * self.size
self.data = [None] * self.size
def put(self,key,data):
hashvalue = self.hashfunction(key,len(self.slots))
if self.slots[hashvalue] == None:
self.slots[hashvalue] = list()
self.slots[hashvalue].append(key)
self.data[hashvalue] = list()
self.data[hashvalue].append(data)
else:
if self.slots[hashvalue] != None:
self.data[hashvalue].append(data) #replace
def hashfunction(self,key,size):
return key%size
def get(self,key):
startslot = self.hashfunction(key,len(self.slots))
data = None
stop = False
found = False
position = startslot
while self.slots[position] != None and not found and not stop:
for index in range (len(self.slots[position])):
if self.slots[position][index]== key:
found = True
data = self.data[position][index]
break
position+1
if position == startslot:
stop = True
return data
def __getitem__(self,key):
return self.get(key)
def __setitem__(self,key,data):
self.put(key,data)
## TEST FOR HashTable
h = HashTable() # create new hash table
nums = [1, 3, 5, 50, 1000] # some keys
nums = nums + [ len(h.slots)*i for i in range(20)] # some keys that have same hash
vals = [ chr(x) for x in range(ord('A'),ord('Z')) ] # list of single letters from A to Z
# add key/values
for i in range(len(nums)):
# print("adding (%d, %s)"%(nums[i],vals[i]),end=" ")
h[nums[i]] = vals[i]
for i in range(len(nums)):
key = nums[i]
value = vals[i]
gotValue = h[key]
assert gotValue == value,"expected key: %d to lookup value: %s but got value %s instead " % (key, value, gotValue)
print("\nAll TESTS PASSED")
I found my issue:
I forgot to add a line under:
if self.slots[hashvalue] != None:
that adds the key to the [hashvalue] of slots.
So now I have:
if self.slots[hashvalue] != None:
self.slots[hashvalue].append(key)
self.data[hashvalue].append(data)
It was adding the data value to the corresponding list "data", but not the matching key value to the list "slots"

Categories