Finding missing value in JSON using python - python

I am facing this problem, I want to separate the dataset that has completed and not complete.
So, I want to put flag like 'complete' in the JSON. Example as in output.
This is the data that i have
data=[{'id': 'abc001',
'demo':{'gender':'1',
'job':'6',
'area':'3',
'study':'3'},
'ex_data':{'fam':'small',
'scholar':'2'}},
{'id': 'abc002',
'demo':{'gender':'1',
'edu':'6',
'qual':'3',
'living':'3'},
'ex_data':{'fam':'',
'scholar':''}},
{'id': 'abc003',
'demo':{'gender':'1',
'edu':'6',
'area':'3',
'sal':'3'}
'ex_data':{'fam':'big',
'scholar':NaN}}]
Output
How can I put the flag and also detect NaN and NULL in JSON?
Output=[{'id': 'abc001',
'completed':'yes',
'demo':{'gender':'1',
'job':'6',
'area':'3',
'study':'3'},
'ex_data':{'fam':'small',
'scholar':'2'}},
{'id': 'abc002',
'completed':'no',
'demo':{'gender':'1',
'edu':'6',
'qual':'3',
'living':'3'},
'ex_data':{'fam':'',
'scholar':''}},
{'id': 'abc003',
'completed':'no',
'demo':{'gender':'1',
'edu':'6',
'area':'3',
'sal':'3'}
'ex_data':{'fam':'big',
'scholar':NaN}}]

Something like this should work for you:
data = [
{
'id': 'abc001',
'demo': {
'gender': '1',
'job': '6',
'area': '3',
'study': '3'},
'ex_data': {'fam': 'small',
'scholar': '2'}
},
{
'id': 'abc002',
'demo': {
'gender': '1',
'edu': '6',
'qual': '3',
'living': '3'},
'ex_data': {'fam': '',
'scholar': ''}},
{
'id': 'abc003',
'demo': {
'gender': '1',
'edu': '6',
'area': '3',
'sal': '3'},
'ex_data': {'fam': 'big',
'scholar': None}
}
]
def browse_dict(dico):
empty_values = 0
for key in dico:
if dico[key] is None or dico[key] == "":
empty_values += 1
if isinstance(dico[key], dict):
for k in dico[key]:
if dico[key][k] is None or dico[key][k] == "":
empty_values += 1
if empty_values == 0:
dico["completed"] = "yes"
else:
dico["completed"] = "no"
for d in data:
browse_dict(d)
print(d)
Output :
{'id': 'abc001', 'demo': {'gender': '1', 'job': '6', 'area': '3', 'study': '3'}, 'ex_data': {'fam': 'small', 'scholar': '2'}, 'completed': 'yes'}
{'id': 'abc002', 'demo': {'gender': '1', 'edu': '6', 'qual': '3', 'living': '3'}, 'ex_data': {'fam': '', 'scholar': ''}, 'completed': 'no'}
{'id': 'abc003', 'demo': {'gender': '1', 'edu': '6', 'area': '3', 'sal': '3'}, 'ex_data': {'fam': 'big', 'scholar': None}, 'completed': 'no'}
Note that I changed NaN to None, because here you are most likely showing a python dictionary, not a JSON file since you are using data =
In a dictionary, the NaN value would be changed for None.
If you have to convert your JSON to a dictionary, refer to the JSON module documentation.
Also please check your dictionary syntax. You missed several commas to separate data.

You should try
The Input is
data = [{'demo': {'gender': '1', 'job': '6', 'study': '3', 'area': '3'}, 'id': 'abc001', 'ex_data': {'scholar': '2', 'fam': 'small'}}, {'demo': {'living': '3', 'gender': '1', 'qual': '3', 'edu': '6'}, 'id': 'abc002', 'ex_data': {'scholar': '', 'fam': ''}}, {'demo': {'gender': '1', 'area': '3', 'sal': '3', 'edu': '6'}, 'id': 'abc003', 'ex_data': {'scholar': None, 'fam': 'big'}}]
Also, Nan will not work in Python. So, instead of Nan we have used None.
for item in data:
item["completed"] = 'yes'
for key in item.keys():
if isinstance(item[key],dict):
for inner_key in item[key].keys():
if (not item[key][inner_key]):
item["completed"] = "no"
break
else:
if (not item[key]):
item["completed"] = "no"
break
The Output will be
data = [{'demo': {'gender': '1', 'job': '6', 'study': '3', 'area': '3'}, 'completed': 'yes', 'id': 'abc001', 'ex_data': {'scholar': '2', 'fam': 'small'}}, {'demo': {'living': '3', 'edu': '6', 'qual': '3', 'gender': '1'}, 'completed': 'no', 'id': 'abc002', 'ex_data': {'scholar': '', 'fam': ''}}, {'demo': {'edu': '6', 'gender': '1', 'sal': '3', 'area': '3'}, 'completed': 'no', 'id': 'abc003', 'ex_data': {'scholar': None, 'fam': 'big'}}]

Related

remove repeated values in dictionary

I want to remove the repeated value in a dictionary after I extracted the needed data which is 'rate' and 'genre'
a=[{'movie': 'abc', 'rate': '9', 'origin': 'AU', 'genre': 'horror'},
{'movie': 'xyz', 'rate': '7', 'origin': 'NY', 'genre': 'romance'},
{'movie': 'jkl', 'rate': '9', 'origin': 'HK', 'genre': 'horror'},
{'movie': 'qwe', 'rate': '6', 'origin': 'HK', 'genre': 'comedy'},
{'movie': 'vbn', 'rate': '9', 'origin': 'BKK', 'genre': 'romance'}]
needed_data=[]
for test in a:
x={}
word=['rate','genre']
for key,value in test.items():
for words in word:
if key == words:
x[key] = value
needed_data.append(x)
results = {}
filters=[]
for yy in needed_data:
for key,value in yy.items():
if value not in results.values():
results[key] = value
filters.append(results)
print(filters)
the output from above code is
[{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'}]
my desired output would be
[{'rate': '9', 'genre': 'horror'},
{'rate': '7', 'genre': 'romance'},
{'rate': '6', 'genre': 'comedy'},
{'rate': '9', 'genre': 'romance'}]
I would recommend to use pandas for data processing
import pandas as pd
df = pd.DataFrame(a)
df_dd= df[["genre", "rate"]].drop_duplicates()
new_a = df_dd.to_dict(orient="records")
print(new_a)
Output
[{'genre': 'horror', 'rate': '9.'},
{'genre': 'romance', 'rate': '7'},
{'genre': 'horror', 'rate': '9'},
{'genre': 'comedy', 'rate': '6'},
{'genre': 'romance', 'rate': '9'}]
Your data has strings '9.' and '9' Do you want it that way?
z = {f"{float(x['rate']):.2f}-{x['genre']}": x for x in needed_data}
list(z.values())
Output
[{'rate': '9', 'genre': 'horror'},
{'rate': '7', 'genre': 'romance'},
{'rate': '6', 'genre': 'comedy'},
{'rate': '9', 'genre': 'romance'}]
This is the easy way to do your task:
a=[{'movie': 'abc', 'rate': '9.', 'origin': 'AU', 'genre': 'horror'},
{'movie': 'xyz', 'rate': '7', 'origin': 'NY', 'genre': 'romance'},
{'movie': 'jkl', 'rate': '9', 'origin': 'HK', 'genre': 'horror'},
{'movie': 'qwe', 'rate': '6', 'origin': 'HK', 'genre': 'comedy'},
{'movie': 'vbn', 'rate': '9', 'origin': 'BKK', 'genre': 'romance'}]
c = []
for b in a:
c.append({'rate':b['rate'],'genre':b['genre'] })
print(c)
So the Output will be:
[{'rate': '9.', 'genre': 'horror'}, {'rate': '7', 'genre': 'romance'}, {'rate': '9', 'genre': 'horror'}, {'rate': '6', 'genre': 'comedy'}, {'rate': '9', 'genre': 'romance'}]

How to remove duplicate elements of, list of dictionaries in python

I have a list of campuses:
campus = [{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'},{'id': '3', 'dlin': '1'},{'id': '4', 'dlin': '2'},{'id': '5', 'dlin': '2'},{'id': '6', 'dlin': '1'}, ]
each campus belongs to a school with a unique dlin. I want to have a list in which I have some other lists, each having a few dictionaries.
I run the below code:
schools = []
for i in campus:
ls = []
for j in campus:
if i['dlin'] == j['dlin']:
ls.append(j)
# campus_copy.remove(j)
schools.append(ls)
[print(item) for item in schools]
the result is:
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
[{'id': '4', 'dlin': '2'}, {'id': '5', 'dlin': '2'}]
[{'id': '4', 'dlin': '2'}, {'id': '5', 'dlin': '2'}]
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
I have to either remove the duplicate members from schools or modify the code such that I do not get duplicates.
When I try to remove duplicates from schools, I see that dic item is not hashable so I can not do it.
To solutions are available that are somewhat similar to my problem.
Remove duplicates from list of dictionaries within list of dictionaries
Remove duplicate dict in list in Python
However, I cannot figure out what to do?
does anybody know how to solve the problem?
what I expect to get is:
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
[{'id': '4', 'dlin': '2'}, {'id': '5', 'dlin': '2'}]
One possible solution is storing the dlin as key in dictionary (and dictionaries cannot have multiple equal keys) rather than removing duplicates explicitly afterwards:
campus = [{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'},{'id': '3', 'dlin': '1'},{'id': '4', 'dlin': '2'},{'id': '5', 'dlin': '2'},{'id': '6', 'dlin': '1'}, ]
schools = {}
for c in campus:
schools.setdefault(c['dlin'], []).append(c)
for s in schools.values():
print(s)
Prints:
[{'id': '1', 'dlin': '1'}, {'id': '2', 'dlin': '1'}, {'id': '3', 'dlin': '1'}, {'id': '6', 'dlin': '1'}]
[{'id': '4', 'dlin': '2'}, {'id': '5', 'dlin': '2'}]
Based on the answer of Andrej, I solved another part of the question I had and I wanted just to share it here:
My question:
I am now involved in another issue related to the previous one:
I have this list of dictionaries, each informaton of a campus. multiple campuses might belong to a school. I have to distinguish and cluster them based on the similarity of their names.
campus = [
{'id': '1', 'name': 'seneca - york'},
{'id': '2', 'name': 'seneca college - north gate campus'},
{'id': '3', 'name': 'humber college - toronto campus'},
{'id': '4', 'name': 'humber college'},
{'id': '5', 'name': 'humber collge - waterloo campus'},
{'id': '6', 'name': 'university of waterloo toronto campus'},
]
my expected result can be reached by this small and neat code:
schools = {}
for c in campus:
schools.setdefault(c['name'][:4], []).append(c)
print(schools)

Formatting a python dictionary received via xmlrpc for nice output

Is there an easy way to format a dictionary in python for nice output?
I am learning how to interact with an API/XMLRPC in python at the moment. After making a request, I get a dictionary back formatted like the following:
{'category_id': '9', 'parent_id': '3', 'name': 'Headboard', 'is_active': '1', 'position': '6', 'level': '3', 'children': []}, {'category_id': '10', 'parent_id': '3', 'name': 'Mattress', 'is_active': '1', 'position': '7', 'level': '3', 'children': []},
This is a wall of text, easily a few pages. Is there an easy way to display this data nicely, or perhaps just to output the name of each category on one line?
edit:
Here is an attempt to print it via pprint, which ended up omitting a lot of the data:
import xmlrpc.client
import pprint
svc = xmlrpc.client.ServerProxy('https://example.com/api/xmlrpc/')
session = svc.login('apiuser', 'apikey')
temp = svc.call(session, 'catalog_category.tree')
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(temp)
You can use pprint.pprint:
>>> pprint([{'category_id': '9', 'parent_id': '3', 'name': 'Headboard', 'is_active': '1', 'position': '6', 'level': '3', 'children': []}, {'category_id': '10', 'parent_id': '3', 'name': 'Mattress', 'is_active': '1', 'position': '7', 'level': '3', 'children': []}])
[{'category_id': '9',
'children': [],
'is_active': '1',
'level': '3',
'name': 'Headboard',
'parent_id': '3',
'position': '6'},
{'category_id': '10',
'children': [],
'is_active': '1',
'level': '3',
'name': 'Mattress',
'parent_id': '3',
'position': '7'}]
To display just the category names you can do:
>>> [x['name'] for x in ...]
Alternatively you can use json.dump(s) + the JSON viewer of your choice (plenty of online choices available, or just your local browser).
Edit
Processing in a recursive manner:
import copy
t2 = copy.deepcopy(temp) # Modify for printing.
items = [t2]
while items:
item = items.pop(-1)
del item['category_id']
del item['is_active']
del item['level']
del item['position']
... # Whatever other keys you want to delete.
items += item.get('children', [])
pprint(t2)
This will give you a list of category names:
list_of_dicts = [{'category_id': '9', 'parent_id': '3', 'name': 'Headboard', 'is_active': '1', 'position': '6', 'level': '3', 'children': []}, {'category_id': '10', 'parent_id': '3', 'name': 'Mattress', 'is_active': '1', 'position': '7', 'level': '3', 'children': []}]
category_names = [dict['name'] for dict in list_of_dicts]
print(category_names)
OUTPUT:
['Headboard', 'Mattress']
If the data is actually a dictionary of dictionaries, such that it is in the form: { "key_1": {}, "key_2": {} ... "key_n": {} }
then the following code will create a list of the names of categories:
dict_of_dicts = {"key_a": {'category_id': '9', 'parent_id': '3', 'name': 'Headboard', 'is_active': '1', 'position': '6', 'level': '3', 'children': []}, "key_b": {'category_id': '10', 'parent_id': '3', 'name': 'Mattress', 'is_active': '1', 'position': '7', 'level': '3', 'children': []}}
category_names = [dict["name"] for dict in dict_of_dicts.values()]
print(category_names)
OUTPUT:
['Headboard', 'Mattress']

Convert list-of-dicts into tree

For two days I try to traverse a list of dicts into a tree.
`list_of_dicts = [
{'name':Category1, 'id': '7', 'parent_id': '7', 'level': '1'}
{'name':Category3, 'id': '33', 'parent_id': '7', 'level': '2'}
{'name':Category5, 'id': '334', 'parent_id': '33', 'level': '3'}
{'name':Category10, 'id': '23', 'parent_id': '7', 'level': '2'}
{'name':Category2, 'id': '8', 'parent_id': '8', 'level': '1'}
{'name':Category6, 'id': '24', 'parent_id': '8', 'level': '2'}
]`
As informations, we know a category on top level (1), has its own id as its parent_id, children have the id of its parent as parent_id and the level.
In a first step the list need to turn in something like a tree:
`traversed_list = [
{'name':Category1, 'id': '7', 'parent_id': '7', 'level': '1', 'children':
[
{'name':Category3, 'id': '33', 'parent_id': '7', 'level': '2', 'children': [
{'name':Category5, 'id': '334', 'parent_id': '33', 'level': '3', 'children':[]}]}
{'name':Category10, 'id': '23', 'parent_id': '7', 'level': '2', 'children':[]}
]}
{'name':Category2, 'id': '8', 'parent_id': '8', 'level': '1', 'children':
[{'name':Category6, 'id': '24', 'parent_id': '8', 'level': '2', 'children':[]}]
}]`
The following code:
import copy
def treeify(lst):
tree = [copy.deepcopy(cat) for cat in lst if cat['level'] == '1']
for el in tree:
el["children"] = []
for i in xrange(len(lst)):
for j in xrange(len(tree)):
if lst[i]["parent_id"] == tree[j]["id"]:
tree[j]["children"].append(copy.deepcopy(lst[i]))
return tree
list_of_dicts = [
{'name':"Category1", 'id': '7', 'parent_id': '7', 'level': '1'},
{'name':"Category3", 'id': '33', 'parent_id': '7', 'level': '2'},
{'name':"Category5", 'id': '334', 'parent_id': '33', 'level': '3'},
{'name':"Category10", 'id': '23', 'parent_id': '7', 'level': '2'},
{'name':"Category2", 'id': '8', 'parent_id': '8', 'level': '1'},
{'name':"Category6", 'id': '24', 'parent_id': '8', 'level': '2'}
]
tree = treeify(list_of_dicts)
for d in tree:
print d
prints
{'id': '7', 'parent_id': '7', 'children': [{'id': '7', 'parent_id': '7', 'name': 'Category1', 'level': '1'}, {'id': '33', 'parent_id': '7', 'name': 'Category3', 'level': '2'}, {'id': '23', 'parent_id': '7', 'name': 'Category10', 'level': '2'}], 'name': 'Category1', 'level': '1'}
{'id': '8', 'parent_id': '8', 'children': [{'id': '8', 'parent_id': '8', 'name': 'Category2', 'level': '1'}, {'id': '24', 'parent_id': '8', 'name': 'Category6', 'level': '2'}], 'name': 'Category2', 'level': '1'}

Merging nested dictionaries by keys preserving different values

I have two list of nested dictionaries with the same keys, but different values:
d1 = {
'distilled ': [{'water': '45'}, {'vodka': '9'}, {'vinegar': '7'}, {'beer': '6'}, {'alcohol': '5'}, {'whiskey': '5'}],
'planted': [{'tree': '30'}, {'seed': '28'}, {'flower': '20'}, {'plant': '7'}, {'bomb': '4'}, {'garden': '2'}]
}
and
d2 = {
'distilled ': [{'water': '14'}, {'vinegar': '9'}, {'wine': '8'}, {'alcohol': '8'}, {'liquid': '7'}, {'whiskey': '6'}, {'beer': '5'}],
'planted ': [{'flower': '28'}, {'tree': '18'}, {'seed': '9'}, {'vegetable': '4'}, {'bush': '3'}, {'grass': '3'}, {'garden': '3'}]
}
I want to merge them in a way that preserves the values and merges only the keys in the nested dictionaries. So that the outcome would look like:
{
'distilled ': [('water', '45', '14'), ('vodka', '9'), ('vinegar', '7', '9'), ('beer', '6', '5'), ('alcohol', '5'), ('whiskey', '5'), ('wine', '8')],
'planted': [('tree', '30', '18'), ('seed', '28', '9'), ('flower', '20', '7'), ('plant', '7'), ('bomb', '4'), ('garden', '2', '3')]
}
I tried merging the two using:
d_merged = { k: [ d1[k], d2_to_compare[k] ] for k in d1 }
but the in the outcome only the values of the first dictionary are presented, obviously. Do you have any ideas on how to fix this? Thank you very much in advance.
I am not sure which way to take from here. Would really appreciate any suggestions! Thanks a lot.
dict only has one key-value pair is not a good idea, but anyway, we can work out like this:
d1 = {
'distilled': [{'water': '45'}, {'vodka': '9'}, {'vinegar': '7'}, {'beer': '6'}, {'alcohol': '5'}, {'whiskey': '5'}],
'planted': [{'tree': '30'}, {'seed': '28'}, {'flower': '20'}, {'plant': '7'}, {'bomb': '4'}, {'garden': '2'}]
}
d2 = {
'distilled': [{'water': '14'}, {'vinegar': '9'}, {'wine': '8'}, {'alcohol': '8'}, {'liquid': '7'}, {'whiskey': '6'}, {'beer': '5'}],
'planted': [{'flower': '28'}, {'tree': '18'}, {'seed': '9'}, {'vegetable': '4'}, {'bush': '3'}, {'grass': '3'}, {'garden': '3'}]
}
d3 = {}
for k, v in d1.items():
k1 = dict([d.items()[0] for d in d1[k]])
k2 = dict([d.items()[0] for d in d2[k]])
ret = []
for d in (set(k1.keys()) | set(k2.keys())):
ret.append((d, k1.get(d), k2.get(d)))
d3[k] = ret
print d3

Categories