Remove nested element occurs twice but should be only once - python

I have a problem. I want to remove all nested elements inside a dict. But unfortunately my code does not work. Every nested element occurs twice, but it should be occurs only once.
What is the problem for that?
Method
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
Running example
import uuid
my_Dict = {
'_key': '1',
'group': 'test',
'data': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
my_Dict2 = {
'_key': '2',
'group': 'test',
'data2': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail2': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
dictionaries = [my_Dict, my_Dict2]
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
result = nested_dict(dictionaries)
result
[OUT]
[{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}}]
What I want
[{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}}]

import uuid
import json
my_Dict = {
'_key': '1',
'group': 'test',
'data': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
my_Dict2 = {
'_key': '2',
'group': 'test',
'data2': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail2': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
dictionaries = [my_Dict, my_Dict2]
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
output:
[
{
"data": {
"__id": "5c6769cf-01e5-4f5d-acfa-622472163aba"
},
"detail": {
"selector": {
"number": "12312",
"isTrue": true,
"requirements": [
{
"type": "customer",
"requirement": "1"
}
]
},
"__id": "d167277f-4d02-4d53-934b-131187f6f214"
}
},
{
"data2": {
"__id": "e9182913-c2fc-4d60-adb8-b0b8274faf50"
},
"detail2": {
"selector": {
"number": "12312",
"isTrue": true,
"requirements": [
{
"type": "customer",
"requirement": "1"
}
]
},
"__id": "46e6be7b-8903-4d2a-a768-f6b24fcc5d31"
}
}
]
only minor changes needed that is you are appending the list within inner for loop but you should do it at outer for loop level. I have pasted the code with output which I got

I think it is because my_new_dict is holding an object that is changed by the time it appends to the list.
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append({key: my_new_dict[key]})
print(my_list)
return my_list

Related

python find sub tree minimum and maximum value

I have a tree with nodes, in each node I have id, name, nodes, roles, parent_id and type.
I need to write a function that check in each nodes, if the nodes has roles, then find the minimum and maximum duration or the roles into the the roles.
after that I need to compare it with his parent(if it's not none), if the parent have maximum duration bigger then it so the children will get the maximum duration of parent/grandparent..
I'll add some examples and what I expected to get
# example 1
roles_1 = [{id: 'r1', 'duration': 4}]
roles_2 = [{id: 'r2', 'duration': 5}, {id: 'r3', 'duration': 8}]
input_tree1 = {
'nodes': [
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'nodes': [
{
'id': '2',
'name': 'o_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_1,
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
}
],
}
],
}
]
}
I have a list of roles, each list of roles contains object of role. each role has id and duration.
in this example, I have parent org1 with one child o_folder_1 and one grandchild
o1_f1_project_1, what I need to do is tho check the grandchild if he has roles, if yes, I calculate the min and max duration of the sub tree, here is 8, then go level up calculate the roles if the node have , here is 4.. 4 is less than 8 so I need to add a update a new field of maximum_duration and minimum duration as you can see in expected tree:
expected_tree1 = {'nodes': [
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'nodes': [
{
'id': '2',
'name': 'o_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_1,
'min_duration': 4,
'max_duration': 4,
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
'min_duration': 4,
'max_duration': 8
}
],
}
],
}
]
}
as you can see here, I added new field of min_duration and maximum_duration for each node with the right value.
example 2:
# example 2
roles_1 = [{id: 'r1', 'duration': 12}]
roles_2 = [{id: 'r2', 'duration': 4}, {id: 'r3', 'duration': 2}]
roles_3 = [{id: 'r4', 'duration': 5}]
roles_4 = [{id: 'r5', 'duration': 9}]
input_tree2 = {'nodes':
[
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'nodes':
[
{
'id': '2',
'name': 'o1_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_1,
'min_duration': 12,
'max_duration': 12
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
},
{
'id': '4',
'name': 'o1_f1_project_2',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_3,
},
],
},
{
'id': '4',
'name': 'o1_folder_2',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_4,
'nodes': [
{
'id': '3',
'name': 'o1_f2_project1',
'nodes': [],
'org_id': '1',
'parent_id': '4',
'type': 'project',
'roles': roles_3,
}
],
},
],
}]}
expected tree 2
expected_tree2 = {'nodes':
[
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'nodes':
[
{
'id': '2',
'name': 'o1_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_1,
'min_duration': 12,
'max_duration': 12
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
'min_duration': 2,
'max_duration': 12
},
{
'id': '4',
'name': 'o1_f1_project_2',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
'min_duration': 5,
'max_duration': 12
},
],
},
{
'id': '4',
'name': 'o1_folder_2',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_4,
'min_duration': 9,
'max_duration': 9
'nodes': [
{
'id': '3',
'name': 'o1_f2_project1',
'nodes': [],
'org_id': '1',
'parent_id': '4',
'type': 'project',
'roles': roles_3,
'min_duration': 5,
'max_duration': 9
}
],
},
],
}]}
example 3
roles_1 = [{id: 'r1', 'duration': 18}]
roles_2 = [{id: 'r2', 'duration': 4}, {id: 'r3', 'duration': 2}]
roles_3 = [{id: 'r4', 'duration': 5}]
roles_4 = [{id: 'r5', 'duration': 9}, {id: 'r5', 'duration': 12}]
roles_5 = [{id: 'r6', 'duration': 20}]
input_tree3 = {'nodes':
[
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'roles': roles_1,
'nodes':
[
{
'id': '2',
'name': 'o1_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_3,
'min_duration': 12,
'max_duration': 12,
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
},
{
'id': '4',
'name': 'o1_f1_project_2',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
},
],
},
{
'id': '4',
'name': 'o1_folder_2',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_5,
'nodes': [
{
'id': '3',
'name': 'o1_f2_project1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_4,
}
],
},
],
}]}
expected tree 3
expected_tree3 = {'nodes':
[
{
'id': '1',
'name': 'org1',
'parent_id': None,
'type': 'organization',
'roles': roles_1,
'min_duration': 18,
'max_duration': 18,
'nodes':
[
{
'id': '2',
'name': 'o1_folder_1',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_3,
'min_duration': 5,
'max_duration': 18,
'nodes': [
{
'id': '3',
'name': 'o1_f1_project_1',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
'min_duration': 2,
'max_duration': 18
},
{
'id': '4',
'name': 'o1_f1_project_2',
'nodes': [],
'org_id': '1',
'parent_id': '2',
'type': 'project',
'roles': roles_2,
'min_duration': 2,
'max_duration': 18
},
],
},
{
'id': '4',
'name': 'o1_folder_2',
'org_id': '1',
'parent_id': '1',
'type': 'folder',
'roles': roles_5,
'min_duration': 18,
'max_duration': 20,
'nodes': [
{
'id': '3',
'name': 'o1_f2_project1',
'nodes': [],
'org_id': '1',
'parent_id': '4',
'type': 'project',
'roles': roles_4,
'min_duration': 9,
'max_duration': 20
}
],
},
],
}]}
as you can see in the examples, each time I check in the last child if he has roles, if he has so it calculate the max duration, then compare with his parent if he has, if the parent has maximum duration it will get it.. but if not, the child only the child remain with the maximum duration, his parent will not effect by id and will not get the child duration..
assumptions:
when parent_id is None is the root
it can be with many levels (root->child->grandchild->....)
if the root doesn't have roles so to check his children.
root can be some children (not always binary tree)
actually what I'm tried to do it
def add_min_max_duration(tree:Dict):
for node in tree["nodes"]:
if "roles" in node.keys():
node["min_duration"] = 25
node["max_duration"] = 0
for role in node["roles"]:
node["min_duration"] = min(node["min_duration"], role.duration)
node["max_duration"] = max(node["max_duration"], role.duration)
add_min_max_duration(node)
but it's not good because it's not compare with his parent/grandparent..
data abstraction
Start with a min_max data type with a + operation for combining two min_max instances -
mm1 = min_max(2,5)
mm2 = min_max(3,9)
mm3 = mm1 + mm2 # (2, 9)
We could write it something like this -
from math import inf
class min_max:
def __init__(self, min = +inf, max = -inf):
self.min = min
self.max = max
def __add__(self, other):
return min_max(min(self.min, other.min), max(self.max, other.max))
role_min_max
We can compute the min_max of a role by writing role_min_max -
def role_min_max(role):
return iter_min_max(x["duration"] for x in role)
def iter_min_max(iterable):
r = min_max()
for value in iterable:
r = r + min_max(value, value)
return r
tree_min_max
Finally tree_min_max starts with a default min_max of (-Infinity, Infinity) and we + it to the role_min_max of the tree["roles"] value, if present. For all node of the children tree["nodes"], we call tree_min_max(node, mm) with the updated min_max, mm -
def tree_min_max(tree, mm = min_max()):
if "roles" in tree:
mm = mm + role_min_max(tree["roles"])
return {
"duration_min": mm.min,
"duration_max": mm.max,
**tree,
"nodes": list(tree_min_max(node, mm) for node in tree["nodes"])
}
demo
We will use json to pretty print the newly created tree -
import json
print(json.dumps(tree_min_max(input_tree3), indent=2))
{
"duration_min": Infinity,
"duration_max": -Infinity,
"nodes": [
{
"duration_min": 18,
"duration_max": 18,
"id": "1",
"name": "org1",
"parent_id": null,
"type": "organization",
"roles": [
{
"id": "r1",
"duration": 18
}
],
"nodes": [
{
"duration_min": 5,
"duration_max": 18,
"id": "2",
"name": "o1_folder_1",
"org_id": "1",
"parent_id": "1",
"type": "folder",
"roles": [
{
"id": "r4",
"duration": 5
}
],
"nodes": [
{
"duration_min": 2,
"duration_max": 18,
"id": "3",
"name": "o1_f1_project_1",
"nodes": [],
"org_id": "1",
"parent_id": "2",
"type": "project",
"roles": [
{
"id": "r2",
"duration": 4
},
{
"id": "r3",
"duration": 2
}
]
},
{
"duration_min": 2,
"duration_max": 18,
"id": "4",
"name": "o1_f1_project_2",
"nodes": [],
"org_id": "1",
"parent_id": "2",
"type": "project",
"roles": [
{
"id": "r2",
"duration": 4
},
{
"id": "r3",
"duration": 2
}
]
}
]
},
{
"duration_min": 18,
"duration_max": 20,
"id": "4",
"name": "o1_folder_2",
"org_id": "1",
"parent_id": "1",
"type": "folder",
"roles": [
{
"id": "r6",
"duration": 20
}
],
"nodes": [
{
"duration_min": 9,
"duration_max": 20,
"id": "3",
"name": "o1_f2_project1",
"nodes": [],
"org_id": "1",
"parent_id": "2",
"type": "project",
"roles": [
{
"id": "r5",
"duration": 9
},
{
"id": "r5",
"duration": 12
}
]
}
]
}
]
}
]
}
Note, your role_* data should put id in quotes, otherwise the key for the dictionary is actually the id function, not the string "id" like you are probably intending.
Also note there was a hard-code "min_duration" and "max_duration" in the input provided which I removed here in this post.
To remove the duration_min and duration_max from the root note, we can add a special case for when "roles" is not present in the node -
def tree_min_max(tree, mm = min_max()):
if "roles" in tree:
mm = mm + role_min_max(tree["roles"])
return {
"duration_min": mm.min,
"duration_max": mm.max,
**tree,
"nodes": list(tree_min_max(node, mm) for node in tree["nodes"])
}
else:
return {
**tree,
"nodes": list(tree_min_max(node, mm) for node in tree["nodes"])
}
without the min_max class
We could write min_max as a simple function which takes two tuples and outputs a tuple of the combined minimums and maximums -
def min_max(a, b):
(amin, amax) = a
(bmin, bmax) = b
return min(amin, bmin), max(amax, bmax)
Then we update iter_min_max -
def iter_min_max(iterable):
r = (+inf, -inf)
for value in iterable:
r = min_max(r, (value, value))
return r
In tree_min_max we need to initialize mm to the default min-max and if "roles" is present, update mm with the computed role_min_max. Notice because mm is no longer a class, we cannot access the subfields, mm.min and mm.max. Instead mm is a tuple and the fields are mm[0] and mm[1] -
def tree_min_max(tree, mm = (+inf, -inf)):
if "roles" in tree:
mm = min_max(mm, role_min_max(tree["roles"]))
return {
"duration_min": mm[0],
"duration_max": mm[1],
**tree,
"nodes": list(tree_min_max(node, mm) for node in tree["nodes"])
}
This is more cognitive load on the programmer and they need to be careful to initialize a min-max with (+inf, -inf) manually. Using the class avoids this burden and makes it less likely for a mistake to occur.

How do I get data from a nested dict?

Hello I'm trying to get specific data out an API call from a website. This is the data I'm receiving
This is the data I'm recieving
{'type': 'NonStockItem', 'attributes': [], 'id': '1', 'description': 'Ikke lagerførte varer høy sats'}
{'type': 'NonStockItem', 'attributes': [], 'id': '2', 'description': 'Ikke lagerførte varer middels sats'}
{'type': 'NonStockItem', 'attributes': [], 'id': '3', 'description': 'Ikke lagerførte varer lav sats'}
{'type': 'NonStockItem', 'attributes': [], 'id': '4', 'description': 'Ikke lagerførte varer avgiftsfri'}
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}], 'id': '5', 'description': 'Lagerførte varer høy sats'}
{'type': 'FinishedGoodItem', 'attributes': [], 'id': '6', 'description': 'Lagerførte varer middels sats'}
{'type': 'FinishedGoodItem', 'attributes': [], 'id': '7', 'description': 'Lagerførte varer avgiftsfri'}
{'type': 'LaborItem', 'attributes': [], 'id': '8', 'description': 'Tjenester (prosjekt)'}
{'type': 'ExpenseItem', 'attributes': [], 'id': '9', 'description': 'Utgifter (Reise)'}
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': True, 'attributeType': 'Text', 'details': []}], 'id': 'ONLINE', 'description': 'Online'}
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}, {'attributeId': 'WEB2', 'description': 'tilgjengelighet i nettbutikk', 'required': True, 'attributeType': 'Combo', 'details': [{'id': 'Ikke Inne', 'description': 'Produktet er utsolgt.'}, {'id': 'Inne', 'description': 'tilgjengelig i nettbutikk'}]}], 'id': 'WEB', 'description': 'Tilgjengelig på nettbutikk.'}
This is the object fields
[
{
"type": "NonStockItem",
"attributes": [
{
"attributeId": "string",
"description": "string",
"sortOrder": 0,
"required": true,
"attributeType": "Text"
}
]
this is my code
if response.status_code == 200:
itemClass = json.loads(response.text)
for item in itemClass:
print(item["type"])
print(item["description"])
print(item["attributes"])
What I'm trying to do is to get only the attributes with an existing attributeId. I'm a bit stuck because the data inside the attributes array is a dict, how can I get the key values?
Current output:
NonStockItem
Ikke lagerførte varer høy sats
[]
NonStockItem
Ikke lagerførte varer middels sats
[]
NonStockItem
Ikke lagerførte varer lav sats
[]
NonStockItem
Ikke lagerførte varer avgiftsfri
[]
FinishedGoodItem
Lagerførte varer høy sats
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}]
FinishedGoodItem
Lagerførte varer middels sats
[]
FinishedGoodItem
Lagerførte varer avgiftsfri
[]
LaborItem
Tjenester (prosjekt)
[]
ExpenseItem
Utgifter (Reise)
[]
FinishedGoodItem
Online
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': True, 'attributeType': 'Text', 'details': []}]
FinishedGoodItem
Tilgjengelig på nettbutikk.
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}, {'attributeId': 'WEB2', 'description': 'tilgjengelighet i nettbutikk', 'required': True, 'attributeType': 'Combo', 'details': [{'id': 'Ikke Inne', 'description': 'Produktet er utsolgt.'}, {'id': 'Inne', 'description': 'tilgjengelig i nettbutikk'}]}]
I only want the types which contain an attributeId
I am assuming the list you are working on is accessible using lst[0]['attributes'].
Try the following, which uses list comprehension:
lst = [
{
"type": "NonStockItem",
"attributes": [
{
"attributeId": "string",
"description": "string",
"sortOrder": 0,
"required": True,
"attributeType": "Text"
},
{
# Note that it does not have attributeId
"description": "string",
"sortOrder": 0,
"required": True,
"attributeType": "Text"
}
]
}
]
attrs = lst[0]['attributes']
output = [d for d in attrs if 'attributeId' in d]
print(output)
Output:
[{'attributeId': 'string', 'description': 'string', 'sortOrder': 0, 'required': True, 'attributeType': 'Text'}]
Note that the output has only one element; in the input example I gave, the second dict does not have attributeId.
Pandas json_normalize could be used for this as well:
import json
import pandas as pd
response = '''[
{
"type": "NonStockItem",
"attributes": [
{
"attributeId": "string1",
"description": "string",
"sortOrder": 0,
"required": true,
"attributeType": "Text"
},
{
"attributeId": "string2",
"description": "string",
"sortOrder": 0,
"required": true,
"attributeType": "Text"
}]
},
{
"type": "NonStockItem",
"attributes":[]
},
{
"type": "NonStockItem",
"attributes": [
{
"attributeId": "string3",
"description": "string",
"sortOrder": 0,
"required": true,
"attributeType": "Text"
},
{
"attributeId": "string4",
"description": "string",
"sortOrder": 0,
"required": true,
"attributeType": "Text"
}]
}
]
'''
itemClass = json.loads(response)
print(pd.concat([pd.json_normalize(x["attributes"]) for x in itemClass],
ignore_index=True))
attributeId description sortOrder required attributeType
0 string1 string 0 True Text
1 string2 string 0 True Text
2 string3 string 0 True Text
3 string4 string 0 True Text
The best solution that I could think considering your data sample and output is to verify if item["attributes"] has values inside or no:
Code:
itemclass = [{'type': 'NonStockItem', 'attributes': [], 'id': '1', 'description': 'Ikke lagerførte varer høy sats'},
{'type': 'NonStockItem', 'attributes': [], 'id': '2', 'description': 'Ikke lagerførte varer middels sats'},
{'type': 'NonStockItem', 'attributes': [], 'id': '3', 'description': 'Ikke lagerførte varer lav sats'},
{'type': 'NonStockItem', 'attributes': [], 'id': '4', 'description': 'Ikke lagerførte varer avgiftsfri'},
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}], 'id': '5', 'description': 'Lagerførte varer høy sats'},
{'type': 'FinishedGoodItem', 'attributes': [], 'id': '6', 'description': 'Lagerførte varer middels sats'},
{'type': 'FinishedGoodItem', 'attributes': [], 'id': '7', 'description': 'Lagerførte varer avgiftsfri'},
{'type': 'LaborItem', 'attributes': [], 'id': '8', 'description': 'Tjenester (prosjekt)'},
{'type': 'ExpenseItem', 'attributes': [], 'id': '9', 'description': 'Utgifter (Reise)'},
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': True, 'attributeType': 'Text', 'details': []}], 'id': 'ONLINE', 'description': 'Online'},
{'type': 'FinishedGoodItem', 'attributes': [{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}, {'attributeId': 'WEB2', 'description': 'tilgjengelighet i nettbutikk', 'required': True, 'attributeType': 'Combo', 'details': [{'id': 'Ikke Inne', 'description': 'Produktet er utsolgt.'}, {'id': 'Inne', 'description': 'tilgjengelig i nettbutikk'}]}], 'id': 'WEB', 'description': 'Tilgjengelig på nettbutikk.'}]
for item in itemclass:
if item["attributes"]:
print(item["type"])
print(item["description"])
print(item["attributes"])
Output:
FinishedGoodItem
Lagerførte varer høy sats
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}]
FinishedGoodItem
Online
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': True, 'attributeType': 'Text', 'details': []}]
FinishedGoodItem
Tilgjengelig på nettbutikk.
[{'attributeId': 'NETTBUTIKK', 'description': 'WEB', 'required': False, 'attributeType': 'Text', 'details': []}, {'attributeId': 'WEB2', 'description': 'tilgjengelighet i nettbutikk', 'required': True, 'attributeType': 'Combo', 'details': [{'id': 'Ikke Inne', 'description': 'Produktet er utsolgt.'}, {'id': 'Inne', 'description': 'tilgjengelig i nettbutikk'}]}]

How to get the count for a particular key in the dictionary

My content inside a dictionary is below
I need to now for BusinessArea how many different name key is there, like this need to know Designation also
test=
[ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Accounting', 'parentname': 'Finance'}, { 'id': '3', 'name': 'Research', 'parentname': 'R & D' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }] },
{ 'masterid': '2', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research', 'parentname': '' }, { 'id': '3', 'name': 'Accounting', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]},
{ 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Engineering' }, { 'id': '3', 'name': 'Engineering', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "3"
}
]
}
]
Try this:
res=[{'name': 'BusinessArea', 'values': []}, {'name': 'Designation', 'values': []}]
listbus=sum([i['BusinessArea'] for i in test], [])
listdes=sum([i['Designation'] for i in test], [])
res[0]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listbus)]
res[1]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listdes)]
for i in listbus:
for k in range(len(res[0]['values'])):
if i['name']==res[0]['values'][k]['name']:
res[0]['values'][k]['count']+=1
for i in listdes:
for k in range(len(res[1]['values'])):
if i['name']==res[1]['values'][k]['name']:
res[1]['values'][k]['count']+=1
>>> print(res)
[{'name': 'BusinessArea', 'values': [{'name': 'Accounting', 'count': 2}, {'name': 'Research', 'count': 2}, {'name': 'Engineering', 'count': 2}]}, {'name': 'Designation', 'values': [{'name': 'L1', 'count': 3}, {'name': 'L2', 'count': 6}]}]
You could count unique names using a nested collections.defaultdict:
from collections import defaultdict
from json import dumps
keys = ["BusinessArea", "Designation"]
group_counts = defaultdict(lambda: defaultdict(int))
for group in test:
for key in keys:
names = [item["name"] for item in group[key]]
unique_names = list(dict.fromkeys(names))
for name in unique_names:
group_counts[key][name] += 1
print(dumps(group_counts, indent=2))
Which will give you these counts:
{
"BusinessArea": {
"Accounting": 2,
"Research": 2,
"Engineering": 1
},
"Designation": {
"L1": 3,
"L2": 3
}
}
Then you could modify the result to get the list of dicts you expect:
result = [
{
"name": name,
"values": [{"name": value, "count": count} for value, count in counts.items()],
}
for name, counts in group_counts.items()
]
print(dumps(result, indent=2))
Which gives you this:
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": 2
},
{
"name": "Research",
"count": 2
},
{
"name": "Engineering",
"count": 1
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": 3
},
{
"name": "L2",
"count": 3
}
]
}
]

How to extract Json data scraped from website

I used Beautiful soup to extract data from a Website. Content is in JSON and I need to extract all the display_name values. I have no clue how to naviagate and print the values I need to save in my CSV.
I tried using some array examples like this one
for productoslvl in soup2.findAll('script',{'id' :'searchResult'}):
element = jsons[0]['display_name']
print (element)
but I keep getting KeyError
This is the JSON data:
{
'page_size': -1,
'refinements': [{
'display_name': 'Brand',
'values': [{
'display_name': 'Acqua Di Parma',
'status': 4,
'value': 900096
}],
'type': 'checkboxes'
}, {
'display_name': 'Bristle Type',
'values': [{
'display_name': 'Addictive',
'status': 1,
'value': 14578019
}, {
'display_name': 'Casual',
'status': 1,
'value': 14578020
}, {
'display_name': 'Chic',
'status': 1,
'value': 14301148
}, {
'display_name': 'Polished',
'status': 1,
'value': 14578022
}],
'type': 'checkboxes'
}, {
'display_name': 'Coverage',
'values': [{
'display_name': 'Balanced',
'status': 1,
'value': 14301025
}, {
'display_name': 'Light',
'status': 1,
'value': 14577894
}, {
'display_name': 'Rich',
'status': 1,
'value': 14577895
}],
'type': 'checkboxes'
}, {
'display_name': 'Formulation',
'values': [{
'display_name': 'Cream',
'status': 1,
'value': 100069
}, {
'display_name': 'Spray',
'status': 1,
'value': 100072
}],
'type': 'checkboxes'
}

How to convert/update the key-values information in defaultdict?

How do I convert the following defaultdict()?
defaultdict(<class 'dict'>, {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A'},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B'},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A'},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B' ........}})
the value of the id i.e key1 becomes the key, and the key called length is changed to length_A or B with corresponding values belonging in the earlier type.
defaultdict(<class 'dict'>, {
'key1': {
'length_A': '663',
'length_B': '389'},
'key2': {
'length_A': '865',
'length_B': '553'}})
Thanks,
I think this does what you want:
from collections import defaultdict
import pprint
d = {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A',
},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B',
},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A',
},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B',
},
}
transformed = defaultdict(dict)
for v in d.values():
transformed[v["id"]]["length_{}".format(v["type"])] = v["length"]
pprint.pprint(transformed)
# Output:
# defaultdict(<class 'dict'>,
# {'key1': {'length_A': '663', 'length_B': '389'},
# 'key2': {'length_A': '865', 'length_B': '553'}})

Categories