How to convert/update the key-values information in defaultdict? - python

How do I convert the following defaultdict()?
defaultdict(<class 'dict'>, {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A'},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B'},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A'},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B' ........}})
the value of the id i.e key1 becomes the key, and the key called length is changed to length_A or B with corresponding values belonging in the earlier type.
defaultdict(<class 'dict'>, {
'key1': {
'length_A': '663',
'length_B': '389'},
'key2': {
'length_A': '865',
'length_B': '553'}})
Thanks,

I think this does what you want:
from collections import defaultdict
import pprint
d = {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A',
},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B',
},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A',
},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B',
},
}
transformed = defaultdict(dict)
for v in d.values():
transformed[v["id"]]["length_{}".format(v["type"])] = v["length"]
pprint.pprint(transformed)
# Output:
# defaultdict(<class 'dict'>,
# {'key1': {'length_A': '663', 'length_B': '389'},
# 'key2': {'length_A': '865', 'length_B': '553'}})

Related

Remove nested element occurs twice but should be only once

I have a problem. I want to remove all nested elements inside a dict. But unfortunately my code does not work. Every nested element occurs twice, but it should be occurs only once.
What is the problem for that?
Method
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
Running example
import uuid
my_Dict = {
'_key': '1',
'group': 'test',
'data': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
my_Dict2 = {
'_key': '2',
'group': 'test',
'data2': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail2': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
dictionaries = [my_Dict, my_Dict2]
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
result = nested_dict(dictionaries)
result
[OUT]
[{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}}]
What I want
[{'data': {'__id': '46f4eb3d-977c-4da4-a99c-c9bfa831b96e'},
'detail': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': 'fad4053e-75e5-4a03-93b6-67e0df814d23'}},
{'data2': {'__id': '6afcf48e-508c-476b-98f3-9bf1e8370fb4'},
'detail2': {'selector': {'number': '12312',
'isTrue': True,
'requirements': [{'type': 'customer', 'requirement': '1'}]},
'__id': '2d4745ea-decd-45dc-aa0b-7bea5c449c34'}}]
import uuid
import json
my_Dict = {
'_key': '1',
'group': 'test',
'data': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
my_Dict2 = {
'_key': '2',
'group': 'test',
'data2': {},
'type': '',
'code': '007',
'conType': '1',
'flag': None,
'createdAt': '2021',
'currency': 'EUR',
'detail2': {
'selector': {
'number': '12312',
'isTrue': True,
'requirements': [{
'type': 'customer',
'requirement': '1'}]
}
}
}
dictionaries = [my_Dict, my_Dict2]
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append(my_new_dict)
return my_list
output:
[
{
"data": {
"__id": "5c6769cf-01e5-4f5d-acfa-622472163aba"
},
"detail": {
"selector": {
"number": "12312",
"isTrue": true,
"requirements": [
{
"type": "customer",
"requirement": "1"
}
]
},
"__id": "d167277f-4d02-4d53-934b-131187f6f214"
}
},
{
"data2": {
"__id": "e9182913-c2fc-4d60-adb8-b0b8274faf50"
},
"detail2": {
"selector": {
"number": "12312",
"isTrue": true,
"requirements": [
{
"type": "customer",
"requirement": "1"
}
]
},
"__id": "46e6be7b-8903-4d2a-a768-f6b24fcc5d31"
}
}
]
only minor changes needed that is you are appending the list within inner for loop but you should do it at outer for loop level. I have pasted the code with output which I got
I think it is because my_new_dict is holding an object that is changed by the time it appends to the list.
def nested_dict(dictionaries):
my_list = []
for my_Dict in dictionaries:
my_new_dict = {}
for key in my_Dict.keys():
if isinstance(my_Dict[key], dict):
idx = str(uuid.uuid4())
my_Dict[key]["__id"] = idx
my_new_dict[key] = my_Dict[key]
my_Dict[key] = idx
my_list.append({key: my_new_dict[key]})
print(my_list)
return my_list

loop over a nested dictionary to create a new one

I've got a nested dictionary like that:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
Expected result:
[
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a1
},
"time": "timexxxxx",
"fields": {
x: 1
}
},
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a2
},
"time": "timexxxxx",
"fields": {
x: 1
}
}
]
that is what I'm trying, however it's being overwritten
for k,v in d.items():
metrics['measurements'] = "XXXXX"
if isinstance(v,dict):
for j,h in v.items():
metrics['tags'] = {'MPC':j,'host':k}
metrics['time'] = "timexxxxx"
for value in h:
metrics['fields'] = {j:h}
and I'm getting:
{'fields': {'b1': ['x1', 2]},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}
Could you give me some pointers on how to deal with this?
Thanks
see below
import pprint
d = {'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
data = []
for k, v in d.items():
entry = {"measurements": "XXXXX"}
entry['tags'] = {'MPC': list(v.keys())[0],"host": k}
entry["time"] = "timexxxxx"
values= list(v.values())
entry["fields"] = {values[0][0]:values[0][1]}
data.append(entry)
pprint.pprint(data)
output
[{'fields': {'x': 1},
'measurements': 'XXXXX',
'tags': {'MPC': 'b', 'host': 'a1'},
'time': 'timexxxxx'},
{'fields': {'x1': 2},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}]
This code can help you:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
def convert(dictionary):
return [
{
"measurements": "XXXXX",
"tags": {
"MPC": list(value.keys())[0],
"host": key
},
"time": "timexxxxx",
"fields": dict(value.values())
} for key, value in dictionary.items()
]
print(convert(d))
Results in [{'measurements': 'XXXXX', 'tags': {'MPC': 'b', 'host': 'a1'}, 'time': 'timexxxxx', 'fields': {'x': 1}}, {'measurements': 'XXXXX', 'tags': {'MPC': 'b1', 'host': 'a2'}, 'time': 'timexxxxx', 'fields': {'x1': 2}}]
You can do it like this
#Empty List
li=[]
#Add Items in list
for i in range(2):
d = {}
d["measurment"] = "XXXXX"
d["tags"] = {1: "x"}
d["time"] = "timexxx"
d["field"] = {2: "y"}
li.append(d)
#Print list elements
for i in li:
for key, value in i.items():
print(key, ":", value)
print()

How do i append a dictionary to a JSON file in python?

I have a JSON looks like this:
{'data': [], 'directed': False, 'multigraph': False, 'elements': {'nodes': [{'data': {'id': 'B2', 'value': 'B2', 'name': 'B2'}}, {'data': {'id': 'SCHROEDER PLZ', 'value': 'SCHROEDER PLZ', 'name': 'SCHROEDER PLZ'}}, {'data': {'id': 'D4', 'value': 'D4', 'name': 'D4'}}, {'data': {'id': 'BLAB PLZ', 'value': 'BLAB PLZ', 'name': 'BLAB PLZ'}}], 'edges': [{'data': {'source': 'B2', 'target': 'SCHROEDER PLZ'}}, {'data': {'source': 'D4', 'target': 'BLAB PLZ'}}]}}
The JSON is a result of the "loads" in my code:
import pandas as pd
import networkx as nx
import json
df= pd.read_csv('.../graph.csv')
g = nx.from_pandas_edgelist(df, source='DISTRICT', target='STREET')
x = nx.cytoscape_data(g)
dump = json.dumps(x)
loads = json.loads(dump)
And this is my csv file structure: The first record is the field name.
OFFENSE_DESCRIPTION,DISTRICT,DAY_OF_WEEK,STREET,INCIDENT_NUMBER,size
INVESTIGATE PERSON,B2,Thursday,SCHROEDER PLZ,854652314,10
INVESTIGATE PERSON,D4,Friday,BLAB PLZ,457856954,3
I want to append "size" values located in my csv file.
In fact, the result must be like the below JSON. in the 'nodes' tags, in the 'data' i want to add 'size' field value.
{'data': [], 'directed': False, 'multigraph': False, 'elements': {'nodes': [{'data': {'id': 'B2', 'value': 'B2', 'name': 'B2','size':10}}, {'data': {'id': 'SCHROEDER PLZ', 'value': 'SCHROEDER PLZ', 'name': 'SCHROEDER PLZ','size':10}}, {'data': {'id': 'D4', 'value': 'D4', 'name': 'D4','size':3}}, {'data': {'id': 'BLAB PLZ', 'value': 'BLAB PLZ', 'name': 'BLAB PLZ','size':3}}], 'edges': [{'data': {'source': 'B2', 'target': 'SCHROEDER PLZ'}}, {'data': {'source': 'D4', 'target': 'BLAB PLZ'}}]}}
An elegant solution is to update node attributes in networkx rather than the output dict. Use nx.set_node_attributes:
df = pd.read_csv('.../graph.csv')
size = dict(df[['DISTRICT', 'size']].values.tolist()
+ df[['STREET', 'size']].values.tolist())
g = nx.from_pandas_edgelist(df, source='DISTRICT', target='STREET')
nx.set_node_attributes(g, size, 'size')
x = nx.cytoscape_data(g)
>>> print(json.dumps(x['elements']['nodes'], indent=4))
[
{
"data": {
"size": 10,
"id": "B2",
"value": "B2",
"name": "B2"
}
},
{
"data": {
"size": 10,
"id": "SCHROEDER PLZ",
"value": "SCHROEDER PLZ",
"name": "SCHROEDER PLZ"
}
},
{
"data": {
"size": 3,
"id": "D4",
"value": "D4",
"name": "D4"
}
},
{
"data": {
"size": 3,
"id": "BLAB PLZ",
"value": "BLAB PLZ",
"name": "BLAB PLZ"
}
}
]

How can I create aggregate expressions of this list of dicts?

I have a list of dictionaries that expresses periods+days for a class in a student information system. Here's the data I'd like to aggregate:
[
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'A',
'sort_order': 1
}
},
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'B',
'sort_order': 2
}
},
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'C',
'sort_order': 1
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'A',
'sort_order': 1
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'B',
'sort_order': 2
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'C',
'sort_order': 2
}
},
{
'period': {
'name': '4',
'sort_order': 4
},
'day': {
'name': 'D',
'sort_order': 3
}
}
]
The aggregated string I'd like the above to reduce to is 1,3(A-C) 4(D). Notice that objects that aren't "adjacent" (determined by the object's sort_order) to each other are delimited by , and "adjacent" records are delimited by a -.
EDIT
Let me try to elaborate on the aggregation process. Each "class meeting" object contains a period and day. There are usually ~5 periods per day, and the days alternate cyclically between A,B,C,D, etc. So if I have a class that occurs 1st period on an A day, we might express that as 1(A). If a class occurs on 1st and 2nd period on an A day, the raw form of that might be 1(A),2(A), but it can be shortened to 1-2(A).
Some classes might not be in "adjacent" periods or days. A class might occur on 1st period and 3rd period on an A day, so its short form would be 1,3(A). However, if that class were on 1st, 2nd, and 3rd period on an A day, it could be written as 1-3(A). This also applies to days, so if a class occurs on 1st,2nd, and 3rd period, on A,B, and C day, then we could write it 1-3(A-C).
Finally, if a class occurs on 1st,2nd, and 3rd period and on A,B, and C day, but also on 4th period on D day, its short form would be 1-3(A-C) 4(D).
What I've tried
The first step that occurs to me to perform is to "group" the meeting objects into related sub-lists with the following function:
def _to_related_lists(list):
"""Given a list of section meeting dicts, return a list of lists, where each sub-list is list of
related section meetings, either related by period or day"""
related_list = []
sub_list = []
related_values = set()
for index, section_meeting_object in enumerate(list):
# starting with empty values list
if not related_values:
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
elif section_meeting_object['period']['name'] in related_values or section_meeting_object['day']['name'] in related_values:
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
else:
# no related values found in current section_meeting_object
related_list.append(sub_list)
sub_list = []
related_values = set()
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
related_list.append(sub_list)
return related_list
Which returns:
[
[{
'period': {
'sort_order': 1,
'name': '1'
},
'day': {
'sort_order': 1,
'name': 'A'
}
}, {
'period': {
'sort_order': 1,
'name': '1'
},
'day': {
'sort_order': 2,
'name': 'B'
}
}, {
'period': {
'sort_order': 2,
'name': '2'
},
'day': {
'sort_order': 1,
'name': 'A'
}
}, {
'period': {
'sort_order': 2,
'name': '2'
},
'day': {
'sort_order': 2,
'name': 'B'
}
}],
[{
'period': {
'sort_order': 4,
'name': '4'
},
'day': {
'sort_order': 3,
'name': 'C'
}
}]
]
If the entire string 1-3(A-C) 4(D) is the aggregate expression I'd like in the end, let's call 1-3(A-C) and 4(D) "sub-expressions". Each related sub-list would be a "sub-expression", so I was thinking I'd somehow iterate through every sublist and create the sub-expression, but I"m not exactly sure how to do that.
First, let us define your list as d_list.
d_list = [
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'C'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'C'}},
{'period': {'sort_order': 4, 'name': '4'}, 'day': {'sort_order': 3, 'name': 'D'}},
]
Note that I use the python native module string to define that B is between A and C. Thus what you may want to do is
import string
agg0 = {}
for d in d_list:
name = d['period']['name']
if name not in agg0:
agg0[name] = []
day = d['day']
agg0[name].append(day['name'])
agg1 = {}
for k,v in agg0.items():
pos_in_alph = [string.ascii_lowercase.index(el.lower()) for el in v]
allowed_indexes = [max(pos_in_alph),min(pos_in_alph)]
agg1[k] = [el for el in v if string.ascii_lowercase.index(el.lower()) in allowed_indexes]
agg = {}
for k,v in agg1.items():
w = tuple(v)
if w not in agg:
agg[w] = {'ks':[],'gr':len(agg0[k])>2}
agg[w]['ks'].append(k)
print agg[w]
str_ = ''
for k,v in sorted(agg.items(), key=lambda item:item[0], reverse=False):
str_ += ' {pnames}({dnames})'.format(pnames=('-' if v['gr'] else ',').join(sorted(v['ks'])),
dnames='-'.join(k))
print(str_.strip())
which outputs 1-3(A-C) 4(D)
Following #NathanJones's comment, note that if d_list were defined as
d_list = [
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'A'}},
##{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'C'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'C'}},
{'period': {'sort_order': 4, 'name': '4'}, 'day': {'sort_order': 3, 'name': 'D'}},
]
The code above would print 1,3(A-C) 4(D)

How to use list comprehensions to make a dict having list of list as values

I Have a list as following and I want to convert that as output shown below using List comprehensions. Any help is appreciated.
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
This is the output I am expecting.
output = {'abc': [1,2,3,9], 'xyz': [4,5], 'pqr': [6,8], 'mno': [10], 'def': [11]}
from operator import itemgetter
from itertools import groupby
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
typegetter = itemgetter('type')
valuesgetter = itemgetter('values')
groups = groupby(sorted(a, key=typegetter), key=typegetter)
print {k:list(map(valuesgetter, v)) for k, v in groups}
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
output = {}
for item in a:
output[item['type']] = [item['values']] if output.get(item['type'], None) is None else output[item['type']] + [item['values']]
print output

Categories