converting pandas dataframe to a custom JSON - python

This is my dataframe:
df = pd.DataFrame(
{
'a': ['x', 'x', 'y', 'y'],
'b': ['xs', 'sx', 'rrx', 'ywer'],
'c': ['aaa', 'bbb', 'rrsdrx', 'yz'],
}
)
And this is the JSON output that I want:
{
'x':{
'links':[
{
'b': 'xs',
'c': 'aaa'
},
{
'b': 'sx',
'c': 'bbb'
}
]
},
'y':{
'links':[
{
'b': 'rrx',
'c': 'rrsdrx'
},
{
'b': 'ywer',
'c': 'yz'
}
]
},
}
I have tried the accepted answer of this post. And the following code was my other try:
x = df.groupby('a')['b'].apply(list).reset_index()
y = x.to_json(orient='records')
parsed = json.loads(y)
z = json.dumps(parsed, indent=4)
but the output was not what I needed.

Group the dataframe by a, then create dictionary for each dataframe for the keys, and create the required dictionary.
{k:{'links': d.drop(columns=['a']).to_dict('records')} for k,d in df.groupby('a')}
OUTPUT
{
"x": {
"links": [
{
"b": "xs",
"c": "aaa"
},
{
"b": "sx",
"c": "bbb"
}
]
},
"y": {
"links": [
{
"b": "rrx",
"c": "rrsdrx"
},
{
"b": "ywer",
"c": "yz"
}
]
}
}

Related

Append list of dictinaries from another with the same value python

Assumed I have this :
List of Dict 1
[
{
"name": A
"born": "London"
},
{
"name": B
"born": "Sydney"
...
]
List of Dict 2
[
{
"name": A,
"class": 1,
...
},
{
"name: B,
"class": 2,
...
}
]
I want to append list of dict 2 with particular attributes in list of dict 1, but with the lowest possible time complexity.
Like this.
List of Dict 3
[
{
"name": A,
"class": 1,
"born": "London"
...
},
{
"name: B,
"class": 2,
"born": "Sydney
...
}
]
Thank you guys
You can do like this.
Time Complexity: O(Len of D1 * Len of D2)
d1 = [
{
"name": "A",
"born": "London"
},
{
"name": "B",
"born": "Sydney"
}
]
d2 = [
{
"name": "A",
"class": 1,
},
{
"name": "B",
"class": 2,
}
]
d3 = []
for i in d1:
temp = {}
for j in d2:
if i['name'] == j['name']:
temp['name'] = i['name']
temp['class'] = j['class']
temp['born'] = i['born']
d3.append(temp)
print(d3)
[{'name': 'A', 'class': 1, 'born': 'London'}, {'name': 'B', 'class': 2, 'born': 'Sydney'}]
To do it without 2 for loops, you can use pandas which is very efficient. It should work very fast for long lists as well.
import pandas as pd
df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2)
df3 = df1.merge(df2, left_on='name', right_on='name', how='left')
#To convert dataframe to a list of dicts
df3 = df3.to_dict('records')
You can choose which attributes to add as follows:
df3 = df1.merge(df2[['name', 'class']], left_on='name', right_on='name', how='left')

How to merge and create dict of dicts from a dictionary [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
I have a dictionary like the one below in which the value of one of the list elements will be a key somewhere in the same dictionary.
{"a": ["b", "c"], "b": ["D"], "c": ["A", "B", "C"], "A": ["abc", "aab", "aba"], "B": ["bcd", "bdc"], "C": ["dab", "dbc", "def", "dgr"], "abc": ["eee", "ehj"], "eee": ["ghi"], "aab": ["tuv", "xuv"], "ehj": ["giu"], "aba": ["suv", "ruv"]}
I want to merge all of them as below.
{"a": [{"b": ["D"]}, {"c": [{"A": [{"abc": [{"eee": ["ghi"], "ehj": ["giu"]}, {"aab": ["tuv", "xuv"]}, {"aba": ["suv", "ruv"]}]}, {"B": ["bcd", "bdc"]}, {"C": ["dab", "dbc", "def", "dgr"]}]}]}]}
JSON Format:
{
"a": [{
"b": ["D"]
}, {
"c": [{
"A": [{
"abc": [{
"eee": ["ghi"],
"ehj": ["giu"]
}, {
"aab": ["tuv", "xuv"]
}, {
"aba": ["suv", "ruv"]
}]
}, {
"B": ["bcd", "bdc"]
}, {
"C": ["dab", "dbc", "def", "dgr"]
}]
}]
}]
}
Also, the number of values (list of key elements) of a key is not equal.
Thanks for your help!
You can use recursion:
import json
d = {"a": ["b", "c"], "b": ["D"], "c": ["A", "B", "C"], "A": ["abc", "aab", "aba"], "B": ["bcd", "bdc"], "C": ["dab", "dbc", "def", "dgr"], "abc": ["eee", "ehj"], "eee": ["ghi"], "aab": ["tuv", "xuv"], "ehj": ["giu"], "aba": ["suv", "ruv"]}
def merge(n):
r = [(i, merge(i)) if i in d else i for i in d[n]]
if all(isinstance(j, str) for j in r):
return r
return [{a:b} for a, b in r] if any(any(isinstance(j, dict) for j in b) for _, b in r) else \
[{a:b for a, b in r}]
result = {a:merge(a) for a in d if all(a not in b for b in d.values())}
print(json.dumps(result, indent=4))
Output:
{
"a": [
{
"b": [
"D"
]
},
{
"c": [
{
"A": [
{
"abc": [
{
"eee": [
"ghi"
],
"ehj": [
"giu"
]
}
]
},
{
"aab": [
"tuv",
"xuv"
]
},
{
"aba": [
"suv",
"ruv"
]
}
]
},
{
"B": [
"bcd",
"bdc"
]
},
{
"C": [
"dab",
"dbc",
"def",
"dgr"
]
}
]
}
]
}

Python recursive aggregation

I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]
You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum
You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]

How to convert this DataFrame into Json

I have this DataFrame with 2 columns
print(df)
a b
10 {'A': 'foo', ...}
20 {'B': 'faa', ...}
30 {'C': 'fee', ...}
40 {'D': 'fii', ...}
50 {'E': 'foo', ...}
when I try to convert it into json it goes wrong:
df.to_json("test.json")
# Output:
{
"a":{10, 20, 30, 40, 50},
"b":{
"1":{
"A":"foo",
...
},
"2":{
"B":"faa",
...
},
"3":{
"B":"faa",
...
},
...
"5":{
"E":"foo",
...
}
}
I don't even know ehere the numbers come from.
My desired json:
[{
'a': 10,
'b': {
'A': 'foo',
...
},
...
'a': 50,
'b': {
'E': 'foo',
...
}
}
]
You could try the following:
data = []
for i in df:
data.append({'a': df[i[0]], 'b': df(i[1])})
This should give you your desired output.
If you want to convert this into a JSON file then you can do the following:
with open("myjson.json", "w") as f:
json.dump(data, f, indent=4)

How can I merge and sum two dictionary key and items?

I have two dictionarys
dict1 = {
"list": {
"alpha": {
"a": {
"score": 1,
"visit": 2
},
"b": {
"score": 3,
"visit": 4
}
},
"beta" : {
"a": {
"score": 1,
"visit": 2
},
"b": {
"score": 3,
"visit": 4
}
}
}
}
dict2 = {
"list": {
"alpha": {
"a": {
"score": 1,
"visit": 2
},
"c": {
"score": 5,
"visit": 6
}
},
"beta" : {
"a": {
"score": 1,
"visit": 2
},
"c": {
"score": 5,
"visit": 6
}
}
}
}
I want to merge dictionary like this
dict1 = {
"list": {
"alpha": {
"a" : {
"score": 2,
"visit": 4
},
"b": {
"score": 3,
"visit": 4
},
"c": {
"score": 5,
"visit": 6
}
},
"beta": {
"a": {
"score": 2,
"visit": 4
},
"b": {
"score": 3,
"visit": 4
},
"c": {
"score": 5,
"visit": 6
}
}
}
}
Condition 1. value is always new dictionary or int (not str)
Condition 2. If the same key exists at the same depth, the value of that key must be sum.
I think maybe I can solve this problem using for loops.
But Python seems to have a simpler and faster way.
this is my best.
code:
def logic(d1, d2, inconflict = lambda v1,v2 : v1+v2) :
for k in d2:
if k in d1 :
if isinstance(d1[k], dict) and isinstance(d2[k], dict) :
logic(d1[k], d2[k], inconflict)
elif isinstance(d1[k], int) :
d1[k] = inconflict(d1[k], d2[k])
else :
d1[k] = d2[k]
return d1
print logic(dict1, dict2)
It's a recursive data structure; let's use recursion.
Edit: missed the python-2.6 tag, no dict comprehensions there. Edit2: Copy values in case they exist in only one of the two, otherwise you'll run into surprises with references to the same dictionary being inside two separate dictionaries.
import copy
def recursively_sum(var1, var2):
"""If var1 and var2 are ints, return their sum. Otherwise they are dicts
and we recursively sum for each key that is either in var1 or var2."""
if var1 is None:
return copy.deepcopy(var2)
elif var2 is None:
return copy.deepcopy(var1)
elif isinstance(var1, int):
return var1 + var2
else:
result = {}
for key in set(var1) | set(var2):
result[key] = recursively_sum(var1.get(key), var2.get(key))
return result

Categories