I need to convert this DataFrame to json file.
Code:
def new_json(df):
drec = dict()
ncols = df.values.shape[1]
for line in df.values:
d = drec
for j, col in enumerate(line[:-1]):
if not col in d.keys():
if j != ncols-2:
d[col] = {}
d = d[col]
else:
d[col] = line[-1]
else:
if j!= ncols-2:
d = d[col]
return drec
a=new_json(df)
print(a)
result:
{'a': {'a2': {'a21': 'new', 'a22': 'old'}, 'a3': {'a31': 'content'}, 'a4': {'a41': 'old'}}, 'b': {'b1': {'b11': 'content', 'b12': 'new', 'b13': 'new'}}, 'c': {'c1': {'c11': 'content'}, 'c2': {'c21': 'content'}, 'c3': {'c31': 'old'}}}
Is it possible to modify the result in this json format?
{
'a': {
'a2': {
'a21': 'new',
'a22': 'old'
},
'a3': {
'a31': 'content'
},
'a4': {
'a41': 'old'
}
},
'b': {
'b1': {
'b11': 'content',
'b12': 'new',
'b13': 'new'
}
},
'c': {
'c1': {
'c11': 'content'
},
'c2': {
'c21': 'content'
},
'c3': {
'c31': 'old'
}
}
}
Related
This is my dataframe:
df = pd.DataFrame(
{
'a': ['x', 'x', 'y', 'y'],
'b': ['xs', 'sx', 'rrx', 'ywer'],
'c': ['aaa', 'bbb', 'rrsdrx', 'yz'],
}
)
And this is the JSON output that I want:
{
'x':{
'links':[
{
'b': 'xs',
'c': 'aaa'
},
{
'b': 'sx',
'c': 'bbb'
}
]
},
'y':{
'links':[
{
'b': 'rrx',
'c': 'rrsdrx'
},
{
'b': 'ywer',
'c': 'yz'
}
]
},
}
I have tried the accepted answer of this post. And the following code was my other try:
x = df.groupby('a')['b'].apply(list).reset_index()
y = x.to_json(orient='records')
parsed = json.loads(y)
z = json.dumps(parsed, indent=4)
but the output was not what I needed.
Group the dataframe by a, then create dictionary for each dataframe for the keys, and create the required dictionary.
{k:{'links': d.drop(columns=['a']).to_dict('records')} for k,d in df.groupby('a')}
OUTPUT
{
"x": {
"links": [
{
"b": "xs",
"c": "aaa"
},
{
"b": "sx",
"c": "bbb"
}
]
},
"y": {
"links": [
{
"b": "rrx",
"c": "rrsdrx"
},
{
"b": "ywer",
"c": "yz"
}
]
}
}
I have the following structure:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'c': 'third',
'price': 3
},
{
'b': 'second',
'price': 2
},
{
'a': 'first',
'price': 1
}
]
},
2: {
'list': [
{
'f': 'sixth',
'price': 6
},
{
'e': 'fifth',
'price': 5
},
{
'd': 'fourth',
'price': 4
}
]
}
}
}
}
}
I need to order each list by price, ascending. The result should be:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'a': 'first',
'price': 1
},
{
'b': 'second',
'price': 2
},
{
'c': 'third',
'price': 3
},
]
},
2: {
'list': [
{
'd': 'fourth',
'price': 4
},
{
'e': 'fifth',
'price': 5
},
{
'f': 'sixth',
'price': 6
}
]
}
}
}
}
}
None of the questions I've found fits my needs because of this particular structure.
Is there a way to order it without having to access each previous keys? Because on my project I have cases with more nested keys before the list, so I need a dynamic solution for sorting it.
I mean, I don't know the exactly path to the list, only the list key.
Make a function to recursively traverse your dict looking for lists, and sort each one based on your criteria:
def find_and_sort_lists(d):
for value in d.values():
if isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)
If it's a requirement to sort only lists whose key is actually 'list', you can use the following:
def find_and_sort_lists(d):
for key, value in d.items():
if key == 'list' and isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)
I've got a nested dictionary like that:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
Expected result:
[
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a1
},
"time": "timexxxxx",
"fields": {
x: 1
}
},
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a2
},
"time": "timexxxxx",
"fields": {
x: 1
}
}
]
that is what I'm trying, however it's being overwritten
for k,v in d.items():
metrics['measurements'] = "XXXXX"
if isinstance(v,dict):
for j,h in v.items():
metrics['tags'] = {'MPC':j,'host':k}
metrics['time'] = "timexxxxx"
for value in h:
metrics['fields'] = {j:h}
and I'm getting:
{'fields': {'b1': ['x1', 2]},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}
Could you give me some pointers on how to deal with this?
Thanks
see below
import pprint
d = {'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
data = []
for k, v in d.items():
entry = {"measurements": "XXXXX"}
entry['tags'] = {'MPC': list(v.keys())[0],"host": k}
entry["time"] = "timexxxxx"
values= list(v.values())
entry["fields"] = {values[0][0]:values[0][1]}
data.append(entry)
pprint.pprint(data)
output
[{'fields': {'x': 1},
'measurements': 'XXXXX',
'tags': {'MPC': 'b', 'host': 'a1'},
'time': 'timexxxxx'},
{'fields': {'x1': 2},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}]
This code can help you:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
def convert(dictionary):
return [
{
"measurements": "XXXXX",
"tags": {
"MPC": list(value.keys())[0],
"host": key
},
"time": "timexxxxx",
"fields": dict(value.values())
} for key, value in dictionary.items()
]
print(convert(d))
Results in [{'measurements': 'XXXXX', 'tags': {'MPC': 'b', 'host': 'a1'}, 'time': 'timexxxxx', 'fields': {'x': 1}}, {'measurements': 'XXXXX', 'tags': {'MPC': 'b1', 'host': 'a2'}, 'time': 'timexxxxx', 'fields': {'x1': 2}}]
You can do it like this
#Empty List
li=[]
#Add Items in list
for i in range(2):
d = {}
d["measurment"] = "XXXXX"
d["tags"] = {1: "x"}
d["time"] = "timexxx"
d["field"] = {2: "y"}
li.append(d)
#Print list elements
for i in li:
for key, value in i.items():
print(key, ":", value)
print()
I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]
You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum
You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]
I have a dataframe like below. Each topic has several sub-topics.
pd.DataFrame({'topic': ['A', 'A', 'A', 'B', 'B'],
'sub-topic': ['A1', 'A2', 'A3', 'B1', 'B3' ],
'value': [2,12,44,21,1]})
topic sub-topic value
0 A A1 2
1 A A2 12
2 A A3 44
3 B B1 21
4 B B3 1
I need to convert it to Json format like below. Within first layer, for example topic A, the value is the sum of all its sub-topics.
{'A': {
'value': 58,
'children': {
'A1': {'value': 2},
'A2': {'value': 12},
'A3': {'value': 44}
},
},
'B': {
'value': 22,
'children': {
'B1': {'value': 21},
'B3': {'value': 1}
}
}
}
Does anyone know how I can convert the data to this specific json? I have no clue how I should approach that. Thanks a lot in advance.
Use cusom function in GroupBy.apply, last use Series.to_dict or Series.to_json:
def f(x):
d = {'value': x['value'].sum(),
'children': x.set_index('sub-topic')[['value']].to_dict('index')}
return (d)
#for dictonary
out = df.groupby('topic').apply(f).to_dict()
#for json
#out = df.groupby('topic').apply(f).to_json()
print (out)
{
'A': {
'value': 58,
'children': {
'A1': {
'value': 2
},
'A2': {
'value': 12
},
'A3': {
'value': 44
}
}
},
'B': {
'value': 22,
'children': {
'B1': {
'value': 21
},
'B3': {
'value': 1
}
}
}
}