DataFrame to nested JSON with Python? - python

I am trying to extract data from SQL and convert it into the JSON file.
I also tried other "techniques" mentioned on the various websites but without any success.
So basically I'm "stuck" after below statement
j = (df.groupby(['SectionCode'])
.apply(lambda x: x[['Barcode', 'BrandCode', 'PurchaseRate', 'SalesRate', 'unit','Item']].to_dict('r'))
.reset_index()
.rename(columns={0: 'Products'})
.to_json(r'D:\DataToFirbaseWithPython\Export_DataFrame.json'))
print(j)
need this json format.
"SectionsWithItem": { #Root_Nose_In_Firebase
"0001": { #SectionCode
"Products": {
"018123": { #Barcode
"Barcode": "018123",
"BrandCode": "1004",
"PurchaseRate": 105.0,
"SalesRate": 125.0,
"Units": "Piece",
"name": "Shahi Delux Mouth Freshener"
},
"0039217": { #Barcode
"Barcode": "0039217",
"BrandCode": "0814",
"PurchaseRate": 140.0,
"SalesRate": 160.0,
"Units": "Piece",
"name": "Maizban Gota Pan Masala Medium Jar"
}
}
},
"0002": { #SectionCode
"Products": {
"03905": { #Barcode
"Barcode": "03905",
"BrandCode": "0189",
"PurchaseRate": 15.4,
"SalesRate": 17.0,
"Units": "Piece",
"name": "Peek Freans Rio Chocolate Half Roll"
},
"0003910": { #Barcode
"Barcode": "0003910",
"BrandCode": "0189",
"PurchaseRate": 110.32,
"SalesRate": 120.0,
"Units": "Piece",
"name": "Peek Freans Gluco Ticky Pack Box"
}
}
}
}
My DataFrame
Barcode,Item,SalesRate,PurchaseRate,unit,BrandCode,SectionCode
0005575,Broom Soft A Quality,100.0,80.0,,2037,0045
0005850,Safa Tomato Paste 800g,340.0,275.0,800g,1004,0009
0005921,Dettol Liquid 1Ltr,800.0,719.99,1Ltr,0475,0045

Grouping by the barcode as well should help with indexing like the desired output.
import pandas as pd
import json
df = pd.read_csv('stac1 - Sheet1.csv', dtype=str) #made dataframe with provided data
j = (df.groupby(['SectionCode', 'Barcode'])
.apply(lambda x: x[['Barcode', 'BrandCode', 'PurchaseRate', 'SalesRate','unit','Item']].to_dict('r'))
.reset_index()
.rename(columns={0: 'Products'})
.to_json(r'Export_DataFrame.json'))
with open('Export_DataFrame.json') as f:
data = json.load(f)
print(data)
Hopefully this helps get you in the right direction!

Related

Converting python dataframe to a particular JSON structute

Hi i want to convert my dataframe to a specific json structure. my dataframe look something like this :
df = pd.DataFrame([["file1", "1.2.3.4.5.6.7.8.9", 91, "RMLO"], ["file2", "1.2.3.4.5.6.7.8.9", 92, "LMLO"], ["file3", "1.2.3.4.5.6.7.8.9", 93, "LCC"], ["file4", "1.2.3.4.5.6.7.8.9", 94, "RCC"]], columns=["Filename", "StudyID", "probablity", "finding_name"])
And the json structure in which i want to convert my datafram is below :
{
"findings": [
{
"name": "RMLO",
"probability": "91"
},
{
"name": "LMLO",
"probability": "92"
},
{
"name": "LCC",
"probability": "93"
}
{
"name": "LCC93",
"probability" : "94"
}
],
"status": "Processed",
"study_id": "1.2.3.4.5.6.7.8.9.0"
}
i tried implementing this with below code with different orient variables but i didn't get what i wanted.
j = df[["probablity","findings"]].to_json(orient='records')
so if any can help in achiveing this..
Thanks.
Is this similar to what you are trying to achieve:
import json
j = df[["finding_name","probablity"]].to_json(orient='records')
study_id = df["StudyID"][0]
j_dict = {"findings": json.loads(j), "status": "Processed", "study_id": study_id}
j_dict
This results in:
{'findings': [{'finding_name': 'RMLO', 'probablity': 91},
{'finding_name': 'LMLO', 'probablity': 92},
{'finding_name': 'LCC', 'probablity': 93},
{'finding_name': 'RCC', 'probablity': 94}],
'status': 'Processed',
'study_id': '1.2.3.4.5.6.7.8.9'}

Converting Dictionary in list in list to dataframe in python

I am really a newbie. Thanks much.
Dictionary in list from JSON looks like this:
data1= [ [{Code:A, date:XXX}], [{Code:B, date:YYY}]]
How can i convert this into dataframe?
Output I want is:
enter image description here
I tried the following code but it's not working.
fda_df=pd.read_json(json.dumps(data1))
The real data is
[
[
{
"code": "AA.US",
"date": "2022-12-31",
"earningsEstimateAvg": "4.5400",
"earningsEstimateGrowth": "0.0630",
"earningsEstimateHigh": "8.5000",
"earningsEstimateLow": "2.2000",
"earningsEstimateNumberOfAnalysts": "12.0000",
"earningsEstimateYearAgoEps": "4.2700",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "6.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "3.8700",
"epsTrend60daysAgo": "3.8200",
"epsTrend7daysAgo": "4.5200",
"epsTrend90daysAgo": "2.5900",
"epsTrendCurrent": "4.5400",
"growth": "0.0630",
"period": "+1y",
"revenueEstimateAvg": "11018700000.00",
"revenueEstimateGrowth": "0.0180",
"revenueEstimateHigh": "12927000000.00",
"revenueEstimateLow": "10029900000.00",
"revenueEstimateNumberOfAnalysts": "9.00",
"revenueEstimateYearAgoEps": null
} ],
[
{
"code": "AAIC.US",
"date": "2022-12-31",
"earningsEstimateAvg": "0.2600",
"earningsEstimateGrowth": "0.4440",
"earningsEstimateHigh": "0.3900",
"earningsEstimateLow": "0.1700",
"earningsEstimateNumberOfAnalysts": "3.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "1.0000",
"epsRevisionsUpLast7days": "0.0000",
"epsTrend30daysAgo": "0.2600",
"epsTrend60daysAgo": "0.2100",
"epsTrend7daysAgo": "0.2600",
"epsTrend90daysAgo": "0.2300",
"epsTrendCurrent": "0.2600",
"growth": "0.4440",
"period": "+1y",
"revenueEstimateAvg": "17280000.00",
"revenueEstimateGrowth": "0.1680",
"revenueEstimateHigh": "22110000.00",
"revenueEstimateLow": "12450000.00",
"revenueEstimateNumberOfAnalysts": "2.00",
"revenueEstimateYearAgoEps": null
},
{
"code": "AAIC.US",
"date": "2020-09-30",
"earningsEstimateAvg": "0.0200",
"earningsEstimateGrowth": "-0.8890",
"earningsEstimateHigh": "0.0300",
"earningsEstimateLow": "0.0200",
"earningsEstimateNumberOfAnalysts": "4.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "1.0000",
"epsRevisionsUpLast30days": "2.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "0.0300",
"epsTrend60daysAgo": "0.0300",
"epsTrend7daysAgo": "0.0300",
"epsTrend90daysAgo": "0.0600",
"epsTrendCurrent": "0.0200",
"growth": "-0.8890",
"period": "0q",
"revenueEstimateAvg": "3890000.00",
"revenueEstimateGrowth": "-0.1710",
"revenueEstimateHigh": "4110000.00",
"revenueEstimateLow": "3780000.00",
"revenueEstimateNumberOfAnalysts": "3.00",
"revenueEstimateYearAgoEps": null
}
] ]
I think pd.DataFrame.from_records(data1) might be what you are looking for
have a look at the documentation
I have done for a sample data. This is what you need
import pandas as pd
data= [[{'Code': 'A', 'date':'XXX', 'name' : 'anil', 'age': 15}], [{'Code':'B', 'date':'YYY', 'name': 'kapoor', 'age': 18}]]
col_name = list(data[0][0].keys())
row_data = []
for i in range(len(data)):
row_data.append(list(data[i][0].values()))
df = pd.DataFrame(row_data, columns =col_name)
print(df)

How to convert a list of OrderedDict to nested json with grouped keys in python

I'm working on a project where I need to convert a set of data rows from database into list of OrderedDict for other purpose and use this list of OrderedDict to convert into a nested JSON format in python. I'm starting to learn python. I was able convert the query response from database which is a list of lists to list of OrderedDict.
I have the list of OrderedDict as below:
{
'OUTBOUND': [
OrderedDict([('Leg', 1), ('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '2'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 145.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 1),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '4'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 111.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 1),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'BDM'),('SeatGroup', 'null'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'A'),('Price', 111.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '1'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 180.0),('Num_Pax', 1),('Channel', 'Web'))]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'ATO'),('SeatGroup', '4'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 97.0),('Num_Pax', 1),('Channel', 'Web')]),
OrderedDict([('Leg', 2),('SessionID', 'W12231fwfegwcaa2'),('FeeCode', 'BDM'),('SeatGroup', 'null'),
('Currency', 'MXN'),('Modality', 'VB'),('BookingClass', 'U'),('Price', 97.0),('Num_Pax', 1),('Channel', 'Web')])
]
}
And I needed the nested format like below:
{
"OUTBOUND": [
{
"Leg": 1,
"SessionID": "W12231fwfegwcaa2",
"Modality": "VB",
"BookingClass": "A",
"FeeCodes":[
{
"FeeCode": "ATO",
"Prices":
[
{
"SeatGroup": "2",
"Price": 145.0,
"Currency": "MXN"
},
{
"SeatGroup": "4",
"Price": 111.0,
"Currency": "MXN"
}
]
},
{
"FeeCode": "VBABDM",
"Prices":
[
{
"SeatGroup": "null",
"Price": 111.0,
"Currency": "MXN"
}
]
}
],
"Num_Pax": 1,
"Channel": "Web"
},
{
"Leg": 2,
"SessionID": "W12231fwfegwcaa2",
"Modality": "VB",
"BookingClass": "U",
"FeeCodes":[
{
"FeeCode": "ATO",
"Prices":
[
{
"SeatGroup": "1",
"Price": 180.0,
"Currency": "MXN"
},
{
"SeatGroup": "4",
"price": 97.0,
"Currency": "MXN"
}
]
},
{
"FeeCode": "VBABDM",
"Prices":
[
{
"SeatGroup": "null",
"price": 97.0,
"Currency": "MXN"
}
]
}
],
"Num_Pax": 1,
"Channel": "Web"
}
]
}
If I'm not wrong, I need to group by Leg, SessionID, Modality, BookingClass, NumPax and Channel and group the FeeCode, SeatGroup, Price and Currency into nested format as above but unable to move ahead with how to loop and group for nesting.
It would be great if I could get some help. Thanks
I was able to write a python code to get the format as I needed using simple looping with a couple of changes in the output like the fields SessionID, Num_Pax and Channel is taken outside then the OUTBOUND field and fields within are generated.
Instead of OrderedDict, I used a list of lists as input which I convert into Pandas DataFrame and work with the DataFrame to get the nested format.
Below is the code I used:
outbound_df = pd.DataFrame(response_outbound,columns=All_columns)
Common_columns = ['Leg', 'Modality', 'BookingClass']
### Taking SessionID, AirlineCode,Num_Pax and Channel outside OUTBOUND part as they are common for all the leg level data
response_data['SessionID'] = outbound_df['SessionID'].unique()[0]
response_data['Num_Pax'] = int(outbound_df['Num_Pax'].unique()[0])
response_data['Channel'] = outbound_df['Channel'].unique()[0]
temp_data = []
Legs = outbound_df['Leg'].unique()
for i in Legs:
subdata = outbound_df[outbound_df['Leg']==i]
### Initializing leg_data dict
leg_data = collections.OrderedDict()
### Populating common fields of the leg (Leg, Modality,BookingClass)
for j in Common_columns:
if(j=='Leg'):
leg_data[j] = int(subdata[j].unique()[0])
else:
leg_data[j] = subdata[j].unique()[0]
leg_data['FeeCodes'] = []
FeeCodes = subdata['FeeCode'].unique()
for fc in FeeCodes:
subdata_fees = subdata[subdata['FeeCode']==fc]
Prices = {'FeeCode':fc, "Prices":[]}
for _,rows in subdata_fees.iterrows():
data = {}
data['SeatGroup'] = rows['SeatGroup']
data['Price'] = float(rows['Price'])
data['Currency'] = rows['Currency']
Prices["Prices"].append(data)
leg_data["FeeCodes"].append(Prices)
temp_data.append(leg_data)
response_data["OUTBOUND"] = temp_data
I can just do json.dumps on response_data to get json format which will be sent to the next steps.
Below is the output format I get:
{
"SessionID":"W12231fwfegwcaa2",
"Num_Pax":1,
"Channel":"Web",
"OUTBOUND":[
{
"Leg":1,
"Modality":"VB",
"BookingClass":"A",
"FeeCodes":[
{
"FeeCode":"ATO",
"Prices":[
{
"SeatGroup":"2",
"Price":145.0,
"Currency":"MXN"
},
{
"SeatGroup":"4",
"Price":111.0,
"Currency":"MXN"
}
]
},
{
"FeeCode":"VBABDM",
"Prices":[
{
"SeatGroup":"null",
"Price":111.0,
"Currency":"MXN"
}
]
}
]
},
{
"Leg":2,
"Modality":"VB",
"BookingClass":"U",
"FeeCodes":[
{
"FeeCode":"ATO",
"Prices":[
{
"SeatGroup":"1",
"Price":180.0,
"Currency":"MXN"
},
{
"SeatGroup":"4",
"price":97.0,
"Currency":"MXN"
}
]
},
{
"FeeCode":"VBABDM",
"Prices":[
{
"SeatGroup":"null",
"price":97.0,
"Currency":"MXN"
}
]
}
]
}
]
}
Please let me know if we can shorten the code in terms of lengthy iterations or any other changes. Thanks.
PS: Sorry for my editing mistakes
Assuming that you stored the dictionary to some variable foo, you can do:
import json
json.dumps(foo)
And be careful, you added extra bracket in the 4th element OUTBOUND list

Convert Pandas Dataframe into multi level nested JSON

I have a dataframe that I need to convert into a nested json format. I can get one level of grouping done, but I don't know how to do a second grouping as well as a nesting beneath that.
I have looked a lot of different examples, but nothing really gets me the example I posted below.
import pandas as pd
data= {'Name': ['TEST01','TEST02'],
'Type': ['Tent','Tent'],
'Address':['123 Happy','456 Happy'],
'City':['Happytown','Happytown'],
'State': ['WA','NY'],
'PostalCode': ['89985','85542'],
'Spot' : ['A','A'],
'SpotAssigment' : ['123','456'],
'Cost': [900,500]
}
df = pd.DataFrame(data)
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses')
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
I want it to look like the below.
[
{
"Name": "TEST01",
"Type": "Tent",
"addresses": [
{
"Address": "123 Happy",
"City": "Happytown",
"PostalCode": "89985",
"State": "WA"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"123",
"Cost":900
]
}
]
},
{
"Name": "TEST02",
"Type": "Tent",
"addresses": [
{
"Address": "456 Happy",
"City": "Happytown",
"PostalCode": "85542",
"State": "NY"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"456",
"Cost":500
]
}
]
}
]
try this:
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses'))
k = (df.groupby(['Name','Type', 'Spot'])
.apply(lambda x: x[['SpotAssigment', 'Cost']].to_dict('r'))
.reset_index(name='SpotAssignments'))
h = (k.groupby(['Name','Type'])
.apply(lambda x: x[['Spot','SpotAssignments']].to_dict('r'))
.reset_index(name='spots'))
m = j.merge(h, how='inner', on=['Name', 'Type'])
result = m.to_dict(orient='records')
from pprint import pprint as pp
pp(result)
this result is a python list of dicts in the same format that you want, you should be able to dump it as JSON directly.

Splitting a string in json using python

I have a simple Json file
input.json
[
{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "200/600"
},
{
"title": "Person1",
"type": "object2",
"required": "firstName1",
"min_max": "230/630"
},
{
"title": "Person2",
"type": "object2",
"required": "firstName2",
"min_max": "201/601"
},
{
"title": "Person3",
"type": "object3",
"required": "firstName3",
"min_max": "2000/6000"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "null"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "1024 / 256"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "0"
}
]
I am trying to create a new json file with new data. I would like to split "min_max" into two different fields ie., min and max. Below is the code written in python.
import json
input=open('input.json', 'r')
output=open('test.json', 'w')
json_decode=json.load(input)
result = []
for item in json_decode:
my_dict={}
my_dict['title']=item.get('title')
my_dict['min']=item.get('min_max')
my_dict['max']=item.get('min_max')
result.append(my_dict)
data=json.dumps(result, output)
output.write(data)
output.close()
How do I split the string into two different values. Also, is there any possibility of printing the json output in order.
Your JSON file seems to be written wrong (the example one). It is not a list. It is just a single associated array (or dictionary, in Python). Additionally, you don't seem to be using json.dumps properly. It only takes 1 argument. I also figured it would be easier to just create the dictionary inline. And you don't seem to be splitting the min_max properly.
Here's the correct input:
[{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "20/60"
}]
Here's your new code:
import json
with open('input.json', 'r') as inp, open('test.json', 'w') as outp:
json_decode=json.load(inp)
result = []
for temp in json_decode:
minMax = temp["min_max"].split("/")
result.append({
"title":temp["title"],
"min":minMax[0],
"max":minMax[1]
})
data=json.dumps(result)
outp.write(data)
Table + Python == Pandas
import pandas as pd
# Read old json to a dataframe
df = pd.read_json("input.json")
# Create two new columns based on min_max
# Removes empty spaces with strip()
# Returns [None,None] if length of split is not equal to 2
df['min'], df['max'] = (zip(*df['min_max'].apply
(lambda x: [i.strip() for i in x.split("/")]
if len(x.split("/"))== 2 else [None,None])))
# 'delete' (drop) min_max column
df.drop('min_max', axis=1, inplace=True)
# output to json again
df.to_json("test.json",orient='records')
Result:
[{'max': '600',
'min': '200',
'required': 'firstName',
'title': 'Person',
'type': 'object'},
{'max': '630',
'min': '230',
'required': 'firstName1',
'title': 'Person1',
'type': 'object2'},
{'max': '601',
'min': '201',
'required': 'firstName2',
'title': 'Person2',
'type': 'object2'},
{'max': '6000',
'min': '2000',
'required': 'firstName3',
'title': 'Person3',
'type': 'object3'},
{'max': None,
'min': None,
...
You can do something like this:
import json
nl=[]
for di in json.loads(js):
min_,sep,max_=map(lambda s: s.strip(), di['min_max'].partition('/'))
if sep=='/':
del di['min_max']
di['min']=min_
di['max']=max_
nl.append(di)
print json.dumps(nl)
This keeps the "min_max" values that cannot be separated into two values unchanged.

Categories