Convert Pandas Dataframe into multi level nested JSON - python

I have a dataframe that I need to convert into a nested json format. I can get one level of grouping done, but I don't know how to do a second grouping as well as a nesting beneath that.
I have looked a lot of different examples, but nothing really gets me the example I posted below.
import pandas as pd
data= {'Name': ['TEST01','TEST02'],
'Type': ['Tent','Tent'],
'Address':['123 Happy','456 Happy'],
'City':['Happytown','Happytown'],
'State': ['WA','NY'],
'PostalCode': ['89985','85542'],
'Spot' : ['A','A'],
'SpotAssigment' : ['123','456'],
'Cost': [900,500]
}
df = pd.DataFrame(data)
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses')
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
I want it to look like the below.
[
{
"Name": "TEST01",
"Type": "Tent",
"addresses": [
{
"Address": "123 Happy",
"City": "Happytown",
"PostalCode": "89985",
"State": "WA"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"123",
"Cost":900
]
}
]
},
{
"Name": "TEST02",
"Type": "Tent",
"addresses": [
{
"Address": "456 Happy",
"City": "Happytown",
"PostalCode": "85542",
"State": "NY"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"456",
"Cost":500
]
}
]
}
]

try this:
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses'))
k = (df.groupby(['Name','Type', 'Spot'])
.apply(lambda x: x[['SpotAssigment', 'Cost']].to_dict('r'))
.reset_index(name='SpotAssignments'))
h = (k.groupby(['Name','Type'])
.apply(lambda x: x[['Spot','SpotAssignments']].to_dict('r'))
.reset_index(name='spots'))
m = j.merge(h, how='inner', on=['Name', 'Type'])
result = m.to_dict(orient='records')
from pprint import pprint as pp
pp(result)
this result is a python list of dicts in the same format that you want, you should be able to dump it as JSON directly.

Related

create dataframe in pandas using multilevel dict dynamic

I am fetching api and trying that response into csv but on catch is there this is multilevel dict or json when i am converting into csv most of the look like list of dict or dicts
I am trying using this
def expand(data):
d = pd.Series(data)
t = d.index
for i in t:
if type(d[i]) in (list,dict):
expend_s = pd.Series(d[i])
t.append(expend_s.index)
d = d.append(expend_s)
d = d.drop([i])
return d
df['person'].apply(expand)
but this solution is not working. if we see person col there is multiple dict or list of dict like
"birthDate": "0000-00-00",
"genderCode": {
"codeValue": "M",
"shortName": "Male",
"longName": "Male"
},
"maritalStatusCode": {
"codeValue": "M",
"shortName": "Married"
},
"disabledIndicator": False,
"preferredName": {},
"ethnicityCode": {
"codeValue": "4",
"shortName": "4",
"longName": "Not Hispanic or Latino"
},
"raceCode": {
"identificationMethodCode": {},
"codeValue": "1",
"shortName": "White",
"longName": "White"
},
"militaryClassificationCodes": [],
"governmentIDs": [
{
"itemID": "9200037107708_4385",
"idValue": "XXX-XX-XXXX",
"nameCode": {
"codeValue": "SSN",
"longName": "Social Security Number"
},
"countryCode": "US"
}
],
"legalName": {
"givenName": "Jack",
"middleName": "C",
"familyName1": "Abele",
"formattedName": "Abele, Jack C"
},
"legalAddress": {
"nameCode": {
"codeValue": "Personal Address 1",
"shortName": "Personal Address 1",
"longName": "Personal Address 1"
},
"lineOne": "1932 Keswick Lane",
"cityName": "Concord",
"countrySubdivisionLevel1": {
"subdivisionType": "StateTerritory",
"codeValue": "CA",
"shortName": "California"
},
"countryCode": "US",
"postalCode": "94518"
},
"communication": {
"mobiles": [
{
"itemID": "9200037107708_4389",
"nameCode": {
"codeValue": "Personal Cell",
"shortName": "Personal Cell"
},
"countryDialing": "1",
"areaDialing": "925",
"dialNumber": "6860589",
"access": "1",
"formattedNumber": "(925) 686-0589"
}
]
}
}
your suggestion and advice would be so helpful
I think we can solve multiple dict using read as pd.josn_normalise and list of dict using the below functions first we get those columns which have list
def df_list_and_dict_col(explode_df: pd.DataFrame, primary_key: str,
col_name: str, folder: str) -> pd.DataFrame:
""" convert list of dict or list of into clean dataframe
Keyword arguments:
-----------------
dict: explode_df -- dataframe where we have to expand column
dict: col_name -- main_file name where most of data is present
Return: pd.DataFrame
return clean or expand dataframe
"""
explode_df[col_name] = explode_df[col_name].replace('', '[]', regex=True)
explode_df[col_name] = explode_df[col_name].fillna('[]')
explode_df[col_name] = explode_df[col_name].astype(
'string') # to make sure that entire column is string
explode_df[col_name] = explode_df[col_name].apply(ast.literal_eval)
explode_df = explode_df.explode(col_name)
explode_df = explode_df.reset_index(drop=True)
normalized_df = pd.json_normalize(explode_df[col_name])
explode_df = explode_df.join(
other=normalized_df,
lsuffix="_left",
rsuffix="_right"
)
explode_df = explode_df.drop(columns=col_name)
type_df = explode_df.applymap(type)
col_list = []
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
# print(col_list,explode_df.columns)
if len(col_list) != 0:
for col in col_list:
df_list_and_dict_col(explode_df[[primary_key,col]], primary_key,
col, folder)
explode_df.drop(columns=col, inplace =True)
print(f'{col}.csv is done')
explode_df.to_csv(f'{folder}/{col_name}.csv')
first we get list col and pass col to function one by one and then check is there any list inside col and then go on and save into csv
type_df = df.applymap(type)
col_list =[]
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
for col in col_list:
# print(col, df[['associateOID',col]])
df_list_and_dict_col(df[['primary_key',col]].copy(), 'primary_key', col,folder='worker')
df.drop(columns=col, inplace=True)
now you have multiple csv in normalise format

Build dictionary from JSON and export it to Pandas

I am trying to build a script that pulls offline endpoints from the dictionary below:
[
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "online",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
}
]
and then populate a dictionary and export output to xlsx with pandas
# Build dictionary to organize endpoints
endpoint = {'name' : [], 'serial' : [], 'mac' : [], 'publicIp' : [], 'networkId' : [], 'status' : [],'lastReportedAt' : [], 'usingCellularFailover' : [], 'wan1Ip' : [], 'wan2Ip' : [], 'lanIp' : []}
# Iterate over the endpoints to fill dictionary
for i in range(len(response_data)):
if response_data[i]['status'] == 'offline':
endpoint['Name'].append(['name'])
endpoint['Serial'].append(['serial'])
endpoint['MAC'].append(['mac'])
endpoint['Public IP'].append(['publicIp'])
endpoint['Network ID'].append(['networkId'])
endpoint['Status'].append(['status'])
endpoint['Last Reied'].append(['lastReiedAt'])
endpoint['Cellular'].append(['usingCellularFailover'])
endpoint['WAN 1'].append(['wan1Ip'])
endpoint['WAN 2'].append(['wan2Ip'])
endpoint['LAN'].append(['lanIp'])
df = pd.DataFrame.from_dict(endpoint)
df.to_excel("output.xlsx", index=False)
I am pretty sure there's a more efficient way to fulfill the task like may be importing the output to pandas and sorting the data but I am still a noob
You could convert a list of dictionaries into a Pandas dataframe directly.
If your list of dictionaries is called "response_data" then you can convert that list to a DataFrame directly like so:
df = pd.DataFrame(response_data, index=range(len(response_data)))
df.to_excel("output.xlsx", index=False)
You can use directly DataFrame and later rename columns and filter data.
response_data = [
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "online",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
},
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "offline",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
}
]
import pandas as pd
df = pd.DataFrame(response_data)
df = df.rename(columns={
'name': 'Name',
'serial': 'Serial',
'mac': 'MAC',
'status': 'Status',
'publicIp': 'Public IP',
'networkId': 'Network ID',
'lastReiedAt': 'Last Reied',
'usingCellularFailover': 'Cellular',
'wan1Ip': 'WAN 1',
'wan2Ip': 'WAN 2',
'lanIp': 'LAN',
})
df = df[ df['Status'] != 'offline' ]
print(df)
df.to_excel("output.xlsx", index=False)

Converting Dictionary in list in list to dataframe in python

I am really a newbie. Thanks much.
Dictionary in list from JSON looks like this:
data1= [ [{Code:A, date:XXX}], [{Code:B, date:YYY}]]
How can i convert this into dataframe?
Output I want is:
enter image description here
I tried the following code but it's not working.
fda_df=pd.read_json(json.dumps(data1))
The real data is
[
[
{
"code": "AA.US",
"date": "2022-12-31",
"earningsEstimateAvg": "4.5400",
"earningsEstimateGrowth": "0.0630",
"earningsEstimateHigh": "8.5000",
"earningsEstimateLow": "2.2000",
"earningsEstimateNumberOfAnalysts": "12.0000",
"earningsEstimateYearAgoEps": "4.2700",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "6.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "3.8700",
"epsTrend60daysAgo": "3.8200",
"epsTrend7daysAgo": "4.5200",
"epsTrend90daysAgo": "2.5900",
"epsTrendCurrent": "4.5400",
"growth": "0.0630",
"period": "+1y",
"revenueEstimateAvg": "11018700000.00",
"revenueEstimateGrowth": "0.0180",
"revenueEstimateHigh": "12927000000.00",
"revenueEstimateLow": "10029900000.00",
"revenueEstimateNumberOfAnalysts": "9.00",
"revenueEstimateYearAgoEps": null
} ],
[
{
"code": "AAIC.US",
"date": "2022-12-31",
"earningsEstimateAvg": "0.2600",
"earningsEstimateGrowth": "0.4440",
"earningsEstimateHigh": "0.3900",
"earningsEstimateLow": "0.1700",
"earningsEstimateNumberOfAnalysts": "3.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "1.0000",
"epsRevisionsUpLast7days": "0.0000",
"epsTrend30daysAgo": "0.2600",
"epsTrend60daysAgo": "0.2100",
"epsTrend7daysAgo": "0.2600",
"epsTrend90daysAgo": "0.2300",
"epsTrendCurrent": "0.2600",
"growth": "0.4440",
"period": "+1y",
"revenueEstimateAvg": "17280000.00",
"revenueEstimateGrowth": "0.1680",
"revenueEstimateHigh": "22110000.00",
"revenueEstimateLow": "12450000.00",
"revenueEstimateNumberOfAnalysts": "2.00",
"revenueEstimateYearAgoEps": null
},
{
"code": "AAIC.US",
"date": "2020-09-30",
"earningsEstimateAvg": "0.0200",
"earningsEstimateGrowth": "-0.8890",
"earningsEstimateHigh": "0.0300",
"earningsEstimateLow": "0.0200",
"earningsEstimateNumberOfAnalysts": "4.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "1.0000",
"epsRevisionsUpLast30days": "2.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "0.0300",
"epsTrend60daysAgo": "0.0300",
"epsTrend7daysAgo": "0.0300",
"epsTrend90daysAgo": "0.0600",
"epsTrendCurrent": "0.0200",
"growth": "-0.8890",
"period": "0q",
"revenueEstimateAvg": "3890000.00",
"revenueEstimateGrowth": "-0.1710",
"revenueEstimateHigh": "4110000.00",
"revenueEstimateLow": "3780000.00",
"revenueEstimateNumberOfAnalysts": "3.00",
"revenueEstimateYearAgoEps": null
}
] ]
I think pd.DataFrame.from_records(data1) might be what you are looking for
have a look at the documentation
I have done for a sample data. This is what you need
import pandas as pd
data= [[{'Code': 'A', 'date':'XXX', 'name' : 'anil', 'age': 15}], [{'Code':'B', 'date':'YYY', 'name': 'kapoor', 'age': 18}]]
col_name = list(data[0][0].keys())
row_data = []
for i in range(len(data)):
row_data.append(list(data[i][0].values()))
df = pd.DataFrame(row_data, columns =col_name)
print(df)

DataFrame to nested JSON with Python?

I am trying to extract data from SQL and convert it into the JSON file.
I also tried other "techniques" mentioned on the various websites but without any success.
So basically I'm "stuck" after below statement
j = (df.groupby(['SectionCode'])
.apply(lambda x: x[['Barcode', 'BrandCode', 'PurchaseRate', 'SalesRate', 'unit','Item']].to_dict('r'))
.reset_index()
.rename(columns={0: 'Products'})
.to_json(r'D:\DataToFirbaseWithPython\Export_DataFrame.json'))
print(j)
need this json format.
"SectionsWithItem": { #Root_Nose_In_Firebase
"0001": { #SectionCode
"Products": {
"018123": { #Barcode
"Barcode": "018123",
"BrandCode": "1004",
"PurchaseRate": 105.0,
"SalesRate": 125.0,
"Units": "Piece",
"name": "Shahi Delux Mouth Freshener"
},
"0039217": { #Barcode
"Barcode": "0039217",
"BrandCode": "0814",
"PurchaseRate": 140.0,
"SalesRate": 160.0,
"Units": "Piece",
"name": "Maizban Gota Pan Masala Medium Jar"
}
}
},
"0002": { #SectionCode
"Products": {
"03905": { #Barcode
"Barcode": "03905",
"BrandCode": "0189",
"PurchaseRate": 15.4,
"SalesRate": 17.0,
"Units": "Piece",
"name": "Peek Freans Rio Chocolate Half Roll"
},
"0003910": { #Barcode
"Barcode": "0003910",
"BrandCode": "0189",
"PurchaseRate": 110.32,
"SalesRate": 120.0,
"Units": "Piece",
"name": "Peek Freans Gluco Ticky Pack Box"
}
}
}
}
My DataFrame
Barcode,Item,SalesRate,PurchaseRate,unit,BrandCode,SectionCode
0005575,Broom Soft A Quality,100.0,80.0,,2037,0045
0005850,Safa Tomato Paste 800g,340.0,275.0,800g,1004,0009
0005921,Dettol Liquid 1Ltr,800.0,719.99,1Ltr,0475,0045
Grouping by the barcode as well should help with indexing like the desired output.
import pandas as pd
import json
df = pd.read_csv('stac1 - Sheet1.csv', dtype=str) #made dataframe with provided data
j = (df.groupby(['SectionCode', 'Barcode'])
.apply(lambda x: x[['Barcode', 'BrandCode', 'PurchaseRate', 'SalesRate','unit','Item']].to_dict('r'))
.reset_index()
.rename(columns={0: 'Products'})
.to_json(r'Export_DataFrame.json'))
with open('Export_DataFrame.json') as f:
data = json.load(f)
print(data)
Hopefully this helps get you in the right direction!

Splitting a string in json using python

I have a simple Json file
input.json
[
{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "200/600"
},
{
"title": "Person1",
"type": "object2",
"required": "firstName1",
"min_max": "230/630"
},
{
"title": "Person2",
"type": "object2",
"required": "firstName2",
"min_max": "201/601"
},
{
"title": "Person3",
"type": "object3",
"required": "firstName3",
"min_max": "2000/6000"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "null"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "1024 / 256"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "0"
}
]
I am trying to create a new json file with new data. I would like to split "min_max" into two different fields ie., min and max. Below is the code written in python.
import json
input=open('input.json', 'r')
output=open('test.json', 'w')
json_decode=json.load(input)
result = []
for item in json_decode:
my_dict={}
my_dict['title']=item.get('title')
my_dict['min']=item.get('min_max')
my_dict['max']=item.get('min_max')
result.append(my_dict)
data=json.dumps(result, output)
output.write(data)
output.close()
How do I split the string into two different values. Also, is there any possibility of printing the json output in order.
Your JSON file seems to be written wrong (the example one). It is not a list. It is just a single associated array (or dictionary, in Python). Additionally, you don't seem to be using json.dumps properly. It only takes 1 argument. I also figured it would be easier to just create the dictionary inline. And you don't seem to be splitting the min_max properly.
Here's the correct input:
[{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "20/60"
}]
Here's your new code:
import json
with open('input.json', 'r') as inp, open('test.json', 'w') as outp:
json_decode=json.load(inp)
result = []
for temp in json_decode:
minMax = temp["min_max"].split("/")
result.append({
"title":temp["title"],
"min":minMax[0],
"max":minMax[1]
})
data=json.dumps(result)
outp.write(data)
Table + Python == Pandas
import pandas as pd
# Read old json to a dataframe
df = pd.read_json("input.json")
# Create two new columns based on min_max
# Removes empty spaces with strip()
# Returns [None,None] if length of split is not equal to 2
df['min'], df['max'] = (zip(*df['min_max'].apply
(lambda x: [i.strip() for i in x.split("/")]
if len(x.split("/"))== 2 else [None,None])))
# 'delete' (drop) min_max column
df.drop('min_max', axis=1, inplace=True)
# output to json again
df.to_json("test.json",orient='records')
Result:
[{'max': '600',
'min': '200',
'required': 'firstName',
'title': 'Person',
'type': 'object'},
{'max': '630',
'min': '230',
'required': 'firstName1',
'title': 'Person1',
'type': 'object2'},
{'max': '601',
'min': '201',
'required': 'firstName2',
'title': 'Person2',
'type': 'object2'},
{'max': '6000',
'min': '2000',
'required': 'firstName3',
'title': 'Person3',
'type': 'object3'},
{'max': None,
'min': None,
...
You can do something like this:
import json
nl=[]
for di in json.loads(js):
min_,sep,max_=map(lambda s: s.strip(), di['min_max'].partition('/'))
if sep=='/':
del di['min_max']
di['min']=min_
di['max']=max_
nl.append(di)
print json.dumps(nl)
This keeps the "min_max" values that cannot be separated into two values unchanged.

Categories