Build dictionary from JSON and export it to Pandas - python

I am trying to build a script that pulls offline endpoints from the dictionary below:
[
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "online",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
}
]
and then populate a dictionary and export output to xlsx with pandas
# Build dictionary to organize endpoints
endpoint = {'name' : [], 'serial' : [], 'mac' : [], 'publicIp' : [], 'networkId' : [], 'status' : [],'lastReportedAt' : [], 'usingCellularFailover' : [], 'wan1Ip' : [], 'wan2Ip' : [], 'lanIp' : []}
# Iterate over the endpoints to fill dictionary
for i in range(len(response_data)):
if response_data[i]['status'] == 'offline':
endpoint['Name'].append(['name'])
endpoint['Serial'].append(['serial'])
endpoint['MAC'].append(['mac'])
endpoint['Public IP'].append(['publicIp'])
endpoint['Network ID'].append(['networkId'])
endpoint['Status'].append(['status'])
endpoint['Last Reied'].append(['lastReiedAt'])
endpoint['Cellular'].append(['usingCellularFailover'])
endpoint['WAN 1'].append(['wan1Ip'])
endpoint['WAN 2'].append(['wan2Ip'])
endpoint['LAN'].append(['lanIp'])
df = pd.DataFrame.from_dict(endpoint)
df.to_excel("output.xlsx", index=False)
I am pretty sure there's a more efficient way to fulfill the task like may be importing the output to pandas and sorting the data but I am still a noob

You could convert a list of dictionaries into a Pandas dataframe directly.

If your list of dictionaries is called "response_data" then you can convert that list to a DataFrame directly like so:
df = pd.DataFrame(response_data, index=range(len(response_data)))
df.to_excel("output.xlsx", index=False)

You can use directly DataFrame and later rename columns and filter data.
response_data = [
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "online",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
},
{
"name": "My AP",
"serial": "Q234-ABCD-5678",
"mac": "00:11:22:33:44:55",
"status": "offline",
"lanIp": "1.2.3.4",
"publicIp": "123.123.123.1",
"networkId": "N_24329156"
}
]
import pandas as pd
df = pd.DataFrame(response_data)
df = df.rename(columns={
'name': 'Name',
'serial': 'Serial',
'mac': 'MAC',
'status': 'Status',
'publicIp': 'Public IP',
'networkId': 'Network ID',
'lastReiedAt': 'Last Reied',
'usingCellularFailover': 'Cellular',
'wan1Ip': 'WAN 1',
'wan2Ip': 'WAN 2',
'lanIp': 'LAN',
})
df = df[ df['Status'] != 'offline' ]
print(df)
df.to_excel("output.xlsx", index=False)

Related

python parse data from nested json

I have the following (excerpt) json data structure:
{
"apiToken": {
"createdAt": "2022-03-04T12:18:29.000956Z",
"expiresAt": "2022-09-04T12:18:29.000956Z"
},
"canGenerateApiToken": true,
"dateJoined": "2021-01-29T10:07:04.395172Z",
"email": "john#doe.com",
"emailReadOnly": true,
"emailVerified": true,
"firstLogin": "2021-01-29T13:01:33.294216Z",
"fullName": "John Doe",
"fullNameReadOnly": true,
"groupsReadOnly": false,
"id": "32168415841",
"isSystem": false,
"lastLogin": "2022-09-12T08:51:00.159750Z",
"lowestRole": "Admin",
"primaryTwoFaMethod": "application",
"scope": "account",
"scopeRoles": [
{
"id": "68418945648943589",
"name": "AT || ACME Inc.",
"roleId": "9848949354653168",
"roleName": "Admin",
"roles": [
"Admin"
]
}
],
"siteRoles": [],
"source": "sso_saml",
"tenantRoles": [],
"twoFaEnabled": true
}
I'm trying to write certain data into an excel file with:
df = pd.json_normalize(result)
df.head()
df[['scope', 'fullName', 'email', 'lowestRole', 'scope',
'scopeRoles.name']].to_excel(completename)
But I struggle with 'scopeRoles.name' as it's nested.
with the code above I get
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Index(['scope', 'fullName', 'email', 'lowestRole', 'scope', 'scopeRoles.name'], dtype='object')] are in the [columns]"
I also tried different versions, but always failed.
I basically need to understand how I can specify the fields to write into excel when the field itself is nested. If I just use "non-nested" entries it works perfectly fine
thanks
You need to flatten your JSON data file.
You could use the flatten_json package.
pip install flatten_json
from flatten_json import flatten
unflat_json = {'user':
{'Rachel':
{'UserID': 1717171717,
'Email': 'rachel1999#gmail.com',
'friends': ['John', 'Jeremy', 'Emily']
}
}
}
flat_json = flatten(unflat_json)
print(flat_json)
Output:
{‘user_Rachel_UserID’: 1717171717, ‘user_Rachel_Email’: ‘rachel1999#gmail.com’, ‘user_Rachel_friends_0’: ‘John’, ‘user_Rachel_friends_1’: ‘Jeremy’, ‘user_Rachel_friends_2’: ‘Emily’}
To deal with a list of dictionaries, you can use df.from_records(). But, you need to process it separately to combine each dataframe together. I assumed the data used is exactly the same, considering the df['scopeRoles'] only consisted of one element. Please try something like this:
import pandas as pd
result = {
"apiToken": {
"createdAt": "2022-03-04T12:18:29.000956Z",
"expiresAt": "2022-09-04T12:18:29.000956Z"
},
"canGenerateApiToken": True,
"dateJoined": "2021-01-29T10:07:04.395172Z",
"email": "john#doe.com",
"emailReadOnly": True,
"emailVerified": True,
"firstLogin": "2021-01-29T13:01:33.294216Z",
"fullName": "John Doe",
"fullNameReadOnly": True,
"groupsReadOnly": False,
"id": "32168415841",
"isSystem": False,
"lastLogin": "2022-09-12T08:51:00.159750Z",
"lowestRole": "Admin",
"primaryTwoFaMethod": "application",
"scope": "account",
"scopeRoles": [
{
"id": "68418945648943589",
"name": "AT || ACME Inc.",
"roleId": "9848949354653168",
"roleName": "Admin",
"roles": [
"Admin"
]
}
],
"siteRoles": [],
"source": "sso_saml",
"tenantRoles": [],
"twoFaEnabled": True
}
df = pd.json_normalize(result)
df2 = df[['scope', 'fullName', 'email', 'lowestRole', 'scope']]
# from_records() returns a dataframe from a list of dict df['scopeRoles'].
df3 = df.from_records(df["scopeRoles"][0])
# join df2 and df3
res = df2.join(df3)
print(res)
I hope this code helps!
EDIT
To get the name column only, you just have to subscript like so:
df3 = df.from_records(df["scopeRoles"][0])['name']

Converting Dictionary in list in list to dataframe in python

I am really a newbie. Thanks much.
Dictionary in list from JSON looks like this:
data1= [ [{Code:A, date:XXX}], [{Code:B, date:YYY}]]
How can i convert this into dataframe?
Output I want is:
enter image description here
I tried the following code but it's not working.
fda_df=pd.read_json(json.dumps(data1))
The real data is
[
[
{
"code": "AA.US",
"date": "2022-12-31",
"earningsEstimateAvg": "4.5400",
"earningsEstimateGrowth": "0.0630",
"earningsEstimateHigh": "8.5000",
"earningsEstimateLow": "2.2000",
"earningsEstimateNumberOfAnalysts": "12.0000",
"earningsEstimateYearAgoEps": "4.2700",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "6.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "3.8700",
"epsTrend60daysAgo": "3.8200",
"epsTrend7daysAgo": "4.5200",
"epsTrend90daysAgo": "2.5900",
"epsTrendCurrent": "4.5400",
"growth": "0.0630",
"period": "+1y",
"revenueEstimateAvg": "11018700000.00",
"revenueEstimateGrowth": "0.0180",
"revenueEstimateHigh": "12927000000.00",
"revenueEstimateLow": "10029900000.00",
"revenueEstimateNumberOfAnalysts": "9.00",
"revenueEstimateYearAgoEps": null
} ],
[
{
"code": "AAIC.US",
"date": "2022-12-31",
"earningsEstimateAvg": "0.2600",
"earningsEstimateGrowth": "0.4440",
"earningsEstimateHigh": "0.3900",
"earningsEstimateLow": "0.1700",
"earningsEstimateNumberOfAnalysts": "3.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "1.0000",
"epsRevisionsUpLast7days": "0.0000",
"epsTrend30daysAgo": "0.2600",
"epsTrend60daysAgo": "0.2100",
"epsTrend7daysAgo": "0.2600",
"epsTrend90daysAgo": "0.2300",
"epsTrendCurrent": "0.2600",
"growth": "0.4440",
"period": "+1y",
"revenueEstimateAvg": "17280000.00",
"revenueEstimateGrowth": "0.1680",
"revenueEstimateHigh": "22110000.00",
"revenueEstimateLow": "12450000.00",
"revenueEstimateNumberOfAnalysts": "2.00",
"revenueEstimateYearAgoEps": null
},
{
"code": "AAIC.US",
"date": "2020-09-30",
"earningsEstimateAvg": "0.0200",
"earningsEstimateGrowth": "-0.8890",
"earningsEstimateHigh": "0.0300",
"earningsEstimateLow": "0.0200",
"earningsEstimateNumberOfAnalysts": "4.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "1.0000",
"epsRevisionsUpLast30days": "2.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "0.0300",
"epsTrend60daysAgo": "0.0300",
"epsTrend7daysAgo": "0.0300",
"epsTrend90daysAgo": "0.0600",
"epsTrendCurrent": "0.0200",
"growth": "-0.8890",
"period": "0q",
"revenueEstimateAvg": "3890000.00",
"revenueEstimateGrowth": "-0.1710",
"revenueEstimateHigh": "4110000.00",
"revenueEstimateLow": "3780000.00",
"revenueEstimateNumberOfAnalysts": "3.00",
"revenueEstimateYearAgoEps": null
}
] ]
I think pd.DataFrame.from_records(data1) might be what you are looking for
have a look at the documentation
I have done for a sample data. This is what you need
import pandas as pd
data= [[{'Code': 'A', 'date':'XXX', 'name' : 'anil', 'age': 15}], [{'Code':'B', 'date':'YYY', 'name': 'kapoor', 'age': 18}]]
col_name = list(data[0][0].keys())
row_data = []
for i in range(len(data)):
row_data.append(list(data[i][0].values()))
df = pd.DataFrame(row_data, columns =col_name)
print(df)

Convert Pandas Dataframe into multi level nested JSON

I have a dataframe that I need to convert into a nested json format. I can get one level of grouping done, but I don't know how to do a second grouping as well as a nesting beneath that.
I have looked a lot of different examples, but nothing really gets me the example I posted below.
import pandas as pd
data= {'Name': ['TEST01','TEST02'],
'Type': ['Tent','Tent'],
'Address':['123 Happy','456 Happy'],
'City':['Happytown','Happytown'],
'State': ['WA','NY'],
'PostalCode': ['89985','85542'],
'Spot' : ['A','A'],
'SpotAssigment' : ['123','456'],
'Cost': [900,500]
}
df = pd.DataFrame(data)
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses')
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
I want it to look like the below.
[
{
"Name": "TEST01",
"Type": "Tent",
"addresses": [
{
"Address": "123 Happy",
"City": "Happytown",
"PostalCode": "89985",
"State": "WA"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"123",
"Cost":900
]
}
]
},
{
"Name": "TEST02",
"Type": "Tent",
"addresses": [
{
"Address": "456 Happy",
"City": "Happytown",
"PostalCode": "85542",
"State": "NY"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"456",
"Cost":500
]
}
]
}
]
try this:
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses'))
k = (df.groupby(['Name','Type', 'Spot'])
.apply(lambda x: x[['SpotAssigment', 'Cost']].to_dict('r'))
.reset_index(name='SpotAssignments'))
h = (k.groupby(['Name','Type'])
.apply(lambda x: x[['Spot','SpotAssignments']].to_dict('r'))
.reset_index(name='spots'))
m = j.merge(h, how='inner', on=['Name', 'Type'])
result = m.to_dict(orient='records')
from pprint import pprint as pp
pp(result)
this result is a python list of dicts in the same format that you want, you should be able to dump it as JSON directly.

csv to complex nested json

So, I have a huge CSV file that looks like:
PN,PCA Code,MPN Code,DATE_CODE,Supplier Code,CM Code,Fiscal YEAR,Fiscal MONTH,Usage,Defects
13-1668-01,73-2590,MPN148,1639,S125,CM1,2017,5,65388,0
20-0127-02,73-2171,MPN170,1707,S125,CM1,2017,9,11895,0
19-2472-01,73-2302,MPN24,1711,S119,CM1,2017,10,4479,0
20-0127-02,73-2169,MPN170,1706,S125,CM1,2017,9,7322,0
20-0127-02,73-2296,MPN170,1822,S125,CM1,2018,12,180193,0
15-14399-01,73-2590,MPN195,1739,S133,CM6,2018,11,1290,0
What I want to do is group up all the data by PCA Code. So, a PCA Code will have certain number for parts, those parts would be manufactured by certain MPN Code and the final nested JSON structure that I want looks like:
[
{
PCA: {
"code": "73-2590",
"CM": ["CM1", "CM6"],
"parts": [
{
"number": "13-1668-01",
"manufacturer": [
{
"id": "MPN148"
"info": [
{
"date_code": 1639,
"supplier": {
"id": "S125",
"FYFM": "2020-9",
"usage": 65388,
"defects": 0,
}
}
]
},
]
}
]
}
}
]
So, I want this structure for multiple part numbers (PNs) having different MPNs with different Date Codes and so on.
I am currently using Pandas to do this but I'm stuck on how to proceed with the nesting.
My code so far:
import json
import pandas as pd
dataframe = pd.read_csv('files/dppm_wc.csv')
data = {'PCAs': []}
for key, group in dataframe.groupby('PCA Code'):
for index, row in group.itterrows():
temp_dict = {'PCA Code': key, 'CM Code': row['CM Code'], 'parts': []}
with open('output.txt', 'w') as file:
file.write(json.dumps(data, indent=4))
How do I proceed to achieve the nested JSON format that I want? Is there a better way to do this than what I am doing?
I don't really understand what you wish to do with that structure, but I guess it could be achieved with something like this
data = {'PCAs': []}
for key, group in df.groupby('PCA Code'):
temp_dict = {'PCA Code': key, 'CM Code': [], 'parts': []}
for index, row in group.iterrows():
temp_dict['CM Code'].append(row['CM Code'])
temp_dict['parts'].append(
{'number': row['PN'],
'manufacturer': [
{
'id': row['MPN Code'],
'info': [
{
'date_code': row['DATE_CODE'],
'supplier': {'id': row['Supplier Code'],
'FYFM': '%s-%s' % (row['Fiscal YEAR'], row['Fiscal MONTH']),
'usage': row['Usage'],
'defects': row['Defects']}
}
]
}]
}
)
data['PCAs'].append(temp_dict)

Splitting a string in json using python

I have a simple Json file
input.json
[
{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "200/600"
},
{
"title": "Person1",
"type": "object2",
"required": "firstName1",
"min_max": "230/630"
},
{
"title": "Person2",
"type": "object2",
"required": "firstName2",
"min_max": "201/601"
},
{
"title": "Person3",
"type": "object3",
"required": "firstName3",
"min_max": "2000/6000"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "null"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "1024 / 256"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "0"
}
]
I am trying to create a new json file with new data. I would like to split "min_max" into two different fields ie., min and max. Below is the code written in python.
import json
input=open('input.json', 'r')
output=open('test.json', 'w')
json_decode=json.load(input)
result = []
for item in json_decode:
my_dict={}
my_dict['title']=item.get('title')
my_dict['min']=item.get('min_max')
my_dict['max']=item.get('min_max')
result.append(my_dict)
data=json.dumps(result, output)
output.write(data)
output.close()
How do I split the string into two different values. Also, is there any possibility of printing the json output in order.
Your JSON file seems to be written wrong (the example one). It is not a list. It is just a single associated array (or dictionary, in Python). Additionally, you don't seem to be using json.dumps properly. It only takes 1 argument. I also figured it would be easier to just create the dictionary inline. And you don't seem to be splitting the min_max properly.
Here's the correct input:
[{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "20/60"
}]
Here's your new code:
import json
with open('input.json', 'r') as inp, open('test.json', 'w') as outp:
json_decode=json.load(inp)
result = []
for temp in json_decode:
minMax = temp["min_max"].split("/")
result.append({
"title":temp["title"],
"min":minMax[0],
"max":minMax[1]
})
data=json.dumps(result)
outp.write(data)
Table + Python == Pandas
import pandas as pd
# Read old json to a dataframe
df = pd.read_json("input.json")
# Create two new columns based on min_max
# Removes empty spaces with strip()
# Returns [None,None] if length of split is not equal to 2
df['min'], df['max'] = (zip(*df['min_max'].apply
(lambda x: [i.strip() for i in x.split("/")]
if len(x.split("/"))== 2 else [None,None])))
# 'delete' (drop) min_max column
df.drop('min_max', axis=1, inplace=True)
# output to json again
df.to_json("test.json",orient='records')
Result:
[{'max': '600',
'min': '200',
'required': 'firstName',
'title': 'Person',
'type': 'object'},
{'max': '630',
'min': '230',
'required': 'firstName1',
'title': 'Person1',
'type': 'object2'},
{'max': '601',
'min': '201',
'required': 'firstName2',
'title': 'Person2',
'type': 'object2'},
{'max': '6000',
'min': '2000',
'required': 'firstName3',
'title': 'Person3',
'type': 'object3'},
{'max': None,
'min': None,
...
You can do something like this:
import json
nl=[]
for di in json.loads(js):
min_,sep,max_=map(lambda s: s.strip(), di['min_max'].partition('/'))
if sep=='/':
del di['min_max']
di['min']=min_
di['max']=max_
nl.append(di)
print json.dumps(nl)
This keeps the "min_max" values that cannot be separated into two values unchanged.

Categories