python parse data from nested json - python

I have the following (excerpt) json data structure:
{
"apiToken": {
"createdAt": "2022-03-04T12:18:29.000956Z",
"expiresAt": "2022-09-04T12:18:29.000956Z"
},
"canGenerateApiToken": true,
"dateJoined": "2021-01-29T10:07:04.395172Z",
"email": "john#doe.com",
"emailReadOnly": true,
"emailVerified": true,
"firstLogin": "2021-01-29T13:01:33.294216Z",
"fullName": "John Doe",
"fullNameReadOnly": true,
"groupsReadOnly": false,
"id": "32168415841",
"isSystem": false,
"lastLogin": "2022-09-12T08:51:00.159750Z",
"lowestRole": "Admin",
"primaryTwoFaMethod": "application",
"scope": "account",
"scopeRoles": [
{
"id": "68418945648943589",
"name": "AT || ACME Inc.",
"roleId": "9848949354653168",
"roleName": "Admin",
"roles": [
"Admin"
]
}
],
"siteRoles": [],
"source": "sso_saml",
"tenantRoles": [],
"twoFaEnabled": true
}
I'm trying to write certain data into an excel file with:
df = pd.json_normalize(result)
df.head()
df[['scope', 'fullName', 'email', 'lowestRole', 'scope',
'scopeRoles.name']].to_excel(completename)
But I struggle with 'scopeRoles.name' as it's nested.
with the code above I get
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Index(['scope', 'fullName', 'email', 'lowestRole', 'scope', 'scopeRoles.name'], dtype='object')] are in the [columns]"
I also tried different versions, but always failed.
I basically need to understand how I can specify the fields to write into excel when the field itself is nested. If I just use "non-nested" entries it works perfectly fine
thanks

You need to flatten your JSON data file.
You could use the flatten_json package.
pip install flatten_json
from flatten_json import flatten
unflat_json = {'user':
{'Rachel':
{'UserID': 1717171717,
'Email': 'rachel1999#gmail.com',
'friends': ['John', 'Jeremy', 'Emily']
}
}
}
flat_json = flatten(unflat_json)
print(flat_json)
Output:
{‘user_Rachel_UserID’: 1717171717, ‘user_Rachel_Email’: ‘rachel1999#gmail.com’, ‘user_Rachel_friends_0’: ‘John’, ‘user_Rachel_friends_1’: ‘Jeremy’, ‘user_Rachel_friends_2’: ‘Emily’}

To deal with a list of dictionaries, you can use df.from_records(). But, you need to process it separately to combine each dataframe together. I assumed the data used is exactly the same, considering the df['scopeRoles'] only consisted of one element. Please try something like this:
import pandas as pd
result = {
"apiToken": {
"createdAt": "2022-03-04T12:18:29.000956Z",
"expiresAt": "2022-09-04T12:18:29.000956Z"
},
"canGenerateApiToken": True,
"dateJoined": "2021-01-29T10:07:04.395172Z",
"email": "john#doe.com",
"emailReadOnly": True,
"emailVerified": True,
"firstLogin": "2021-01-29T13:01:33.294216Z",
"fullName": "John Doe",
"fullNameReadOnly": True,
"groupsReadOnly": False,
"id": "32168415841",
"isSystem": False,
"lastLogin": "2022-09-12T08:51:00.159750Z",
"lowestRole": "Admin",
"primaryTwoFaMethod": "application",
"scope": "account",
"scopeRoles": [
{
"id": "68418945648943589",
"name": "AT || ACME Inc.",
"roleId": "9848949354653168",
"roleName": "Admin",
"roles": [
"Admin"
]
}
],
"siteRoles": [],
"source": "sso_saml",
"tenantRoles": [],
"twoFaEnabled": True
}
df = pd.json_normalize(result)
df2 = df[['scope', 'fullName', 'email', 'lowestRole', 'scope']]
# from_records() returns a dataframe from a list of dict df['scopeRoles'].
df3 = df.from_records(df["scopeRoles"][0])
# join df2 and df3
res = df2.join(df3)
print(res)
I hope this code helps!
EDIT
To get the name column only, you just have to subscript like so:
df3 = df.from_records(df["scopeRoles"][0])['name']

Related

Converting Dictionary in list in list to dataframe in python

I am really a newbie. Thanks much.
Dictionary in list from JSON looks like this:
data1= [ [{Code:A, date:XXX}], [{Code:B, date:YYY}]]
How can i convert this into dataframe?
Output I want is:
enter image description here
I tried the following code but it's not working.
fda_df=pd.read_json(json.dumps(data1))
The real data is
[
[
{
"code": "AA.US",
"date": "2022-12-31",
"earningsEstimateAvg": "4.5400",
"earningsEstimateGrowth": "0.0630",
"earningsEstimateHigh": "8.5000",
"earningsEstimateLow": "2.2000",
"earningsEstimateNumberOfAnalysts": "12.0000",
"earningsEstimateYearAgoEps": "4.2700",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "6.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "3.8700",
"epsTrend60daysAgo": "3.8200",
"epsTrend7daysAgo": "4.5200",
"epsTrend90daysAgo": "2.5900",
"epsTrendCurrent": "4.5400",
"growth": "0.0630",
"period": "+1y",
"revenueEstimateAvg": "11018700000.00",
"revenueEstimateGrowth": "0.0180",
"revenueEstimateHigh": "12927000000.00",
"revenueEstimateLow": "10029900000.00",
"revenueEstimateNumberOfAnalysts": "9.00",
"revenueEstimateYearAgoEps": null
} ],
[
{
"code": "AAIC.US",
"date": "2022-12-31",
"earningsEstimateAvg": "0.2600",
"earningsEstimateGrowth": "0.4440",
"earningsEstimateHigh": "0.3900",
"earningsEstimateLow": "0.1700",
"earningsEstimateNumberOfAnalysts": "3.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "0.0000",
"epsRevisionsUpLast30days": "1.0000",
"epsRevisionsUpLast7days": "0.0000",
"epsTrend30daysAgo": "0.2600",
"epsTrend60daysAgo": "0.2100",
"epsTrend7daysAgo": "0.2600",
"epsTrend90daysAgo": "0.2300",
"epsTrendCurrent": "0.2600",
"growth": "0.4440",
"period": "+1y",
"revenueEstimateAvg": "17280000.00",
"revenueEstimateGrowth": "0.1680",
"revenueEstimateHigh": "22110000.00",
"revenueEstimateLow": "12450000.00",
"revenueEstimateNumberOfAnalysts": "2.00",
"revenueEstimateYearAgoEps": null
},
{
"code": "AAIC.US",
"date": "2020-09-30",
"earningsEstimateAvg": "0.0200",
"earningsEstimateGrowth": "-0.8890",
"earningsEstimateHigh": "0.0300",
"earningsEstimateLow": "0.0200",
"earningsEstimateNumberOfAnalysts": "4.0000",
"earningsEstimateYearAgoEps": "0.1800",
"epsRevisionsDownLast30days": "1.0000",
"epsRevisionsUpLast30days": "2.0000",
"epsRevisionsUpLast7days": "1.0000",
"epsTrend30daysAgo": "0.0300",
"epsTrend60daysAgo": "0.0300",
"epsTrend7daysAgo": "0.0300",
"epsTrend90daysAgo": "0.0600",
"epsTrendCurrent": "0.0200",
"growth": "-0.8890",
"period": "0q",
"revenueEstimateAvg": "3890000.00",
"revenueEstimateGrowth": "-0.1710",
"revenueEstimateHigh": "4110000.00",
"revenueEstimateLow": "3780000.00",
"revenueEstimateNumberOfAnalysts": "3.00",
"revenueEstimateYearAgoEps": null
}
] ]
I think pd.DataFrame.from_records(data1) might be what you are looking for
have a look at the documentation
I have done for a sample data. This is what you need
import pandas as pd
data= [[{'Code': 'A', 'date':'XXX', 'name' : 'anil', 'age': 15}], [{'Code':'B', 'date':'YYY', 'name': 'kapoor', 'age': 18}]]
col_name = list(data[0][0].keys())
row_data = []
for i in range(len(data)):
row_data.append(list(data[i][0].values()))
df = pd.DataFrame(row_data, columns =col_name)
print(df)

Convert Pandas Dataframe into multi level nested JSON

I have a dataframe that I need to convert into a nested json format. I can get one level of grouping done, but I don't know how to do a second grouping as well as a nesting beneath that.
I have looked a lot of different examples, but nothing really gets me the example I posted below.
import pandas as pd
data= {'Name': ['TEST01','TEST02'],
'Type': ['Tent','Tent'],
'Address':['123 Happy','456 Happy'],
'City':['Happytown','Happytown'],
'State': ['WA','NY'],
'PostalCode': ['89985','85542'],
'Spot' : ['A','A'],
'SpotAssigment' : ['123','456'],
'Cost': [900,500]
}
df = pd.DataFrame(data)
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses')
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
I want it to look like the below.
[
{
"Name": "TEST01",
"Type": "Tent",
"addresses": [
{
"Address": "123 Happy",
"City": "Happytown",
"PostalCode": "89985",
"State": "WA"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"123",
"Cost":900
]
}
]
},
{
"Name": "TEST02",
"Type": "Tent",
"addresses": [
{
"Address": "456 Happy",
"City": "Happytown",
"PostalCode": "85542",
"State": "NY"
}
],
"spots":[
{"Spot":'A',
"SpotAssignments":[
"SpotAssignment":"456",
"Cost":500
]
}
]
}
]
try this:
j = (df.groupby(['Name','Type'])
.apply(lambda x: x[['Address','City', 'State', 'PostalCode']].to_dict('r'))
.reset_index(name='addresses'))
k = (df.groupby(['Name','Type', 'Spot'])
.apply(lambda x: x[['SpotAssigment', 'Cost']].to_dict('r'))
.reset_index(name='SpotAssignments'))
h = (k.groupby(['Name','Type'])
.apply(lambda x: x[['Spot','SpotAssignments']].to_dict('r'))
.reset_index(name='spots'))
m = j.merge(h, how='inner', on=['Name', 'Type'])
result = m.to_dict(orient='records')
from pprint import pprint as pp
pp(result)
this result is a python list of dicts in the same format that you want, you should be able to dump it as JSON directly.

Convert multiple string stored in a variable into a single list in python

I hope everyone is doing well.
I need a little help where I need to get all the strings from a variable and need to store into a single list in python.
For example -
I have json file from where I am getting ids and all the ids are getting stored into a variable called id as below when I run print(id)
17298626-991c-e490-bae6-47079c6e2202
17298496-19bd-2f89-7b5f-881921abc632
17298698-3e17-7a9b-b337-aacfd9483b1b
172986ac-d91d-c4ea-2e50-d53700480dd0
172986d0-18aa-6f51-9c62-6cb087ad31e5
172986f4-80f0-5c21-3aee-12f22a5f4322
17298712-a4ac-7b36-08e9-8512fa8322dd
17298747-8cc6-d9d0-8d05-50adf228c029
1729875c-050f-9a99-4850-bb0e6ad35fb0
1729875f-0d50-dc94-5515-b4891c40d81c
17298761-c26b-3ce5-e77e-db412c38a5b4
172987c8-2b5d-0d94-c365-e8407b0a8860
1729881a-e583-2b54-3a52-d092020d9c1d
1729881c-64a2-67cf-d561-6e5e38ed14cb
172987ec-7a20-7eb6-3ebe-a9fb621bb566
17298813-7ac4-258b-d6f9-aaf43f9147b1
17298813-f1ef-d28a-0817-5f3b86c3cf23
17298828-b62b-9ee6-248b-521b0663226e
17298825-7449-2fcb-378e-13671cb4688a
I want these all values to be stored into a single list.
Can some please help me out with this.
Below is the code I am using:
import json
with open('requests.json') as f:
data = json.load(f)
print(type(data))
for i in data:
if 'traceId' in i:
id = i['traceId']
newid = id.split()
#print(type(newid))
print(newid)
And below is my json file looks like:
[
{
"id": "376287298-hjd8-jfjb-khkf-6479280283e9",
"submittedTime": 1591692502558,
"traceId": "17298626-991c-e490-bae6-47079c6e2202",
"userName": "ABC",
"onlyChanged": true,
"description": "Not Required",
"startTime": 1591694487929,
"result": "NONE",
"state": "EXECUTING",
"paused": false,
"application": {
"id": "16b22a09-a840-f4d9-f42a-64fd73fece57",
"name": "XYZ"
},
"applicationProcess": {
"id": "dihihdosfj9279278yrie8ue",
"name": "Deploy",
"version": 12
},
"environment": {
"id": "fkjdshkjdshglkjdshgldshldsh03r937837",
"name": "DEV"
},
"snapshot": {
"id": "djnglkfdglki98478yhgjh48yr844h",
"name": "DEV_snapshot"
},
},
{
"id": "17298495-f060-3e9d-7097-1f86d5160789",
"submittedTime": 1591692844597,
"traceId": "17298496-19bd-2f89-7b5f-881921abc632",
"userName": "UYT,
"onlyChanged": true,
"startTime": 1591692845543,
"result": "NONE",
"state": "EXECUTING",
"paused": false,
"application": {
"id": "osfodsho883793hgjbv98r3098w",
"name": "QA"
},
"applicationProcess": {
"id": "owjfoew028r2uoieroiehojehfoef",
"name": "EDC",
"version": 5
},
"environment": {
"id": "16cf69c5-4194-e557-707d-0663afdbceba",
"name": "DTESTU"
},
}
]
From where I am trying to get the traceId.
you could use simple split method like the follwing:
ids = '''17298626-991c-e490-bae6-47079c6e2202 17298496-19bd-2f89-7b5f-881921abc632 17298698-3e17-7a9b-b337-aacfd9483b1b 172986ac-d91d-c4ea-2e50-d53700480dd0 172986d0-18aa-6f51-9c62-6cb087ad31e5 172986f4-80f0-5c21-3aee-12f22a5f4322 17298712-a4ac-7b36-08e9-8512fa8322dd 17298747-8cc6-d9d0-8d05-50adf228c029 1729875c-050f-9a99-4850-bb0e6ad35fb0 1729875f-0d50-dc94-5515-b4891c40d81c 17298761-c26b-3ce5-e77e-db412c38a5b4 172987c8-2b5d-0d94-c365-e8407b0a8860 1729881a-e583-2b54-3a52-d092020d9c1d 1729881c-64a2-67cf-d561-6e5e38ed14cb 172987ec-7a20-7eb6-3ebe-a9fb621bb566 17298813-7ac4-258b-d6f9-aaf43f9147b1 17298813-f1ef-d28a-0817-5f3b86c3cf23 17298828-b62b-9ee6-248b-521b0663226e 17298825-7449-2fcb-378e-13671cb4688a'''
l = ids.split(" ")
print(l)
This will give the following result, I assumed that the separator needed is simple space you can adjust properly:
['17298626-991c-e490-bae6-47079c6e2202', '17298496-19bd-2f89-7b5f-881921abc632', '17298698-3e17-7a9b-b337-aacfd9483b1b', '172986ac-d91d-c4ea-2e50-d53700480dd0', '172986d0-18aa-6f51-9c62-6cb087ad31e5', '172986f4-80f0-5c21-3aee-12f22a5f4322', '17298712-a4ac-7b36-08e9-8512fa8322dd', '17298747-8cc6-d9d0-8d05-50adf228c029', '1729875c-050f-9a99-4850-bb0e6ad35fb0', '1729875f-0d50-dc94-5515-b4891c40d81c', '17298761-c26b-3ce5-e77e-db412c38a5b4', '172987c8-2b5d-0d94-c365-e8407b0a8860', '1729881a-e583-2b54-3a52-d092020d9c1d', '1729881c-64a2-67cf-d561-6e5e38ed14cb', '172987ec-7a20-7eb6-3ebe-a9fb621bb566', '17298813-7ac4-258b-d6f9-aaf43f9147b1', '17298813-f1ef-d28a-0817-5f3b86c3cf23', '17298828-b62b-9ee6-248b-521b0663226e', '17298825-7449-2fcb-378e-13671cb4688a']
Edit
You get list of lists because each iteration you read only 1 id, so what you need to do is to initiate an empty list and append each id to it in the following way:
l = []
for i in data
if 'traceId' in i:
id = i['traceId']
l.append(id)
you can append the ids variable to the list such as,
#list declaration
l1=[]
#this must be in your loop
l1.append(ids)
I'm assuming you get the id as a str type value. Using id.split() will return a list of all ids in one single Python list, as each id is separated by space here in your example.
id = """17298626-991c-e490-bae6-47079c6e2202 17298496-19bd-2f89-7b5f-881921abc632
17298698-3e17-7a9b-b337-aacfd9483b1b 172986ac-d91d-c4ea-2e50-d53700480dd0
172986d0-18aa-6f51-9c62-6cb087ad31e5 172986f4-80f0-5c21-3aee-12f22a5f4322
17298712-a4ac-7b36-08e9-8512fa8322dd 17298747-8cc6-d9d0-8d05-50adf228c029
1729875c-050f-9a99-4850-bb0e6ad35fb0 1729875f-0d50-dc94-5515-b4891c40d81c
17298761-c26b-3ce5-e77e-db412c38a5b4 172987c8-2b5d-0d94-c365-e8407b0a8860
1729881a-e583-2b54-3a52-d092020d9c1d 1729881c-64a2-67cf-d561-6e5e38ed14cb
172987ec-7a20-7eb6-3ebe-a9fb621bb566 17298813-7ac4-258b-d6f9-aaf43f9147b1
17298813-f1ef-d28a-0817-5f3b86c3cf23 17298828-b62b-9ee6-248b-521b0663226e
17298825-7449-2fcb-378e-13671cb4688a"""
id_list = id.split()
print(id_list)
Output:
['17298626-991c-e490-bae6-47079c6e2202', '17298496-19bd-2f89-7b5f-881921abc632',
'17298698-3e17-7a9b-b337-aacfd9483b1b', '172986ac-d91d-c4ea-2e50-d53700480dd0',
'172986d0-18aa-6f51-9c62-6cb087ad31e5', '172986f4-80f0-5c21-3aee-12f22a5f4322',
'17298712-a4ac-7b36-08e9-8512fa8322dd', '17298747-8cc6-d9d0-8d05-50adf228c029',
'1729875c-050f-9a99-4850-bb0e6ad35fb0', '1729875f-0d50-dc94-5515-b4891c40d81c',
'17298761-c26b-3ce5-e77e-db412c38a5b4', '172987c8-2b5d-0d94-c365-e8407b0a8860',
'1729881a-e583-2b54-3a52-d092020d9c1d', '1729881c-64a2-67cf-d561-6e5e38ed14cb',
'172987ec-7a20-7eb6-3ebe-a9fb621bb566', '17298813-7ac4-258b-d6f9-aaf43f9147b1',
'17298813-f1ef-d28a-0817-5f3b86c3cf23', '17298828-b62b-9ee6-248b-521b0663226e',
'17298825-7449-2fcb-378e-13671cb4688a']
split() splits by default with space as a separator. You can use the sep argument to use any other separator if needed.

Issue parsing JSON file-Python

Have this section in one large JSON file
"UserDetailList": [
{
"UserName": "citrix-xendesktop-ec2-provisioning",
"GroupList": [],
"CreateDate": "2017-11-07T14:20:14Z",
"UserId": "AIDAI2YJINPRUEM3XHKXO",
"Path": "/",
"AttachedManagedPolicies": [
{
"PolicyName": "AmazonEC2FullAccess",
"PolicyArn": "arn:aws:iam::aws:policy/AmazonEC2FullAccess"
},
{
"PolicyName": "AmazonS3FullAccess",
"PolicyArn": "arn:aws:iam::aws:policy/AmazonS3FullAccess"
}
],
"Arn": "arn:aws:iam::279052847476:user/citrix-xendesktop-ec2-provisioning"
},
Need to extract AttachedManagedPolicy.Policy name for user
Desired output:
"citrix-xendesktop-ec2-provisioning","AmazonEC2FullAccess"
"citrix-xendesktop-ec2-provisioning","AmazonS3FullAccess"
Some users don't have any policy at all so need some checking mechanism to avoid errors
with open('1.json') as file:
data = json.load(file)
for element in data['UserDetailList']:
s = element['UserName'], element['AttachedManagedPolicies']
print s
And getting
(u'citrix-xendesktop-ec2-provisioning', [{u'PolicyName': u'AmazonEC2FullAccess', u'PolicyArn': u'arn:aws:iam::aws:policy/AmazonEC2FullAccess'}, {u'PolicyName': u'AmazonS3FullAccess', u'PolicyArn': u'arn:aws:iam::aws:policy/AmazonS3FullAccess'}])
When added element['AttachedManagedPolicies']['PolicyName']
got: TypeError: list indices must be integers, not str
You are getting error because element['AttachedManagedPolicies'] is list not dictionary you need to iterate over element['AttachedManagedPolicies'] and then access key as below:
[i['PolicyName'] for i in element['AttachedManagedPolicies']]
this will construct list of values for key PolicyName
As you said you have very large JSON structure you might have empty values or not values and for that you can proceed as below:
d = {
"UserDetailList": [
{
"UserName": "citrix-xendesktop-ec2-provisioning",
"GroupList": [],
"CreateDate": "2017-11-07T14:20:14Z",
"UserId": "AIDAI2YJINPRUEM3XHKXO",
"Path": "/",
"AttachedManagedPolicies": [
{
"PolicyName": "AmazonEC2FullAccess",
"PolicyArn": "arn:aws:iam::aws:policy/AmazonEC2FullAccess"
},
{
"PolicyName": "AmazonS3FullAccess",
"PolicyArn": "arn:aws:iam::aws:policy/AmazonS3FullAccess"
}
],
"Arn": "arn:aws:iam::279052847476:user/citrix-xendesktop-ec2-provisioning"
}
]
}
user_list = d.get("UserDetailList", None) # if unable to fetch key then it will return None
if user_list:
for user_detail in user_list:
username = user_detail.get("UserName", None)
policies = [i.get('PolicyName') for i in user_detail.get('AttachedManagedPolicies', []) if i.get('PolicyName', None)] # empty list constructed if no policy exist
print(username, policies)

Splitting a string in json using python

I have a simple Json file
input.json
[
{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "200/600"
},
{
"title": "Person1",
"type": "object2",
"required": "firstName1",
"min_max": "230/630"
},
{
"title": "Person2",
"type": "object2",
"required": "firstName2",
"min_max": "201/601"
},
{
"title": "Person3",
"type": "object3",
"required": "firstName3",
"min_max": "2000/6000"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "null"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "1024 / 256"
},
{
"title": "Person4",
"type": "object4",
"required": "firstName4",
"min_max": "0"
}
]
I am trying to create a new json file with new data. I would like to split "min_max" into two different fields ie., min and max. Below is the code written in python.
import json
input=open('input.json', 'r')
output=open('test.json', 'w')
json_decode=json.load(input)
result = []
for item in json_decode:
my_dict={}
my_dict['title']=item.get('title')
my_dict['min']=item.get('min_max')
my_dict['max']=item.get('min_max')
result.append(my_dict)
data=json.dumps(result, output)
output.write(data)
output.close()
How do I split the string into two different values. Also, is there any possibility of printing the json output in order.
Your JSON file seems to be written wrong (the example one). It is not a list. It is just a single associated array (or dictionary, in Python). Additionally, you don't seem to be using json.dumps properly. It only takes 1 argument. I also figured it would be easier to just create the dictionary inline. And you don't seem to be splitting the min_max properly.
Here's the correct input:
[{
"title": "Person",
"type": "object",
"required": "firstName",
"min_max": "20/60"
}]
Here's your new code:
import json
with open('input.json', 'r') as inp, open('test.json', 'w') as outp:
json_decode=json.load(inp)
result = []
for temp in json_decode:
minMax = temp["min_max"].split("/")
result.append({
"title":temp["title"],
"min":minMax[0],
"max":minMax[1]
})
data=json.dumps(result)
outp.write(data)
Table + Python == Pandas
import pandas as pd
# Read old json to a dataframe
df = pd.read_json("input.json")
# Create two new columns based on min_max
# Removes empty spaces with strip()
# Returns [None,None] if length of split is not equal to 2
df['min'], df['max'] = (zip(*df['min_max'].apply
(lambda x: [i.strip() for i in x.split("/")]
if len(x.split("/"))== 2 else [None,None])))
# 'delete' (drop) min_max column
df.drop('min_max', axis=1, inplace=True)
# output to json again
df.to_json("test.json",orient='records')
Result:
[{'max': '600',
'min': '200',
'required': 'firstName',
'title': 'Person',
'type': 'object'},
{'max': '630',
'min': '230',
'required': 'firstName1',
'title': 'Person1',
'type': 'object2'},
{'max': '601',
'min': '201',
'required': 'firstName2',
'title': 'Person2',
'type': 'object2'},
{'max': '6000',
'min': '2000',
'required': 'firstName3',
'title': 'Person3',
'type': 'object3'},
{'max': None,
'min': None,
...
You can do something like this:
import json
nl=[]
for di in json.loads(js):
min_,sep,max_=map(lambda s: s.strip(), di['min_max'].partition('/'))
if sep=='/':
del di['min_max']
di['min']=min_
di['max']=max_
nl.append(di)
print json.dumps(nl)
This keeps the "min_max" values that cannot be separated into two values unchanged.

Categories