Flatten the Json file data using pandas normalizer - python

I would like to flatten the complex nested json file .Please find the below sample json data
{
"applications": [
{
"id": 87334412,
"name": "cdata1",
"language": "known",
"health_status": "unknown",
"reporting": true,
"last_reported_at": "2017-10-06T06:30:55+00:00",
"application_summary": {
"response_time": 1.2,
"throughput": 216,
"error_rate": 0,
"target": 0.5,
"ascore": 1,
"host_count": 3,
"instance_count": 3
},
"settings": {
"column": 0.5,
"columns": 7,
"columns1": true,
"columns2": false
},
"links": {
"application_data": [
93818199,
93819351,
93819359
],
"servers": [],
"application_content": [
32006189,
87342924,
47565225
]
}
},
code using :
import json
from pandas.io.json import json_normalize
json_file=open('ptr1.json')
json_data=json.load(json_file)
#print json_data["applications"]
for line in json_data:
data=json_normalize(line,['name','id'])
print data
can any one help to get the following data name,id,last_reported_at,instance_count. note json file contains many id details

IIUC:
In [34]: d = json.loads(json_str)
In [35]: cols = ['id','name','last_reported_at','application_summary.instance_count']
In [36]: pd.io.json.json_normalize(d['applications'])[cols]
Out[36]:
id name last_reported_at application_summary.instance_count
0 87334412 cdata1 2017-10-06T06:30:55+00:00 3
1 87334444 cdata2 2017-10-05T06:30:55+00:00 3

Related

Parse json with python and access to inner nodes

Hello I want to parse this json file in python like sql database column as below:
row #1 order id: 317246190-A can_refund: false cancelations_amount:15.13 offer_sku:3803372 order_line_id: 317246190-A-1
row #2 order id: 317246190-A can_refund: false cancelations_amount:15.13 offer_sku:3803372 order_line_id: 317246190-A-2
here is the json file :
{
"orders": [
{
"order_id": "317246190-A",
"order_lines": [
{
"can_refund": false,
"cancelations": [
{
"amount": 15.13,
"commission_amount": 3.32
}
],
"offer_sku": "3803372",
"order_line_id": "317246190-A-1",
"price": 0.0
},
{
"can_refund": false,
"cancelations": [
{
"amount": 15.13,
"commission_amount": 3.32
}
],
"offer_sku": "3803372",
"order_line_id": "317246190-A-2",
"price": 0.0
}
]
}
],
"total_count": 1
}
I want to parse this json file in python like sql database column as below
row #1 order id: 317246190-A can_refund: false cancelations_amount:15.13 offer_sku:3803372 order_line_id: 317246190-A-1
row #2 order id: 317246190-A can_refund: false cancelations_amount:15.13 offer_sku:3803372 order_line_id: 317246190-A-2

Converting python dataframe to a particular JSON structute

Hi i want to convert my dataframe to a specific json structure. my dataframe look something like this :
df = pd.DataFrame([["file1", "1.2.3.4.5.6.7.8.9", 91, "RMLO"], ["file2", "1.2.3.4.5.6.7.8.9", 92, "LMLO"], ["file3", "1.2.3.4.5.6.7.8.9", 93, "LCC"], ["file4", "1.2.3.4.5.6.7.8.9", 94, "RCC"]], columns=["Filename", "StudyID", "probablity", "finding_name"])
And the json structure in which i want to convert my datafram is below :
{
"findings": [
{
"name": "RMLO",
"probability": "91"
},
{
"name": "LMLO",
"probability": "92"
},
{
"name": "LCC",
"probability": "93"
}
{
"name": "LCC93",
"probability" : "94"
}
],
"status": "Processed",
"study_id": "1.2.3.4.5.6.7.8.9.0"
}
i tried implementing this with below code with different orient variables but i didn't get what i wanted.
j = df[["probablity","findings"]].to_json(orient='records')
so if any can help in achiveing this..
Thanks.
Is this similar to what you are trying to achieve:
import json
j = df[["finding_name","probablity"]].to_json(orient='records')
study_id = df["StudyID"][0]
j_dict = {"findings": json.loads(j), "status": "Processed", "study_id": study_id}
j_dict
This results in:
{'findings': [{'finding_name': 'RMLO', 'probablity': 91},
{'finding_name': 'LMLO', 'probablity': 92},
{'finding_name': 'LCC', 'probablity': 93},
{'finding_name': 'RCC', 'probablity': 94}],
'status': 'Processed',
'study_id': '1.2.3.4.5.6.7.8.9'}

get data from a json

I want to get the data from a json. I have the idea of a loop to access all levels.
I have only been able to pull data from a single block.
print(output['body']['data'][0]['list'][0]['outUcastPkts'])
How do I get the other data?
import json,urllib.request
data = urllib.request.urlopen("http://172.0.0.0/statistic").read()
output = json.loads(data)
for elt in output['body']['data']:
print(output['body']['data'][0]['inUcastPktsAll'])
for elt in output['list']:
print(output['body']['data'][0]['list'][0]['outUcastPkts'])
{
"body": {
"data": [
{
"inUcastPktsAll": 3100617019,
"inMcastPktsAll": 7567,
"inBcastPktsAll": 8872,
"outPktsAll": 8585575441,
"outUcastPktsAll": 8220240108,
"outMcastPktsAll": 286184143,
"outBcastPktsAll": 79151190,
"list": [
{
"outUcastPkts": 117427359,
"outMcastPkts": 1990586,
"outBcastPkts": 246120
},
{
"outUcastPkts": 0,
"outMcastPkts": 0,
"outBcastPkts": 0
}
]
},
{
"inUcastPktsAll": 8269483865,
"inMcastPktsAll": 2405765,
"inBcastPktsAll": 124466,
"outPktsAll": 3101194852,
"outUcastPktsAll": 3101012296,
"outMcastPktsAll": 173409,
"outBcastPktsAll": 9147,
"list": [
{
"outUcastPkts": 3101012296,
"outMcastPkts": 90488,
"outBcastPkts": 9147
},
{
"outUcastPkts": 0,
"outMcastPkts": 0,
"outBcastPkts": 0
}
]
}
],
"msgs": [ "successful" ]
},
"header": {
"opCode": "1",
"token": "",
"state": "",
"version": 1
}
}
output = json.loads(data) #Type of output is a dictionary.
#Try to use ".get()" method.
print(output.get('body')) #Get values of key 'body'
print(output.get('body').get('data')) #Get a list of key 'data'
If a key doesn't exist, the '.get()' method will return None.
https://docs.python.org/3/library/stdtypes.html#dict.get
In python you can easily iterate over the objects of a list like so:
>>> l = [1, 2, 3, 7]
>>> for elem in l:
... print(elem)
...
1
2
3
7
This works regarding what can of object do you have in the list (integers, tuples, dictionaries). Having that in mind, your solution was not far off, you only to do the following changes:
for entry in output['body']['data']:
print(entry['inUcastPktsAll'])
for list_element in entry['list']:
print(list_element['outUcastPkts'])
This will give you the following for the json object you have provided:
3100617019
117427359
0
8269483865
3101012296
0

Passing dictionary as parameter to a function

So, I am working on a project in which the user gives inputs in the json file and the parser reads data from the json file and then creates a data structure to which gets updated to the inputs mentioned in the data file.
My json file(input_file.json5) looks like this:
{
"clock_frequency": 25000,
"Triggering_Mode": "positive_edge_triggered",
"Mode": "Offline",
"overshoot": 0.05,
"duty_cycle": 0.5,
"amplitude/high_level": 1,
"offset/low_level": 0
}
The data structure(data_struc.py) looks like this:
Parameters={
"Global_parameters": {
"frequency": 3000,
"Triggering_Mode": "positive_edge_triggered"
},
"Executor_param": {
"Mode": "Offline"
},
"Waveform_Settings": {
"overshoot": 0.05,
"duty_cycle": 0.5,
"amplitude/high_level": 1,
"offset/low_level": 0,
}
}
The code for the parser is:
import json5
from data_struc import Parameters
class Parser(object):
def read_input_file(self, path_name, file_name):
input_file = open(path_name + file_name + '.json5')
data = json5.load(input_file)
print(Parameters['Global_parameters'])
parameters = self.parse_parameters(data)
input_file.close()
return parameters
def parser_parameters(self, data):
parameter = {
"Global_parameters": {
"frequency": data[clock_frequency]
"Triggering_Mode": data[Triggering_Mode]
}
}
return parameter
I want to pass data as a parameter to the function and I want to update the contents of the data structure using the value of the data(passed as dictionary) to the function. How do I implement the function parser_parameters?
Here is a one-liner to map the data to a schema if you can change the schema, you could also just go and grab the keys instead of creating a list of items to match. This formats the data to the schema based on matching keys:
EDIT: added 'Data' tag to the schema and output for nested list data
schema = {
'Global_parameters': [
'clock_frequency', # I noticed you had this as just 'clock' in your desired outuput
'Triggering_Mode'
],
'Executor_param': [
'Mode'
],
'Waveform_Settings': [
'overshoot',
'duty_cycle',
'amplitude/high_level',
'offset/low_level'
],
'Data': {
'Packet'
}
}
data = {
"clock_frequency": 25000,
"Triggering_Mode": "positive_edge_triggered",
"Mode": "Offline",
"overshoot": 0.05,
"duty_cycle": 0.5,
"amplitude/high_level": 1,
"offset/low_level": 0,
"Packet": [
{"time_index":0.1, "data":0x110},
{"time_index":1.21, "data":123},
{"time_index":2.0, "data": 0x45}
]
}
# "one line" nested dict comprehension
data_structured = {k0: {k1: v1 for k1, v1 in data.items() if k1 in v0} # in v0.keys() if you are using the structure you have above
for k0, v0 in schema.items()}
import json
print(json.dumps(data_structured, indent=4)) # pretty print in json format
Output:
{
"Global_parameters": {
"clock_frequency": 25000,
"Triggering_Mode": "positive_edge_triggered"
},
"Executor_param": {
"Mode": "Offline"
},
"Waveform_Settings": {
"overshoot": 0.05,
"duty_cycle": 0.5,
"amplitude/high_level": 1,
"offset/low_level": 0
},
"Data": {
"Packet": [
{
"time_index": 0.1,
"data": 272
},
{
"time_index": 1.21,
"data": 123
},
{
"time_index": 2.0,
"data": 69
}
]
}
}

How to add title row to using writer and unicodecsv

I have the following JSON file - test.json (names, keys and addresses changed for security reasons)
[
{
"accountMode":"Live",
"acquirer":"TEST",
"acquirerConstraints":{
"cardTypes":[
"MASTERCARD",
"MAESTRO",
"VISA"
],
"cvcRegexp":"^[0-9]{3}$",
"cvcRequired":true,
"maxAmount":500000,
"minAmount":50
},
"acquirerDetails":{
"TEST":"Studio",
"ERROR_LIST":[
],
"MERCHANT_CODE":"218331",
"VALID":true,
"_mId":"T712484",
"_status":"INPROCESS",
"email":"test7#gmail.com",
"name":"Studio",
"valid":true
},
"acquirerValidations":null,
"allowedCurrencies":[
"EUR",
"USD",
"GBP"
],
"apiKeyPairs":[
{
"accountMode":"Live",
"label":"Virtual Terminal",
"publishableKey":"niunibiubniunijknkjknj",
"source":"VIRTUAL_TERMINAL"
},
{
"accountMode":"Live",
"label":"Default",
"publishableKey":"iiuhiuhiu",
"source":"ECOMMERCE"
}
],
"appLogoUrl":null,
"applicationId":"541d75e0-7db8b343a31f",
"authorizationCode":"",
"closedDate":null,
"closureReason":null,
"declineAvsAddressFailure":false,
"declineAvsZipFailure":false,
"declineCvcFailure":false,
"defaultCurrency":"EUR",
"descriptor":null,
"email":"test1#gmail.com",
"id":"ddddeff",
"invitationCode":null,
"locale":"en_IE",
"merchantApplication":{
"accountNumber":null,
"acquirer":"TEST",
"annualAmount":null,
"annualVolume":null,
"applicationType":"APPROVAL",
"bankName":"UNKNOWN",
"brand":null,
"businessAddress":"54 My St, 1",
"businessAddress2":null,
"businessCity":"Abbey",
"businessCountry":"IRL",
"businessPhone":null,
"businessState":"DUBLIN",
"businessZip":null,
"data":null,
"email":"test#gmail.com",
"escalationPhone":null,
"fax":null,
"legalName":"UAB \"Studio\"",
"maxTransactionAmount":null,
"mccCode":"5712",
"merchantPromotionCode":null,
"mobile":null,
"monthlyAmount":null,
"monthlyVolume":null,
"ownerFirstName":"tlana",
"ownerLastName":"nava",
"phone":"37647",
"GuideAccepted":null,
"privacyAccepted":true,
"privacyVersion":"1a",
"referenceId":"9104d65i08d071",
"routingNumber":null,
"singleTransactionAmount":null,
"statementName":"UAB \"Studio\"",
"taxId":null,
"termsAccepted":true,
"termsVersion":"1a",
"url":"http://www.design.lt"
},
"merchantId":"12484",
"merchantPromotionCode":null,
"mposEnabled":true,
"name":"Studio",
"netonfiguration":null,
"onboardedDate":1505513232485,
"onboardingMethod":null,
"onboardingStatus":"INPROCESS",
"partner":null,
"saqCompliant":false,
"saqExpires":null,
"settings":[
{
"key":"MERCHANT_DETAILS",
"value":"{\"zip\":\"Wicklow\",\"phone\":\"342647\",\"email\":\"suppoor#outlook.com\",\"address\":\"Bck 6\",\"state\":\"Ireland\",\"addressLine2\":\"Unit 8, Bl Par\",\"city\":\"Wicklow\"}"
},
{
"key":"VAT_NUMBER",
"value":"/evzaqen/"
}
],
"timezone":"Europe/Dublin",
"tinStatus":null
},
{
"accountMode":"Live",
"acquirer":"TEST",
"acquirerConstraints":{
"cardTypes":[
"MASTERCARD",
"MAESTRO",
"VISA"
],
"cvcRegexp":"^[0-9]{3}$",
"cvcRequired":true,
"maxAmount":500000,
"minAmount":50
},
"acquirerDetails":{
"TEST":"test",
"ERROR_LIST":[
],
"MERCHANT_CODE":"594920",
"MID_ASSIGNED":true,
"VALID":true,
"_mId":"103558",
"_status":"APPROVED",
"acquiringMid":"1036598",
"descriptor":"test 8885551212",
"email":"test#gmail.com",
"gatewayMid":"SIMP337",
"id":"SIMP337",
"level4Mid":"76576576",
"name":"test",
"status":"APPROVED",
"transactionCurrency":"USD;EUR;GBP",
"valid":true,
"paymentGatewayKey":"ytfytfytfyt"
},
"acquirerValidations":null,
"allowedCurrencies":[
"EUR",
"USD",
"GBP"
],
"apiKeyPairs":[
],
"appLogoUrl":null,
"applicationId":"949bdde5-07-d8d58f4c3d01",
"authorizationCode":"",
"closedDate":null,
"closureReason":null,
"declineAvsAddressFailure":false,
"declineAvsZipFailure":false,
"declineCvcFailure":false,
"defaultCurrency":"EUR",
"descriptor":"test85551212",
"email":"test#gmail.com",
"id":"9f3a7d7",
"invitationCode":null,
"locale":"en_US",
"merchantApplication":{
"accountNumber":null,
"acquirer":"TEST",
"annualAmount":null,
"annualVolume":null,
"applicationType":"APPROVAL",
"bankName":"UNKNOWN",
"brand":null,
"businessAddress":"123 test",
"businessAddress2":null,
"businessCity":"Atlanta",
"businessCountry":"IRL",
"businessPhone":null,
"businessState":"CARLOW",
"businessZip":null,
"data":null,
"email":"test#gmail.com",
"escalationPhone":null,
"fax":null,
"legalName":"stest",
"maxTransactionAmount":null,
"mccCode":"521",
"merchantPromotionCode":null,
"mobile":null,
"monthlyAmount":null,
"monthlyVolume":null,
"ownerFirstName":"moto",
"ownerLastName":"test",
"phone":"3141212",
"GuideAccepted":null,
"privacyAccepted":true,
"privacyVersion":"1a",
"referenceId":"2920",
"routingNumber":null,
"singleTransactionAmount":null,
"statementName":"test",
"taxId":null,
"termsAccepted":true,
"termsVersion":"1a",
"url":null
},
"merchantId":"1036558",
"merchantPromotionCode":null,
"mposEnabled":true,
"name":"test",
"netonfiguration":null,
"onboardedDate":1456846054925,
"onboardingMethod":null,
"onboardingStatus":"CLOSED",
"partner":null,
"saqCompliant":false,
"saqExpires":null,
"settings":[
],
"timezone":"Europe/Dublin",
"tinStatus":"InCompliance"
}
]
I want to process this file and take some of the information and populate a CSV file with it. To do this I am using the following:
import unicodecsv
import json
json_data = open("test.json")
data = json.load(json_data)
f = unicodecsv.writer(open("results.csv","wb+"))
for entry in data:
if "merchantApplication" in entry:
ma = entry["merchantApplication"]
if "email" in ma:
f.writerow([ma["ownerFirstName"],ma["ownerLastName"],ma["email"],ma["legalName"],ma["businessAddress"],ma["businessAddress2"],ma["businessCity"],ma["businessCountry"],ma["businessState"],ma["businessZip"],ma["phone"],ma["mobile"]])
json_data.close()
This Works fine but does not print the headers above the columns. How do I add in the headers? I am using Python 2.7.10
How do I add in the headers?
Well quite simply by calling f.writerow((<your>,<headers>,<here>)) before your for loop.

Categories