Related
My JSON file looks like:
{
"numAccounts": xxxx,
"filtersApplied": {
"accountIds": "All",
"checkIds": "All",
"categories": [
"cost_optimizing"
],
"statuses": "All",
"regions": "All",
"organizationalUnitIds": [
"yyyyy"
]
},
"categoryStatusMap": {
"cost_optimizing": {
"statusMap": {
"RULE_ERROR": {
"name": "Blue",
"count": 11
},
"ERROR": {
"name": "Red",
"count": 11
},
"OK": {
"name": "Green",
"count": 11
},
"WARN": {
"name": "Yellow",
"count": 11
}
},
"name": "Cost Optimizing",
"monthlySavings": 1111
}
},
"accountStatusMap": {
"xxxxxxxx": {
"cost_optimizing": {
"statusMap": {
"OK": {
"name": "Green",
"count": 1111
},
"WARN": {
"name": "Yellow",
"count": 111
}
},
"name": "Cost Optimizing",
"monthlySavings": 1111
}
},
Which I load into memory using pandas:
df = pd.read_json('file.json', orient='index')
I find the index orient the most suitable because it gives me:
print(df)
0
numAccounts 125
filtersApplied {'accountIds': 'All', 'checkIds': 'All', 'cate...
categoryStatusMap {'cost_optimizing': {'statusMap': {'RULE_ERROR...
accountStatusMap {'xxxxxxx': {'cost_optimizing': {'statusM...
Now, how can I access the accountStatusMap entry?
I tried account_status_map = df['accountStatusMap'] which gives me a
KeyError: 'accountStatusMap'
Is there something specific to the index orientation in how to access specific entries in a dataframe?
Selecting nested dictionaries and turning them to a DataFrame in Python
From the nested 'biblio' data below, is there a way of sorting this into a data frame with each key as a column? For example, where 'classifications_cpc' is a column header with the codes as the subsequent values?
{
"publication_reference": {
"jurisdiction": "US",
"doc_number": "10236491",
"kind": "B2",
"date": "2019-03-19"
},
"application_reference": {
"jurisdiction": "US",
"doc_number": "201615053025",
"kind": "A",
"date": "2016-02-25"
},
"priority_claims": {
"claims": [
{
"jurisdiction": "JP",
"doc_number": "2015062114",
"kind": "A",
"date": "2015-03-25",
"sequence": 1
}
]
},
"invention_title": [
{
"text": "Lithium ion secondary battery",
"lang": "en"
}
],
"parties": {
"applicants": [
{
"residence": "JP",
"extracted_name": {
"value": "AUTOMOTIVE ENERGY SUPPLY CORP"
}
}
],
"inventors": [
{
"residence": "JP",
"sequence": 1,
"extracted_name": {
"value": "SAKAGUCHI SHINICHIRO"
}
},
{
"residence": "JP",
"sequence": 2,
"extracted_name": {
"value": "KIMURA AIKA"
}
},
{
"residence": "JP",
"sequence": 3,
"extracted_name": {
"value": "MIZUTA MASATOMO"
}
}
],
"agents": [
{
"extracted_name": {
"value": "Troutman Sanders LLP"
}
}
],
"owners_all": [
{
"recorded_date": "2016-02-25",
"execution_date": "2016-01-28",
"extracted_name": {
"value": "AUTOMOTIVE ENERGY SUPPLY CORPORATION"
},
"extracted_address": "10-1, HIRONODAI 2-CHOME, ZAMA-SHI, KANAGAWA, 252-0012",
"extracted_country": "JP"
}
]
},
"classifications_ipcr": {
"classifications": [
{
"symbol": "H01M2/02"
},
{
"symbol": "H01M2/14"
},
{
"symbol": "H01M2/18"
},
{
"symbol": "H01M10/0525"
},
{
"symbol": "H01M10/0585"
}
]
},
"classifications_cpc": {
"classifications": [
{
"symbol": "H01M10/0525"
},
{
"symbol": "H01M10/0525"
},
{
"symbol": "H01M50/463"
},
{
"symbol": "H01M10/0525"
},
{
"symbol": "H01M10/0585"
},
{
"symbol": "H01M10/0585"
},
{
"symbol": "H01M50/10"
},
{
"symbol": "H01M50/116"
},
{
"symbol": "H01M50/116"
},
{
"symbol": "H01M50/40"
},
{
"symbol": "H01M50/40"
},
{
"symbol": "H01M50/409"
},
{
"symbol": "H01M50/543"
},
{
"symbol": "H01M50/543"
},
{
"symbol": "Y02E60/10"
}
]
},
"references_cited": {
"citations": [
{
"sequence": 1,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2011151307",
"kind": "A1",
"date": "2011-06-23"
},
"lens_id": "052-557-140-975-892"
}
},
{
"sequence": 2,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2011287301",
"kind": "A1",
"date": "2011-11-24"
},
"lens_id": "050-516-769-883-801"
}
},
{
"sequence": 3,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2014205887",
"kind": "A1",
"date": "2014-07-24"
},
"lens_id": "041-534-822-806-155"
}
},
{
"sequence": 4,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2015056492",
"kind": "A1",
"date": "2015-02-26"
},
"lens_id": "101-776-463-080-028"
}
},
{
"sequence": 5,
"patcit": {
"document_id": {
"jurisdiction": "WO",
"doc_number": "2013047778",
"kind": "A1",
"date": "2013-04-04"
},
"lens_id": "135-661-134-273-324"
}
},
{
"sequence": 1,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2011143183",
"kind": "A1",
"date": "2011-06-16"
},
"lens_id": "095-161-033-897-779"
}
},
{
"sequence": 2,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2014349169",
"kind": "A1",
"date": "2014-11-27"
},
"lens_id": "075-950-005-288-26X"
}
},
{
"sequence": 3,
"patcit": {
"document_id": {
"jurisdiction": "US",
"doc_number": "2015050542",
"kind": "A1",
"date": "2015-02-19"
},
"lens_id": "003-582-946-821-435"
}
},
{
"sequence": 4,
"patcit": {
"document_id": {
"jurisdiction": "CN",
"doc_number": "102124591",
"kind": "A",
"date": "2011-07-13"
},
"lens_id": "157-805-739-981-807"
}
},
{
"sequence": 5,
"patcit": {
"document_id": {
"jurisdiction": "CN",
"doc_number": "104106155",
"kind": "A",
"date": "2014-10-15"
},
"lens_id": "003-865-201-672-551"
}
},
{
"sequence": 6,
"patcit": {
"document_id": {
"jurisdiction": "CN",
"doc_number": "104205416",
"kind": "A",
"date": "2014-12-10"
},
"lens_id": "182-508-848-265-100"
}
},
{
"sequence": 7,
"patcit": {
"document_id": {
"jurisdiction": "EP",
"doc_number": "2747167",
"kind": "A1",
"date": "2014-06-25"
},
"lens_id": "167-072-626-506-628"
}
},
{
"sequence": 8,
"patcit": {
"document_id": {
"jurisdiction": "JP",
"doc_number": "2009277397",
"kind": "A",
"date": "2009-11-26"
},
"lens_id": "061-699-339-033-165"
}
},
{
"sequence": 9,
"nplcit": {
"text": "Extended European Search Report dated Apr. 14, 2016 issued in corresponding European Patent Application No. 16157356.3."
}
}
],
"patent_count": 13,
"npl_count": 1
},
"cited_by": {}
}
Any Suggestions or Ideas?
Do you want a column for each and every key? or only specific ones? For example, the cited_by key has no value in it.
However, assign the data you provided to a variable names your_data and try this code:
import pandas as pd
list_for_df =[]
classifications = your_data["classifications_cpc"]
symbol_list = classifications["classifications"]
for symbol in symbol_list:
list_for_df.append(symbol["symbol"])
df = pd.DataFrame(list_for_df,columns=["classifications_cpc"])
The data frame will look like that:
classifications_cpc
0 H01M10/0525
1 H01M10/0525
2 H01M50/463
3 H01M10/0525
4 H01M10/0585
5 H01M10/0585
6 H01M50/10
7 H01M50/116
8 H01M50/116
9 H01M50/40
10 H01M50/40
11 H01M50/409
12 H01M50/543
13 H01M50/543
14 Y02E60/10
Let me try to approach your requirements. As the column names 'classifications_cpc' or 'parties' or 'classifications_ipcr' are each an array of unequal lengths, it would not make sense to put them together into a single DataFrame. Each resultant row will have unrelated fields grouped together.
What you might be looking for is to extract values using specific key in the nested dictionary or 'lists of dictionaries' . For example using recursive function to extract values using some key:
data = {...nested dictionary or 'lists of dictionaries'...}
def get_vals(nested, key):
result = []
if isinstance(nested, list) and nested != []: #non-empty list
for lis in nested:
result.extend(get_vals(lis, key))
elif isinstance(nested, dict) and nested != {}: #non-empty dict
for val in nested.values():
if isinstance(val, (list, dict)): #(list or dict) in dict
result.extend(get_vals(val, key))
if key in nested.keys(): #key found in dict
result.append(nested[key])
return result
get_vals(data, 'value')
Output
['AUTOMOTIVE ENERGY SUPPLY CORP',
'SAKAGUCHI SHINICHIRO',
'KIMURA AIKA',
'MIZUTA MASATOMO',
'Troutman Sanders LLP',
'AUTOMOTIVE ENERGY SUPPLY CORPORATION']
Or to look for the key 'classifications', you would get 2 lists from 'classifications_ipcr' and 'classifications_cpc':
get_vals(data, 'classifications')
[[{'symbol': 'H01M2/02'},
{'symbol': 'H01M2/14'},
{'symbol': 'H01M2/18'},
{'symbol': 'H01M10/0525'},
{'symbol': 'H01M10/0585'}],
[{'symbol': 'H01M10/0525'},
{'symbol': 'H01M10/0525'},
{'symbol': 'H01M50/463'},
{'symbol': 'H01M10/0525'},
{'symbol': 'H01M10/0585'},
{'symbol': 'H01M10/0585'},
{'symbol': 'H01M50/10'},
{'symbol': 'H01M50/116'},
{'symbol': 'H01M50/116'},
{'symbol': 'H01M50/40'},
{'symbol': 'H01M50/40'},
{'symbol': 'H01M50/409'},
{'symbol': 'H01M50/543'},
{'symbol': 'H01M50/543'},
{'symbol': 'Y02E60/10'}]]
Another way is to use built-in function pd.json_normalize(), but you have to identify specific keys linkage to arrive at the data you desire.
df = pd.json_normalize(data['classifications_cpc']['classifications'])
Output df
symbol
0 H01M10/0525
1 H01M10/0525
2 H01M50/463
3 H01M10/0525
4 H01M10/0585
5 H01M10/0585
6 H01M50/10
7 H01M50/116
8 H01M50/116
9 H01M50/40
10 H01M50/40
11 H01M50/409
12 H01M50/543
13 H01M50/543
14 Y02E60/10
Wasn't sure how to title this question but I am working with the Quickbooks Online API and when querying a report like BalanceSheet or GeneralLedger the API returns data rows in multiple nested levels which is quite frustrating to parse through.
Example of the BalanceSheet return included below. I am only interested in the data from "Row" objects but as you can see that can be returned in 1, 2, 3 or more different levels of data. I am thinking of going through each level to check for Rows and then get each Row but that seems overly complex as I would need multiple for loops for each level.
I'm wondering if there is a better way to get each "Row" in that data without regard to which level it is on? Any ideas would be appreciated!
Here's an example of a return from their sandbox data:
{
"Header": {
"Time": "2021-04-28T14:12:17-07:00",
"ReportName": "BalanceSheet",
"DateMacro": "this calendar year-to-date",
"ReportBasis": "Accrual",
"StartPeriod": "2021-01-01",
"EndPeriod": "2021-04-28",
"SummarizeColumnsBy": "Month",
"Currency": "USD",
"Option": [
{
"Name": "AccountingStandard",
"Value": "GAAP"
},
{
"Name": "NoReportData",
"Value": "false"
}
]
},
"Columns": {
"Column": [
{
"ColTitle": "",
"ColType": "Account",
"MetaData": [
{
"Name": "ColKey",
"Value": "account"
}
]
},
{
"ColTitle": "Jan 2021",
"ColType": "Money",
"MetaData": [
{
"Name": "StartDate",
"Value": "2021-01-01"
},
{
"Name": "EndDate",
"Value": "2021-01-31"
},
{
"Name": "ColKey",
"Value": "Jan 2021"
}
]
},
{
"ColTitle": "Feb 2021",
"ColType": "Money",
"MetaData": [
{
"Name": "StartDate",
"Value": "2021-02-01"
},
{
"Name": "EndDate",
"Value": "2021-02-28"
},
{
"Name": "ColKey",
"Value": "Feb 2021"
}
]
},
{
"ColTitle": "Mar 2021",
"ColType": "Money",
"MetaData": [
{
"Name": "StartDate",
"Value": "2021-03-01"
},
{
"Name": "EndDate",
"Value": "2021-03-31"
},
{
"Name": "ColKey",
"Value": "Mar 2021"
}
]
},
{
"ColTitle": "Apr 1-28, 2021",
"ColType": "Money",
"MetaData": [
{
"Name": "StartDate",
"Value": "2021-04-01"
},
{
"Name": "EndDate",
"Value": "2021-04-28"
},
{
"Name": "ColKey",
"Value": "Apr 1-28, 2021"
}
]
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "ASSETS"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Assets"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Bank Accounts"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Checking",
"id": "35"
},
{
"value": "1201.00"
},
{
"value": "1201.00"
},
{
"value": "1201.00"
},
{
"value": "1201.00"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Savings",
"id": "36"
},
{
"value": "800.00"
},
{
"value": "800.00"
},
{
"value": "800.00"
},
{
"value": "800.00"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Bank Accounts"
},
{
"value": "2001.00"
},
{
"value": "2001.00"
},
{
"value": "2001.00"
},
{
"value": "2001.00"
}
]
},
"type": "Section",
"group": "BankAccounts"
},
{
"Header": {
"ColData": [
{
"value": "Accounts Receivable"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Accounts Receivable (A/R)",
"id": "84"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Accounts Receivable"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
},
{
"value": "5281.52"
}
]
},
"type": "Section",
"group": "AR"
},
{
"Header": {
"ColData": [
{
"value": "Other Current Assets"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Inventory Asset",
"id": "81"
},
{
"value": "596.25"
},
{
"value": "596.25"
},
{
"value": "596.25"
},
{
"value": "596.25"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Undeposited Funds",
"id": "4"
},
{
"value": "2062.52"
},
{
"value": "2062.52"
},
{
"value": "2062.52"
},
{
"value": "2062.52"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Other Current Assets"
},
{
"value": "2658.77"
},
{
"value": "2658.77"
},
{
"value": "2658.77"
},
{
"value": "2658.77"
}
]
},
"type": "Section",
"group": "OtherCurrentAssets"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Current Assets"
},
{
"value": "9941.29"
},
{
"value": "9941.29"
},
{
"value": "9941.29"
},
{
"value": "9941.29"
}
]
},
"type": "Section",
"group": "CurrentAssets"
},
{
"Header": {
"ColData": [
{
"value": "Fixed Assets"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Truck",
"id": "37"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Original Cost",
"id": "38"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Truck"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
}
]
},
"type": "Section"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Fixed Assets"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
},
{
"value": "13495.00"
}
]
},
"type": "Section",
"group": "FixedAssets"
}
]
},
"Summary": {
"ColData": [
{
"value": "TOTAL ASSETS"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
}
]
},
"type": "Section",
"group": "TotalAssets"
},
{
"Header": {
"ColData": [
{
"value": "LIABILITIES AND EQUITY"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Liabilities"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Current Liabilities"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"Header": {
"ColData": [
{
"value": "Accounts Payable"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Accounts Payable (A/P)",
"id": "33"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Accounts Payable"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
},
{
"value": "1602.67"
}
]
},
"type": "Section",
"group": "AP"
},
{
"Header": {
"ColData": [
{
"value": "Credit Cards"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Mastercard",
"id": "41"
},
{
"value": "157.72"
},
{
"value": "157.72"
},
{
"value": "157.72"
},
{
"value": "157.72"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Credit Cards"
},
{
"value": "157.72"
},
{
"value": "157.72"
},
{
"value": "157.72"
},
{
"value": "157.72"
}
]
},
"type": "Section",
"group": "CreditCards"
},
{
"Header": {
"ColData": [
{
"value": "Other Current Liabilities"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Arizona Dept. of Revenue Payable",
"id": "89"
},
{
"value": "0.00"
},
{
"value": "0.00"
},
{
"value": "0.00"
},
{
"value": "0.00"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Board of Equalization Payable",
"id": "90"
},
{
"value": "370.94"
},
{
"value": "370.94"
},
{
"value": "370.94"
},
{
"value": "370.94"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Loan Payable",
"id": "43"
},
{
"value": "4000.00"
},
{
"value": "4000.00"
},
{
"value": "4000.00"
},
{
"value": "4000.00"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Other Current Liabilities"
},
{
"value": "4370.94"
},
{
"value": "4370.94"
},
{
"value": "4370.94"
},
{
"value": "4370.94"
}
]
},
"type": "Section",
"group": "OtherCurrentLiabilities"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Current Liabilities"
},
{
"value": "6131.33"
},
{
"value": "6131.33"
},
{
"value": "6131.33"
},
{
"value": "6131.33"
}
]
},
"type": "Section",
"group": "CurrentLiabilities"
},
{
"Header": {
"ColData": [
{
"value": "Long-Term Liabilities"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Notes Payable",
"id": "44"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
}
],
"type": "Data"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Long-Term Liabilities"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
},
{
"value": "25000.00"
}
]
},
"type": "Section",
"group": "LongTermLiabilities"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Liabilities"
},
{
"value": "31131.33"
},
{
"value": "31131.33"
},
{
"value": "31131.33"
},
{
"value": "31131.33"
}
]
},
"type": "Section",
"group": "Liabilities"
},
{
"Header": {
"ColData": [
{
"value": "Equity"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
]
},
"Rows": {
"Row": [
{
"ColData": [
{
"value": "Opening Balance Equity",
"id": "34"
},
{
"value": "-9337.50"
},
{
"value": "-9337.50"
},
{
"value": "-9337.50"
},
{
"value": "-9337.50"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Retained Earnings",
"id": "2"
},
{
"value": "1642.46"
},
{
"value": "1642.46"
},
{
"value": "1642.46"
},
{
"value": "1642.46"
}
],
"type": "Data"
},
{
"ColData": [
{
"value": "Net Income"
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
},
{
"value": ""
}
],
"type": "Data",
"group": "NetIncome"
}
]
},
"Summary": {
"ColData": [
{
"value": "Total Equity"
},
{
"value": "-7695.04"
},
{
"value": "-7695.04"
},
{
"value": "-7695.04"
},
{
"value": "-7695.04"
}
]
},
"type": "Section",
"group": "Equity"
}
]
},
"Summary": {
"ColData": [
{
"value": "TOTAL LIABILITIES AND EQUITY"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
},
{
"value": "23436.29"
}
]
},
"type": "Section",
"group": "TotalLiabilitiesAndEquity"
}
]
}
}
I'm trying to merge nested Dicts in a list based on "name" like the following:
[
{
"name": "abc",
"metadata": [
{
"name": "foo",
"data": [
{
"version": "1.0"
}
]
},
{
"name": "foo",
"data": [
{
"version": "2.0"
}
]
},
{
"name": "bar",
"data": [
{
"version": "1.0"
}
]
}
]
},
{
"name": "xyz",
"metadata": [
{
"name": "bob",
"data": [
{
"version": "3.2"
}
]
},
{
"name": "alice",
"data": [
{
"version": "2.2"
}
]
}
]
},
{
"name": "xyz",
"metadata": [
{
"name": "mike",
"data": [
{
"version": "3.2"
}
]
},
{
"name": "alice",
"data": [
{
"version": "2.2"
}
]
}
]
}
]
Considering that the merged items should not have duplicates in the metadata, how can I do that in Python? Metadata entries should be unique, if name+data+version exist in the metadata, then the item should not be merged.
my desired output should look like this
[
{
"name": "abc",
"metadata": [
{
"name": "foo",
"data": [
{
"version": "1.0"
}
]
},
{
"name": "foo",
"data": [
{
"version": "2.0"
}
]
},
{
"name": "bar",
"data": [
{
"version": "1.0"
}
]
}
]
},
{
"name": "xyz",
"metadata": [
{
"name": "bob",
"data": [
{
"version": "3.2"
}
]
},
{
"name": "mike",
"data": [
{
"version": "3.2"
}
]
},
{
"name": "alice",
"data": [
{
"version": "2.2"
}
]
}
]
}
]
You can use itertools.groubpy:
import itertools
d = [{'name': 'abc', 'metadata': [{'name': 'foo', 'data': [{'version': '1.0'}]}, {'name': 'foo', 'data': [{'version': '2.0'}]}, {'name': 'bar', 'data': [{'version': '1.0'}]}]}, {'name': 'xyz', 'metadata': [{'name': 'bob', 'data': [{'version': '3.2'}]}, {'name': 'alice', 'data': [{'version': '2.2'}]}]}, {'name': 'xyz', 'metadata': [{'name': 'mike', 'data': [{'version': '3.2'}]}, {'name': 'alice', 'data': [{'version': '2.2'}]}]}]
new_d = [[a, list(b)] for a, b in itertools.groupby(sorted(d, key=lambda x:x['name']), key=lambda x:x['name'])]
result = [{'name':a, 'metadata':[c for j in b for c in j['metadata']]} for a, b in new_d]
final_result = [{**i, 'metadata':[c for d, c in enumerate(i['metadata']) if all(a != c for a in i['metadata'][:d])]} for i in result]
import json
print(json.dumps(final_result, indent=4))
Output:
[
{
"name": "abc",
"metadata": [
{
"name": "foo",
"data": [
{
"version": "1.0"
}
]
},
{
"name": "foo",
"data": [
{
"version": "2.0"
}
]
},
{
"name": "bar",
"data": [
{
"version": "1.0"
}
]
}
]
},
{
"name": "xyz",
"metadata": [
{
"name": "bob",
"data": [
{
"version": "3.2"
}
]
},
{
"name": "alice",
"data": [
{
"version": "2.2"
}
]
},
{
"name": "mike",
"data": [
{
"version": "3.2"
}
]
}
]
}
]
I want to append the longitude to a latitude stored in 2 separated json files
The result should be stored in a 3rd file
How can I do that on Python OR Javascript/Node?
Many thanks for your support,
LATITUDE
{
"tags": [{
"name": "LATITUDE_deg",
"results": [{
"groups": [{
"name": "type",
"type": "number"
}],
"values": [
[1123306773000, 46.9976859318, 3],
[1123306774000, 46.9976859319, 3]
],
"attributes": {
"customer": ["Acme"],
"host": ["server1"]
}
}],
"stats": {
"rawCount": 2
}
}]
}
LONGITUDE
{
"tags": [{
"name": "LONGITUDE_deg",
"results": [{
"groups": [{
"name": "type",
"type": "number"
}],
"values": [
[1123306773000, 36.9976859318, 3],
[1123306774000, 36.9976859317, 3]
],
"attributes": {
"customer": ["Acme"],
"host": ["server1"]
}
}],
"stats": {
"rawCount": 2
}
}]
}
Expected result: LATITUDE_AND_LONGITUDE
{
"tags": [{
"name": "LATITUDE_AND_LONGITUDE_deg",
"results": [{
"groups": [{
"name": "type",
"type": "number"
}],
"values": [
[1123306773000, 46.9976859318, 36.9976859318, 3],
[1123306774000, 46.9976859319, 36.9976859317, 3]
],
"attributes": {
"customer": ["Acme"],
"host": ["server1"]
}
}],
"stats": {
"rawCount": 2
}
}]
}
I have written the solution with a colleague, find the source code on github: https://gist.github.com/Abdelkrim/715eb222cc318219196c8be293c233bf