Combine json based on Key value using python - python

I have two JSON strings as sample:
json_1 = [
{
"breadth": 48.04,
"vessel_id": 1,
"vessel_name": "SHIP-01",
"vessel_type": "Crude Oil Tanker",
"year_built": 2012
},
{
"breadth": 42,
"vessel_id": 2,
"vessel_name": "SHIP-02",
"vessel_type": "Crude Oil Tanker",
"year_built": 2016
}
]
json_2 = [
{
"Ballast_miles": 43575.8,
"Ballast_miles_pct": 36.1,
"org_id": 1,
"port_days": 383.5,
"sea_days": 414.9,
"total_days": 798.4,
"vessel_id": 1
},
{
"Ballast_miles": 21642.7,
"Ballast_miles_pct": 29.8,
"org_id": 1,
"port_days": 325.7,
"sea_days": 259.8,
"total_days": 585.5,
"vessel_id": 2
}
]
I want to combine these two JSON based on vessel_id.
My output format should look like:
[{ vesselId: 1,
 json1:{},
 json2:{}
},
{ vesselId: 2,
 json1:{},
 json2:{}
}]
What I've tried so far is:
data = {'First_Json': json_1, 'Second_Json': json_2}
json.dumps(data)
But this combines entirely without checking based on vessel_id.

Something like this?
json_1 = [{ "breadth": 48.04, "vessel_id": 1, "vessel_name": "SHIP-01", "vessel_type": "Crude Oil Tanker", "year_built": 2012 }, { "breadth": 42, "vessel_id": 2, "vessel_name": "SHIP-02", "vessel_type": "Crude Oil Tanker", "year_built": 2016 }]
json_2 = [{ "Ballast_miles": 43575.8, "Ballast_miles_pct": 36.1, "org_id": 1, "port_days": 383.5, "sea_days": 414.9, "total_days": 798.4, "vessel_id": 1 }, { "Ballast_miles": 21642.7, "Ballast_miles_pct": 29.8, "org_id": 1, "port_days": 325.7, "sea_days": 259.8, "total_days": 585.5, "vessel_id": 2 }]
from collections import defaultdict
result = defaultdict(dict)
for item in json_1:
result[item['vessel_id']]['json_1'] = item
for item in json_2:
result[item['vessel_id']]['json_2'] = item
[{"vessel_id" : k,
"json1" : v['json_1'],
"json2" : v['json_2']}
for k,v in result.items()]
Output:
[{'json1': {'breadth': 48.04,
'vessel_id': 1,
'vessel_name': 'SHIP-01',
'vessel_type': 'Crude Oil Tanker',
'year_built': 2012},
'json2': {'Ballast_miles': 43575.8,
'Ballast_miles_pct': 36.1,
'org_id': 1,
'port_days': 383.5,
'sea_days': 414.9,
'total_days': 798.4,
'vessel_id': 1},
'vessel_id': 1},
{'json1': {'breadth': 42,
'vessel_id': 2,
'vessel_name': 'SHIP-02',
'vessel_type': 'Crude Oil Tanker',
'year_built': 2016},
'json2': {'Ballast_miles': 21642.7,
'Ballast_miles_pct': 29.8,
'org_id': 1,
'port_days': 325.7,
'sea_days': 259.8,
'total_days': 585.5,
'vessel_id': 2},
'vessel_id': 2}]
If you want to remove the redundant vessel_id, try using for loop with a del command on each dict

Related

Parse JSON from Api to pandas dataframe with doubled names

I'm parsing a json and I don't understand how to correctly decompose it into a dataframe.
Json structure i have (api response):
{
"result": {
"data": [],
"totals": [
0
]
},
"timestamp": "2021-11-25 15:19:21"
}
response_ =
{
"result":{
"data":[
{
"dimensions":[
{
"id":"2023-01-10",
"name":""
},
{
"id":"123",
"name":"good3"
}
],
"metrics":[
10,
20,
30,
40
]
},
{
"dimensions":[
{
"id":"2023-01-10",
"name":""
},
{
"id":"234",
"name":"good2"
}
],
"metrics":[
1,
2,
3,
4
]
}
],
"totals":[
11,
22,
33,
44
]
},
"timestamp":"2023-02-07 12:58:40"
}
I don't need "timestamp" and "totals" - just "data". So i do:
...
response_ = requests.post(url, headers=head, data=body)
datas = response_.json()
datas_ = datas['result']['data']
df1 = pd.json_normalize(datas_)
I got:
dimensions
metrics
0
[{'id': '2023-01-10', 'name': ''}, {'id': '123', 'name': 'good1'}]
[10, 20, 30, 40]
1
[{'id': '2023-01-10', 'name': ''}, {'id': '234', 'name': 'good2'}]
[1, 2, 3, 4]
But i need dataframe like:
id_
name_
id
name
metric1
metric2
metric3
metric4
0
2023-01-10
123
good1
10
20
30
40
1
2023-01-10
234
good2
1
2
3
4
When i try like:
df1 = pd.json_normalize(datas_, 'dimensions')
i get all id's and name's in one column.
Explain step by step if possible. Thank you.
Try:
response = {
"result": {
"data": [
{
"dimensions": [
{"id": "2023-01-10", "name": ""},
{"id": "123", "name": "good3"},
],
"metrics": [10, 20, 30, 40],
},
{
"dimensions": [
{"id": "2023-01-10", "name": ""},
{"id": "234", "name": "good2"},
],
"metrics": [1, 2, 3, 4],
},
],
"totals": [11, 22, 33, 44],
},
"timestamp": "2023-02-07 12:58:40",
}
tmp = [
{
**{f"{k}_": v for k, v in d["dimensions"][0].items()},
**{k: v for k, v in d["dimensions"][1].items()},
**{f'metric{i}':m for i, m in enumerate(d['metrics'], 1)}
}
for d in response["result"]["data"]
]
df = pd.DataFrame(tmp)
print(df)
Prints:
id_ name_ id name metric1 metric2 metric3 metric4
0 2023-01-10 123 good3 10 20 30 40
1 2023-01-10 234 good2 1 2 3 4

represent parent and child relation as dictionary

I have a list of dictionary. I want to convert this list into dictionary using parent and child relation. I have try many time. But its difficult for me.
Thanks in advance for solving the problem.
Input =
data = [
{
"_id": 1,
"label": "Property",
"index": 1
},
{
"_id": 2,
"label": "Find Property",
"index": 1,
"parent_id": 1
},
{
"_id": 3,
"label": "Add Property",
"index": 2,
"parent_id": 1
},
{
"_id": 4,
"label": "Offer",
"index": 2
},
{
"_id": 5,
"label": "My Offer",
"index": 1,
"parent_id": 4
},
{
"_id": 6,
"label": "Accept",
"index": 1,
"parent_id": 5
}
]
I have a list of dictionary. I want to convert this list into dictionary using parent and child relation. I have try many time. But its difficult for me.
Thanks in advance for solving the problem.
Expected Output:
[
{
"_id": 1,
"label": "Property",
"index": 1,
"children" : [
{
"_id": 2,
"label": "Find Property",
"index": 1
},
{
"_id": 3,
"label": "Add Property",
"index": 2
}
]
},
{
"_id": 4,
"label": "Offer",
"index": 2,
"children" : [
{
"_id": 5,
"label": "My Offer",
"index": 1,
"children" : [
{
"_id": 6,
"label": "Accept",
"index": 1
}
]
}
]
},
]
I would do it like this. Keep in mind that this solution also affects the original data list.
parents = list()
# First, create a new dict where the key is property id and the value
# is the property itself.
indexed = {d["_id"]:d for d in data}
for id_, item in indexed.items():
# If a property doesn't have "parent_id" key it means that
# this is the root property, appending it to the result list.
if "parent_id" not in item:
parents.append(item)
continue
# Saving parent id for convenience.
p_id = item["parent_id"]
# Adding a children list if a parent doesn't have it yet.
if "children" not in indexed[p_id]:
indexed[p_id]["children"] = list()
indexed[p_id]["children"].append(item)
And the result is:
import pprint
pprint.pprint(parents)
[{'_id': 1,
'children': [{'_id': 2, 'index': 1, 'label': 'Find Property', 'parent_id': 1},
{'_id': 3, 'index': 2, 'label': 'Add Property', 'parent_id': 1}],
'index': 1,
'label': 'Property'},
{'_id': 4,
'children': [{'_id': 5,
'children': [{'_id': 6,
'index': 1,
'label': 'Accept',
'parent_id': 5}],
'index': 1,
'label': 'My Offer',
'parent_id': 4}],
'index': 2,
'label': 'Offer'}]

flatten out sub level data of dictionary into one level?

"dataFrameData": [
{
"intersection": {
"Item": "Item1",
"Customer": "Customer1",
"Month": "1"
},
"measures": {
"Sales": 1212.33,
"Forecast": 400
}
},
{
"intersection": {
"Item": "Item1",
"Customer": "Customer1",
"Month": "2"
},
"measures": {
"Sales": 1200,
"Forecast": 450
}
}
]
I have dataframe stored like this in a list and want to flatten out into one level by removing "intersection" and "measures" level. after flattening it out it should look like this:
[
{
"Item": "Item1",
"Customer": "Customer1",
"Month": "1"
"Sales": 1212.33,
"Forecast": 400
},
{
"Item": "Item2",
"Customer": "Customer2",
"Month": "12"
"Sales": 1212.33,
"Forecast": 800
}
]
Is there any approach to do that in o(1) space complexity? instead of building new list and copying items using loop
Depends on what you mean by O(1). If you just want to avoid using an explicit loop, you can do this
dict = [dict(x['intersection'],**x['measures']) for x in dataFrameData],
which returns
[{'Item': 'Item1',
'Customer': 'Customer1',
'Month': '1',
'Sales': 1212.33,
'Forecast': 400},
{'Item': 'Item1',
'Customer': 'Customer1',
'Month': '2',
'Sales': 1200,
'Forecast': 450}]
If you really need the space complexity, you can use
[(x.update(x['intersection']),x.update(x['measures']),x.pop('intersection'),x.pop('measures')) for x in dataFrameData].
While this is list comprehension and is technically a loop, every function in there is in-place. This gives me the output:
[{'Item': 'Item1',
'Customer': 'Customer1',
'Month': '1',
'Sales': 1212.33,
'Forecast': 400},
{'Item': 'Item1',
'Customer': 'Customer1',
'Month': '2',
'Sales': 1200,
'Forecast': 450}]
Try this to make your new list:
original_list = [
{
"intersection": {
"Item": "Item1",
"Customer": "Customer1",
"Month": "1"
},
"measures": {
"Sales": 1212.33,
"Forecast": 400
}
},
{
"intersection": {
"Item": "Item1",
"Customer": "Customer1",
"Month": "2"
},
"measures": {
"Sales": 1200,
"Forecast": 450
}
}
]
print([each_element["intersection"] for each_element in original_list])
The output:
[{'Item': 'Item1', 'Customer': 'Customer1', 'Month': '1'}, {'Item': 'Item1', 'Customer': 'Customer1', 'Month': '2'}]

Easiest way to split JSON file using Python

I am working on an interactive visualization of the world happiness report from the years 2015 up to 2020. The data was split into 6 csv files. Using pandas, I have succesfully cleaned the data and concatenated them into one big JSON file with the following format:
[
{
"Country": "Switzerland",
"Year": 2015,
"Happiness Rank": 1,
"Happiness Score": 7.587000000000001,
},
{
"Country": "Iceland",
"Year": 2015,
"Happiness Rank": 2,
"Happiness Score": 7.561,
},
{
"Country": "Switzerland",
"Year": 2016,
"Happiness Rank": 2,
"Happiness Score": 7.5089999999999995,
},
{
"Country": "Iceland",
"Year": 2016,
"Happiness Rank": 3,
"Happiness Score": 7.501,
},
{
"Country": "Switzerland",
"Year": 2017,
"Happiness Rank": 3,
"Happiness Score": 7.49399995803833,
},
{
"Country": "Iceland",
"Year": 2017,
"Happiness Rank": 1,
"Happiness Score": 7.801,
}
]
Now, I would like to programmatically format the JSON file such that it has the following format:
{
"2015": {
"Switzerland": {
"Happiness Rank": 1,
"Happiness Score": 7.587000000000001
},
"Iceland": {
"Happiness Rank": 2,
"Happiness Score": 7.561
}
},
"2016": {
"Switzerland": {
"Happiness Rank": 2,
"Happiness Score": 7.5089999999999995
},
"Iceland": {
"Happiness Rank": 3,
"Happiness Score": 7.501
}
},
"2017": {
"Switzerland": {
"Happiness Rank": 3,
"Happiness Score": 7.49399995803833
},
"Iceland": {
"Happiness Rank": 1,
"Happiness Score": 7.801
}
}
}
It has to be done programmatically, since there are over 900 distinct (country, year) pairs. I want the JSON in this format since it make the JSON file more readable, and makes it easier to select appropriate data. If I want the rank of Iceland in 2015, I can then do data[2015]["Iceland"]["Happiness Rank"]
Does anyone know the easiest / most convenient way to do this in Python?
If data is your original list of dictionaries:
def by_year(data):
from itertools import groupby
from operator import itemgetter
retain_keys = ("Happiness Rank", "Happiness Score")
for year, group in groupby(data, key=itemgetter("Year")):
as_tpl = tuple(group)
yield str(year), dict(zip(map(itemgetter("Country"), as_tpl), [{k: d[k] for k in retain_keys} for d in as_tpl]))
print(dict(by_year(data)))
Output:
{'2015': {'Switzerland': {'Happiness Rank': 1, 'Happiness Score': 7.587000000000001}, 'Iceland': {'Happiness Rank': 2, 'Happiness Score': 7.561}}, '2016': {'Switzerland': {'Happiness Rank': 2, 'Happiness Score': 7.5089999999999995}, 'Iceland': {'Happiness Rank': 3, 'Happiness Score': 7.501}}, '2017': {'Switzerland': {'Happiness Rank': 3, 'Happiness Score': 7.49399995803833}, 'Iceland': {'Happiness Rank': 1, 'Happiness Score': 7.801}}}
>>>
This assumes that the dictionaries in data will already be grouped together by year.
I assume you have the original pandas dataframe from which this JSON was created. With pandas, you can do df = df.groupby(['Year', 'Country']). You can then follow the procedure in pandas groupby to nested json to convert it to JSON.
you might find groupby from the itertools module useful. I was able to do this with
import itertools
groups = itertools.groupby(data, lambda x: x["Year"])
newdict = {str(year): {entry["Country"]:entry for entry in group} for year, group in groups}
Where data is the data with the form of the example you gave
It will retain the original fields in the dict, but it can easily be deleted in this way
for countries in newdict.values():
for c in countries.values():
del c["Year"]
del c["Country"]

Parse List in nested dictionary Python

data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
This is my python object, How can I identify the Key's-Value is a list. example here, after traversing through 'print(data["cars"]["model1"]["company_details"]["some_list"])'I get the list, since it is small dictionary it was easy, but how can I identify the same if I encounter list as a value for some other key in future.
Example:
data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": ["US", "UK", "IND"],
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
Can anyone please suggest/guide me to understand how to identify the key's value is a list.
The final goal is to remove the duplicates in the list if any exists?
Thank you very much:)
You can have a recursive function that goes to any depth and make the items of the list unique like below:
In [8]: def removeDuplicatesFromList(di):
...: for key, val in di.items():
...: if isinstance(val, dict):
...: removeDuplicatesFromList(val)
...: elif isinstance(val, list):
...: di[key] =list(set(val))
...: else:
...: continue
...:
...:
In [9]: removeDuplicatesFromList(data)
In [10]: data
Out[10]:
{'persons': {'1': {'name': 'siddu'}, '2': {'name': 'manju'}},
'cars': {'model1': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}},
'model2': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}}}}

Categories