Convert any JSON to name/value pairs - python

I have a source JSON that can be in any potential format. For storage & processing purposes, I'd like to save this data in 2 column format.
For example, I'd like the following JSON:
"record" {
"name1": "value1",
"name2": "value2",
"parameters": {
"param": {},
"paramSet": {
"items": [{
"id": "id1"
}, {
"id": "id2"
}]
}
}
}
To be converted to the following CSV-like format:
record:name1 , "value1"
record:name2 , "value2"
record:parameters:param , ""
record:parameters:paramSet:items#0:id , "id1"
record:parameters:paramSet:items#1:id , "id2"
My questions are:
Is there a formal name for this transformation (so that I can search better).
Is there a standard or convention for representing JSON in a 2-column format like this?
Are there any libraries in Python that can do this for me?
Are there libraries on other major programming languages that will make it easier to implement this?
Thanks in advance.

First I made json correct:
{
"record": {
"name1": "value1",
"name2": "value2",
"parameters": {
"param": {},
"paramSet": {
"items": [
{
"id": "id1"
},
{
"id": "id2"
}
]
}
}
}
}
Next some code for recursive mocking json
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
class Thing(object):
data = ''
output = []
def __init__(self, file_name):
with open(file_name) as data_file:
self.data = json.load(data_file)
def mock(self):
for (i, item) in enumerate(self.data):
if type(self.data[item]) == dict:
self._recursive(self.data[item], item)
for (i, data) in enumerate(self.output):
print(data)
def _recursive(self, request_data, path):
for (i, item) in enumerate(request_data):
if type(request_data[item]) == dict:
if len(request_data[item]) > 0:
path2 = "{}:{}".format(path, item)
self._recursive(request_data[item], path2)
else:
self.output.append("{}:{}, \"\"".format(path, item))
elif type(request_data[item]) == list:
for (j, list_item) in enumerate(request_data[item]):
path2 = "{}:{}#{}".format(path, item, j)
self._recursive(request_data[item][j], path2)
else:
self.output.append("{}:{}, {}".format(path, item, request_data[item]))
thing = Thing("input.json")
thing.mock()
Following code will output:
record:name1, value1
record:name2, value2
record:parameters:paramSet:items#0:id, id1
record:parameters:paramSet:items#1:id, id2
record:parameters:param, ""

Related

JSON array sort by value - Python

I need to sort and create a new array based on the value of the JSON. I need to filter repositories under each team and store repositories into a different array.
Input array:
{
"repo_list": [
{
"repo_name": "MaticCorporation/Sample-Repo-1",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-2",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-3",
"team_name": "-",
"tlt_member": "Sample-TLT-Member-2",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-4",
"team_name": "RETIX",
"tlt_member": "-",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-5",
"team_name": "-",
"tlt_member": "-",
"matix.properties": "No"
}
]
}
Output:
{
"repo_by_team": [
{
"team": "AFIN",
"repo_count": 2,
"repo_list": [
"MaticCorporation/Sample-Repo-1",
"MaticCorporation/Sample-Repo-2"
]
},
{
"team": "RETIX",
"repo_count": 1,
"repo_list": [
"MaticCorporation/Sample-Repo-4"
]
}
]
}
I've implemented the solution to filter and store all team names into an array, but I'm having difficulty how to get the result like output array.
Here is my code for extracting team names:
def get_team_names(repo_list):
repos=valid_repos(repo_list)
team_name=[item.get('team') for item in repos]
return team_name
You can use a dict[str, list[str]] to map between a team and its repositories, and you can use the json module to transform data between Python dictionaries and a JSON representation.
import json
with open('input.json') as input_file, open('output.json', 'w') as output_file:
repo_data = json.load(input_file)['repo_list']
team_repos = {}
for repo in repo_data:
if repo['team_name'] != '-':
if repo['team_name'] not in team_repos:
team_repos[repo['team_name']] = []
team_repos[repo['team_name']].append(repo['repo_name'])
result = []
for team, repo_list in team_repos.items():
result.append({
"team": team,
"repo_count": len(repo_list),
"repo_list": repo_list
})
json.dump({'repo_by_team': result}, output_file, indent=4)
The following is functional. The function may perform slowly on large input, but it uses no more than the necessary amount of space. It does, however, accept and return a Python dictionary. To convert to and from a dictionary use the Python json module.
def sort_by_team(repo_list: dict) -> dict:
ans = {"repo_by_team": []}
for repo in repo_list:
if repo["team_name"] != "-" and repo["team_name"] not in [r["team"] for r in ans["repo_by_team"]]:
ans["repo_by_team"].append({"team": repo["team_name"], "repo_count": 1, "repo_list": [repo["repo_name"]]})
else:
for r in ans["repo_by_team"]:
if r["team"] != repo["team_name"]:
continue
r["repo_count"] += 1
r["repo_list"].append(repo["repo_name"])
break
return ans

"TypeError: list indices must be integers or slices, not str" when trying to change keys

I want to remove some problematic $oid and everything that contains $ in a json file. I wrote:
import json
with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
data = [json.loads(line) for line in handle]
for k,v in data[0].items():
#check if key has dict value
if type(v) == dict:
#find id with $
r = list(data[k].keys())[0]
#change value if $ occurs
if r[0] == '$':
data[k] = data[k][r]
print(data)
But I get TypeError: list indices must be integers or slices, not str. I know it is because the json dictionaries are made redeable for Python, but how do I fix it?
Edit: the .json file in my computer looks like this:
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
I believe this is because your k is a str and you try to call data[k]?
It will be better if you show the format of the json as well.
Updating with answer.
This should work for the given json. But if you want to for a larger file. looping can be tricky, specially because you're trying to modify the keys of a dictionary.
import json
line = '{"_id": { "$oid": "5e7511c45cb29ef48b8cfcff" }, "description": "some text", "startDate": { "$date": "5e7511c45cb29ef48b8cfcff"},"completionDate": {"$date": "2021-01-05T14:59:58.046Z"}}'
data = [json.loads(line)]
for k,v in data[0].items():
if type(v) == dict:
for k2, v2 in data[0][k].items():
if k2[0] == '$':
formatted = k2[1:]
del data[0][k][k2]
data[0][k][formatted] = v2
print(data)
# import json
# with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
# data = [json.loads(line) for line in handle]
data = [
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
]
for d in data:
for k, v in d.items():
# check if key has dict value
del_keys = set()
if type(v) == dict:
# find id with $
del_keys.update([i for i in v if i.startswith("$")])
[v.pop(key) for key in del_keys]
print(data)
# [{'_id': {}, 'description': 'some text', 'startDate': {}, 'completionDate': {}}]

i want to convert sample JSON data into nested JSON using specific key-value in python

I have below sample data in JSON format :
project_cost_details is my database result set after querying.
{
"1": {
"amount": 0,
"breakdown": [
{
"amount": 169857,
"id": 4,
"name": "SampleData",
"parent_id": "1"
}
],
"id": 1,
"name": "ABC PR"
}
}
Here is full json : https://jsoneditoronline.org/?id=2ce7ab19af6f420397b07b939674f49c
Expected output :https://jsoneditoronline.org/?id=56a47e6f8e424fe8ac58c5e0732168d7
I have this sample JSON which i created using loops in code. But i am stuck at how to convert this to expected JSON format. I am getting sequential changes, need to convert to tree like or nested JSON format.
Trying in Python :
project_cost = {}
for cost in project_cost_details:
if cost.get('Parent_Cost_Type_ID'):
project_id = str(cost.get('Project_ID'))
parent_cost_type_id = str(cost.get('Parent_Cost_Type_ID'))
if project_id not in project_cost:
project_cost[project_id] = {}
if "breakdown" not in project_cost[project_id]:
project_cost[project_id]["breakdown"] = []
if 'amount' not in project_cost[project_id]:
project_cost[project_id]['amount'] = 0
project_cost[project_id]['name'] = cost.get('Title')
project_cost[project_id]['id'] = cost.get('Project_ID')
if parent_cost_type_id == cost.get('Cost_Type_ID'):
project_cost[project_id]['amount'] += int(cost.get('Amount'))
#if parent_cost_type_id is None:
project_cost[project_id]["breakdown"].append(
{
'amount': int(cost.get('Amount')),
'name': cost.get('Name'),
'parent_id': parent_cost_type_id,
'id' : cost.get('Cost_Type_ID')
}
)
from this i am getting sample JSON. It will be good if get in this code only desired format.
Also tried this solution mention here : https://adiyatmubarak.wordpress.com/2015/10/05/group-list-of-dictionary-data-by-particular-key-in-python/
I got approach to convert sample JSON to expected JSON :
data = [
{ "name" : "ABC", "parent":"DEF", },
{ "name" : "DEF", "parent":"null" },
{ "name" : "new_name", "parent":"ABC" },
{ "name" : "new_name2", "parent":"ABC" },
{ "name" : "Foo", "parent":"DEF"},
{ "name" : "Bar", "parent":"null"},
{ "name" : "Chandani", "parent":"new_name", "relation": "rel", "depth": 3 },
{ "name" : "Chandani333", "parent":"new_name", "relation": "rel", "depth": 3 }
]
result = {x.get("name"):x for x in data}
#print(result)
tree = [];
for a in data:
#print(a)
if a.get("parent") in result:
parent = result[a.get("parent")]
else:
parent = ""
if parent:
if "children" not in parent:
parent["children"] = []
parent["children"].append(a)
else:
tree.append(a)
Reference help : http://jsfiddle.net/9FqKS/ this is a JavaScript solution i converted to Python
It seems that you want to get a list of values from a dictionary.
result = [value for key, value in project_cost_details.items()]

How to sum integers stored in json

How can I sum the count values? My json data is as following.
{
"note":"This file contains the sample data for testing",
"comments":[
{
"name":"Romina",
"count":97
},
{
"name":"Laurie",
"count":97
},
{
"name":"Bayli",
"count":90
}
]
}
This is how i did it eventually.
import urllib
import json
mysumcnt = 0
input = urllib.urlopen('url').read()
info = json.loads(input)
myinfo = info['comments']
for item in myinfo:
mycnt = item['count']
mysumcnt += mycnt
print mysumcnt
Using a sum, map and a lambda function
import json
data = '''
{
"note": "This file contains the sample data for testing",
"comments": [
{
"name": "Romina",
"count": 97
},
{
"name": "Laurie",
"count": 97
},
{
"name": "Bayli",
"count": 90
}
]
}
'''
count = sum(map(lambda x: int(x['count']), json.loads(data)['comments']))
print(count)
If the JSON is currently a string and not been loaded into a python object you'll need to:
import json
loaded_json = json.loads(json_string)
comments = loaded_json['comments']
sum(c['count'] for c in comments)

Grab element from json dump

I'm using the following python code to connect to a jsonrpc server and nick some song information. However, I can't work out how to get the current title in to a variable to print elsewhere. Here is the code:
TracksInfo = []
for song in playingSongs:
data = { "id":1,
"method":"slim.request",
"params":[ "",
["songinfo",0,100, "track_id:%s" % song, "tags:GPASIediqtymkovrfijnCYXRTIuwxN"]
]
}
params = json.dumps(data, sort_keys=True, indent=4)
conn.request("POST", "/jsonrpc.js", params)
httpResponse = conn.getresponse()
data = httpResponse.read()
responce = json.loads(data)
print json.dumps(responce, sort_keys=True, indent=4)
TrackInfo = responce['result']["songinfo_loop"][0]
TracksInfo.append(TrackInfo)
This brings me back the data in json format and the print json.dump brings back:
pi#raspberrypi ~/pithon $ sudo python tom3.py
{
"id": 1,
"method": "slim.request",
"params": [
"",
[
"songinfo",
"0",
100,
"track_id:-140501481178464",
"tags:GPASIediqtymkovrfijnCYXRTIuwxN"
]
],
"result": {
"songinfo_loop": [
{
"id": "-140501481178464"
},
{
"title": "Witchcraft"
},
{
"artist": "Pendulum"
},
{
"duration": "253"
},
{
"tracknum": "1"
},
{
"type": "Ogg Vorbis (Spotify)"
},
{
"bitrate": "320k VBR"
},
{
"coverart": "0"
},
{
"url": "spotify:track:2A7ZZ1tjaluKYMlT3ItSfN"
},
{
"remote": 1
}
]
}
}
What i'm trying to get is result.songinfoloop.title (but I tried that!)
The songinfo_loop structure is.. peculiar. It is a list of dictionaries each with just one key.
Loop through it until you have one with a title:
TrackInfo = next(d['title'] for d in responce['result']["songinfo_loop"] if 'title' in d)
TracksInfo.append(TrackInfo)
A better option would be to 'collapse' all those dictionaries into one:
songinfo = reduce(lambda d, p: d.update(p) or d,
responce['result']["songinfo_loop"], {})
TracksInfo.append(songinfo['title'])
songinfo_loop is a list not a dict. That means you need to call it by position, or loop through it and find the dict with a key value of "title"
positional:
responce["result"]["songinfo_loop"][1]["title"]
loop:
for info in responce["result"]["songinfo_loop"]:
if "title" in info.keys():
print info["title"]
break
else:
print "no song title found"
Really, it seems like you would want to have the songinfo_loop be a dict, not a list. But if you need to leave it as a list, this is how you would pull the title.
The result is really a standard python dict, so you can use
responce["result"]["songinfoloop"]["title"]
which should work

Categories