Convert dataframe into specific JSON - python

I would like to convert my DataFrame into a specific JSON. I try to use to_dict() but for the moment I didn't find the correct parameters to replicate the output.
Do you have any idea?
My code :
import pandas as pd
data = {
'alt' : ["BeattheBeachmark NEW", "BeattheBeachmark NEW"],
'Mod' : ["GA", "GA"],
'Pers' : ["Movment", "Movment"],
'Vie' : ["Inprogress", "Inprogress"],
'Actions' : ["Clear", "Add"]
}
df = pd.DataFrame(data)
My Ouput :
result = {
"alt" : {
"BeattheBeachmark NEW" : {
"Mod" : {
"GA" : {
"Pers" : {
"Movment" : {
"Vie" : {
"Inprogress" : {
'Actions' : ["Clear", "Add"]
}
}
}
}
}
}
}
}
}

You can group your dataframe by "alt", by "Mod"... and so on and create your dictionary along the way:
import pandas as pd
import json
data = {
'alt' : ["BeattheBeachmark NEW", "BeattheBeachmark NEW"],
'Mod' : ["GA", "GA"],
'Pers' : ["Movment", "Movment"],
'Vie' : ["Inprogress", "Inprogress"],
'Actions' : ["Clear", "Add"]
}
df = pd.DataFrame(data)
output_dict = dict()
output_dict['alt'] = dict()
for alt in df.groupby("alt"):
output_dict['alt'][alt[0]] = dict()
output_dict['alt'][alt[0]]["Mod"] = dict()
for mod in alt[1].groupby("Mod"):
output_dict['alt'][alt[0]]["Mod"][mod[0]] = dict()
output_dict['alt'][alt[0]]["Mod"][mod[0]]["Pers"] = dict()
for pers in mod[1].groupby("Pers"):
output_dict['alt'][alt[0]]["Mod"][mod[0]]["Pers"][pers[0]] = dict()
output_dict['alt'][alt[0]]["Mod"][mod[0]]["Pers"][pers[0]]["Vie"] = dict()
for vie in pers[1].groupby("Vie"):
output_dict['alt'][alt[0]]["Mod"][mod[0]]["Pers"][pers[0]]["Vie"][vie[0]] = dict()
output_dict['alt'][alt[0]]["Mod"][mod[0]]["Pers"][pers[0]]["Vie"][vie[0]]["Actions"] = list(vie[1].Actions)
print(json.dumps(output_dict, indent=4))
Output:
{
"alt": {
"BeattheBeachmark NEW": {
"Mod": {
"GA": {
"Pers": {
"Movment": {
"Vie": {
"Inprogress": {
"Actions": [
"Clear",
"Add"
]
}
}
}
}
}
}
}
}
}
EDIT: for archive purpose, I add a recursive solution for this kind of problem, making it much more generic:
import pandas as pd
import json
data = {
'alt' : ["BeattheBeachmark NEW", "BeattheBeachmark NEW"],
'Mod' : ["GA", "GA"],
'Pers' : ["Movment", "Movment"],
'Vie' : ["Inprogress", "Inprogress"],
'Actions' : ["Clear", "Add"]
}
df_in = pd.DataFrame(data)
output_dict = dict()
def extract_columns(df, col, output_dict):
if col == len(df.columns)-1:
output_dict[df.columns[col]] = list(df[df.columns[col]])
else:
output_dict[df.columns[col]] = dict()
for first_col_grp in df.groupby(df.columns[col]):
output_dict[df.columns[col]][first_col_grp[0]] = dict()
extract_columns(first_col_grp[1], col+1, output_dict[df.columns[col]][first_col_grp[0]])
extract_columns(df_in, 0, output_dict)
print(json.dumps(output_dict, indent=4))

To get the same dictionary as in your example, you can iterate through your dataframe's columns and create the dictionary as such (using literal evaluation to help since df.to_json returns a string and you want a list):
import ast
your_dict = {}
for col in df.columns:
your_dict[col] = df[col].to_json(orient='records')
your_dict[col] = ast.literal_eval(your_dict[col])
print(your_dict)
Giving you:
{'alt': ['BeattheBeachmark NEW', 'BeattheBeachmark NEW'],
'Mod': ['GA', 'GA'],
'Pers': ['Movment', 'Movment'],
'Vie': ['Inprogress', 'Inprogress'],
'Actions': ['Clear', 'Add']}

Related

Use python parse string of file path to json

I have the list of path string like below. How to convert it into complete json object?
foldersList = [
'1/',
'1/2/',
'1/2/2.txt',
'1/2/5/',
'1/5.txt',
'2/',
'2/test.txt',
'test.json'
]
How to convert it into complete json object like below
{
"fileMenu":{
"list":[
{
"fileType":"d",
"name":"1",
"subFolders":[
{
"fileType":"-",
"name":"5.txt",
},
{
"fileType":"d",
"name":"2",
"subFolders":[
{
"fileType":"-",
"name":"2.txt",
},
{
"date":1594983597000,
"fileType":"d",
"name":"5",
"size":0,
"subFolders":[]
}]
}]
},
{
"fileType":"d",
"name":"2",
"subFolders":[{
"fileType":"-",
"name":"test.txt"
}]
},
{
"fileType":"-",
"name":"test.json"
}],
"status":"OK"
}
}
How to do this? I tried it with the some code snippets.
foldersList = [
'1/',
'1/2/',
'1/2/2.txt',
'1/2/5/',
'1/5.txt',
'2/',
'2/test.txt',
'test.json'
]
foldersJson = {}
nodeInfoList = []
nodeInfoDic = {}
for i, path in enumerate(foldersList):
nodeInfoDic = foldersJson
for j,node in enumerate(path.split('/')):
if node != '':
if nodeInfoDic.has_key(node) != True:
nodeInfoDic[node] = {}
nodeInfoDic = nodeInfoDic[node]
# print(foldersJson)
nodeInfoList.append(nodeInfoDic)
print(nodeInfoList)
# print(foldersJson)

MongoDB Python Update/ Insert dict in dict without overwriting

I can't insert my new document value (dict) without overwriting my existing data. I've looked through all different resources and can't find an answer.
I've also though of putting the values from first_level_dict into a list "first_level_dict" : [dict1, dict2] but I won't know how to append the dict eighter.
Sample Data:
# Create the document
target_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"}
}
},
"Root_key": "Root_value"
}
collection.insert_one(target_dict)
The result I'm looking for:
result_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"},
"second_level_dict2": {"Content2": "Value2"}
}
},
"Root_key": "Root_value"
}
Update: New Values example 2:
# New Values Sample
new_values = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"secon_level_dict2": {"Content2": "Value2"},
"secon_level_dict3": {"Content3": "Value3"}
}
}
collection.insert_one(target_dict)
Update: The result I'm looking for example 2:
result_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"},
"second_level_dict2": {"Content2": "Value2"},
"second_level_dict3": {"Content3": "Value3"},
}
},
"Root_key": "Root_value"
}
What I've tried:
# Update document "$setOnInsert"
q = {"_id": 55}
target_dict = {"$set": {"Root_dict": {"first_level_dict": {"second_level_dict2": {"Content2": "Value2"}}}}}
collection.update_one(q, target_dict)
What I've tried example 2:
# Update document
q = {"_id": 55}
target_dict = {"$set": {"Root_dict.first_level_dict": {
"second_level_dict2": {"Content2": "Value2"},
"second_level_dict3": {"Content3": "Value3"}}}}
collection.update_one(q, target_dict)
Try using the dot notation:
target_dict = {$set: {"Root_dict.first_level_dict.second_level_dict2": {"Content2": "Value2"}}}
Additionally, to update/add multiple fields (for "example 2"):
target_dict = {$set: {
"Root_dict.first_level_dict.second_level_dict2": {"Content2": "Value2"},
"Root_dict.first_level_dict.second_level_dict3": {"Content3": "Value3"}
}
}

i want to convert sample JSON data into nested JSON using specific key-value in python

I have below sample data in JSON format :
project_cost_details is my database result set after querying.
{
"1": {
"amount": 0,
"breakdown": [
{
"amount": 169857,
"id": 4,
"name": "SampleData",
"parent_id": "1"
}
],
"id": 1,
"name": "ABC PR"
}
}
Here is full json : https://jsoneditoronline.org/?id=2ce7ab19af6f420397b07b939674f49c
Expected output :https://jsoneditoronline.org/?id=56a47e6f8e424fe8ac58c5e0732168d7
I have this sample JSON which i created using loops in code. But i am stuck at how to convert this to expected JSON format. I am getting sequential changes, need to convert to tree like or nested JSON format.
Trying in Python :
project_cost = {}
for cost in project_cost_details:
if cost.get('Parent_Cost_Type_ID'):
project_id = str(cost.get('Project_ID'))
parent_cost_type_id = str(cost.get('Parent_Cost_Type_ID'))
if project_id not in project_cost:
project_cost[project_id] = {}
if "breakdown" not in project_cost[project_id]:
project_cost[project_id]["breakdown"] = []
if 'amount' not in project_cost[project_id]:
project_cost[project_id]['amount'] = 0
project_cost[project_id]['name'] = cost.get('Title')
project_cost[project_id]['id'] = cost.get('Project_ID')
if parent_cost_type_id == cost.get('Cost_Type_ID'):
project_cost[project_id]['amount'] += int(cost.get('Amount'))
#if parent_cost_type_id is None:
project_cost[project_id]["breakdown"].append(
{
'amount': int(cost.get('Amount')),
'name': cost.get('Name'),
'parent_id': parent_cost_type_id,
'id' : cost.get('Cost_Type_ID')
}
)
from this i am getting sample JSON. It will be good if get in this code only desired format.
Also tried this solution mention here : https://adiyatmubarak.wordpress.com/2015/10/05/group-list-of-dictionary-data-by-particular-key-in-python/
I got approach to convert sample JSON to expected JSON :
data = [
{ "name" : "ABC", "parent":"DEF", },
{ "name" : "DEF", "parent":"null" },
{ "name" : "new_name", "parent":"ABC" },
{ "name" : "new_name2", "parent":"ABC" },
{ "name" : "Foo", "parent":"DEF"},
{ "name" : "Bar", "parent":"null"},
{ "name" : "Chandani", "parent":"new_name", "relation": "rel", "depth": 3 },
{ "name" : "Chandani333", "parent":"new_name", "relation": "rel", "depth": 3 }
]
result = {x.get("name"):x for x in data}
#print(result)
tree = [];
for a in data:
#print(a)
if a.get("parent") in result:
parent = result[a.get("parent")]
else:
parent = ""
if parent:
if "children" not in parent:
parent["children"] = []
parent["children"].append(a)
else:
tree.append(a)
Reference help : http://jsfiddle.net/9FqKS/ this is a JavaScript solution i converted to Python
It seems that you want to get a list of values from a dictionary.
result = [value for key, value in project_cost_details.items()]

How can I use Python to generate nested JSON data from my CSV file

I have tried to use the online Jsonify It tool which can create nested JSON data from my data but I can't seem to get that to work. I have also tried to use the Python code from other posts on but they do not seem to work either. If you know an easier method than using Python, that would be good.
Here is my .CSV data:
ID,Name,Date,Subject,Start,Finish
0,Ladybridge High School,01/11/2019,Maths,05:28,0
0,Ladybridge High School,02/11/2019,Maths,05:30,06:45
0,Ladybridge High School,01/11/2019,Economics,11:58,12:40
0,Ladybridge High School,02/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,01/11/2019,Maths,05:28,06:45
1,Loreto Sixth Form,02/11/2019,Maths,05:30,06:45
1,Loreto Sixth Form,01/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,02/11/2019,Economics,11:58,12:40
This is the nested JSON structure I would like:
{
"Timetable" : [ {
"Date" : {
"01-11-2019" : {
"Maths" : {
"Start" : "05:28",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
},
"02-11-2019" : {
"Maths" : {
"Start" : "05:30",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
}
},
"Name" : "Ladybridge High School"
}, {
"Date" : {
"01-11-2019" : {
"Maths" : {
"Start" : "05:28",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
},
"02-11-2019" : {
"Maths" : {
"Start" : "05:30",
"Finish" : "06:45"
},
"Economics" : {
"Start" : "11:58",
"Finish" : "12:40"
}
}
},
"Name" : "Loreto Sixth From"
} ]
}
Something like this?
[EDIT]
I refactored it to handle arbitrary top-level keys for each entry in the timetable. I also made it first create a dict and then convert the dict to a list so that it can run in O(N) time, in case the input is very large.
import csv
timetable = {}
with open('data.csv') as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
for row in csv_data:
if not timetable.get(row["ID"]):
timetable[row["ID"]] = {"ID": row["ID"], "Date": {}}
for k in row.keys():
# Date has to be handled as a special case
if k == "Date":
timetable[row["ID"]]["Date"][row["Date"]] = {}
timetable[row["ID"]]["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
# Ignore these keys because they are only for 'Date'
elif k == "Start" or k == "Finish" or k == "Subject":
continue
# Use everything else
else:
timetable[row["ID"]][k] = row[k]
timetable = {"Timetable": [v for k, v in timetable.items()]}
An improvement to the above answer to nest the ID before the name and date:
import csv
timetable = {"Timetable": []}
print(timetable)
with open("C:/Users/kspv914/Downloads/data.csv") as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
name_array = []
for name in [row["Name"] for row in csv_data]:
name_array.append(name)
name_set = set(name_array)
for name in name_set:
timetable["Timetable"].append({"Name": name, "Date": {}})
for row in csv_data:
for entry in timetable["Timetable"]:
if entry["Name"] == row["Name"]:
entry["Date"][row["Date"]] = {}
entry["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
print(timetable)

nested json to csv using pandas normalize

With given script I am able to get output as I showed in a screenshot,
but there is a column named as cve.description.description_data which is again in json format. I want to extract that data as well.
import json
import pandas as pd
from pandas.io.json import json_normalize
#load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
#tells us parent node is 'programs'
nycphil = json_normalize(d['CVE_Items'])
nycphil.head(3)
works_data = json_normalize(data=d['CVE_Items'], record_path='cve')
works_data.head(3)
nycphil.to_csv("test4.csv")
If I change works_data = json_normalize(data=d['CVE_Items'], record_path='cve.descr') it gives this error:
"result = result[spec] KeyError: 'cve.description'"
JSON format as follows:
{
"CVE_data_type":"CVE",
"CVE_data_format":"MITRE",
"CVE_data_version":"4.0",
"CVE_data_numberOfCVEs":"1000",
"CVE_data_timestamp":"2018-04-04T00:00Z",
"CVE_Items":[
{
"cve":{
"data_type":"CVE",
"data_format":"MITRE",
"data_version":"4.0",
"CVE_data_meta":{
"ID":"CVE-2001-1594",
"ASSIGNER":"cve#mitre.org"
},
"affects":{
"vendor":{
"vendor_data":[
{
"vendor_name":"gehealthcare",
"product":{
"product_data":[
{
"product_name":"entegra_p&r",
"version":{
"version_data":[
{
"version_value":"*"
}
]
}
}
]
}
}
]
}
},
"problemtype":{
"problemtype_data":[
{
"description":[
{
"lang":"en",
"value":"CWE-255"
}
]
}
]
},
"references":{
"reference_data":[
{
"url":"http://apps.gehealthcare.com/servlet/ClientServlet/2263784.pdf?DOCCLASS=A&REQ=RAC&DIRECTION=2263784-100&FILENAME=2263784.pdf&FILEREV=5&DOCREV_ORG=5&SUBMIT=+ ACCEPT+"
},
{
"url":"http://www.forbes.com/sites/thomasbrewster/2015/07/10/vulnerable- "
},
{
"url":"https://ics-cert.us-cert.gov/advisories/ICSMA-18-037-02"
},
{
"url":"https://twitter.com/digitalbond/status/619250429751222277"
}
]
},
"description":{
"description_data":[
{
"lang":"en",
"value":"GE Healthcare eNTEGRA P&R has a password of (1) value."
}
]
}
},
"configurations":{
"CVE_data_version":"4.0",
"nodes":[
{
"operator":"OR",
"cpe":[
{
"vulnerable":true,
"cpe22Uri":"cpe:/a:gehealthcare:entegra_p%26r",
"cpe23Uri":"cpe:2.3:a:gehealthcare:entegra_p\\&r:*:*:*:*:*:*:*:*"
}
]
}
]
},
"impact":{
"baseMetricV2":{
"cvssV2":{
"version":"2.0",
"vectorString":"(AV:N/AC:L/Au:N/C:C/I:C/A:C)",
"accessVector":"NETWORK",
"accessComplexity":"LOW",
"authentication":"NONE",
"confidentialityImpact":"COMPLETE",
"integrityImpact":"COMPLETE",
"availabilityImpact":"COMPLETE",
"baseScore":10.0
},
"severity":"HIGH",
"exploitabilityScore":10.0,
"impactScore":10.0,
"obtainAllPrivilege":false,
"obtainUserPrivilege":false,
"obtainOtherPrivilege":false,
"userInteractionRequired":false
}
},
"publishedDate":"2015-08-04T14:59Z",
"lastModifiedDate":"2018-03-28T01:29Z"
}
]
}
I want to flatten all data.
Assuming the multiple URLs delineate between rows and all else meta data repeats, consider a recursive function call to extract every key-value pair in nested json object, d.
The recursive function will call global to update the needed global objects to be binded into a list of dictionaries for pd.DataFrame() call. Last loop at end updates the recursive function's dictionary, inner, to integrate the different urls (stored in multi)
import json
import pandas as pd
# load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
multi = []; inner = {}
def recursive_extract(i):
global multi, inner
if type(i) is list:
if len(i) == 1:
for k,v in i[0].items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
else:
multi = i
if type(i) is dict:
for k,v in i.items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
recursive_extract(d['CVE_Items'])
data_dict = []
for i in multi:
tmp = inner.copy()
tmp.update(i)
data_dict.append(tmp)
df = pd.DataFrame(data_dict)
df.to_csv('Output.csv')
Output (all columns the same except for URL, widened for emphasis)

Categories