I used this answer Create array of json objects from for loops and works really well for "plain" JSON objects, but if I have a nested element is not working correctly, this is my code:
story_project=open('json_jira/stories/stories_to_jira_TESTEST.json', 'w+'
#######Projects############
json_projects = []
p_name_a, p_key_a, p_type_a = [], [], []
#######Issues##############
json_issues = []
i_summary_a, i_created_a, i_reporter_a, i_status_a, i_issue_type_a = [], [], [], [], []
#######Custom Fields########
json_custom_field_values = []
cf_field_name_a, cf_field_type_a, cf_value_a = [], [], []
#########The Values################
p_name_a.append("ClubHouseDEV")
p_key_a.append("CLUB")
p_type_a.append("software")
i_summary_a.append("This summary doesn not exist")
i_created_a.append("2017-07-17T02:35:16Z")
i_reporter_a.append("5a02285487c3eb1913c44a80")
i_status_a.append("Open")
i_issue_type_a.append("Milestones")
cf_field_name_a.append("external_id")
cf_field_type_a.append("com.atlassian.jira.plugin.system.customfieldtypes:float")
cf_value_a.append(3)
cf_field_name_a.append("Story Points")
cf_field_type_a.append("com.atlassian.jira.plugin.system.customfieldtypes:float")
cf_value_a.append(5)
###########Build The JSON##############
json_custom_field_values = [{"fieldName": cf_field_name, "fieldType": cf_field_type, "value": cf_value} for cf_field_name, cf_field_type, cf_value in zip(cf_field_name_a, cf_field_type_a, cf_value_a)]
json_issues = [{"sumamry": i_summary, "created": i_created, "reporter": i_reporter, "status": i_status, "issueType": i_issue_type, "customFieldValues" : json_custom_field_value} for i_summary, i_created, i_reporter, i_status, i_issue_type, json_custom_field_value in zip(i_summary_a, i_created_a, i_reporter_a, i_status_a, i_issue_type_a, json_custom_field_values)]
json_projects = [{"name": p_name, "key": p_key, "type": p_type, "issues": json_issue} for p_name, p_key, p_type,json_issue in zip(p_name_a, p_key_a, p_type_a,json_issues)]
json_file = [{"projects": json_project} for json_project in zip(json_projects)]
json.dump(json_file, story_project)
The output should be:
{ "projects": [
{
"name": "ClubHouseDEV",
"key": "CLUB",
"type":"software",
"issues":
[
{
"summary":"This summary doesn not exist",
"created":"2017-07-17T02:35:16Z",
"reporter":"5a02285487c3eb1913c44a80",
"status":"Open",
"issueType":"Milestones",
"customFieldValues":
[
{
"fieldName": "external_id",
"fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
"value": 3
},
{
"fieldName": "Story Points",
"fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
"value": 5
}
],
"labels" : ["ch_epics"],
"updated": "2017-07-17T02:35:16Z"
}
]
}
]
}
But it is:
[{"projects": [{"name": "ClubHouseDEV", "key": "CLUB", "type": "software", "issues": {"sumamry": "This summary doesn not exist", "created": "2017-07-17T02:35:16Z", "reporter": "5a02285487c3eb1913c44a80", "status": "Open", "issueType": "Milestones", "customFieldValues": {"fieldName": "external_id", "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float", "value": 3}}}]}]
As you can see it only added one value on the nested "Custom Field Values", how can I add all the values.
This is how I solve id: Build the deepest level, then integrated it with the level up and so on, the output was the expected.
###########Build The JSON##############
for cf_field_name, cf_field_type, cf_value in zip(cf_field_name_a, cf_field_type_a, cf_value_a):
json_custom_field_values = {}
json_custom_field_values["fieldName"] = cf_field_name
json_custom_field_values["fieldType"] = cf_field_type
json_custom_field_values["value"] = cf_value
cf_data.append(json_custom_field_values)
for i_summary, i_created, i_reporter, i_status, i_issue_type in zip(i_summary_a, i_created_a, i_reporter_a, i_status_a, i_issue_type_a):
json_issues = {}
json_issues["summary"] = i_summary
json_issues["created"] = i_created
json_issues["reporter"] =i_reporter
json_issues["status"] = i_status
json_issues["issueType"] = i_issue_type
# json_issues["customFieldValues"] = [{"fieldName": cf_field_name, "fieldType": cf_field_type, "value": cf_value} for cf_field_name, cf_field_type, cf_value in zip(cf_field_name_a, cf_field_type_a, cf_value_a)]
json_issues["customFieldValues"] = cf_data
issues_data.append(json_issues)
for p_name, p_key, p_type in zip(p_name_a, p_key_a, p_type_a):
json_projects = {}
json_projects["name"] = p_name
json_projects["key"] = p_key
json_projects["type"] = p_type
json_projects["issues"] = issues_data
projects_data.append(json_projects)
json_dict["projects"] = projects_data
json.dump(json_dict, story_project)
Related
I got organizations tree stored as json
{
"name": "amos",
"direct_reports": [
{
"name": "bart",
"direct_reports": [
{
"name": "colin",
"direct_reports": []
},
{
"name": "clara",
"direct_reports": []
}
]
},
{
"name": "bravo",
"direct_reports": [
{
"name": "cupid",
"direct_reports": []
},
{
"name": "clever",
"direct_reports": []
}
]
}
]
}
I need to store full "management path" for each employee, such as:
management_chain["clever"]={bravo,amos}
management_chain["bart"]={amos}
Currently I manage to reach all edges and classify those as employees and managers with code as followed:
def get_herarchy(org):
tmp_obj = {}
tmp_obj['managers'] = []
for emp in org['direct_reports']:
tmp_obj['managers'].append(org['name'])
print("manager "+org['name'])
if len(emp['direct_reports'])>0:
get_herarchy(emp)
tmp_obj['name'] = emp['name']
print(emp['name'])
return tmp_obj
But the dictionary doesn't holds the right values
Like this, maybe:
def get_chain(org, name):
if org['name'] == name:
return [name]
for emp in org['direct_reports']:
chain = get_chain(emp, name)
if chain:
return [org['name']] + chain
return None
print(get_chain(org, 'bart')) # ['amos', 'bart']
print(get_chain(org, 'clever')) # ['amos', 'bravo', 'clever']
UPD: This is how to make a dictionary:
def nested_iter(org):
yield org['name']
for emp in org['direct_reports']:
yield from nested_iter(emp)
print({name: get_chain(org, name)[0:-1] for name in nested_iter(org)})
I am fetching api and trying that response into csv but on catch is there this is multilevel dict or json when i am converting into csv most of the look like list of dict or dicts
I am trying using this
def expand(data):
d = pd.Series(data)
t = d.index
for i in t:
if type(d[i]) in (list,dict):
expend_s = pd.Series(d[i])
t.append(expend_s.index)
d = d.append(expend_s)
d = d.drop([i])
return d
df['person'].apply(expand)
but this solution is not working. if we see person col there is multiple dict or list of dict like
"birthDate": "0000-00-00",
"genderCode": {
"codeValue": "M",
"shortName": "Male",
"longName": "Male"
},
"maritalStatusCode": {
"codeValue": "M",
"shortName": "Married"
},
"disabledIndicator": False,
"preferredName": {},
"ethnicityCode": {
"codeValue": "4",
"shortName": "4",
"longName": "Not Hispanic or Latino"
},
"raceCode": {
"identificationMethodCode": {},
"codeValue": "1",
"shortName": "White",
"longName": "White"
},
"militaryClassificationCodes": [],
"governmentIDs": [
{
"itemID": "9200037107708_4385",
"idValue": "XXX-XX-XXXX",
"nameCode": {
"codeValue": "SSN",
"longName": "Social Security Number"
},
"countryCode": "US"
}
],
"legalName": {
"givenName": "Jack",
"middleName": "C",
"familyName1": "Abele",
"formattedName": "Abele, Jack C"
},
"legalAddress": {
"nameCode": {
"codeValue": "Personal Address 1",
"shortName": "Personal Address 1",
"longName": "Personal Address 1"
},
"lineOne": "1932 Keswick Lane",
"cityName": "Concord",
"countrySubdivisionLevel1": {
"subdivisionType": "StateTerritory",
"codeValue": "CA",
"shortName": "California"
},
"countryCode": "US",
"postalCode": "94518"
},
"communication": {
"mobiles": [
{
"itemID": "9200037107708_4389",
"nameCode": {
"codeValue": "Personal Cell",
"shortName": "Personal Cell"
},
"countryDialing": "1",
"areaDialing": "925",
"dialNumber": "6860589",
"access": "1",
"formattedNumber": "(925) 686-0589"
}
]
}
}
your suggestion and advice would be so helpful
I think we can solve multiple dict using read as pd.josn_normalise and list of dict using the below functions first we get those columns which have list
def df_list_and_dict_col(explode_df: pd.DataFrame, primary_key: str,
col_name: str, folder: str) -> pd.DataFrame:
""" convert list of dict or list of into clean dataframe
Keyword arguments:
-----------------
dict: explode_df -- dataframe where we have to expand column
dict: col_name -- main_file name where most of data is present
Return: pd.DataFrame
return clean or expand dataframe
"""
explode_df[col_name] = explode_df[col_name].replace('', '[]', regex=True)
explode_df[col_name] = explode_df[col_name].fillna('[]')
explode_df[col_name] = explode_df[col_name].astype(
'string') # to make sure that entire column is string
explode_df[col_name] = explode_df[col_name].apply(ast.literal_eval)
explode_df = explode_df.explode(col_name)
explode_df = explode_df.reset_index(drop=True)
normalized_df = pd.json_normalize(explode_df[col_name])
explode_df = explode_df.join(
other=normalized_df,
lsuffix="_left",
rsuffix="_right"
)
explode_df = explode_df.drop(columns=col_name)
type_df = explode_df.applymap(type)
col_list = []
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
# print(col_list,explode_df.columns)
if len(col_list) != 0:
for col in col_list:
df_list_and_dict_col(explode_df[[primary_key,col]], primary_key,
col, folder)
explode_df.drop(columns=col, inplace =True)
print(f'{col}.csv is done')
explode_df.to_csv(f'{folder}/{col_name}.csv')
first we get list col and pass col to function one by one and then check is there any list inside col and then go on and save into csv
type_df = df.applymap(type)
col_list =[]
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
for col in col_list:
# print(col, df[['associateOID',col]])
df_list_and_dict_col(df[['primary_key',col]].copy(), 'primary_key', col,folder='worker')
df.drop(columns=col, inplace=True)
now you have multiple csv in normalise format
So I am having here one big JSON file which looks like this:
data = {
"Module1": {
"Description": "",
"Layer": "1",
"SourceDir": "pathModule1",
"Attributes": {
"some",
},
"Vendor": "comp",
"components":{
"Component1": {
"path": "pathToCom1",
"includes": [
"include1",
"include2",
"include3",
"include4",
"include5"
]
"generated:" "txt"
"memory:" "txt"
etc
},
"Component2":{
"path": "pathToCom2",
"includes": [
"include1",
"include2",
"include3",
"include4",
"include5"
]
"generated:" "txt"
"memory:" "txt"
etc
}
}
},
"Module2": {
"Description": "",
"Layer": "2",
"SourceDir": "pathModule2",
"Attributes": {
"some",
},
"Vendor": "comp",
"components":{
"Component1": {
"path": "pathToCom1",
"includes": [
"include1",
"include2",
"include3",
"include4",
"include5"
]
"generated:" "txt"
"memory:" "txt"
etc
},
"Component2":{
"path": "pathToCom2",
"includes": [
"include1",
"include2",
"include3",
"include4",
"include5"
]
"generated:" "txt"
"memory:" "txt"
etc
}
}
},
"Module3": {
"Description": "",
"Layer": "3",
"SourceDir": "path",
"Attributes": {
"some",
},
"Vendor": "",
},
"Module4": {
"Description": "",
"Layer": "4",
"SourceDir": "path",
"Attributes": {
"some",
}
}
}
Then I filter only ones that have field "Vendor" == "comp" with:
data = {k: v for k,v in data.items() if v.get("Vendor") == "comp"}
And after that I filter out and get final output:
Module1 pathModule1 [('Component1', 'pathToCom1', ['include1', 'include2', 'include3', 'include4', 'include5']), ('Component2', 'pathToCom2', ['include1', 'include2', 'include3', 'include4', 'include5'])]
Module2 pathModule2 [('Component1', 'pathToCom1', ['include1', 'include2', 'include3', 'include4', 'include5']), ('Component2', 'pathToCom2', ['include1', 'include2', 'include3', 'include4', 'include5'])]
code for this:
for k,v in data2.items():
components = [(comp_name, comp_data["path"], comp_data["includes"]) for comp_name, comp_data in v["components"].items()]
print(k, v["SourceDir"], components)
Now next thing that I have to do, as final output -> create in folder some .txt files which will be named as Module name, and contain paths to it's components, like this:
Module1.txt should contain only paths to it's components, so
Module1.txt has inside:
pathToCom1
pathToCom2
Module2.txt with:
pathToCom1
pathToCom2
And also, includes should be stored inside of corresponding .txt files, so we would have at the end name of the Component, and it's "includes" fields, so we would have:
Component1.txt with inside:
include1
include2
include3
include4
include5
Component2.txt with inside:
include1
include2
include3
include4
include5
EDIT:
So I have managed to get this, code is:
for k,v in data.items():
components = [(comp_name, comp_data["path"], comp_data["includes"]) for comp_name, comp_data in v["components"].items()]
with open(components_path+k+'.txt', 'w') as f:
for i,n in v['components '].items():
path_to_write = n['path']
f.write(path_to_write+'\n')
f.close()
for i,n in v['components'].items():
with open(path_to_includes+i+'.txt', 'w') as f:
includes_to_write = n['includes']
f.write(str(includes_to_write)+'\n')
f.close()
Now the only problem is that I get includes as one line:
['include1', 'include2', 'include3', 'include4'..]
I need them to be:
include1
include2
include3
include4
include5
So finally have managed to get this working, here is the code might help someone someday:
for k,v in data.items():
components = [(comp_name, comp_data["path"], comp_data["includes"]) for comp_name, comp_data in v["components"].items()]
with open(components_path+k+'.txt', 'w') as f:
for i,n in v['components '].items():
path_to_write = n['path']
f.write(path_to_write+'\n')
f.close()
for i,n in v['components'].items():
with open(path_to_includes+i+'.txt', 'w') as f:
includes_to_write = n['includes']
for line in includes_to_write:
f.write(line+'\n')
f.close()
I'm calling an API service which returns JSON (with Czech language values) that looks like:
{
"model": "czech-morfflex-pdt-161115",
"acknowledgements": [
"http://ufal.mff.cuni.cz/morphodita#morphodita_acknowledgements",
"http://ufal.mff.cuni.cz/morphodita/users-manual#czech-morfflex-pdt_acknowledgements"
],
"result": [
[
{
"token": "Děti",
"analyses": [
{
"lemma": "dítě",
"tag": "POS=N|SubPOS=N|Gen=F|Num=P|Cas=1|Neg=A"
},
{
"lemma": "dítě",
"tag": "POS=N|SubPOS=N|Gen=F|Num=P|Cas=4|Neg=A"
},
{
"lemma": "dítě",
"tag": "POS=N|SubPOS=N|Gen=F|Num=P|Cas=5|Neg=A"
}
],
"space": " "
},
...
I want to return "lemma" value where "tag" values Cas=3
I tried:
import json
import os
import httpx
service_url = "http://lindat.mff.cuni.cz/services/morphodita/api"
output_format = "json"
model = "czech-morfflex"
text = "Děti pojedou k babičce Martě. Už se těší."
anal_service_url = "/".join([service_url, "analyze"])
params = {"output": output_format, "model": model, "data": text}
response = httpx.request("GET", anal_service_url, params=params)
response.raise_for_status()
response_dict = response.json()
result = response_dict.get("result")
print(type(result))
for res in result:
for a in res:
for b in a['analyses']:
for case in b['tag'][4]:
for i in [i for i,x in enumerate(case) if x == '3']:
print(i) # print position
But I don't know how to access "lemma" if case=3.
Help would be appreciated.
You can use an if statement to find the tag in the string:
case_tag = 'Cas=3'
for res_list in result:
for res_list_elem in res_list:
for item in res_list_elem['analyses']:
if case_tag in item['tag']:
print(item['lemma'])
I have some csv data that need to be converted to specific json format.
I have written a code that works for some nested level but not as required
This is my csv data:
title context answers question id
tit1 con1 text1 que1 id1
tit1 con1 text2 que2 id2
tit2 con2 text3 que3 id3
tit2 con2 text4 que4 id4
tit2 con3 text5 que5 id5
my code:
df = pd.read_csv('processedOutput.csv')
finalList = []
finalDict = {}
grouped = df.groupby(['context'])
for key, value in grouped:
dictionary = {}
j = grouped.get_group(key).reset_index(drop=True)
dictionary['context'] = j.at[0, 'context']
dictList = []
anotherDict = {}
for i in j.index:
anotherDict['answers'] = j.at[i, 'answers']
anotherDict['question'] = j.at[i, 'question']
anotherDict['id'] = j.at[i, 'id']
dictList.append(anotherDict)
dictionary['qas'] = dictList
finalList.append(dictionary)
import json
data = json.dumps(finalList)
whose output structure is fine but takes the last elem of grouped item only
[{"context": "con1",
"qas": [
{"answers": "text2", "question": "que2", "id": "id2"},
{"answers": "text2", "question": "que2", "id": "id2"}
]
},
{"context": "con2",
"qas": [
{"answers": "text4", "question": "que4", "id": "id4"},
{"answers": "text4", "question": "que4", "id": "id4"}
]
},
{"context": "con3",
"qas": [
{"answers": "text5", "question": "que5", "id": "id5"}
]
}
]
want to make the data to be nested one more level with all fields as below:
[
{
"title": "tit1",
"paragraph": [
{
"context": "con1",
"qas": [
{"answers": "text1","question": "que1","id": "id1"},
{"answers": "text2","question": "que2","id": "id2"}
]}]
},
{
"title": "tit2",
"paragraph": [
{
"context": "con2",
"qas": [
{"answers": "text3","question": "que3","id": "id3"},
{"answers": "text4","question": "que4","id": "id4"}
],
"context": "con3",
"qas": [
{"answers": "text5","question":"que5", "id": "id5"}
]
}
]
}
]
stuck on this for very long, any suggestions will be great
Your output data needs 3 levels of grouping: title, paragraph, and q&a's. I would recommend using df.groupby(['title', 'context', 'answers']) to drive the loop.
Then, within the loop, each group would constitute of one q&a dictionary (assuming
that id column contains unique values only). To build the higher level structure,
all it takes is some bookkeeping to detect level changes and add to the appropriate list and dictionary. We'll use more groupby levels to do this:
...
g1 = df.groupby(['title'])
for k1, v1 in g1:
l2_para_list = []
l4_qas_list = []
g2 = v1.groupby(['context'])
for k2, v2 in g2:
g3 = v2.groupby(['answers'])
for _, v3 in g3:
qas_dict = {}
qas_dict['answers'] = v3.answers.item()
qas_dict['question'] = v3.question.item()
qas_dict['id'] = v3.id.item()
l4_qas_list.append(qas_dict)
l3_para_dict = {}
l3_para_dict['context'] = k2
l3_para_dict['qas'] = l4_qas_list
l4_qas_list = []
l2_para_list.append(l3_para_dict)
l3_para_dict = {}
l1_title_dict = {}
l1_title_dict['title'] = k1
l1_title_dict['paragraph'] = l2_para_list
finalList.append(l1_title_dict)
l1_title_dict = {}
l2_para_list = []
print(json.dumps(finalList))
...
Output (formatted for presentation)
[{"title": "tit1", "paragraph":
[{"context": "con1",
"qas": [{"answers": "text1", "question": "que1", "id": "id1"},
{"answers": "text2", "question": "que2", "id": "id2"}]}]},
{"title": "tit2", "paragraph":
[{"context": "con2",
"qas": [{"answers": "text3", "question": "que3", "id": "id3"},
{"answers": "text4", "question": "que4", "id": "id4"}]},
{"context": "con3",
"qas": [{"answers": "text5", "question": "que5", "id": "id5"}]}]}]