Extract course ids for instructors from json input - python

Let's say I've got a nested JSON file as below. If I want to print the courses that each instructor teaches, how do I do that?
{
"info":{
"source_objects":[
{
"type":"sub-category",
"id":277438897,
}
],
"item_type":"course",
"items":[
{
"_class":"course",
"id":156173119,
"is_paid":null,
"trainer":[
{
"id":257585701,
"url":"/user/tania_guerra/",
}
],
{
"_class":"course",
"id":12456,
"is_paid":null,
"trainer":[
{
"id":257585701,
"url":"/user/tania_guerra/",
}
],
}
*************and more data on the same format****************
}
}
I'm not sure if there's any simple trick that I'm missing. So far, I've tried the following and it prints the course id and trainer id. But then how do I add all the courses that this trainer trains?
with open (alljson, 'r') as json: # alljson is a directory where multiple json file exists
read_json = json.load(json)
for i in ange(int(len(read_all_json['info']['items']))):
cid = read_json['info']['items'][i]['id'] # gets the course id
for j in range(int(len(read_json['info']['items'][i]['trainer'])))
trainer_id = read_json['info']['items'][i]['trainer'][j]['id'] # gets the trainer id
# then how do I get course id added to trainer id. for example
# 12456---123456***123457***123454***12454
# trainer id--- all the courses that this instructor teaches addind ***

Assuming each trainer has a unique id, you can create a dict of lists, where the keys are trainer ids and the values are lists of course ids:
import os, json
rootdir = 'tmp/test1'
trainers = {}
for root, dirs, files in os.walk(rootdir):
for filename in files:
if os.path.splitext(filename)[1] != '.json':
continue
filepath = os.path.join(root, filename)
with open(filepath) as stream:
data = json.load(stream)
for item in data['info']['items']:
cid = item['id']
for trainer in item['trainer']:
key = (trainer['id'], trainer['url'])
if key not in trainers:
trainers[key] = []
trainers[key].append(str(cid))
output = 'trainers.txt'
with open(output, 'w') as stream:
for (tid, url), cids in sorted(trainers.items()):
stream.write('%s---%s---%s\n' % (tid, url, ';;;'.join(cids)))
Result:
257585701---/user/tania_guerra/---12456;;;7992450;;;7812756;;;156173119;;;562456
918585703---/user/tania_guerra/---7867833;;;14473169;;;156173119
test.json:
{
"info": {
"source_objects": [
{
"type": "sub-category",
"id": 277438897
}
],
"item_type": "course",
"items": [
{
"_class": "course",
"id": 156173119,
"is_paid": null,
"trainer": [
{
"id": 257585701,
"url": "/user/tania_guerra/"
}
]
},
{
"_class": "course",
"id": 12456,
"is_paid": null,
"trainer": [
{
"id": 257585701,
"url": "/user/tania_guerra/"
}
]
}
]
}
}

I think it's easiest to use a dict or better a defaultdict[int->List[int]]
something like
from collections import defaultdict
with open(alljson, "r") as json:
items = json["info"]["items"]
trainer_course_mapping = defaultdict(list)
for item in items:
trainers = item["trainer"]
for trainer in trainers:
trainer_course_mapping[trainer["id"]].append(item["id"])

Related

python - collect full path till leaf on organization tree

I got organizations tree stored as json
{
"name": "amos",
"direct_reports": [
{
"name": "bart",
"direct_reports": [
{
"name": "colin",
"direct_reports": []
},
{
"name": "clara",
"direct_reports": []
}
]
},
{
"name": "bravo",
"direct_reports": [
{
"name": "cupid",
"direct_reports": []
},
{
"name": "clever",
"direct_reports": []
}
]
}
]
}
I need to store full "management path" for each employee, such as:
management_chain["clever"]={bravo,amos}
management_chain["bart"]={amos}
Currently I manage to reach all edges and classify those as employees and managers with code as followed:
def get_herarchy(org):
tmp_obj = {}
tmp_obj['managers'] = []
for emp in org['direct_reports']:
tmp_obj['managers'].append(org['name'])
print("manager "+org['name'])
if len(emp['direct_reports'])>0:
get_herarchy(emp)
tmp_obj['name'] = emp['name']
print(emp['name'])
return tmp_obj
But the dictionary doesn't holds the right values
Like this, maybe:
def get_chain(org, name):
if org['name'] == name:
return [name]
for emp in org['direct_reports']:
chain = get_chain(emp, name)
if chain:
return [org['name']] + chain
return None
print(get_chain(org, 'bart')) # ['amos', 'bart']
print(get_chain(org, 'clever')) # ['amos', 'bravo', 'clever']
UPD: This is how to make a dictionary:
def nested_iter(org):
yield org['name']
for emp in org['direct_reports']:
yield from nested_iter(emp)
print({name: get_chain(org, name)[0:-1] for name in nested_iter(org)})

How to search through multiple (thousands) of JSON files to find files with a specific value and then append those specific values to a new list

I recently generated 10,000 images with a corresponding .json file. I generated 10 before I did the bigger collection and so I am trying to filter out or search through the 10,000 json files, for a specific key value. here is one of the JSON files for example:
{
"name": "GrapeGrannys #1",
"description": "Grannys with grapes etc.",
"image": "ipfs://NewUriToReplace/1.png",
"dna": "93596679f006e3a9226700e0e7539179b532bf29",
"edition": 1,
"date": 1667406230920,
"attributes": [
{
"trait_type": "Backgrounds",
"value": "sunrise_beach"
},
{
"trait_type": "main",
"value": "GrapeGranny"
},
{
"trait_type": "eyeColor",
"value": "gray"
},
{
"trait_type": "skirtAndTieColor",
"value": "green"
},
{
"trait_type": "Headwear",
"value": "hat1"
},
{
"trait_type": "specialItems",
"value": "ThugLife"
}
],
"compiler": "HashLips Art Engine"
}
In "attributes", I want to I want to target the first object and its value and check to see if that value is equal to "GrapeCity".
Then after all files have been read and searched through, Id like the files with that specific value "GrapeCity" to be stored in a new list or array that I can print and see which specific files contain that keyword. Here is what I have tried in Python:
import json
import glob
# from datetime import datetime
src = "./Assets/json"
# date = datetime.now()
data = []
files = glob.glob('$./Assets/json/*', recursive=True)
for single_file in files:
with open(single_file, 'r') as f:
try:
json_file = json.load(f)
data.append([
json_file["attributes"]["values"]["GrapeCity"]
])
except KeyError:
print(f'Skipping {single_file}')
data.sort()
print(data)
# csv_filename = f'{str(date)}.csv'
# with open(csv_filename, "w", newline="") as f:
# writer = csv.writer(f)
# writer.writerows(data)
# print("Updated CSV")
At one point I was getting a typeError but now it is just outputing an empty array. Any help is appreciated!
json_file["attributes"] is a list so you can't access it like a dictionary.
Try this:
for single_file in files:
with open(single_file, 'r') as f:
try:
json_file = json.load(f)
attrs = json_file["attributes"]
has_grape_city = any(attr["value"] == "GrapeCity" for attr in attrs)
if has_grape_city:
data.append(single_file)
except KeyError:
print(f'Skipping {single_file}')

How to merge non-fixed key json multilines into one json abstractly

If I have a heavy json file that have 30m entries like that
{"id":3,"price":"231","type":"Y","location":"NY"}
{"id":4,"price":"321","type":"N","city":"BR"}
{"id":5,"price":"354","type":"Y","city":"XE","location":"CP"}
--snip--
{"id":30373779,"price":"121","type":"N","city":"SR","location":"IU"}
{"id":30373780,"price":"432","type":"Y","location":"TB"}
{"id":30373780,"price":"562","type":"N","city":"CQ"}
how I can only abstract the location and the city and parse it into one json like that in python:
{
"orders":{
3:{
"location":"NY"
},
4:{
"city":"BR"
},
5:{
"city":"XE",
"location":"CP"
},
30373779:{
"city":"SR",
"location":"IU"
},
30373780:{
"location":"TB"
},
30373780:{
"city":"CQ"
}
}
}
P.S: beatufy the syntax is not necessary.
Assuming your input file is actually in jsonlines format, then you can read each line, extract the city and location keys from the dict and then append those to a new dict:
import json
from collections import defaultdict
orders = { 'orders' : defaultdict(dict) }
with open('orders.txt', 'r') as f:
for line in f:
o = json.loads(line)
id = o['id']
if 'location' in o:
orders['orders'][id]['location'] = o['location']
if 'city' in o:
orders['orders'][id]['city'] = o['city']
print(orders)
Output for your sample data (note it has two 30373780 id values, so the values get merged into one dict):
{
"orders": {
"3": {
"location": "NY"
},
"4": {
"city": "BR"
},
"5": {
"location": "CP",
"city": "XE"
},
"30373779": {
"location": "IU",
"city": "SR"
},
"30373780": {
"location": "TB",
"city": "CQ"
}
}
}
As you've said that your file is pretty big and you probably don't want to keep all entries in memory here is the way to consume source file line by line and write output immediately:
import json
with open(r"in.jsonp") as i_f, open(r"out.json", "w") as o_f:
o_f.write('{"orders":{')
for i in i_f:
i_obj = json.loads(i)
o_f.write(f'{i_obj["id"]}:')
o_obj = {}
if location := i_obj.get("location"):
o_obj["location"] = location
if city := i_obj.get("city"):
o_obj["city"] = city
json.dump(o_obj, o_f)
o_f.write(",")
o_f.write('}}')
It will generate semi-valid JSON object in same format you've provided in your question.

Nested dictionary from data in a text file

I am new with python and I am trying to create a dictionary that outputs in a JSON file, this with data from a text file. So the text file would be this one.
557e155fc5f0 557e155fc5f0 1 557e155fc602 1
557e155fc610 557e155fc610 2
557e155fc620 557e155fc620 1 557e155fc626 1
557e155fc630 557e155fc630 1 557e155fc636 1
557e155fc640 557e155fc640 1
557e155fc670 557e155fc670 1 557e155fc698 1
557e155fc6a0 557e155fc6a0 1 557e155fc6d8 1
And the desired output for the first two lines would be
{ "functions": [
{
"address": "557e155fc5f0",
"blocks": [
"557e155fc5f0": "calls":{1}
"557e155fc602": "calls":{1}
]
},
{
"address": " 557e155fc610",
"blocks": [
" 557e155fc610": "calls":{2}
]
},
I have wrote a script to begin but I don't know how to continue.
import json
filename = 'calls2.out' # here the name of the output file
funs = {}
bbls = {}
with open(filename) as fh: # open file
for line in fh: # walk line by line
if line.strip(): # non-empty line?
rtn,bbl = line.split(None,1) # None means 'all whitespace', the default
for j in range(len(bbl)):
funs[rtn] = bbl.split()
print(json.dumps(funs, indent=2, sort_keys=True))
#json = json.dumps(fun, indent=2, sort_keys=True) # to save it into a file
#f = open("fout.json","w")
#f.write(json)
#f.close()
this script gives me this output
"557e155fc5f0": [
"557e155fc5f0",
"1",
"557e155fc602",
"1"
],
"557e155fc610": [
"557e155fc610",
"2"
],
"557e155fc620": [
"557e155fc620",
"1",
"557e155fc626",
"1"
],
funs[rtn] = bbl.split()
Here you add "557e155fc5f0", "1" as value to the rtnkey, because bbl is 557e155fc5f0 1 at this point, but you want to add it as a dictionary.
temp_dict = {bbl.split()[0]: bbl.split()[1]}
funs[rtn] = temp_dict
This will give you following json:
{
"557e155fc6a0": {
"557e155fc6a0": "1"
}
}
If you need the calls as key in the json you'd need to extend a bit:
temp_dict = {bbl.split()[0]: {"calls": bbl.split()[1]}}
funs[rtn] = temp_dict
Gives you this:
{
"557e155fc6a0": {
"557e155fc6a0": {
"calls": "1"
}
}
}
Also, your example json is malformed, I assume you want sth like this:
{
"functions": {
"address": "557e155fc5f0",
"blocks": {
"557e155fc5f0": {
"calls": 1
},
"557e155fc602": {
"calls": 1
}
}
},
"address": " 557e155fc610",
"blocks": {
"557e155fc610": {
"calls": 2
}
}
}
I'd try an Online JSON Editor for testing/creating examples.
Hope it helps!

Grab element from json dump

I'm using the following python code to connect to a jsonrpc server and nick some song information. However, I can't work out how to get the current title in to a variable to print elsewhere. Here is the code:
TracksInfo = []
for song in playingSongs:
data = { "id":1,
"method":"slim.request",
"params":[ "",
["songinfo",0,100, "track_id:%s" % song, "tags:GPASIediqtymkovrfijnCYXRTIuwxN"]
]
}
params = json.dumps(data, sort_keys=True, indent=4)
conn.request("POST", "/jsonrpc.js", params)
httpResponse = conn.getresponse()
data = httpResponse.read()
responce = json.loads(data)
print json.dumps(responce, sort_keys=True, indent=4)
TrackInfo = responce['result']["songinfo_loop"][0]
TracksInfo.append(TrackInfo)
This brings me back the data in json format and the print json.dump brings back:
pi#raspberrypi ~/pithon $ sudo python tom3.py
{
"id": 1,
"method": "slim.request",
"params": [
"",
[
"songinfo",
"0",
100,
"track_id:-140501481178464",
"tags:GPASIediqtymkovrfijnCYXRTIuwxN"
]
],
"result": {
"songinfo_loop": [
{
"id": "-140501481178464"
},
{
"title": "Witchcraft"
},
{
"artist": "Pendulum"
},
{
"duration": "253"
},
{
"tracknum": "1"
},
{
"type": "Ogg Vorbis (Spotify)"
},
{
"bitrate": "320k VBR"
},
{
"coverart": "0"
},
{
"url": "spotify:track:2A7ZZ1tjaluKYMlT3ItSfN"
},
{
"remote": 1
}
]
}
}
What i'm trying to get is result.songinfoloop.title (but I tried that!)
The songinfo_loop structure is.. peculiar. It is a list of dictionaries each with just one key.
Loop through it until you have one with a title:
TrackInfo = next(d['title'] for d in responce['result']["songinfo_loop"] if 'title' in d)
TracksInfo.append(TrackInfo)
A better option would be to 'collapse' all those dictionaries into one:
songinfo = reduce(lambda d, p: d.update(p) or d,
responce['result']["songinfo_loop"], {})
TracksInfo.append(songinfo['title'])
songinfo_loop is a list not a dict. That means you need to call it by position, or loop through it and find the dict with a key value of "title"
positional:
responce["result"]["songinfo_loop"][1]["title"]
loop:
for info in responce["result"]["songinfo_loop"]:
if "title" in info.keys():
print info["title"]
break
else:
print "no song title found"
Really, it seems like you would want to have the songinfo_loop be a dict, not a list. But if you need to leave it as a list, this is how you would pull the title.
The result is really a standard python dict, so you can use
responce["result"]["songinfoloop"]["title"]
which should work

Categories