Iterate through nested JSON in Python - python

js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
Let us name this json "js" as a variable, this variable will be in a for-loop.
To understand better what I'm doing here, I'm trying to collect data from a game.
This game has hundreds of different tanks, each tank has tank_id with which I can post tank_id to the game server and respond the performance data as "js".
for tank_id: json = requests.post(tank_id) etc...
and fetch all these values to my database as shown in the screenshot.
my python code for it:
def api_get():
for property in js['data']['542250529']['all']:
spotted = property['spotted']
battles_on_stunning_vehicles = property['battles_on_stunning_vehicles']
# etc
# ...
insert_to_db(spotted, battles_on_stunning_vehicles, etc....)
the exception is:
for property in js['data']['542250529']['all']:
TypeError: list indices must be integers or slices, not str
and when:
print(js['data']['542250529'])
i get the rest of the js as a string, and i can't iterate... can't be used a valid json string, also what's inside js['data']['542250529'] is a list containing only the item 'all'..., any help would be appreciated

You just missed [0] to get the first item in a list:
def api_get():
for property in js['data']['542250529'][0]['all']:
spotted = property['spotted']
# ...
Look carefully at the data structure in the source JSON.

There is a list containing the dictionary with a key of all. So you need to use js['data']['542250529'][0]['all'] not js['data']['542250529']['all']. Then you can use .items() to get the key-value pairs.
See below.
js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
for key, val in js['data']['542250529'][0]['all'].items():
print("key:", key, " val:", val)
#Or this way
for key in js['data']['542250529'][0]['all']:
print("key:", key, " val:", js['data']['542250529'][0]['all'][key])

Related

How to extract items inside JSON one by one with regex condition

I use Google Vision API on my project. The OCR result returns a JSON file that represents all the items the API recognized with coordinates. I want to add a feature that runs through the whole JOSN to find the item I want and then store the coordinate and the description into an array/list.
This is the returned JSON format:
{
"textAnnotations": [
{
"description": "a",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},
{
"description": "b",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},{c...},{d...},{e...}
],
"fullTextAnnotation": {
"pages": "not important",
"text": "a\nb\nc\nd\ne\n"
}
}
My aim is to find 2 items and calculate whether they are parallel. For example, I want to find out b or c or d or e is parallel with a, and I have already stored the coordinate of a into a list with this method:
def getJson():
try:
f = open('json_file.json', 'r', encoding="utf-8")
string = f.read()
origin_data = json.loads(string)
return origin_data
except Exception as e:
print(e)
print(traceback.format_exc())
def get_keywords_coordinates(origin_data):
__nodes = [__node for __node in origin_data['textAnnotations'] if __node['description'] == "a"]
__keyword_coords = []
for __lv in range(0, 4):
__tempx = __node['boundingPoly']['vertices'][__lv]['x']
__keyword_coords.append(__tempx)
__tempy = __node['boundingPoly']['vertices'][__lv]['y']
__keyword_coords.append(__tempy)
return __keyword_coords
which keyword_coords is the list that contains the coordinate, which looks like this:
keyword_coords[235, 409, 247, 408, 250, 456, 238, 457]
I will put it and another keyword coordinate into a function to do that calculation but I have no idea how to get the coordinate of b, c, d, and e one by one (abcde is just an example, the real situation will not be able to define the item name with hard code. I may let the program finds out the keywords with some regex)
How should I deal with this?
I don't know what exactly you want to do but it doesn't need regex but normal for-loop to work with items one by one.
First I would change get_keywords_coordinates to get all items and coordinates
def get_keywords_coordinates(data):
results = []
for item in data['textAnnotations']:
key = item["description"]
coords = []
for point in item["boundingPoly"]['vertices']:
coords.append(point['x'])
coords.append(point['y'])
results.append( (key, coords) )
return results
results = get_keywords_coordinates(data)
print('--- coords ---')
print(results)
Result:
--- coords ---
[
('a', [235, 409, 247, 408, 250, 456, 238, 457]),
('b', [335, 409, 347, 408, 350, 456, 338, 457]),
('c', [435, 409, 447, 408, 450, 456, 438, 457])
]
And I would get some selected itme (i.e. first item with a) and create list without this item
selected = results[0]
#rest = results[1:]
rest = results.copy() # more useful if I would selected item with different index
rest.remove(selected) # more useful if I would selected item with different index
print('--- items ---')
print('selected:', selected)
print('rest :', rest)
print('---')
Result:
--- items ---
selected: ('a', [235, 409, 247, 408, 250, 456, 238, 457])
rest : [('b', [335, 409, 347, 408, 350, 456, 338, 457]), ('c', [435, 409, 447, 408, 450, 456, 438, 457])]
And I could use for-loop to compare selected item with other items - one by one
for item in rest:
print('compare', selected[0], 'with', item[0])
print(selected[0], selected[1])
print(item[0], item[1])
Result:
compare a with b
a [235, 409, 247, 408, 250, 456, 238, 457]
b [335, 409, 347, 408, 350, 456, 338, 457]
compare a with c
a [235, 409, 247, 408, 250, 456, 238, 457]
c [435, 409, 447, 408, 450, 456, 438, 457]
Full example:
data = {
"textAnnotations": [
{
"description": "a",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},
{
"description": "b",
"boundingPoly": {
"vertices": [
{
"x": 335,
"y": 409
},
{
"x": 347,
"y": 408
},
{
"x": 350,
"y": 456
},
{
"x": 338,
"y": 457
}
]
}
},
{
"description": "c",
"boundingPoly": {
"vertices": [
{
"x": 435,
"y": 409
},
{
"x": 447,
"y": 408
},
{
"x": 450,
"y": 456
},
{
"x": 438,
"y": 457
}
]
}
},
],
"fullTextAnnotation": {
"pages": "not important",
"text": "a\nb\nc\nd\ne\n"
}
}
def get_keywords_coordinates(data):
results = []
for item in data['textAnnotations']:
key = item["description"]
coords = []
for point in item["boundingPoly"]['vertices']:
coords.append(point['x'])
coords.append(point['y'])
results.append( (key, coords) )
return results
results = get_keywords_coordinates(data)
print('--- coords ---')
print(results)
selected = results[0]
#rest = results[1:]
rest = results.copy()
rest.remove(selected)
print('--- keywords ---')
print('selected:', selected)
print('rest :', rest)
print('---')
for item in rest:
print('compare', selected[0], 'with', item[0])
print(selected[0], selected[1])
print(item[0], item[1])

How can I fetch specific information for a dictionary thats in an api

So I am writing a code that can give me certain information. The url https://api.brawlhalla.com/player/28472387/ranked?api_key=MY_API_KEY
provides information about my profile. When print it in text I get
{
"name": "Twitter: ufrz_",
"brawlhalla_id": 28472387,
"rating": 2093,
"peak_rating": 2110,
"tier": "Diamond",
"wins": 140,
"games": 257,
"region": "US-E",
"global_rank": 0,
"region_rank": 0,
"legends": [
{
"legend_id": 3,
"legend_name_key": "bodvar",
"rating": 870,
"peak_rating": 870,
"tier": "Tin 4",
"wins": 2,
"games": 4
},
{
"legend_id": 4,
"legend_name_key": "cassidy",
"rating": 968,
"peak_rating": 968,
"tier": "Bronze 2",
"wins": 0,
"games": 0
},
{
"legend_id": 5,
"legend_name_key": "orion",
"rating": 1131,
"peak_rating": 1131,
"tier": "Silver 1",
"wins": 1,
"games": 3
},
(not the full page.)
Here is the code I used to fetch this
import requests
url = "https://api.brawlhalla.com/player/28472387/ranked?api_key= MY_API_KEY"
r = requests.get(url)
print(r.text)
Now for example how would I go about fetching my rating and not the actual word but the number "2093" I tried someway but they didn't work. I am using bs4 and request and new to both so I really don't know how I would get this.
(Just want to say sorry for poorly worded question I don't really know how word my issue so my apologies in advance)
First of all, you have to convert your result to a json object:
data = r.json()
Then, you can request using data['rating']
For your question :
how would I go about getting the ranking for the legend_key_name "bodvar" how could I specifically get that legends ranking.
for legend in data['legends']:
if legend['legend_name_key'] == "bovdar"
print(legend['rating'])
return legend['rating']
or using a function :
def getLegendByName(data, legendName):
for legend in data['legends']:
if legend['legend_name_key'] == legendName:
return legend
return None
legendName = "bodvar"
data = r.json()
legend = getLegendByName(data, legendName)
if legend is not None:
legendRating = legend['rating']
else
print("There is no legend that exists with this name"

Extracting values from nested dictionary from text file to JSON

The text file contains dictionary of dictionary. In that text file for exmaple "2018" acts as they further "8" is the month which is value for "2018" but key for next dictionary. I want to fetch the "total_queries_count","total_dislike","unique_users" values.
{"2018":
{"8":{ "total_queries_count": 4,
"queries_without_teachers": 3,
"non_teacher_queries": 1,
"total_dislike": 0,
"unique_users": [", "landmark", "232843"],
"user_dislike": 0
},
"9":{ "total_queries_count": 1021,
"queries_without_teachers": 0,
"non_teacher_queries": 1021,
"total_dislike": 0,
"unique_users": [", "1465146", "14657", "dfgf", "1123", "456", "1461546", "Ra", "siva", "234", "ramesh", "3456", "23", "43567", "sfdf", "sdsd", "ra", "sddff", "1234", "rames", "RAM", "444", "123", "333", "RAM", "789", "itassistant", "rame", "12345"],
"user_dislike": 0},
"10": {"total_queries_count": 352,
"queries_without_teachers": 1,
"non_teacher_queries": 351,
"total_dislike": 0,
"unique_users": [", "1465146", "777", "43567", "1234", "456", "123456", "12345", "232843"],
"user_dislike": 0
},
"11": {"total_queries_count": 180,
"queries_without_teachers": 0,
"non_teacher_queries": 180,
"total_dislike": 12,
"unique_users": [", "75757575", "9000115", "9000157", "9000494", "9000164", "123453"],
"user_dislike": 12},
"12": {"total_queries_count": 266,
"queries_without_teachers": 0,
"non_teacher_queries": 266,
"total_dislike": 16,
"unique_users": [", "131422", "121550", "9000508", "9000560", "9000115", "9000371", "9000372", "93979", "146625", "114586", "165937", "9000494", "9000463", "38404", "129458", "62948", "125143", "9000179", "9000145", "9000001", "9000164", "81849", "102663", "9000123", "105407", "33517", "21344", "9000213", "202074", "9000103", "18187", "9000342", "9000125", "9000100", "9000187", "18341", "9000181", "168802", "9000529", "12345", "110127", "9000134", "100190", "9000352", "9000156", "9000055", "tcs_hariharas", "9000078", "204101", "9000050", "9000139"],
"user_dislike": 16}
}
}
Check https://docs.python.org/3/tutorial/datastructures.html#dictionaries
You can access needed keys like this:
# assuming your initial nested dict is called 'data'
data["2018"]["8"]["total_queries_count"]
If you want to aggregate data for all years and months in one place, you can do this:
overall_queries = 0
overall_dislikes = 0
users = set() # this is a set not a list in order to preserve uniqueness of users
for year in data: # year is a key in data dict
for month in data[year]: # month is a key in data[year] dict
users.update(data[year][month]["unique_users"])
overall_queries += data[year][month]["total_queries_count"]
overall_dislikes += data[year][month]["total_dislike"]
If you want to keep your result separated by years you can do this:
result = {}
for year in data:
overall_queries = 0
overall_dislikes = 0
users = set()
for month in data[year]:
overall_queries += data[year][month]["total_queries_count"]
overall_dislikes += data[year][month]["total_dislike"]
users.update(data[year][month]["unique_users"])
result[year] = {
"overall_queries": overall_queries,
"overall_dislikes": overall_dislikes,
"users": users,
}
Result:
{'2018': {'overall_dislikes': 28,
'overall_queries': 1823,
'users': {'100190',
'102663',
'105407',
'110127',
...}}}

OverflowError: MongoDB can only handle up to 8-byte ints?

I have spent the last 12 hours scouring the web. I am completely lost, please help.
I am trying to pull data from an API endpoint and put it into MongoDB. The data looks like this:
{"_links": {
"self": {
"href": "https://us.api.battle.net/data/sc2/ladder/271302?namespace=prod"
}
},
"league": {
"league_key": {
"league_id": 5,
"season_id": 37,
"queue_id": 201,
"team_type": 0
},
"key": {
"href": "https://us.api.battle.net/data/sc2/league/37/201/0/5?namespace=prod"
}
},
"team": [
{
"id": 6956151645604413000,
"rating": 5321,
"wins": 131,
"losses": 64,
"ties": 0,
"points": 1601,
"longest_win_streak": 15,
"current_win_streak": 4,
"current_rank": 1,
"highest_rank": 10,
"previous_rank": 1,
"join_time_stamp": 1534903699,
"last_played_time_stamp": 1537822019,
"member": [
{
"legacy_link": {
"id": 9964871,
"realm": 1,
"name": "mTOR#378",
"path": "/profile/9964871/1/mTOR"
},
"played_race_count": [
{
"race": "Zerg",
"count": 195
}
],
"character_link": {
"id": 9964871,
"battle_tag": "Hellghost#11903",
"key": {
"href": "https://us.api.battle.net/data/sc2/character/Hellghost-11903/9964871?namespace=prod"
}
}
}
]
},
{
"id": 11611747760398664000, .....
....
Here's the code:
for ladder_number in ladder_array:
ladder_call_url = ladder_call+slash+str(ladder_number)+eng_locale+access_token
url = str(ladder_call_url)
response = requests.get(url)
print('trying ladder number '+str(ladder_number))
print('calling :'+url)
if response.status_code == 200:
print('status: '+str(response))
mmr_db.ladders.insert_one(response.json())
I get an error:
OverflowError: MongoDB can only handle up to 8-byte ints?
Is this because the data I am trying to load is too large? Are the "ID" integers too large?
Oh man, any help would be sincerely appreciated.
_______ EDIT ____________
Edited to include the Traceback:
Traceback (most recent call last):
File "C:\scripts\mmr_from_ladders.py", line 96, in <module>
mmr_db.ladders.insert_one(response.json(), bypass_document_validation=True)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\collection.py", line 693, in insert_one
session=session),
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\collection.py", line 607, in _insert
bypass_doc_val, session)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\collection.py", line 595, in _insert_one
acknowledged, _insert_command, session)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\mongo_client.py", line 1243, in _retryable_write
return self._retry_with_session(retryable, func, s, None)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\mongo_client.py", line 1196, in _retry_with_session
return func(session, sock_info, retryable)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\collection.py", line 590, in _insert_command
retryable_write=retryable_write)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\pool.py", line 584, in command
self._raise_connection_failure(error)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\pool.py", line 745, in _raise_connection_failure
raise error
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\pool.py", line 579, in command
unacknowledged=unacknowledged)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\network.py", line 114, in command
codec_options, ctx=compression_ctx)
File "C:\Users\me\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pymongo\message.py", line 679, in _op_msg
flags, command, identifier, docs, check_keys, opts)
OverflowError: MongoDB can only handle up to 8-byte ints
The BSON spec — MongoDB’s native binary extended JSON format / data type — only supports 32 bit (signed) and 64 bit (signed) integers — 8 bytes being 64 bits.
The maximum integer value that can be stored in a 64 bit int is:
9,223,372,036,854,775,807
In your example you appear to have larger ids, for example:
11,611,747,760,398,664,000
I’m guessing that the app generating this data is using uint64 types (unsigned can hold x2-1 values).
I would start by looking at either of these potential solutions, if possible:
Changing the other side to use int64 (signed) types for the IDs.
Replacing the incoming IDs using ObjectId() as you then get a 12 byte ~ GUID for your unique IDs.

TypeError: list indices must be integers or slices, not str <encoding error>

So, my code looks like this:
import requests
import random
def load():
req = requests.get(https: // yande.re / post.json?tags = rating % 3
Asafe + -pantyshot + -panties + & ms = 1 & page = 12650 & limit = 1)
data = Posts(req.json()["id"][0], req.json()["tags"], slice(req.json()["creator_id"]), req.json()["author"],
req.json()["source"],
req.json()["score"], req.json()["md5"], req.json()["file_url"], req.json()["sample_url"],
req.json()["width"],
req.json()["height"])
all = data.tags, data.creator_id, data.author, data.source, data.score, data.md5, data.file_url, data.sample_url, data.width, data.height
return all
And, when I run the load(), I have this output:
Traceback (most recent call last): File "", line
134, in File "", line 126, in anime
TypeError: list indices must be integers or slices, not str
What could be causing it?
By the way, the data I'm fetching looks like this:
[
{
"actual_preview_height": 218,
"jpeg_url": "https://files.yande.re/image/32a001e7b5050828c9b07e62de634958/yande.re%20376617%20dress%20novelance%20see_through.jpg",
"status": "active",
"preview_url": "https://assets.yande.re/data/preview/32/a0/32a001e7b5050828c9b07e62de634958.jpg",
"has_children": false,
"source": "http://i2.pixiv.net/img-original/img/2016/12/05/00/00/10/60241721_p0.jpg",
"score": 1,
"height": 1392,
"rating": "s",
"id": 376617,
"last_commented_at": 0,
"frames": [],
"md5": "32a001e7b5050828c9b07e62de634958",
"updated_at": 1480900734,
"creator_id": 280440,
"frames_pending_string": "",
"frames_string": "",
"actual_preview_width": 300,
"is_shown_in_index": true,
"frames_pending": [],
"change": 1992459,
"last_noted_at": 0,
"approver_id": null,
"is_held": false,
"preview_width": 150,
"tags": "dress novelance see_through",
"preview_height": 109,
"created_at": 1480900721,
"file_ext": "jpg",
"sample_height": 1088,
"sample_url": "https://files.yande.re/sample/32a001e7b5050828c9b07e62de634958/yande.re%20376617%20sample%20dress%20novelance%20see_through.jpg",
"parent_id": null,
"width": 1920,
"jpeg_file_size": 0,
"sample_file_size": 478570,
"author": "LolitaJoy",
"file_size": 989513,
"file_url": "https://files.yande.re/image/32a001e7b5050828c9b07e62de634958/yande.re%20376617%20dress%20novelance%20see_through.jpg",
"is_note_locked": false,
"is_pending": false,
"sample_width": 1500,
"jpeg_width": 1920,
"jpeg_height": 1392,
"is_rating_locked": false
}
]
I found the problem actually. instead of req.json()["id"], it should have been req.json()[0]["id"]

Categories