How to print out the exact field/string of the JSON output? - python

I'm trying to filter all the result that I got in the GET Request.
The Output that I want is just to get the summary: , key: and self:.
But I'm getting a lot of Json data.
I've tried googling on how to do this and I'm going to nowhere.
Here is my code:
The commented lines are the codes that I have tried.
import requests
import json
import re
import sys
url ="--------"
auth='i.g--t----------', 'X4------'
r = requests.get(url, auth=(auth))
data = r.json()
#print( json.dumps(data, indent=2) )
#res1 = " ".join(re.split("summary", data))
#print ("first string result: ", str(res1))
#json_str = json.dumps(data)
#resp = json.loads(json_str)
#print (resp['id'])
#resp_dict = json.loads(resp_str)
#resp_dict.get('name')
#print('dasdasd', json_str["summary"])
Example of the Get Api Output that I'm getting using this code. print( json.dumps(data, indent=2) )
{
"id": "65621",
"self": "https://bboxxltd.atlassian.net/rest/api/2/issue/65621",
"key": "CMS-5901",
"fields": {
"summary": "new starter: Edoardo Bologna",
"customfield_10700": [
{
"id": "2",
"name": "BBOXX Rwanda HQ",
"_links": {
"self": "https://bboxxltd.atlassian.net/rest/servicedeskapi/organization/2"
}
}
},
"inwardIssue": {
"id": "65862",
"key": "BMT-2890",
"self": "https://bboxxltd.atlassian.net/rest/api/2/issue/65862",
"fields": {
"summary": "ERP Databases access with Read Only",
"status": {
"self": "https://bboxxltd.atlassian.net/rest/api/2/status/10000",
"description": "",
"iconUrl": "https://bboxxltd.atlassian.net/",
"name": "To Do",
"id": "10000",
"statusCategory": {
"self": "https://bboxxltd.atlassian.net/rest/api/2/statuscategory/2",
"id": 2,
"key": "new",
"colorName": "blue-gray",
"name": "To Do"
}
},
"priority": {
"self": "https://bboxxltd.atlassian.net/rest/api/2/priority/4",
"iconUrl": "https://bboxxltd.atlassian.net/images/icons/priorities/low.svg",
"name": "Low",
My error is:
Traceback (most recent call last):
File "c:/Users/IanJayloG/Desktop/Python Files/Ex_Files_Learning_Python/Exercise Files/Test/Untitled-1.py", line 17, in <module>
print('dasdasd', data["summary"])
KeyError: 'summary'
PS C:\Users\IanJayloG\Desktop\Python Files\Ex_Files_Learning_Python\Exercise Files> & C:/Users/IanJayloG/AppData/Local/Programs/Python/Python37-32/python.exe "c:/Users/IanJayloG/Desktop/Python Files/Ex_Files_Learning_Python/Exercise Files/Test/Untitled-1.py"
Traceback (most recent call last):
File "c:/Users/IanJayloG/Desktop/Python Files/Ex_Files_Learning_Python/Exercise Files/Test/Untitled-1.py", line 17, in <module>
print('dasdasd', json_str["summary"])
TypeError: string indices must be integers

The problem about your error message
print('dasdasd', json_str["summary"])
TypeError: string indices must be integers
is that you try to access the named field summary on a string (variable json_str), which does not work because strings don't have fields to access by name. If you use the indexing [] operator on a string, you can only provide integers or ranges to extract single characters or sequences from that string. This is obviously not what you're intending.
The keys self and key are on top level of your JSON document, whereas summary is under fields. This should do it, without any extra transformation applied:
import requests
r = requests.get(url, auth=(auth))
data = r.json()
data_summary = data['fields']['summary']
data_self = data['self']
data_key = data['key']

Related

Extract data from json, append using for loop and save as CSV

I have extracted id, username, and name for 100 followers for 102 politicians using Tweepy. The data is stored in a JSON file named pol_followers. Now I wish to append id and username and save it as a CSV file using the function below. However, when using the function in the last line append_followers_to_csv(pol_followers, "pol_followers.csv") I get the error seen at the bottom.
# Structure of pol_followers. The full pol_followers is much longer...
print(json.dumps(pol_followers, indent=4, sort_keys=True)) # see json data structure
[
{
"data": [
{
"id": "1464206217807601666",
"name": "terry alex",
"username": "terryal51850644"
},
{
"id": "1479032154394968064",
"name": "Charles Williams",
"username": "Charles99924770"
},
{
"id": "2526015770",
"name": "LISA P",
"username": "LISAP0910"
},
{
"id": "2957692520",
"name": "fayaz ahmad",
"username": "ahmadfayaz202"
}
],
"meta": {
"next_token": "F6HS7IU5SRGHEZZZ",
"result_count": 100
}
},
{
"data": [
{
"id": "2482703136",
"name": "HieuVu",
"username": "sachieuhaihanh"
},
{
"id": "580882148",
"name": "Maxine D. Harmon",
"username": "maxxximd"
},
{
"id": "1478867472841334787",
"name": "RBPsych1",
"username": "RBPsych1"
# Create file
csv_follower_file = open("pol_followers.csv", "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
# Create headers for the data I want to save. I only want to save these columns in my dataset
csv_follower_writer.writerow(
['id', 'username'])
csv_follower_file.close()\
def append_followers_to_csv(pol_followers, csv_follower_file):
# A counter variable
global follower_id, username
counter = 0
# Open OR create the target CSV file
csv_follower_file = open(csv_follower_file, "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
for ids in pol_followers['data']:
# 1. follower ID
follower_id = ids['id']
# 2. follower username
username = ids['username']
# Assemble all data in a list
ress = [follower_id, username]
# Append the result to the CSV file
csv_follower_writer.writerow(ress)
counter += 1
# When done, close the CSV file
csvFile.close()
# Print the number of tweets for this iteration
print("# of Tweets added from this response: ", counter)
append_followers_to_csv(pol_followers, "pol_followers.csv") # Save tweet data in a csv file
File "<input>", line 1, in <module>
File "<input>", line 11, in append_followers_to_csv
TypeError: list indices must be integers or slices, not str
You are just missing additional loop, like so:
for each_dict in pol_followers:
for ids in each_dict['data']:
follower_id = ids['id']
username = ids['username']
You seem to have wrapped your JSON object in a list, so instead of getting the 'data' bit of the JSON, you are getting the 'data'th element of a list when you are iterating in your append_followers_to_csv function, which you can't do in python. Try removing the square brackets around the JSON or making it for ids in pol_followers[0]['data'].

Python - How to retrieve element from json

Aloha,
My python routine will retrieve json from site, then check the file and download another json given the first answer and eventually download a zip.
The first json file gives information about doc.
Here's an example :
[
{
"id": "d9789918772f935b2d686f523d066a7b",
"originalName": "130010259_AC2_R44_20200101",
"type": "SUP",
"status": "document.deleted",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_AC2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.4212881,
47.6171589,
8.1598899,
50.1338684
],
"documentSource": "UPLOAD",
"uploadDate": "2020-06-25T14:56:27+02:00",
"updateDate": "2021-01-19T14:33:35+01:00",
"fileIdentifier": "SUP-AC2-R44-130010259-20200101",
"legalControlStatus": 101
},
{
"id": "6a9013bdde6acfa632861aeb1a02942b",
"originalName": "130010259_AC2_R44_20210101",
"type": "SUP",
"status": "document.production",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_AC2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.4212881,
47.6171589,
8.1598899,
50.1338684
],
"documentSource": "UPLOAD",
"uploadDate": "2021-01-18T16:37:01+01:00",
"updateDate": "2021-01-19T14:33:29+01:00",
"fileIdentifier": "SUP-AC2-R44-130010259-20210101",
"legalControlStatus": 101
},
{
"id": "efd51feaf35b12248966cb82f603e403",
"originalName": "130010259_PM2_R44_20210101",
"type": "SUP",
"status": "document.production",
"legalStatus": "APPROVED",
"name": "130010259_SUP_R44_PM2",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
3.6535762,
47.665021,
7.9509455,
49.907347
],
"documentSource": "UPLOAD",
"uploadDate": "2021-01-28T09:52:31+01:00",
"updateDate": "2021-01-28T18:53:34+01:00",
"fileIdentifier": "SUP-PM2-R44-130010259-20210101",
"legalControlStatus": 101
},
{
"id": "2e1b6104fdc09c84077d54fd9e74a7a7",
"originalName": "444619258_I4_R44_20210211",
"type": "SUP",
"status": "document.pre_production",
"legalStatus": "APPROVED",
"name": "444619258_SUP_R44_I4",
"grid": {
"name": "R44",
"title": "GRAND EST"
},
"bbox": [
2.8698336,
47.3373246,
8.0881368,
50.3796449
],
"documentSource": "UPLOAD",
"uploadDate": "2021-04-19T10:20:20+02:00",
"updateDate": "2021-04-19T14:46:21+02:00",
"fileIdentifier": "SUP-I4-R44-444619258-20210211",
"legalControlStatus": 100
}
]
What I try to do is to retrieve "id" from this json file. (ex. "id": "2e1b6104fdc09c84077d54fd9e74a7a7",)
I've tried
import json
from jsonpath_rw import jsonpath, parse
import jsonpath_rw_ext as jp
with open('C:/temp/gpu/SUP/20210419/SUPGE.json') as f:
d = json.load(f)
data = json.dumps(d)
print("oriName: {}".format( jp.match1("$.id[*]",data) ) )
It doesn't work In fact, I'm not sure how jsonpath-rw is intended to work. Thankfully there was this blogpost But I'm still stuck.
Does anyone have a clue ?
With the id, I'll be able to download another json and in this json there'll be an archiveUrl to get the zipfile.
Thanks in advance.
import json
file = open('SUPGE.json')
with file as f:
d = json.load(f)
for i in d:
print(i.get('id'))
this will give you id only.
d9789918772f935b2d686f523d066a7b
6a9013bdde6acfa632861aeb1a02942b
efd51feaf35b12248966cb82f603e403
2e1b6104fdc09c84077d54fd9e74a7a7
Ok.
Here's what I've done.
import json
import urllib
# not sure it's the best way to load json from url, but it works fine
# and I could test most of code if needed.
def getResponse(url):
operUrl = urllib.request.urlopen(url)
if(operUrl.getcode()==200):
data = operUrl.read()
jsonData = json.loads(data)
else:
print("Erreur reçue", operUrl.getcode())
return jsonData
# Here I get the json from the url. *
# That part will be in the final script a parameter,
# because I got lot of territory to control
d = getResponse('https://www.geoportail-urbanisme.gouv.fr/api/document?documentFamily=SUP&grid=R44&legalStatus=APPROVED')
for i in d:
if i['status'] == 'document.production' :
print('id du doc en production :',i.get('id'))
# here we parse the id to fetch the whole document.
# Same server, same API but different url
_URL = 'https://www.geoportail-urbanisme.gouv.fr/api/document/' + i.get('id')+'/details'
d2 = getResponse(_URL)
print('archive',d2['archiveUrl'])
urllib.request.urlretrieve(d2['archiveUrl'], 'c:/temp/gpu/SUP/'+d2['metadata']+'.zip' )
# I used wget in the past and loved the progression bar.
# Maybe I'd switch to wget because of it.
# Works fine.
Thanks for your answer. I'm delighted to see that even with only the json library you could do amazing things. Just normal stuff. But amazing.
Feel free to comment if you think I've missed smthg.

Python to parse nested JSON values that can be null sometimes

I'm trying to parse the following and pull out primary_ip as a variable. Sometimes primary_ip is "null". Here is an example of the JSON, code and the most recent error I am getting.
{
"count": 67,
"next": "https://master.netbox.dev/api/dcim/devices/?limit=50&offset=50",
"previous": null,
"results": [
{
"id": 28,
"url": "https://master.netbox.dev/api/dcim/devices/28/",
"name": "q2",
"display_name": "q2",
"device_type": {
"id": 20,
"url": "https://master.netbox.dev/api/dcim/device-types/20/",
"manufacturer": {
"id": 15,
"url": "https://master.netbox.dev/api/dcim/manufacturers/15/",
"name": "Zyxel",
"slug": "zyxel"
},
"model": "GS1900",
"slug": "gs1900",
"display_name": "Zyxel GS1900"
},
"device_role": {
"id": 4,
"url": "https://master.netbox.dev/api/dcim/device-roles/4/",
"name": "Access Switch",
"slug": "access-switch"
},
"primary_ip": {
"id": 301,
"url": "https://master.netbox.dev/api/ipam/ip-addresses/301/",
"family": 4,
"address": "172.31.254.241/24"
},
Example Python
import requests
import json
headers = {
'Authorization': 'Token 63d421a5f733dd2c5070083e80df8b4d466ae525',
'Accept': 'application/json; indent=4',
}
response = requests.get('https://master.netbox.dev/api/dcim/sites/', headers=headers)
j = response.json()
for results in j['results']:
x=results.get('name')
y=results.get('physical_address')
response2 = requests.get('https://master.netbox.dev/api/dcim/devices', headers=headers)
device = response2.json()
for result in device['results']:
x=result.get('name')
z=result.get('site')['name']
# if result.get('primary_ip') != None
y=result.get('primary_ip', {}).get('address')
print(x,y,z)
I get the following error when I run it:
ubuntu#ip-172-31-39-26:~$ python3 Netbox-python
Traceback (most recent call last):
File "Netbox-python", line 22, in <module>
y=result.get('primary_ip', {}).get('address')
AttributeError: 'NoneType' object has no attribute 'get'
Which value is None? Is it the primary_ip or is it address ?
you could try the following:
y = result.get('primary_ip', {}).get('address, 'empty_address')
This will replace any None values with empty_address
Update:
I have just ran your code and got the following output:
LC1 123.123.123.123/24 site1
q1 172.31.254.254/24 COD
q2 172.31.254.241/24 COD
After running this:
import requests
import json
headers = {
"Authorization": "Token 63d421a5f733dd2c5070083e80df8b4d466ae525",
"Accept": "application/json; indent=4",
}
response = requests.get("https://master.netbox.dev/api/dcim/sites/", headers=headers)
j = response.json()
for results in j["results"]:
x = results.get("name")
y = results.get("physical_address")
response2 = requests.get("https://master.netbox.dev/api/dcim/devices", headers=headers)
device = response2.json()
for result in device["results"]:
x = result.get("name")
z = result.get("site")["name"]
if result.get("primary_ip") != None:
y = result.get("primary_ip").get("address")
print(x, y, z)
I am not sure of the expected output but the code doesn't throw any errors. From looking at the code, it seems there were a few indentation errors, where things didn't match up in terms of where they should have been indented.

How to extract parrticular value from nested json values.?

I am new to python.
I have small requirement (i.e) want to extract only one value from the JSON format.
Please do correct me if i am wrong.
JSON input is:
{
"meta": {
"limit": 1,
"next": "/api/v1/ips/?username=sic1&api_key=689db0740ed73c2bf6402a7de0fcf2d7b57111ca&limit=1&objects=&offset=1",
"offset": 0,
"previous": null,
"total_count": 56714
},
"objects": [
{
"_id": "556f4c81dcddec0c41463529",
"bucket_list": [],
"campaign": [
{
"analyst": "prabhu",
"confidence": "medium",
"date": "2015-06-03 14:50:41.440000",
"name": "Combine"
}
],
"created": "2015-06-03 14:50:41.436000",
"ip": "85.26.162.70",
"locations": [],
"modified": "2015-06-18 09:50:51.612000",
"objects": [],
"relationships": [
{
"analyst": "prabhu",
"date": "2015-06-18 09:50:51.369000",
"rel_confidence": "unknown",
"rel_reason": "N/A",
"relationship": "Related_To",
"relationship_date": "2015-06-18 09:50:51.369000",
"type": "Indicator",
"value": "556f4c81dcddec0c4146353a"
}
],
"releasability": [],
"schema_version": 3,
"screenshots": [],
"sectors": [],
"source": [
{
"instances": [
{
"analyst": "prabhu",
"date": "2015-06-03 14:50:41.438000",
"method": "trawl",
"reference": "http://www.openbl.org/lists/base_30days.txt"
}
],
"name": "www.openbl.org"
}
],
"status": "New",
"tickets": [],
"type": "Address - ipv4-addr"
}
]
}
The code i used for getting value only IP's from objects
import requests
from pprint import pprint
import json
url = 'http://127.0.0.1:8080/api/v1/ips/'
params = {'api_key':'xxxxxx','username': 'abcd'}
r = requests.get(url, params=params, verify=False)
parsed = json.loads(r)
print (parsed['objects']['ip'])
The error i am receiving is:
Traceback (most recent call last):
File "testapi.py", line 9, in <module>
parsed = json.loads(r)
File "/usr/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
TypeError: expected string or buffer
I just want to get IP's from that JSON input.
Thanks.
You are passing a requests object instead of a str object to json.loads(). You need to change
parsed = json.loads(r)
to
parsed = json.loads(r.text)
Also, parsed['objects'] is a list, you need to access its first element & then get the key ip:
>>> print(parsed['objects'][0]['ip'])
The problem is in this line: parsed = json.loads(r)
You're reciving the json response but insted of feeding json elements to json.loads you're instead feeding it <Response [200]>
>>> r = requests.get('http://www.google.com')
>>> r
<Response [200]>
>>> type(r)
<class 'requests.models.Response'>
(Look closely at the error message. Expected string or buffer Which means you're providing it something that is NOT string or buffer(an object in this case))
This is the reason why str(r) didn't work. Because it just converted <Response 200> to '<Response 200>' which obviously is not json.
change this line to parsed = json.loads(r.text).
>>> type(r.text)
<type 'unicode'>
and then parsed['objects'][0]['ip'] should give you the IP address :)
You can find more about the requests module here

Grab element from json dump

I'm using the following python code to connect to a jsonrpc server and nick some song information. However, I can't work out how to get the current title in to a variable to print elsewhere. Here is the code:
TracksInfo = []
for song in playingSongs:
data = { "id":1,
"method":"slim.request",
"params":[ "",
["songinfo",0,100, "track_id:%s" % song, "tags:GPASIediqtymkovrfijnCYXRTIuwxN"]
]
}
params = json.dumps(data, sort_keys=True, indent=4)
conn.request("POST", "/jsonrpc.js", params)
httpResponse = conn.getresponse()
data = httpResponse.read()
responce = json.loads(data)
print json.dumps(responce, sort_keys=True, indent=4)
TrackInfo = responce['result']["songinfo_loop"][0]
TracksInfo.append(TrackInfo)
This brings me back the data in json format and the print json.dump brings back:
pi#raspberrypi ~/pithon $ sudo python tom3.py
{
"id": 1,
"method": "slim.request",
"params": [
"",
[
"songinfo",
"0",
100,
"track_id:-140501481178464",
"tags:GPASIediqtymkovrfijnCYXRTIuwxN"
]
],
"result": {
"songinfo_loop": [
{
"id": "-140501481178464"
},
{
"title": "Witchcraft"
},
{
"artist": "Pendulum"
},
{
"duration": "253"
},
{
"tracknum": "1"
},
{
"type": "Ogg Vorbis (Spotify)"
},
{
"bitrate": "320k VBR"
},
{
"coverart": "0"
},
{
"url": "spotify:track:2A7ZZ1tjaluKYMlT3ItSfN"
},
{
"remote": 1
}
]
}
}
What i'm trying to get is result.songinfoloop.title (but I tried that!)
The songinfo_loop structure is.. peculiar. It is a list of dictionaries each with just one key.
Loop through it until you have one with a title:
TrackInfo = next(d['title'] for d in responce['result']["songinfo_loop"] if 'title' in d)
TracksInfo.append(TrackInfo)
A better option would be to 'collapse' all those dictionaries into one:
songinfo = reduce(lambda d, p: d.update(p) or d,
responce['result']["songinfo_loop"], {})
TracksInfo.append(songinfo['title'])
songinfo_loop is a list not a dict. That means you need to call it by position, or loop through it and find the dict with a key value of "title"
positional:
responce["result"]["songinfo_loop"][1]["title"]
loop:
for info in responce["result"]["songinfo_loop"]:
if "title" in info.keys():
print info["title"]
break
else:
print "no song title found"
Really, it seems like you would want to have the songinfo_loop be a dict, not a list. But if you need to leave it as a list, this is how you would pull the title.
The result is really a standard python dict, so you can use
responce["result"]["songinfoloop"]["title"]
which should work

Categories