Flask python json parsing - python

Hello I am completely new to flask and python. I am using an API to geocode
and i get a json which is
"info": {
"copyright": {
"imageAltText": "\u00a9 2015 MapQuest, Inc.",
"imageUrl": "http://api.mqcdn.com/res/mqlogo.gif",
"text": "\u00a9 2015 MapQuest, Inc."
},
"messages": [],
"statuscode": 0
},
"options": {
"ignoreLatLngInput": false,
"maxResults": -1,
"thumbMaps": true
},
"results": [
{
"locations": [
{
"adminArea1": "US",
"adminArea1Type": "Country",
"adminArea3": "",
"adminArea3Type": "",
"adminArea4": "",
"adminArea4Type": "County",
"adminArea5": "",
"adminArea5Type": "City",
"adminArea6": "",
"adminArea6Type": "Neighborhood",
"displayLatLng": {
"lat": 33.663512,
"lng": -111.958849
},
"dragPoint": false,
"geocodeQuality": "ADDRESS",
"geocodeQualityCode": "L1AAA",
"latLng": {
"lat": 33.663512,
"lng": -111.958849
},
"linkId": "25438895i35930428r65831359",
"mapUrl": "http://www.mapquestapi.com/staticmap/v4/getmap?key=&rand=1009123942",
"postalCode": "",
"sideOfStreet": "R",
"street": "",
"type": "s",
"unknownInput": ""
}
],
"providedLocation": {
"city": " ",
"postalCode": "",
"state": "",
"street": "E Blvd"
}
}
]
}
RIght now i am doing this
data=json.loads(r)
return jsonify(data)
and this prints all the data as shown above. I need to get the latlng array from locations which is in results. I have tried
data.get("results").get("locations") and hundreds of combinations like that but i still cant get it to work. I basically need to store the lat and long in a session variable. Any help is appreciated

Assuming you just have one location as in your example:
from __future__ import print_function
import json
r = ...
data = json.loads(r)
latlng = data['results'][0]['locations'][0]['latLng']
latitude = latlng['lat']
longitude = latlng['lng']
print(latitude, longitude) # 33.663512 -111.958849

data.get("results") will return a list type object. As list object does not have get attribute, you can not do data.get("results").get("locations")
According to the json you provided, you can do like this:
data.get('results')[0].get('locations') # also a list
This will give you the array. Now you can get the lat and lng like this:
data.get('results')[0].get('locations')[0].get('latLng').get('lat') # lat
data.get('results')[0].get('locations')[0].get('latLng').get('lng') # lng

I summarize my comments as follows:
You can use data as a dict of dict and list.
A quick ref to dict and list:
A dictionary’s keys are almost arbitrary values.
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.
official docs about stdtypes

Related

Python : How to loop through data to access similar keys present inside nested dict

I have an API, after calling which I'm getting a very big json in response.
I want to access similar keys which are present inside the nested dict.
I'm using following lines to make a get request and storing the json data : -
p25_st_devices = r'https://url_from_where_im_getting_data.com'
header_events = {
'Authorization': 'Basic random_keys'}
r2 = requests.get(p25_st_devices, headers= header_events)
r2_json = json.loads(r2.content)
The sample of the json is as follows : -
{
"next": "value",
"self": "value",
"managedObjects": [
{
"creationTime": "2021-08-02T10:48:15.120Z",
"type": " c8y_MQTTdevice",
"lastUpdated": "2022-03-24T17:09:01.240+03:00",
"childAdditions": {
"self": "value",
"references": []
},
"name": "PS_MQTT1",
"assetParents": {
"self": "value",
"references": []
},
"self": "value",
"id": "338",
"Building": "value"
},
{
"creationTime": "2021-08-02T13:06:09.834Z",
"type": " c8y_MQTTdevice",
"lastUpdated": "2021-12-27T12:08:20.186+03:00",
"childAdditions": {
"self": "value",
"references": []
},
"name": "FS_MQTT2",
"assetParents": {
"self": "value",
"references": []
},
"self": "value",
"id": "339",
"c8y_IsDevice": {}
},
{
"creationTime": "2021-08-02T13:06:39.602Z",
"type": " c8y_MQTTdevice",
"lastUpdated": "2021-12-27T12:08:20.433+03:00",
"childAdditions": {
"self": "value",
"references": []
},
"name": "PS_MQTT3",
"assetParents": {
"self": "value",
"references": []
},
"self": "value",
"id": "340",
"c8y_IsDevice": {}
}
],
"statistics": {
"totalPages": 423,
"currentPage": 1,
"pageSize": 3
}
}
As per my understanding I can access name key using r2_json['managedObjects'][0]['name']
But how do I iterate over this json and store all values of name inside an array?
EDIT 1 :
Another thing which I'm trying to achieve is get all id from the JSON data and store in an array where the nested dict managedObjects contains name starting with PS_ only.
Therefore, the expected output would be device_id = ['338','340']
You should not just call the [0] index of the list, but loop over it:
all_names = []
for object in r2_json['managedObjects']:
all_names.append(object['name'])
print(all_names)
edit: Updated answer after OP updated theirs.
For your second question you can use startswith(). The code is almost the same.
PS_names = []
for object in r2_json['managedObjects']:
if object['name'].startswith("PS_"):
PS_names.append(object['id']) # we append with the id, if startswith("PS_") returns True.
print(PS_names)

Converting from json to dataframe to sql

I'm trying to save all the json data to the sql database and I'm using python so I decided to use pandas.
Part of the JSON:
{
"stores": [
{
"ID": "123456",
"name": "Store 1",
"status": "Active",
"date": "2019-03-28T15:20:00Z",
"tagIDs": null,
"location": {
"cityID": 2,
"countryID": 4,
"geoLocation": {
"latitude": 1.13121,
"longitude": 103.4324231
},
"postcode": "123456",
"address": ""
},
"new": false
},
{
"ID": "223456",
"name": "Store 2",
"status": "Active",
"date": "2020-03-28T15:20:00Z",
"tagIDs": [
12,
35
],
"location": {
"cityID": 21,
"countryID": 5,
"geoLocation": {
"latitude": 1.12512,
"longitude": 103.23342
},
"postcode": "223456",
"address": ""
},
"new": true
}
]
}
My Code:
response = requests.get(.....)
result = response.text
data = json.loads(result)
df = pd.json_normalize(data["store"])
.....
db_connection = sqlalchemy.create_engine(.....)
df.to_sql(con=db_connection, name="store", if_exists="append" )
Error: _mysql_connector.MySQLInterfaceError: Python type list cannot be converted
How I want the dataframe to actually look like:
ID tagIDs date
0 123456 [] 2020-04-23T09:32:26Z
1 223456 [12,35] 2019-05-24T03:21:39Z
2 323456 [709,1493] 2019-03-28T15:38:39Z
I tried using different dataframes & json objects so far and they all work.
So I discovered the issue is with the json object.
Without the "tagIDs", everything else works fine.
I was thinking maybe if I converted the object to a string it can be parsed to sql but it didn't work either. How do I change the tagIDs such that I can parse everything to sql? Or is there another more efficient way to do this?
I think the tagIDs field is a list and your database does not seem to be happy with it.
Not sure this is the best way but you can try to convert it from list to string
df['tagIDs'] = df['tagIDs'].apply(lambda x: str(x))

Converting a JSON file into Python Objects

I have a JSON file which I want to take and put into python objects. It has two parts, staff and assets and I want to load them into two separate ones. Here is a sample of the JSON file:
{
"staff": [
{
"id": "DA7153",
"name": [
"Fran\u00c3\u00a7ois",
"Ullman"
],
"department": {
"name": "Admin"
},
"server_admin": "true"
},
{
"id": "DA7356",
"name": [
"Bob",
"Johnson"
],
"department": {
"name": "Admin"
},
"server_admin": "false"
},
],
"assets": [
{
"asset_name": "ENGAGED SLOTH",
"asset_type": "File",
"owner": "DA8333",
"details": {
"security": {
"cia": [
"HIGH",
"INTERMEDIATE",
"LOW"
],
"data_categories": {
"Personal": "true",
"Personal Sensitive": "true",
"Customer Sensitive": "true"
}
},
"retention": 2
},
"file_type": "Document",
"server": {
"server_name": "ISOLATED UGUISU",
"ip": [
10,
234,
148,
52
]
}
},
{
"asset_name": "ISOLATED VIPER",
"asset_type": "File",
"owner": "DA8262",
"details": {
"security": {
"cia": [
"LOW",
"HIGH",
"LOW"
],
"data_categories": {
"Personal": "false",
"Personal Sensitive": "false",
"Customer Sensitive": "true"
}
},
"retention": 2
},
},
]
I have tried to create a class for staff but whenever I do I get the error "TypeError: dict expected at most 1 argument, got 3"
The code I am using looks like this:
import json
with open('Admin_sample.json') as f:
admin_json = json.load(f)
class staffmem(admin_json):
def __init__(self, id, name, department, server_admin):
self.id = id
self.name = name
self.deparment = department[name]
self.server_admin = server_admin
def staffid(self):
return self.id
print(staffmem.staffid)
I just can't work it out. Any help would be appreciated.
Thanks.
The following should be a good starting point but you have to fix few things. Note that I am using get() everywhere to provide a "safe" default if the keys do not exist:
import json
class StaffMember:
def __init__(self, json_entry):
self.name = ",".join(json_entry.get("name"))
self.id = json_entry.get("id")
self.dept = json_entry.get("department", {}).get("name")
self.server_admin = (
True
if json_entry.get("server_admin", "false").lower() == "true"
else False
)
# Get the data
with open("/tmp/test.data") as f:
data = json.load(f)
# For every entry in the data["staff"] create object and index them by ID
all_staff = {}
for json_entry in data.get("staff", []):
tmp = StaffMember(json_entry)
all_staff[tmp.id] = tmp
print(all_staff)
print(all_staff['DA7153'].name)
Output:
$ python3 /tmp/test.py
{'DA7153': <__main__.StaffMember object at 0x1097b2d50>, 'DA7356': <__main__.StaffMember object at 0x1097b2d90>}
François,Ullman
Potential Improvements:
Unicode handling
Add getters/setters
Instead of passing json dict in ctor, consider adding a from_json() static method to create your object
Error handling on missing values
Consider using a dataclass in py3 if this object is used to only/mainly store data
Consider the namedtuple approach from the comments if you do not intend to modify the object (read-only)
Notes:
The json you provided is not correct - you will need to fix it
Your syntax is wrong in your example and the naming convention is not much pythonic (read more here

Working with multiple JSONs from API calls in Python

I'm trying to make multiple API calls to retrieve JSON files. The JSONs all follow the same schema. I want to merge all the JSON files together as one file so I can do two things:
1) Extract all the IP addresses from the JSON to work with later
2) Convert the JSON into a Pandas Dataframe
When I first wrote the code, I made a single request and it returned a JSON that I could work with. Now I have used a for loop to collect multiple JSONs and append them to a list called results_list so that the next JSON does not overwrite the previous one I requested.
Here's the code
headers = {
'Accept': 'application/json',
'key': 'MY_API_KEY'
}
query_type = 'QUERY_TYPE'
locations_list = ['London', 'Amsterdam', 'Berlin']
results_list = []
for location in locations_list:
url = ('https://API_URL' )
r = requests.get(url, params={'query':str(query_type)+str(location)}, headers = headers)
results_list.append(r)
with open('my_search_results.json' ,'w') as outfile:
json.dump(results_list, outfile)
The JSON file my_search_results.json has a separate row for each API query e.g. 0 is London, 1 is Amsterdam, 2 is Berlin etc. Like this:
[
{
"complete": true,
"count": 51,
"data": [
{
"actor": "unknown",
"classification": "malicious",
"cve": [],
"first_seen": "2020-03-11",
"ip": "1.2.3.4",
"last_seen": "2020-03-28",
"metadata": {
"asn": "xxxxx",
"category": "isp",
"city": "London",
"country": "United Kingdom",
"country_code": "GB",
"organization": "British Telecommunications PLC",
"os": "Linux 2.2-3.x",
"rdns": "xxxx",
"tor": false
},
"raw_data": {
"ja3": [],
"scan": [
{
"port": 23,
"protocol": "TCP"
},
{
"port": 81,
"protocol": "TCP"
}
],
"web": {}
},
"seen": true,
"spoofable": false,
"tags": [
"some tag",
]
}
(I've redacted any sensitive data. There is a separate row in the JSON for each API request, representing each city, but it's too big to show here)
Now I want to go through the JSON and pick out all the IP addresses:
for d in results_list['data']:
ips = (d['ip'])
print(ips)
However this gives the error:
TypeError: list indices must be integers or slices, not str
When I was working with a single JSON from a single API request this worked fine, but now it seems like either the JSON is not formatted properly or Python is seeing my big JSON as a list and not a dictionary, even though I used json.dump() on results_list earlier in the script. I'm sure it has to do with the way I had to take all the API calls and append them to a list but I can't work out where I'm going wrong.
I'm struggling to figure out how to pick out the IP addresses or if there is just a better way to collect and merge multiple JSONs. Any advice appreciated.
To get the IP try:
for d in results_list['data']: #this works only if you accessed data rightly..
ips = (d[0]['ip'])
print(ips)
Reason for why you recieved the Error:
The key value of data is a list which contains a dictionary of the ip you need. So when you try to access ip by ips = (d['ip']), you are indexing the outer list, which raises the error:
TypeError: list indices must be integers or slices, not str
So if:
results_list= [
{
"complete": True,
"count": 51,
"data": [
{
"actor": "unknown",
"classification": "malicious",
"cve": [],
"first_seen": "2020-03-11",
"ip": "1.2.3.4",
"last_seen": "2020-03-28",
"metadata": {
"asn": "xxxxx",
"category": "isp",
"city": "London",
"country": "United Kingdom",
"country_code": "GB",
"organization": "British Telecommunications PLC",
"os": "Linux 2.2-3.x",
"rdns": "xxxx",
"tor": False
},
"raw_data": {
"ja3": [],
"scan": [
{
"port": 23,
"protocol": "TCP"
},
{
"port": 81,
"protocol": "TCP"
}
],
"web": {}
},
"seen": True,
"spoofable": False,
"tags": [
"some tag",
]
}...(here is your rest data)
]}]
to get all IP addresses, run:
ip_address=[]
# this works only if each result is a seperate dictionary in the results_list
for d in results_list:
ips = d['data'][0]['ip']
ip_address.append(ips)
print(ips)
#if all results are within data
for d in results_list[0]['data']:
ips = d['ip']
ip_address.append(ips)
print(ips)
results_list is a list, not a dictionary, so results_list['data'] raises an error. Instead, you should get each dictionary from that list, then access the 'data' attribute. Noting also that the value for the key 'data' is of type list, you also need to access the element of that list:
for result in results_list:
for d in result["data"]:
ips = d["ip"]
print(ips)
If you know that your JSON list only has one element, you may simplify this to:
for d in results_list[0]["data"]:
ips = d["ip"]
print(ips)

Accessing nested json objects using python

I am trying to interact with an API and running into issues accessing nested objects. Below is sample json output that I am working with.
{
"results": [
{
"task_id": "22774853-2b2c-49f4-b044-2d053141b635",
"params": {
"type": "host",
"target": "54.243.80.16",
"source": "malware_analysis"
},
"v": "2.0.2",
"status": "success",
"time": 227,
"data": {
"details": {
"as_owner": "Amazon.com, Inc.",
"asn": "14618",
"country": "US",
"detected_urls": [],
"resolutions": [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
],
"response_code": 1,
"verbose_msg": "IP address in dataset"
},
"match": true
}
}
]
}
The deepest I am able to access is the data portion which returns too much.... ideally I am just trying access as_owner,asn,country,detected_urls,resolutions
When I try to access details / response code ... etc I will get a KeyError. My nested json goes deeper then other Q's mentioned and I have tried that logic.
Below is my current code snippet and any help is appreciated!
import requests
import json
headers = {
'Content-Type': 'application/json',
}
params = (
('wait', 'true'),
)
data = '{"target":{"one":{"type": "ip","target": "54.243.80.16", "sources": ["xxx","xxxxx"]}}}'
r=requests.post('https://fakewebsite:8000/api/services/intel/lookup/jobs', headers=headers, params=params, data=data, auth=('apikey', ''))
parsed_json = json.loads(r.text)
#results = parsed_json["results"]
for item in parsed_json["results"]:
print(item['data'])
You just need to index correctly into the converted JSON. Then you can easily loop over a list of the keys you want to fetch, since they are all in the "details" dictionary.
import json
raw = '''\
{
"results": [
{
"task_id": "22774853-2b2c-49f4-b044-2d053141b635",
"params": {
"type": "host",
"target": "54.243.80.16",
"source": "malware_analysis"
},
"v": "2.0.2",
"status": "success",
"time": 227,
"data": {
"details": {
"as_owner": "Amazon.com, Inc.",
"asn": "14618",
"country": "US",
"detected_urls": [],
"resolutions": [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
],
"response_code": 1,
"verbose_msg": "IP address in dataset"
},
"match": true
}
}
]
}
'''
parsed_json = json.loads(raw)
wanted = ['as_owner', 'asn', 'country', 'detected_urls', 'resolutions']
for item in parsed_json["results"]:
details = item['data']['details']
for key in wanted:
print(key, ':', json.dumps(details[key], indent=4))
# Put a blank line at the end of the details for each item
print()
output
as_owner : "Amazon.com, Inc."
asn : "14618"
country : "US"
detected_urls : []
resolutions : [
{
"hostname": "bumbleride.com",
"last_resolved": "2016-09-15 00:00:00"
},
{
"hostname": "chilitechnology.com",
"last_resolved": "2016-09-16 00:00:00"
}
]
BTW, when you fetch JSON data using requests there's no need to use json.loads: you can access the converted JSON using the .json method of the returned request object instead of using its .text attribute.
Here's a more robust version of the main loop of the above code. It simply ignores any missing keys. I didn't post this code earlier because the extra if tests make it slightly less efficient, and I didn't know that keys could be missing.
for item in parsed_json["results"]:
if not 'data' in item:
continue
data = item['data']
if not 'details' in data:
continue
details = data['details']
for key in wanted:
if key in details:
print(key, ':', json.dumps(details[key], indent=4))
# Put a blank line at the end of the details for each item
print()

Categories