Getting the specified data from the json format using python?

Getting the specified data from the json format using python? - python

I am having a json data like this
and I want the specific data from this json format based on the condition given below
{
"ok": true,
"members": [
{
"id": "W012A3CDE",
"team_id": "T012AB3C4",
"name": "spengler",
"deleted": false,
"color": "9f69e7",
"real_name": "spengler",
"tz": "America/Los_Angeles",
"tz_label": "Pacific Daylight Time",
"tz_offset": -25200,
"profile": {
"avatar_hash": "ge3b51ca72de",
"status_text": "Print is dead",
"status_emoji": ":books:",
}
},
{
"id": "W07QCRPA4",
"team_id": "T0G9PQBBK",
"name": "glinda",
"deleted": false,
"color": "9f69e7",
"real_name": "Glinda Southgood",
"tz": "America/Los_Angeles",
"tz_label": "Pacific Daylight Time",
"tz_offset": -25200,
"profile": {
"phone": "",
"skype": "",
"real_name": "Glinda Southgood",
"real_name_normalized": "Glinda Southgood",
"display_name": "Glinda the Fairly Good",
"display_name_normalized": "Glinda the Fairly Good",
"email": "glenda#south.oz.coven"
},
}
],
"cache_ts": 1498777272,
"response_metadata": {
"next_cursor": "dXNlcjpVMEc5V0ZYTlo="
}
}
now I want to get only name and id from the members where deleted:false from this json format using python Can anyone help me

Just a simple list comprehension should be enough assuming you've managed to load the json into a dict.
response = json.loads(<your_json_string_here>)
undeleted_members = [dict(id=member['id'], name=member['name']) for member in response['members'] if not member['deleted']]
print(undeleted_members)
Returns a list of dicts with just the name & ID like:
{'id': 'W07QCRPA4', 'name': 'glinda'}
Or if you want separate lists for IDs & names:
all_ids = [member['id'] for member in response['members'] if not member['deleted']]
all_names = [member['name'] for member in response['members'] if not member['deleted']]

Related

Ignore specific JSON keys when extracting data in Python

I'm extracting certain keys in several JSON files and then converting it to a CSV in Python. I'm able to define a key list when I run my code and get the information I need.
However, there are certain sub-keys that I want to ignore from the JSON file. For example, if we look at the following snippet:
JSON Sample
[
{
"callId": "abc123",
"errorCode": 0,
"apiVersion": 2,
"statusCode": 200,
"statusReason": "OK",
"time": "2020-12-14T12:00:32.744Z",
"registeredTimestamp": 1417731582000,
"UID": "_guid_abc123==",
"created": "2014-12-04T22:19:42.894Z",
"createdTimestamp": 1417731582000,
"data": {},
"preferences": {},
"emails": {
"verified": [],
"unverified": []
},
"identities": [
{
"provider": "facebook",
"providerUID": "123",
"allowsLogin": true,
"isLoginIdentity": true,
"isExpiredSession": true,
"lastUpdated": "2014-12-04T22:26:37.002Z",
"lastUpdatedTimestamp": 1417731997002,
"oldestDataUpdated": "2014-12-04T22:26:37.002Z",
"oldestDataUpdatedTimestamp": 1417731997002,
"firstName": "John",
"lastName": "Doe",
"nickname": "John Doe",
"profileURL": "https://www.facebook.com/John.Doe",
"age": 50,
"birthDay": 31,
"birthMonth": 12,
"birthYear": 1969,
"city": "City, State",
"education": [
{
"school": "High School Name",
"schoolType": "High School",
"degree": null,
"startYear": 0,
"fieldOfStudy": null,
"endYear": 0
}
],
"educationLevel": "High School",
"favorites": {
"music": [
{
"name": "Music 1",
"id": "123",
"category": "Musician/band"
},
{
"name": "Music 2",
"id": "123",
"category": "Musician/band"
}
],
"movies": [
{
"name": "Movie 1",
"id": "123",
"category": "Movie"
},
{
"name": "Movie 2",
"id": "123",
"category": "Movie"
}
],
"television": [
{
"name": "TV 1",
"id": "123",
"category": "Tv show"
}
]
},
"followersCount": 0,
"gender": "m",
"hometown": "City, State",
"languages": "English",
"likes": [
{
"name": "Like 1",
"id": "123",
"time": "2014-10-31T23:52:53.0000000Z",
"category": "TV",
"timestamp": "1414799573"
},
{
"name": "Like 2",
"id": "123",
"time": "2014-09-16T08:11:35.0000000Z",
"category": "Music",
"timestamp": "1410855095"
}
],
"locale": "en_US",
"name": "John Doe",
"photoURL": "https://graph.facebook.com/123/picture?type=large",
"timezone": "-8",
"thumbnailURL": "https://graph.facebook.com/123/picture?type=square",
"username": "john.doe",
"verified": "true",
"work": [
{
"companyID": null,
"isCurrent": null,
"endDate": null,
"company": "Company Name",
"industry": null,
"title": "Company Title",
"companySize": null,
"startDate": "2010-12-31T00:00:00"
}
]
}
],
"isActive": true,
"isLockedOut": false,
"isRegistered": true,
"isVerified": false,
"lastLogin": "2014-12-04T22:26:33.002Z",
"lastLoginTimestamp": 1417731993000,
"lastUpdated": "2014-12-04T22:19:42.769Z",
"lastUpdatedTimestamp": 1417731582769,
"loginProvider": "facebook",
"loginIDs": {
"emails": [],
"unverifiedEmails": []
},
"rbaPolicy": {
"riskPolicyLocked": false
},
"oldestDataUpdated": "2014-12-04T22:19:42.894Z",
"oldestDataUpdatedTimestamp": 1417731582894,
"registered": "2014-12-04T22:19:42.956Z",
"regSource": "",
"socialProviders": "facebook"
}
]
I want to extract data from created and identities but ignore identities.favorites and identities.likes as well as their data underneath it.
This is what I have so far, below. I defined the JSON keys that I want to extract in the key_list variable:
Current Code
import json, pandas
from flatten_json import flatten
# Enter the path to the JSON and the filename without appending '.json'
file_path = r'C:\Path\To\file_name'
# Open and load the JSON file
json_list = json.load(open(file_path + '.json', 'r', encoding='utf-8', errors='ignore'))
# Extract data from the defined key names
key_list = ['created', 'identities']
json_list = [{k:d[k] for k in key_list} for d in json_list]
# Flatten and convert to a data frame
json_list_flattened = (flatten(d, '.') for d in json_list)
df = pandas.DataFrame(json_list_flattened)
# Export to CSV in the same directory with the original file name
export_csv = df.to_csv (file_path + r'.csv', sep=',', encoding='utf-8', index=None, header=True)
Similar to the key_list, I suspect that I would make an ignore list and factor that in the json_list for loop that I have? Something like:
key_ignore = ['identities.favorites', 'identities.likes']`
Then utilize the dict.pop() which looks like it will remove the unwanted sub-keys if it matches? Just not sure how to implement that correctly.
Expected Output
As a result, the code should extract data from the defined keys in key_list and ignore the sub keys defined in key_ignore, which is identities.favorites and identities.likes. Then the rest of the code will continue to convert it into a CSV:
created
identities.0.provider
identities.0.providerUID
identities...
2014-12-04T19:23:05.191Z
site
cb8168b0cf734b70ad541f0132763761
...

If the keys are always there, you can use
del d[0]['identities'][0]['likes']
del d[0]['identities'][0]['favorites']
or if you want to remove the columns from the dataframe after reading all the json data in you can use
df.drop(df.filter(regex='identities.0.favorites|identities.0.likes').columns, axis=1, inplace=True)

How to print a specific item from within a JSON file into python

I want to print a user from a JSON list into Python that I select however I can only print all the users. How do you print a specific user? At the moment I have this which prints all the users out in a ugly format
import json
with open('Admin_sample.json') as f:
admin_json = json.load(f)
print(admin_json['staff'])
The JSON file looks like this
{
"staff": [
{
"id": "DA7153",
"name": [
"Fran\u00c3\u00a7ois",
"Ullman"
],
"department": {
"name": "Admin"
},
"server_admin": "true"
},
{
"id": "DA7356",
"name": [
"Bob",
"Johnson"
],
"department": {
"name": "Admin"
},
"server_admin": "false"
},
],
"assets": [
{
"asset_name": "ENGAGED SLOTH",
"asset_type": "File",
"owner": "DA8333",
"details": {
"security": {
"cia": [
"HIGH",
"INTERMEDIATE",
"LOW"
],
"data_categories": {
"Personal": "true",
"Personal Sensitive": "true",
"Customer Sensitive": "true"
}
},
"retention": 2
},
"file_type": "Document",
"server": {
"server_name": "ISOLATED UGUISU",
"ip": [
10,
234,
148,
52
]
}
},
{
"asset_name": "ISOLATED VIPER",
"asset_type": "File",
"owner": "DA8262",
"details": {
"security": {
"cia": [
"LOW",
"HIGH",
"LOW"
],
"data_categories": {
"Personal": "false",
"Personal Sensitive": "false",
"Customer Sensitive": "true"
}
},
"retention": 2
},
},
]
I just can't work it out. Any help would be appreciated.
Thanks.

You need to index into the staff list, e.g.:
print(admin_json['staff'][0])
I suggest reading up a bit on dictionaries in Python. Dictionary values can be set to any object: in this case, the value of the staff key is set to a list of dicts. Here's an example that will loop through all the staff members and print their names:
staff_list = admin_json['staff']
for person in staff_list:
name_parts = person['name']
full_name = ' '.join(name_parts) # combine name parts into a string
print(full_name)

Try something like this:
import json
def findStaffWithId(allStaff, id):
for staff in allStaff:
if staff["id"] == id:
return staff
return {} # no staff found
with open('Admin_sample.json') as f:
admin_json = json.load(f)
print(findStaffWithId(admin_json['staff'], "DA7356"))

You can list all the users name with
users = [user["name"] for user in admin_json['staff']]

You have two lists in this JSON file. When you try to parse it, you'll be reach a list. For example getting the first staff id:
print(admin_json['staff'][0]['id'])
This will print:
DA7153
When you use "json.loads" this will simply converts JSON file to the Python dictionary. For further info:
https://docs.python.org/3/tutorial/datastructures.html#dictionaries

Parse specific data from JSON

I have a JSON file with lots of data, and I want to keep only specific data.
I thought reading the file, get all the data I want and save as a new JSON.
The JSON is like this:
{
"event": [
{
"date": "2019-01-01",
"location": "world",
"url": "www.com",
"comments": "null",
"country": "china",
"genre": "blues"
},
{
"date": "2000-01-01",
"location": "street x",
"url": "www.cn",
"comments": "null",
"country":"turkey",
"genre": "reds"
},
{...
and I want it to be like this (with just date and url from each event.
{
"event": [
{
"date": "2019-01-01",
"url": "www.com"
},
{
"date": "2000-01-01",
"url": "www.cn"
},
{...
I can open the JSON and read from it using
with open('xx.json') as f:
data = json.load(f)
data2=data["events"]["date"]
But I still need to understand how to save the data I want in a new JSON keeping it's structure

You can use loop comprehension to loop over the events in and return a dictionary containing only the keys that you want.
data = { "event": [
{
"date": "2019-01-01",
"location": "world",
"url": "www.com",
"comments": None,
"country": "china",
"genre": "blues",
},
{
"date": "2000-01-01",
"location": "street x",
"url": "www.cn",
"comments": None,
"country" :"turkey",
"genre":"reds",
}
]}
# List comprehension
data["event"] = [{"date": x["date"], "url": x["url"]} for x in data["event"]]
Alternatively, you can map a function over the events list
keys_to_keep = ["date", "url"]
def subset_dict(d):
return {x: d[x] for x in keys_to_keep}
data["event"] = list(map(subset_dict, data["event"]))

Need help formatting/parsing this list

I'm not a developer so sorry if this is a dumb question or if my terminology is incorrect. I'm writing a script to make calls to our CMDB's API but i'm not sure how to handle the data that is being sent back from it. It appears to be a list type but I can't reference anything by key names. Is there a way to convert it to something that i can easily manipulate and pull data out of?
Here is my code:
import requests
import json
r=requests.post('API.URL', data={'grant_type': 'password', 'client_id':'#######', 'username': 'user', 'password': 'password'})
json_data = json.loads(r.content)
token = json_data['access_token']
data ={
"filters": [
{
"fieldId": "937905400191ae67dd03ab4b79968fcbaa264b1a75",
"operator": "eq",
"value": "hostname"
}
],
"fields":[
'9426b6ddf3cb971488517145e39efc5aa7f16fec46',
'9343f8800b3917f26533954918a6388ae8c863507f',
'9379053db492ece14816704ef5a9e3e567e217511b',
'9343f93fc4c8422bcf24e74a9a86035bb7d0248b00',
'941ba290776d6f51ce35664246927b958330a753b2'
],
"association": "Configuration Item",
"busObId": "93dada9f640056ce1dc67b4d4bb801f69104894dc8",
"includeAllFields": 'false',
"pageNumber": 0,
"pageSize": 300,
"scope": "Global",
"scopeOwner": "(None)",
"searchName": "APItest"
}
payload = json.dumps(data)
headers = {'Content-Type': 'application/json', 'Accept': 'application/json', 'Authorization':'bearer '+token}
search=requests.post('http://API.URL', headers=headers, data=payload)
search_json = json.loads(search.content)
bo = search_json['businessObjects']
print(bo)
Here's the response:
[
{
"busObRecId": "9423ad7d617390fdc956ee4302a69d0ccf1a37a4c1",
"hasError": false,
"links": [
{
"url": "http://URL",
"name": "Delete Record"
}
],
"fields": [
{
"displayName": "Business Sponsor",
"name": "Business Sponsor",
"value": "",
"html": null,
"dirty": false,
"fieldId": "9426b6ddf3cb971488517145e39efc5aa7f16fec46"
},
{
"displayName": "Owned By",
"name": "Owned By",
"value": "John Doe",
"html": null,
"dirty": false,
"fieldId": "9343f8800b3917f26533954918a6388ae8c863507f"
},
{
"displayName": "Asset Status",
"name": "Asset Status",
"value": "Active",
"html": null,
"dirty": false,
"fieldId": "9379053db492ece14816704ef5a9e3e567e217511b"
},
{
"displayName": "Description",
"name": "Description",
"value": "Automation Server",
"html": null,
"dirty": false,
"fieldId": "9343f93fc4c8422bcf24e74a9a86035bb7d0248b00"
},
{
"displayName": "Data Center Location",
"name": "Data Center Location",
"value": "",
"html": null,
"dirty": false,
"fieldId": "941ba290776d6f51ce35664246927b958330a753b2"
}
],
"errorMessage": null,
"busObPublicId": "9423ad7d617390fdc956ee4302a69d0ccf1a37a4c1",
"busObId": "93dada9f640056ce1dc67b4d4bb801f69104894dc8",
"errorCode": null
}
]
type() shows the object bo as a list and len() says it only has one element so I'm not sure how to pull data out of it without hacking away at it stripping out characters.

The reason why you cannot reference anything by key names is the fact that your output is a list. A list which is only containing one single dictionary element. If you print out
bo[0]
you get the whole data, without the [ and ] symbols. As for the dictionary now we can access different elements by keys, e.g.:
print(bo["busObId"])
will return the following value:
93dada9f640056ce1dc67b4d4bb801f69104894dc
Let's say you would want to print out the fieldId of the first element of "fields". You can do it the following way:
print(bo[0]["fields"][0]["fieldId"])
Hope this helped.

Finding specific values in nested json using Python

I have a file containing a large number of nested json objects. I pasted a snippet of it below. I am trying to use python to query all of the objects in the file to pull out those objects that have at least one custom feeds - url value that begins with "http://commshare" Some objects will not have any custom feeds, and the others will have one or more custom feed each of which might or might not begin with that string I am searching for. Any help would be appreciated! I am very new to Python.
Example JSON:
[{
"empid": "12345",
"values": {
"custom_feeds": {
"custom_feeds": [
{
"name": "Bulletins",
"url": "http://infoXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
}
]
},
"gadgetTitle": "InfoSec Updates",
"newWindow": false,
"article_limit_value": 10,
"show_source": true
}
},
{
"empid": "23456",
"values": {
"custom_feeds": {
"custom_feeds": [
{
"name": "1 News",
"url": "http://blogs.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
},
{
"name": "2 News",
"url": "http://info.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
},
{
"name": "3 News",
"url": "http://blogs.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
},
{
"name": "4 News",
"url": "http://commshare.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
}
]
},
"gadgetTitle": "Org News",
"newWindow": false,
"article_limit_value": 10,
"show_source": true
}
}, {
"empid": "34567",
"values": {
"custom_feeds": {
"custom_feeds": []
},
"gadgetTitle": "Org News",
"newWindow": false,
"article_limit_value": 10,
"show_source": true
}
}]

Assuming your file is named input.json and you want the object for each feed, you could parse the JSON and create a new list where the feeds meet your criteria using list comprehension:
import json
with open('input.json') as input_file:
items = json.loads(input_file.read())
feeds = [{'name': feed['name'], 'url': feed['url'], 'empid': item['empid']}
for item in items
for feed in item['values']['custom_feeds']['custom_feeds']
if feed['url'].startswith('http://commshare')]
assert feeds == [{'name': '4 News', 'url': 'http://commshare.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'empid': '23456'}]

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Getting the specified data from the json format using python? - python

Related

Ignore specific JSON keys when extracting data in Python

How to print a specific item from within a JSON file into python

Parse specific data from JSON

Need help formatting/parsing this list

Finding specific values in nested json using Python

Categories

Resources