glom assign based on data - python

In the following code, I am trying to mask personal information based on data. I have two scenarioes. In scenario 1, I want to update when type = 'FirstName', update or assign valueString value to "Masked". In scenario 2, I want to update when type matches the pattern "first****Name", update or assign valueString value to "Masked". I was wondering if anyone have suggestions for writing glom assign statements to solve the above cases.
Example Json String
{
"id": "985babac-9999-8888-8887",
"entity": [
{
"what": {
"reference": "4lincoln-123-11eb-bc1a-732f"
},
"detail": [
{
"type": "uuid",
"valueString": "4obama-f199-77eb-bc1a-555555704d2f"
},
{
"type": "firstName",
"valueString": "John"
},
{
"type": "userName",
"valueString": "Johns"
},
{
"type": "middleInitial",
"valueString": "S"
},
{
"type": "lastName",
"valueString": "Trump"
},
{
"type": "first-4fa999-f1999-Name",
"valueString": "John"
},
{
"type": "birth-4fa999-f1999-Date",
"valueString": "2010-01-01"
}
]
}
]
}
Updated output should look like the following
{
"id": "985babac-9999-8888-8887",
"entity": [
{
"what": {
"reference": "4lincoln-123-11eb-bc1a-732f"
},
"detail": [
{
"type": "uuid",
"valueString": "4obama-f199-77eb-bc1a-555555704d2f"
},
{
"type": "firstName",
"valueString": "Masked"
},
{
"type": "userName",
"valueString": "Johns"
},
{
"type": "middleInitial",
"valueString": "S"
},
{
"type": "lastName",
"valueString": "Trump"
},
{
"type": "first-4fa999-f1999-Name",
"valueString": "Masked"
},
{
"type": "birth-4fa999-f1999-Date",
"valueString": "2010-01-01"
}
]
}
]
}

I came up with the following solution. I was wondering if this can be done in one glom call instead of calling multiple times?
import json
import logging
import sys
import time
import re
from glom import glom, assign, Coalesce, SKIP, Spec, Path, Call, T, Iter, Inspect
LOGGING_FORMAT = '%(asctime)s - [%(filename)s:%(name)s:%(lineno)d] - %(levelname)s - %(message)s'
LOGLEVEL = logging.INFO
logging.basicConfig(level=LOGLEVEL,format=LOGGING_FORMAT)
logger = logging.getLogger(__name__)
start_time = time.time()
target = {
"id": "985babac-9999-8888-8887",
"entity": [
{
"what": {
"reference": "4lincoln-123-11eb-bc1a-732f"
},
"detail": [
{
"type": "uuid",
"valueString": "4obama-f199-77eb-bc1a-555555704d2f"
},
{
"type": "firstName",
"valueString": "John"
},
{
"type": "userName",
"valueString": "Johns"
},
{
"type": "middleInitial",
"valueString": "S"
},
{
"type": "lastName",
"valueString": "Trump"
},
{
"type": "first-4fa999-f1999-Name",
"valueString": "John"
},
{
"type": "birth-4fa999-f1999-Date",
"valueString": "2010-01-01"
}
]
}
]
}
# def myupdate(x):
# for count, item in enumerate(x):
# myspec = 'entity.0.detail.{}.valueString'.format(count)
# if item == 'firstName':
# _ = assign(target,myspec,'Masked')
piiRegex = re.compile(r'^first.*Name$|^last.*Name$|^middle.*Initial$|^birth.*Date$')
def myupdate(x):
for count, item in enumerate(x):
myspec = 'entity.0.detail.{}.valueString'.format(count)
mo = piiRegex.search(item)
if mo:
_ = assign(target,myspec,'Masked')
spec = {'result': ('entity.0.detail', ['type'], myupdate)}
xyz = glom(target, spec)
print(xyz)
print(target)
logger.info("Program completed in --- %s seconds ---" % (time.time() - start_time))
===============
Result:
{'result': None}
{'id': '985babac-9999-8888-8887', 'entity': [{'what': {'reference': '4lincoln-123-11eb-bc1a-732f'}, 'detail': [{'type': 'uuid', 'valueString': '4obama-f199-77eb-bc1a-555555704d2f'}, {'type': 'firstName', 'valueString': 'Masked'}, {'type': 'userName', 'valueString': 'Johns'}, {'type': 'middleInitial', 'valueString': 'Masked'}, {'type': 'lastName', 'valueString': 'Masked'}, {'type': 'first-4fa999-f1999-Name', 'valueString': 'Masked'}, {'type': 'birth-4fa999-f1999-Date', 'valueString': 'Masked'}]}]}

Related

How to extract data from complex JSON object?

I am trying to extract data from the json file I got from a get request.
{
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
}
]
}
I have an empty dictionary that stores projectid as Key.
projectids = {
001:"",
002:"",
003:"",
004:"",
}
I was looking for a way to find "projectid" inside "attributes" and the corresponding value for "id" and populate the dictionary projectids with the key(['attributes']['projectid']) and values(id):
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": ""
}
You can try this, assuming data is your variable for the response from the GET request
# this solution will populate for all project ids
projectids = {}
for item in data['data']:
projectids[item['attributes']['projectid']] = item['id']
Output:
{
'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'
}
if you're trying to match with already existing projectids in a dict then try
# this solution will search for only pre-specified project ids
projectids = {
"001": "",
"002": "",
"003": "",
"004": "",
}
for idx in projectids.keys():
# find the index of matching dict from data['data']
# will return None if match is not found
matching_index = next((i for i, item in enumerate(data['data']) if
item["attributes"]["projectid"] == idx), None)
if matching_index is not None:
projectids[idx] = data['data'][matching_index]['id']
If data is your input data from the question, then:
projectids = {f"{i:>03}": "" for i in range(1, 5)}
out = {
**projectids,
**{d["attributes"]["projectid"]: d["id"] for d in data["data"]},
}
print(out)
Prints:
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": "",
}
Simply try this:
json_data = {
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
}
]
}
Just asumme the above json data and try the following code:
project_ids = {item['attributes']['projectid']:item['id'] for item in json_data['data']}
expected output:
{'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'}

How to convert Json to Python object?

How to convert the complex Json format to python? I feel difficulty in converting the attached complex json to python object and I have to validate this data later against the DB.
Json:
{
"namespace":"Data.Datapoint",
"type":"record",
"name":"Blood Donar",
"fields":[
{
"name":"id",
"type":"int"
},
{
"name":"donor_number",
"type":"string"
},
{
"name":"birth_date",
"type":{
"type":"int",
"logicalType":"date"
},
"doc":"Birth Date"
},
{
"name":"height",
"type":[
"int",
"null"
],
"doc":"Height"
},
{
"name":"applicant_ts",
"type":[
{
"type":"long",
"logicalType":"timestamp-millis"
},
"null"
],
"doc":"Creation Timestamp"
},
{
"name":"arm_preference_ind",
"type":[
"string",
"null"
],
"doc":"Arm Preference; Selection from list"
},
{
"name":"abo_ind",
"type":[
"string",
"null"
],
"doc":"Blood Type/ABO"
},
{
"name":"vein_grading_ind",
"type":[
"string",
"null"
],
"doc":"Vein Grade"
}
]
}
import json
data = '''
{ "namespace": "Data.Datapoint", "type": "record", "name": "Blood Donar", "fields": [ { "name": "id", "type": "int" }, { "name": "donor_number", "type": "string" }, { "name": "birth_date", "type": { "type": "int", "logicalType": "date" }, "doc": "Birth Date" }, { "name": "height", "type": [ "int", "null" ], "doc": "Height" }, { "name": "applicant_ts", "type": [ { "type": "long", "logicalType": "timestamp-millis" }, "null" ], "doc": "Creation Timestamp" }, { "name": "arm_preference_ind", "type": [ "string", "null" ], "doc": "Arm Preference; Selection from list" }, { "name": "abo_ind", "type": [ "string", "null" ], "doc": "Blood Type/ABO" }, { "name": "vein_grading_ind", "type": [ "string", "null" ], "doc": "Vein Grade" } ] }
'''
json_data = json.loads(data)
json_data is your python dict obj.
if you want json data from web you can try this
import json
import requests
response = requests.get("https://jsonplaceholder.typicode.com/todos")
todos = json.loads(response.text)

How to get this json specific word from this array

I have this json and I would like to get only the Name from every array. How do I write it in python,
Currently, I have this li = [item.get(data_new[0]'id') for item in data_new]
where data_new is my json data.
[
{
"id": "1687fbfa-8936-4b77-a7bc-123f9f276c49",
"attributes": [
{
"name": "status",
"value": "rejected",
"scope": "identity"
},
{
"name": "created_ts",
"value": "2020-06-25T16:22:07.578Z",
"scope": "system"
},
{
"name": "updated_ts",
"value": "2020-07-08T12:43:09.361Z",
"scope": "system"
},
{
"name": "artifact_name",
"value": "release-v10",
"scope": "inventory"
},
{
"name": "device_type",
"value": "proddemo-device",
"scope": "inventory"
},
],
"updated_ts": "2020-07-08T12:43:09.361Z"
},
{
"id": "0bf2a1fe-6004-473f-88b7-aab061972115",
"attributes": [
{
"name": "status",
"value": "rejected",
"scope": "identity"
},
{
"name": "created_ts",
"value": "2020-07-01T16:23:00.631Z",
"scope": "system"
},
{
"name": "updated_ts",
"value": "2020-07-08T17:41:16.45Z",
"scope": "system"
},
{
"name": "artifact_name",
"value": "Module_logs_v7",
"scope": "inventory"
},
{
"name": "cpu_model",
"value": "ARMv8 Processor",
"scope": "inventory"
},
{
"name": "device_type",
"value": "device",
"scope": "inventory"
},
{
"name": "hostname",
"value": "device004",
"scope": "inventory"
},
{
"name": "ipv4_br-d6eae8b3a339",
"value": "172.0.0.1/18",
"scope": "inventory"
}
],
"updated_ts": "2020-07-08T12:43:09.361Z"
}
]
This is the output snippet from my API and from this output I want to retrieve the value of the device whose name is hostname, as you can see that is the second last entry from this code where "name": "hostname"
So, I want to retrieve the value for that particular json only where the name will be "hostname", how can I do that.
Please guide me through.
a = [{'id': '291ae0e5956c69c2267489213df4459d19ed48a806603def19d417d004a4b67e',
'attributes': [{'name': 'ip_addr',
'value': '1.2.3.4',
'descriptionName': 'IP address'},
{'name': 'ports', 'value': ['8080', '8081'], 'description': 'Open ports'}],
'updated_ts': '2016-10-03T16:58:51.639Z'},
{'id': '76f40e5956c699e327489213df4459d1923e1a806603def19d417d004a4a3ef',
'attributes': [{'name': 'mac',
'value': '00:01:02:03:04:05',
'descriptionName': 'MAC address'}],
'updated_ts': '2016-10-04T18:24:21.432Z'}]
descriptionName = []
for i in a:
for j in i["attributes"]:
for k in j:
if k == "descriptionName":
descriptionName.append(j[k])
One liner:
[j["descriptionName"] for j in i["attributes"] for i in a if "descriptionName" in j ]
Output:
['IP address', 'MAC address']
Update 1:
To get all names
One liner code -
[j["name"] for j in i["attributes"] for i in a if "name" in j.keys()]
Output:
['status',
'status',
'created_ts',
'created_ts',
'updated_ts',
'updated_ts',
'artifact_name',
'artifact_name',
'cpu_model',
'cpu_model',
'device_type',
'device_type',
'hostname',
'hostname',
'ipv4_br-d6eae8b3a339',
'ipv4_br-d6eae8b3a339']
To get value for which name is "hostname"
[j["value"] for j in i["attributes"] for i in a if "name" in j.keys() and j["name"] == "hostname"]
Output:
['device004', 'device004']

Python how to pick 3rd occurence in nested json array

I am working with one of my requirement
My requirement: I need to pick and print only 3rd "id" from "syrap" list from the nested json file. I am not getting desired output. Any help will be appreciated.
Test file:
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
Expected output in a csv:
0001,donut,abc,0303,1003
My code:
import requests
import json
import csv
f = open('testdata.json')
data = json.load(f)
f.close()
f = csv.writer(open('testout.csv', 'wb+'))
for item in data:
f.writerow([item['id'], item[type], item['batters'][0]['process'],
item['batters'][0]['mix'],
item['batters'][0]['syrap'][0]['id'],
item['batters'][0]['syrap'][1]['id'],
item['batters'][0]['syrap'][2]['id'])
Here is some sample code showing how you can iterate through json content parsed as a dictionary:
import json
json_str = '''{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
'''
jsondict = json.loads(json_str)
syrap_node = jsondict['batters']['syrap']
for item in syrap_node:
print (f'id:{item["id"]} type: {item["type"]}')
Simply, data[“batters”][“syrap”][2][“id”]
Much better way to achieve this would be
f = open('testout.csv', 'wb+')
with f:
fnames = ['id','type','process','mix','syrap']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writeheader()
for item in data:
print item
writer.writerow({'id' : item['id'], 'type': item['type'],
'process' : item['batters']['process'],
'mix': item['batters']['mix'],
'syrap': item['batters']['syrap'][2]['id']})
You need to make sure that data is actually a list. if it is not a list, don't use for loop.
simply,
writer.writerow({'id' : data['id'], 'type': data['type'],
'process' : data['batters']['process'],
'mix': data['batters']['mix'],
'syrap': data['batters']['syrap'][2]['id']})

Create dynamic json object in python

I have a dictionary which is contain multiple keys and values and the values also contain the key, value pair. I am not getting how to create dynamic json using this dictionary in python. Here's the dictionary:
image_dict = {"IMAGE_1":{"img0":"IMAGE_2","img1":"IMAGE_3","img2":"IMAGE_4"},"IMAGE_2":{"img0":"IMAGE_1", "img1" : "IMAGE_3"},"IMAGE_3":{"img0":"IMAGE_1", "img1":"IMAGE_2"},"IMAGE_4":{"img0":"IMAGE_1"}}
My expected result like this :
{
"data": [
{
"image": {
"imageId": {
"id": "IMAGE_1"
},
"link": {
"target": {
"id": "IMAGE_2"
},
"target": {
"id": "IMAGE_3"
},
"target": {
"id": "IMAGE_4"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_2"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_3"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_3"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_2"
}
}
},
"updateData": "link"
} ,
{
"image": {
"imageId": {
"id": "IMAGE_4"
},
"link": {
"target": {
"id": "IMAGE_1"
}
}
},
"updateData": "link"
}
]
}
I tried to solve it but I didn't get expected result.
result = {"data":[]}
for k,v in sorted(image_dict.items()):
for a in sorted(v.values()):
result["data"].append({"image":{"imageId":{"id": k},
"link":{"target":{"id": a}}},"updateData": "link"})
print(json.dumps(result, indent=4))
In Python dictionaries you can't have 2 values with the same key. So you can't have multiple targets all called "target". So you can index them. Also I don't know what this question has to do with dynamic objects but here's the code I got working:
import re
dict_res = {}
ind = 0
for image in image_dict:
lin_ind = 0
sub_dict = {'image' + str(ind): {'imageId': {image}, 'link': {}}}
for sub in image_dict[image].values():
sub_dict['image' + str(ind)]['link'].update({'target' + str(lin_ind): {'id': sub}})
lin_ind += 1
dict_res.update(sub_dict)
ind += 1
dict_res = re.sub('target\d', 'target', re.sub('image\d', 'image', str(dict_res)))
print dict_res

Categories