Remove keys but keep values from dictionary - python

Let's say I have this dictionary:
{
"id": "132-sd-sa-23-a-1",
"data": {
"lastUpdated": { "S": "2020-07-22T21:39:20Z" },
"profile": {
"M": {
"address": { "L": [] },
"fakeField": { "S": "someValue" },
"someKey": { "M": { "firstName": { "S": "Test" } } }
}
},
"groups": {
"L": [{ "S": "hello world!" }]
}
}
}
How can I remove the "M", "S", "L", etc. keys from the dictionary but keep the values. So it would turn into this:
{
"id": "132-sd-sa-23-a-1",
"data": {
"lastUpdated": "2020-07-22T21:39:20Z",
"profile": {
"address": [],
"fakeField": "someValue",
"someKey": { "firstName": "Test" }
},
"groups": ["hello world!"]
}
}
I could turn the dictionary into a string, loop through it, and remove what's necessary but that doesn't seem efficient or fast. I can't save the original list as the output that I'm hoping for initially so I need to convert it myself.

Sounds like a job for recursion:
def unsml(obj):
if isinstance(obj, dict):
if len(obj) == 1:
(key, value), *_ = obj.items() # get the only key and value
if key in "SML":
return unsml(value)
return {
key: unsml(value) if isinstance(value, dict) else value
for key, value
in obj.items()
}
elif isinstance(obj, list):
return [unsml(value) for value in obj]
return obj
Then just do new_dict = unsml(old_dict).

Related

Python and JSON - if Key matches Move value from a key to root key

I have an application which generates clickstream data and pushes the data into a JSON. Almost 1000 clicks data will be saved in that file.
File contents: (steam-00001.json)
{
"date": {
"s": "2019-04-20"
},
"name": {
"s": "www.mywebsite.com"
},
"products": {
"m": {
"13128": {
"m": {
"quantity": {
"s": "1"
},
"price": {
"n": "799"
}
}
}
}
}
}
{
"date": {
"s": "2019-04-20"
},
"name": {
"s": "www.mywebsite.com"
},
"products": {
"m": {
"13128": {
"m": {
"quantity": {
"s": "1"
},
"price": {
"n": "799"
}
}
}
}
}
}
If you see the above JSON file, all of my values are pointed with s,m,n. The developers used this for some reference.
S - Single value
M - Multiple value
N - Number
Now I need to remove these references. I tried multiple json load and dumps but not able to achieve what im expecting.
Could someone help me on this?
Expected Output:
{
"date": "2019-04-20",
"name": "www.mywebsite.com",
"products": {
"13128": {
"quantity": "1",
"price": "799"
}
}
}
{
"date": "2019-04-20",
"name": "www.mywebsite.com",
"products": {
"13128": {
"quantity": "1",
"price": "799"
}
}
}
Steps I tried:
import json
def load_json_multiple(segments):
chunk = ""
for segment in segments:
chunk += segment
try:
yield json.loads(chunk)
chunk = ""
except ValueError:
pass
def filterJson(payload):
for key, value in payload.items():
if isinstance(value, dict):
for key1 in value.keys():
if(key1 == "M"):
payload[key] = value["M"]
tmp = filterJson(value)
if(tmp != None):
payload[key] = tmp
#print ans
else:
if(key == "S" or key == "N"):
return payload.pop(key)
with open('/home/ec2-user/olddata', 'r') as f:
for parsed_json in load_json_multiple(f):
print parsed_json
filterJson(parsed_json)
print parsed_json

Store the route to the "#myID" based on the key value

I have a json in the following format.
json_tree ={
"Garden": {
"Seaside": {
"#loc": "127.0.0.1",
"#myID": "1.3.1",
"Shoreside": {
"#myID": "3",
"InfoList": {
"Notes": {
"#code": "0",
"#myID": "1"
},
"Count": {
"#myID": "2",
"#val": "0"
}
},
"state": "0",
"Tid": "3",
"Lakesshore": {
"#myID": "4",
"InfoList": {
"Notes": {
"#code": "0",
"#oid": "1"
},
"Count": {
"#myID": "2",
"#val": "0"
}
},
"state": "0",
"Tid": "4"
}
},
"state": "0",
"Tid": "2"
},
"Tid": "1",
"state": "0"
}
}
My implementation:
def getmyID(json_tree, itemList):
for k1, v1 in json_tree .items():
for k,v in v1.items():
if k == itemList:
return '{}{}.{}'.format(json_tree['#myID'] + '.' if '#myID' in json_tree else '',
v['#myID'], v['InfoList']['status']['#myID'])
The issue I have is that this method does not work when as it returns "None" when I want to find the route to "BackYard". Note the 'BackYard" is nested inside the "Seaside" node.
I am appending the "#myID" of the key node to the "#myID" in the respective "status" node.
getmyID(json_tree, "Seaside")
"Output" = "1.2.3.26" --> Currently I get this only
getmyID(json_tree, "BackYard")
"Output" = "1.2.3.75.32" --> expected output but getting "None"
The expected output of "Backyard" is created by concatenating the "#myID" of the root node which it is nested in, that is "Seaside" in this case, and concatenating it with the "#myID" of "backYard" and the "#myID" of its "status" node.
"For a nested node, output string is formed by":
["root #myID" + "nested node "#myID" + "status node #myID"] or ["1.2.3" + "75" + "23"] for "Backyard".
"For a level node, output string is formed by":
["root #myID" + "status node #myID"] or ["1.2.3" + "26"] for "Seaside".
Any help would be appreciated.
Thanks.
You can use recursion with a generator:
def get_vals(d, target, path = []):
for a, b in d.items():
if a == target:
yield '.'.join(filter(None, path+[b['#myID'], b["InfoList"]['status']['#myID']]))
if isinstance(b, dict):
yield from get_vals(b, target, path + [b.get('#myID', '')])
print(list(get_vals(json_tree, "Seaside")))
print(list(get_vals(json_tree, "BackYard")))
Output:
['1.2.3.26']
['1.2.3.75.32']
Edit: newest data:
def get_vals(d, target, paths = []):
for a, b in d.items():
if a == target:
yield '.'.join(filter(None, paths+[b["#myID"], b.get("InfoList", {}).get('Notes', {}).get('#myID')]))
if isinstance(b, dict):
yield from get_vals(b, target, paths+[b.get('#myID')])
print(list(get_vals(json_tree, "Seaside")))
print(list(get_vals(json_tree, 'Shoreside')))
Output:
['1.3.1']
['1.3.1.3.1']
EDIT - I have no problem with people down-voting my answers or even editing them, as long as they tell me why. Would love to hear back from Yvette Colomb and ChrisF.
Here is the solution I came up with using a stack-based approach. The idea is that you try to go as deep as you can in your nested structure before finding the desired key (what you called itemList). I'm sure there must be a cuter way of doing this:
json_tree = {
"Gardens": {
"Seaside": {
"#loc": "porch",
"#myID": "1.2.3",
"Tid": "1",
"InfoList": {
"status": {
"#default": "0",
"#myID": "26"
},
"count": {
"#default": "0",
"#myID": "1"
}
},
"BackYard": {
"#myID": "75",
"Tid": "2",
"InfoList": {
"status": {
"#default": "6",
"#myID": "32"
},
"count": {
"#default": "0",
"#myID": "2"
}
}
}
}
}
}
def is_valid_kv_pair(key, value):
try:
assert isinstance(key, str)
assert isinstance(value, dict)
assert isinstance(value["#myID"], str)
assert isinstance(value["Tid"], str)
assert isinstance(value["InfoList"], dict)
assert isinstance(value["InfoList"]["status"], dict)
assert isinstance(value["InfoList"]["status"]["#default"], str)
assert isinstance(value["InfoList"]["status"]["#myID"], str)
except (AssertionError, KeyError):
return False
else:
return True
def get_id(dictionary, desired_key):
if not isinstance(dictionary, dict):
return None
dictionary_stack = [dictionary]
root_stack = []
while dictionary_stack:
current_dictionary = dictionary_stack.pop()
appended_root = False
for key, value in current_dictionary.items():
if appended_root:
root_stack.pop()
if is_valid_kv_pair(key, value):
if key == desired_key:
rootIDs = [d["#myID"] for d in root_stack]
myID = value["#myID"]
statusID = value["InfoList"]["status"]["#myID"]
return ".".join(rootIDs + [myID] + [statusID])
root_stack.append(value)
appended_root = True
if isinstance(value, dict):
dictionary_stack.append(value)
return None
ID = get_id(json_tree, "BackYard")
print(ID)

comparing elements of json in python

I do have a sample json as below:
{
"parent": [
{
"code": "333",
"priority": {
"p": "BB"
}
},
{
"code": "999",
"priority": {
"p": "BX"
}
},
{
"code": "2222",
"priority": {
"p": "BA"
}
}
]
}
This is just a sample json. I want to compare the value of p(priority) and based on that return corresponding code which has highest priority. Like in above sample json, 999 is the value that I am expecting since the value of p is BX which has highest priority. Is there a short cut and efficient way in python since I am new to python?
Update/clarification on Question:
Since I got answer based on the assumption that p is having alphabetic value which we can prioritize. But, that is not the case. P could have 3 set of values(which are fixed for example: BX is highest priority,BB is second and BA is least). If this json has priority BX, I need to return 999, if BX is not there but BB and BA are there, I need to return BB.
This was my basic solution which works but I think it has to many conditions:
code = None
for item in json['parent']:
if item['priority'] is not None and item['priority']['p'] is not None and item['priority']['p'] == 'BX' \
and item['code'] is not None:
code = item['code']
if item['priority'] is not None and item['priority']['p'] is not None and item['priority']['p'] == 'BB' \
and item['code'] is not None and code is not 'BX':
code = item['code']
if item['priority'] is not None and item['priority']['p'] is not None and item['priority']['p'] == 'BA' \
and item['code'] is not None and code is not 'BX' and code is not 'BB':
code = item['code']
You can utilise the argmin function from numpy:
import numpy as np
json = {
"parent": [
{
"code": "333",
"priority": {
"p": "B"
}
},
{
"code": "999",
"priority": {
"p": "A"
}
},
{
"code": "2222",
"priority": {
"p": "C"
}
}
]
}
highest_prio_ind = np.argmin([x['priority']['p'] for x in json['parent']])
print(json['parent'][highest_prio_ind]['code'])
which returns the wanted 999.
You can use Python's builtin function min() (doc) with custom key=:
json_data = {
"parent": [
{
"code": "333",
"priority": {
"p": "B"
}
},
{
"code": "999",
"priority": {
"p": "A"
}
},
{
"code": "2222",
"priority": {
"p": "C"
}
}
]
}
item = min(json_data['parent'], key=lambda k: k['priority']['p'])
print(item['code'])
Prints:
999
Let's just take your JSON Object as a variable and sort it.
parent = [
{
"code": "333",
"priority": {
"p": "B"
}
},
{
"code": "999",
"priority": {
"p": "A"
}
},
{
"code": "2222",
"priority": {
"p": "C"
}
}
]
To parse and sort the same there is a simple solution of lambda function.
parent = sorted(parent, key=lambda k: k['priority'].get('p', 0), reverse=False)
This will give you output something like this..
{'code': '999', 'priority': {'p': 'A'}}
{'code': '333', 'priority': {'p': 'B'}}
{'code': '2222', 'priority': {'p': 'C'}}
You can even turn into reverse by just doing reverse=True.
To get the value directly just find the code value of first element.
print(parent[0]['code'])
This will give you 999

Remove all occurences of a value from a nested dictionary

I have a nested dictionary as the following.
myDict= {
"id": 10,
"state": "MY LIST",
"Stars":
{
"BookA": {
"id": 10,
"state": "new book",
"Mystery": {
"AuthorA":
{
"id": "100",
"state": "thriller"
},
"AuthorB":
{
"id": "112",
"state": "horror"
}
},
"Thriller": {
"Store1":
{
"id": "300",
"state": "Old"
}
}
}
}
}
I want to return a dictionary which has all of the "state": "text" removed. So that means, I want to remove all the "state" fields and have an output as below.
I want it to be generic method as the dictionary could be nested on many levels.
myDict=
{
id: 10,
"Stars":
{
"BookA": {
"id": 10
"Mystery": {
"AuthorA":
{
"id": "100"
},
"AuthorB":
{
"id": "112"
}
},
"Thriller": {
"Store1":
{
"id": "300"
}
}
}
}
I tried the following but it doesnt seem to work. It only removes the "state": "MY LIST". May someone help me to resolve the issue?
def get(self):
removelist= ["state"]
new_dict = {}
for key, item in myDict.items():
if key not in removelist:
new_dict.update({key: item})
return new_dict
It doesnt remove all the "state" values.
You can use a DFS:
def remove_keys(d, keys):
if isinstance(d, dict):
return {k: remove_keys(v, keys) for k, v in d.items() if k not in keys}
else:
return d
The idea is to remove recursively the keys from subtrees: for every subtree that is a nested dict, return a dict without the keys to remove, using a dict comprehension; for every leaf (that is a single value), just return the value.
Test:
from pprint import pprint
pprint(remove_keys(myDict, ['state']))
Output:
{'Stars': {'BookA': {'Mystery': {'AuthorA': {'id': '100'},
'AuthorB': {'id': '112'}},
'Thriller': {'Store1': {'id': '300'}},
'id': 10}},
'id': 10}
The problem is you aren't handling the nested dictionaries.
def get(self):
removelist= ["state"]
new_dict = {}
for key, item in myDict.items():
if key not in removelist:
new_dict.update({key: item})
if isinstance(item, dict):
# You'll need to handle this use case.
return new_dict
To elaborate, lets look back at your dictionary:
myDict= {
"id": 10, # int
"state": "MY LIST", # string
"Stars": { # dictionary
"BookA": {
"id": 10, # int
"state": "new book", # string
"Mystery": { # dictionary
"AuthorA": {
"id": "100",
"state": "thriller"
},
"AuthorB": {
"id": "112",
"state": "horror"
}
},
"Thriller": {
"Store1": {
"id": "300",
"state": "Old"
}
}
}
}
}
I commented in the types for clarity. Your code is currently parsing myDict and ignoring the key "state". Once you hit the value "Stars", you need to parse that dictionary to also ignore the key "state".

Python - create dynamic nested dictionary from list of dictionary

Below is sample list data, I want to convert it into a dynamic dictionary.
result = [
{
"standard": "119",
"score": "0",
"type": "assignment",
"student": "4"
},
{
"standard": "119",
"score": "0",
"type": "assignment",
"student": "5"
},
{
"standard": "118",
"score": "0",
"type": "assessment",
"student": "4"
}
]
I want to create one function conv_to_nested_dict(*args,data), which convertes all list of key to dictonary dynamically.
For example : conv_to_nested_dict(['standard','student'],result) should give op :
{
"118": {
"4": [{
"score": "0",
"type": "assessment"
}]
},
"119": {
"4": [{
"score": "0",
"type": "assignment"
}],
"5": [{
"score": "0",
"type": "assignment"
}]
}
}
conv_to_nested_dict(['standard','type'],result)
{
"118": {
"assessment": [{
"score": 0,
"student": "4"
}]
},
"119": {
"assignment": [{
"score": 0,
"student": "4"
},{
"score": 0,
"student": "5"
}]
}
}
This is a general idea.
def conf_to_nested_dict(keys, result):
R = {}
for record in result:
node = R
for key in keys[:-1]:
kv = record[key]
next_node = node.get(kv, {})
node[kv] = next_node
node = next_node
last_node = node.get(record[keys[-1]], [])
last_node.append(record)
node[record[keys[-1]]] = last_node
return R
#R is your structure
result is your source array, keys are the keys by which you want to group results. Iterate over results, for each record - create a tree structure based on key values ( record[key] ). For the last key - create a list and append the record to it.

Categories