I am trying to get the most recent date in a nested dicionary. The dates are strings and can be found in a variable number of dictionaries under the key forth. This is my approach:
data = {
"first": {
"second": {
"third_1": {"forth": "2022-01-01"},
"third_2": {"forth": None},
"third_3": {"forth": "2021-01-01"},
}
}
}
def get_max(data, key):
tmp = []
for item in data.values():
tmp.append(item.get(key))
tmp = [
datetime.strptime(date, "%Y-%m-%d").date().strftime("%Y-%m-%d")
for date in tmp
if date
]
return max(tmp)
out = data["first"]["second"]
out = get_max(data=out, key="forth")
out
Is there anything I can improve?
I think comparing dates without converting them into object will also work
You can use below approach as well
data = {
"first": {
"second": {
"third_1": {"forth": "2022-01-01"},
"third_2": {"forth": None},
"third_3": {"forth": "2021-01-01"},
}
}
}
max(filter(lambda x: x["forth"], data["first"]["second"].values()), key=lambda x: x["forth"])
try:
Max = max(d for a,b in data["first"]["second"].items() for c,d in b.items() if d != None)
Related
I'm reading data from an Update Cloud Firestore Trigger. The event is a dictionary that contains the data whithin the key ['value']['fields']. However, each of the keys contains s nested dictionary containing a key like 'integerValue', 'booleanValue' or 'stringValue', where the value of integerValue is actually a string. Is there a method to remove the 'type pointers'?
How can I convert this:
{
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
To this:
{
'count': 0,
'verified': False,
'user': 'Matt',
}
Recently i encountered similar problem.
We could recursively traverse the map to extract and simplify the event trigger data.
Here's python implementation, extended from previous answers.
class FirestoreTriggerConverter(object):
def __init__(self, client=None) -> None:
self.client = client if client else firestore.client()
self._action_dict = {
'geoPointValue': (lambda x: dict(x)),
'stringValue': (lambda x: str(x)),
'arrayValue': (lambda x: [self._parse_value(value_dict) for value_dict in x.get("values", [])]),
'booleanValue': (lambda x: bool(x)),
'nullValue': (lambda x: None),
'timestampValue': (lambda x: self._parse_timestamp(x)),
'referenceValue': (lambda x: self._parse_doc_ref(x)),
'mapValue': (lambda x: {key: self._parse_value(value) for key, value in x["fields"].items()}),
'integerValue': (lambda x: int(x)),
'doubleValue': (lambda x: float(x)),
}
def convert(self, data_dict: dict) -> dict:
result_dict = {}
for key, value_dict in data_dict.items():
result_dict[key] = self._parse_value(value_dict)
return result_dict
def _parse_value(self, value_dict: dict) -> Any:
data_type, value = value_dict.popitem()
return self._action_dict[data_type](value)
def _parse_timestamp(self, timestamp: str):
try:
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
except ValueError as e:
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
def _parse_doc_ref(self, doc_ref: str) -> DocumentReference:
path_parts = doc_ref.split('/documents/')[1].split('/')
collection_path = path_parts[0]
document_path = '/'.join(path_parts[1:])
doc_ref = self.client.collection(collection_path).document(document_path)
return doc_ref
Use this as follows
converter = FirestoreTriggerConverter(client)
simplified_data_dict = converter.convert(event_data_dict["event"]["value"]["fields"])
You can create a mapping of the known types and convert the values that way:
types = {
'integerValue': int,
'booleanValue': bool,
'stringValue': str,
}
You can replace a nested dictionary like the one you have through the magic of dict.popitem:
replacement = {}
for key, meta in event['value']['fields'].items():
typ, value = meta.popitem()
replacement[key] = types[typ](value)
event['value'] = replacement
You can reduce it to a one liner with a dictionary comprehension:
event['value'] = {k: types[t](v) for k t, v in (k, *d.popitem()) for k, d in event['value']['fields'].items())}
Use keys() in dictionary
origin_dict={
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
# remove first layer
b = origin_dict['fields']
new_dict = dict()
for i in b.keys():
# i will be second layer of dictionary
for j in b[i].keys():
# j will be third layer of dictionary
new_dict[i] = b[i][j]
print (new_dict)
There is no explicit method to do so. One you can do is iterate through existing dictionary picking up items you need in the new dictionary:
d = {
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
required = ['count', 'verified', 'user']
d1 = {}
for x in d.values():
for y in required:
if 'integerValue' in x[y].keys():
d1[y] = int(list(x[y].values())[0])
else:
d1[y] = list(x[y].values())[0]
print(d1)
# {'count': 0, 'verified': False, 'user': 'Matt'}
I have JSON file as mentioned below,
**test.json**
{
"header1" :
{
"header1_body1":
{
"some_key":"some_value",
.......................
},
"header1_body2":
{
"some_key":"some_value",
.......................
}
},
"header2":
{
"header2_body1":
{
"some_key":"some_value",
.......................
},
"header2_body2":
{
"some_key":"some_value",
.......................
}
}
}
Would like to group the JSON content into lists as below:
header1 = ['header1_body1','header1_body2']
header2 = ['header2_body1','header2_body2']
header1, header2 can be till ....header n. So dynamically lists has to be created containing it's values as shown above.
How can i achieve this ?
What's the best optimal way to approach ?
SOLUTION:
with open('test.json') as json_data:
d = json.load(json_data)
for k,v in d.iteritems():
if k == "header1" or k == "header2":
globals()['{}'.format(k)] = d[k].keys()
now, header1 and header2 can be accessed as list.
for i in header1:
print i
Assuming you read the JSON into a variable d (maybe using json.loads), you could iterate over the keys (sorted?) and build the lists with the keys of current value:
for key in sorted(d.keys()):
l = [x for x in sorted(d[key].keys())] # using list comprehension
print(key + ' = ' + str(l))
Fixing your json structure:
{
"header1" :
{
"header1_body1":
{
"some_key":"some_value"
},
"header1_body2":
{
"some_key":"some_value"
}
},
"header2":
{
"header2_body1":
{
"some_key":"some_value"
},
"header2_body2":
{
"some_key":"some_value"
}
}
}
And then loading and creating lists:
header = []
for key, value in dictdump.items():
header.append(list(value.keys()))
for header_num in range(0, len(header)):
print("header{} : {}".format(header_num + 1, header[header_num]))
Gives:
header1 : ['header1_body1', 'header1_body2']
header2 : ['header2_body1', 'header2_body2']
Once you load your json, you can get the list you want for any key by doing something like the following (headers variable below is a placeholder for your loaded json). You don't need to convert it to a list to work with it as an iterable but wrapped it in list(...) to match the output in your question.
list(headers['header1'].keys())
If you need to actually store the list of keys for each of your "header" dicts in some sort of accessible format, then you could create another dictionary that contains the lists you want. For example:
import json
data = """{
"header1" : {
"header1_body1": {
"some_key":"some_value"
},
"header1_body2": {
"some_key":"some_value"
}
},
"header2": {
"header2_body1": {
"some_key":"some_value"
},
"header2_body2": {
"some_key":"some_value"
}
}
}"""
headers = json.loads(data)
# get the list of keys for a specific header
header = list(headers['header1'].keys())
print(header)
# ['header1_body1', 'header1_body2']
# if you really want to store them in another dict
results = {h[0]: list(h[1].keys()) for h in headers.items()}
print(results)
# OUTPUT
# {'header1': ['header1_body1', 'header1_body2'], 'header2': ['header2_body1', 'header2_body2']}
You can use recursion:
d = {'header1': {'header1_body1': {'some_key': 'some_value'}, 'header1_body2': {'some_key': 'some_value'}}, 'header2': {'header2_body1': {'some_key': 'some_value'}, 'header2_body2': {'some_key': 'some_value'}}}
def flatten(_d):
for a, b in _d.items():
yield a
if isinstance(b, dict):
yield from flatten(b)
new_results = {a:[i for i in flatten(b) if i.startswith(a)] for a, b in d.items()}
Output:
{'header1': ['header1_body1', 'header1_body2'], 'header2': ['header2_body1', 'header2_body2']}
import json
with open('test.json') as json_data:
d = json.load(json_data)
for k,v in d.iteritems():
if k == "header1" or k == "header2":
globals()['{}'.format(k)] = d[k].keys()
now, `header1` and `header2` can be accessed as list.
for i in header1:
print i
This question already has answers here:
How to remove all empty fields in a nested dict?
(5 answers)
Closed 4 years ago.
In a Python script I run json_decoded = json.load(file) that results in the following JSON data:
json_decoded = {
"data": {
"keyA": [
{
"subkeyA1": "valueA1",
"subkeyA2": "valueA2"
},
{
"subkeyA3": ""
}
],
"keyB": []
}
}
I would like to remove all the [] and "" ("empty") key-value pairs so to have:
json_decoded = {
"data": {
"keyA": [
{
"subkeyA1": "valueA1",
"subkeyA2": "valueA2"
}
]
}
}
How can I have that?
Note: I am pretty new to Python (v2.7.3).
You can use recursion to traverse the structure:
json_decoded = {'data': {'keyA': [{'subkeyA1': 'valueA1', 'subkeyA2': 'valueA2'}, {'subkeyA3': ''}], 'keyB': []}}
def remove_empty(d):
final_dict = {}
for a, b in d.items():
if b:
if isinstance(b, dict):
final_dict[a] = remove_empty(b)
elif isinstance(b, list):
final_dict[a] = list(filter(None, [remove_empty(i) for i in b]))
else:
final_dict[a] = b
return final_dict
print(remove_empty(json_decoded))
Output:
{'data':
{'keyA':
[{'subkeyA1': 'valueA1',
'subkeyA2': 'valueA2'}
]
}
}
With given script I am able to get output as I showed in a screenshot,
but there is a column named as cve.description.description_data which is again in json format. I want to extract that data as well.
import json
import pandas as pd
from pandas.io.json import json_normalize
#load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
#tells us parent node is 'programs'
nycphil = json_normalize(d['CVE_Items'])
nycphil.head(3)
works_data = json_normalize(data=d['CVE_Items'], record_path='cve')
works_data.head(3)
nycphil.to_csv("test4.csv")
If I change works_data = json_normalize(data=d['CVE_Items'], record_path='cve.descr') it gives this error:
"result = result[spec] KeyError: 'cve.description'"
JSON format as follows:
{
"CVE_data_type":"CVE",
"CVE_data_format":"MITRE",
"CVE_data_version":"4.0",
"CVE_data_numberOfCVEs":"1000",
"CVE_data_timestamp":"2018-04-04T00:00Z",
"CVE_Items":[
{
"cve":{
"data_type":"CVE",
"data_format":"MITRE",
"data_version":"4.0",
"CVE_data_meta":{
"ID":"CVE-2001-1594",
"ASSIGNER":"cve#mitre.org"
},
"affects":{
"vendor":{
"vendor_data":[
{
"vendor_name":"gehealthcare",
"product":{
"product_data":[
{
"product_name":"entegra_p&r",
"version":{
"version_data":[
{
"version_value":"*"
}
]
}
}
]
}
}
]
}
},
"problemtype":{
"problemtype_data":[
{
"description":[
{
"lang":"en",
"value":"CWE-255"
}
]
}
]
},
"references":{
"reference_data":[
{
"url":"http://apps.gehealthcare.com/servlet/ClientServlet/2263784.pdf?DOCCLASS=A&REQ=RAC&DIRECTION=2263784-100&FILENAME=2263784.pdf&FILEREV=5&DOCREV_ORG=5&SUBMIT=+ ACCEPT+"
},
{
"url":"http://www.forbes.com/sites/thomasbrewster/2015/07/10/vulnerable- "
},
{
"url":"https://ics-cert.us-cert.gov/advisories/ICSMA-18-037-02"
},
{
"url":"https://twitter.com/digitalbond/status/619250429751222277"
}
]
},
"description":{
"description_data":[
{
"lang":"en",
"value":"GE Healthcare eNTEGRA P&R has a password of (1) value."
}
]
}
},
"configurations":{
"CVE_data_version":"4.0",
"nodes":[
{
"operator":"OR",
"cpe":[
{
"vulnerable":true,
"cpe22Uri":"cpe:/a:gehealthcare:entegra_p%26r",
"cpe23Uri":"cpe:2.3:a:gehealthcare:entegra_p\\&r:*:*:*:*:*:*:*:*"
}
]
}
]
},
"impact":{
"baseMetricV2":{
"cvssV2":{
"version":"2.0",
"vectorString":"(AV:N/AC:L/Au:N/C:C/I:C/A:C)",
"accessVector":"NETWORK",
"accessComplexity":"LOW",
"authentication":"NONE",
"confidentialityImpact":"COMPLETE",
"integrityImpact":"COMPLETE",
"availabilityImpact":"COMPLETE",
"baseScore":10.0
},
"severity":"HIGH",
"exploitabilityScore":10.0,
"impactScore":10.0,
"obtainAllPrivilege":false,
"obtainUserPrivilege":false,
"obtainOtherPrivilege":false,
"userInteractionRequired":false
}
},
"publishedDate":"2015-08-04T14:59Z",
"lastModifiedDate":"2018-03-28T01:29Z"
}
]
}
I want to flatten all data.
Assuming the multiple URLs delineate between rows and all else meta data repeats, consider a recursive function call to extract every key-value pair in nested json object, d.
The recursive function will call global to update the needed global objects to be binded into a list of dictionaries for pd.DataFrame() call. Last loop at end updates the recursive function's dictionary, inner, to integrate the different urls (stored in multi)
import json
import pandas as pd
# load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
multi = []; inner = {}
def recursive_extract(i):
global multi, inner
if type(i) is list:
if len(i) == 1:
for k,v in i[0].items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
else:
multi = i
if type(i) is dict:
for k,v in i.items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
recursive_extract(d['CVE_Items'])
data_dict = []
for i in multi:
tmp = inner.copy()
tmp.update(i)
data_dict.append(tmp)
df = pd.DataFrame(data_dict)
df.to_csv('Output.csv')
Output (all columns the same except for URL, widened for emphasis)
I have the following object in python:
{
name: John,
age: {
years:18
},
computer_skills: {
years:4
},
mile_runner: {
years:2
}
}
I have an array with 100 people with the same structure.
What is the best way to go through all 100 people and make it such that there is no more "years"? In other words, each object in the 100 would look something like:
{
name: John,
age:18,
computer_skills:4,
mile_runner:2
}
I know I can do something in pseudocode:
for(item in list):
if('years' in (specific key)):
specifickey = item[(specific key)][(years)]
But is there a smarter/more efficent way?
Your pseudo-code is already pretty good I think:
for person in persons:
for k, v in person.items():
if isinstance(v, dict) and 'years' in v:
person[k] = v['years']
This overwrites every property which is a dictionary that has a years property with that property’s value.
Unlike other solutions (like dict comprehensions), this will modify the object in-place, so no new memory to keep everything is required.
def flatten(d):
ret = {}
for key, value in d.iteritems():
if isinstance(value, dict) and len(value) == 1 and "years" in value:
ret[key] = value["years"]
else:
ret[key] = value
return ret
d = {
"name": "John",
"age": {
"years":18
},
"computer_skills": {
"years":4
},
"mile_runner": {
"years":2
}
}
print flatten(d)
Result:
{'age': 18, 'mile_runner': 2, 'name': 'John', 'computer_skills': 4}
Dictionary comprehension:
import json
with open("input.json") as f:
cont = json.load(f)
print {el:cont[el]["years"] if "years" in cont[el] else cont[el] for el in cont}
prints
{u'age': 18, u'mile_runner': 2, u'name': u'John', u'computer_skills': 4}
where input.json contains
{
"name": "John",
"age": {
"years":18
},
"computer_skills": {
"years":4
},
"mile_runner": {
"years":2
}
}
Linear with regards to number of elements, you can't really hope for any lower.
As people said in the comments, it isn't exactly clear what your "object" is, but assuming that you actually have a list of dicts like this:
list = [{
'name': 'John',
'age': {
'years': 18
},
'computer_skills': {
'years':4
},
'mile_runner': {
'years':2
}
}]
Then you can do something like this:
for item in list:
for key in item:
try:
item[key] = item[key]['years']
except (TypeError, KeyError):
pass
Result:
list = [{'age': 18, 'mile_runner': 2, 'name': 'John', 'computer_skills': 4}]