I'm reading data from an Update Cloud Firestore Trigger. The event is a dictionary that contains the data whithin the key ['value']['fields']. However, each of the keys contains s nested dictionary containing a key like 'integerValue', 'booleanValue' or 'stringValue', where the value of integerValue is actually a string. Is there a method to remove the 'type pointers'?
How can I convert this:
{
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
To this:
{
'count': 0,
'verified': False,
'user': 'Matt',
}
Recently i encountered similar problem.
We could recursively traverse the map to extract and simplify the event trigger data.
Here's python implementation, extended from previous answers.
class FirestoreTriggerConverter(object):
def __init__(self, client=None) -> None:
self.client = client if client else firestore.client()
self._action_dict = {
'geoPointValue': (lambda x: dict(x)),
'stringValue': (lambda x: str(x)),
'arrayValue': (lambda x: [self._parse_value(value_dict) for value_dict in x.get("values", [])]),
'booleanValue': (lambda x: bool(x)),
'nullValue': (lambda x: None),
'timestampValue': (lambda x: self._parse_timestamp(x)),
'referenceValue': (lambda x: self._parse_doc_ref(x)),
'mapValue': (lambda x: {key: self._parse_value(value) for key, value in x["fields"].items()}),
'integerValue': (lambda x: int(x)),
'doubleValue': (lambda x: float(x)),
}
def convert(self, data_dict: dict) -> dict:
result_dict = {}
for key, value_dict in data_dict.items():
result_dict[key] = self._parse_value(value_dict)
return result_dict
def _parse_value(self, value_dict: dict) -> Any:
data_type, value = value_dict.popitem()
return self._action_dict[data_type](value)
def _parse_timestamp(self, timestamp: str):
try:
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
except ValueError as e:
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
def _parse_doc_ref(self, doc_ref: str) -> DocumentReference:
path_parts = doc_ref.split('/documents/')[1].split('/')
collection_path = path_parts[0]
document_path = '/'.join(path_parts[1:])
doc_ref = self.client.collection(collection_path).document(document_path)
return doc_ref
Use this as follows
converter = FirestoreTriggerConverter(client)
simplified_data_dict = converter.convert(event_data_dict["event"]["value"]["fields"])
You can create a mapping of the known types and convert the values that way:
types = {
'integerValue': int,
'booleanValue': bool,
'stringValue': str,
}
You can replace a nested dictionary like the one you have through the magic of dict.popitem:
replacement = {}
for key, meta in event['value']['fields'].items():
typ, value = meta.popitem()
replacement[key] = types[typ](value)
event['value'] = replacement
You can reduce it to a one liner with a dictionary comprehension:
event['value'] = {k: types[t](v) for k t, v in (k, *d.popitem()) for k, d in event['value']['fields'].items())}
Use keys() in dictionary
origin_dict={
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
# remove first layer
b = origin_dict['fields']
new_dict = dict()
for i in b.keys():
# i will be second layer of dictionary
for j in b[i].keys():
# j will be third layer of dictionary
new_dict[i] = b[i][j]
print (new_dict)
There is no explicit method to do so. One you can do is iterate through existing dictionary picking up items you need in the new dictionary:
d = {
'fields': {
'count': {
'integerValue': '0'
},
'verified': {
'booleanValue': False
},
'user': {
'stringValue': 'Matt'
}
}
}
required = ['count', 'verified', 'user']
d1 = {}
for x in d.values():
for y in required:
if 'integerValue' in x[y].keys():
d1[y] = int(list(x[y].values())[0])
else:
d1[y] = list(x[y].values())[0]
print(d1)
# {'count': 0, 'verified': False, 'user': 'Matt'}
Related
I am trying to get the most recent date in a nested dicionary. The dates are strings and can be found in a variable number of dictionaries under the key forth. This is my approach:
data = {
"first": {
"second": {
"third_1": {"forth": "2022-01-01"},
"third_2": {"forth": None},
"third_3": {"forth": "2021-01-01"},
}
}
}
def get_max(data, key):
tmp = []
for item in data.values():
tmp.append(item.get(key))
tmp = [
datetime.strptime(date, "%Y-%m-%d").date().strftime("%Y-%m-%d")
for date in tmp
if date
]
return max(tmp)
out = data["first"]["second"]
out = get_max(data=out, key="forth")
out
Is there anything I can improve?
I think comparing dates without converting them into object will also work
You can use below approach as well
data = {
"first": {
"second": {
"third_1": {"forth": "2022-01-01"},
"third_2": {"forth": None},
"third_3": {"forth": "2021-01-01"},
}
}
}
max(filter(lambda x: x["forth"], data["first"]["second"].values()), key=lambda x: x["forth"])
try:
Max = max(d for a,b in data["first"]["second"].items() for c,d in b.items() if d != None)
I have the following dict:
{
'foo': {
'name': 'bar',
'options': None,
'type': 'qux'
},
'baz': {
'name': 'grault',
'options': None,
'type': 'plugh'
},
}
The names of the top level keys are unknown at runtime. I am unable to figure out how to get the name of the top level key where the value of type is plugh. I have tried all kinds of iterators, loops, comprehensions etc, but i'm not great with Python. Any pointers would be appreciated.
Try this:
for key, inner_dict in dict_.items():
if inner_dict['type'] == 'plugh':
print(key)
Or if you a one liner to get the first key matching the condition:
key = next(key for key, inner_dict in dict_.items() if inner_dict['type'] == 'plugh')
print(key)
output:
baz
Try iterating over the dict keys and check for the element
for key in d:
if(d[key]['type'] == 'plugh'):
print(key)
baz
You need to iterate over your data like this:
def top_level_key(search_key, data):
for key, value in data.items():
if value['type'] == search_key:
return key
print(top_level_key('plugh', data_dict))
Besides running loop to filter the target, you have another option to use jsonpath, which is quite like xPath
# pip install jsonpath-ng==1.5.2
# python 3.6
from jsonpath_ng.ext import parse
dct = {
'foo': {
'name': 'bar',
'options': None,
'type': 'qux'
},
'baz': {
'name': 'grault',
'options': None,
'type': 'plugh'
},
}
parse_str = '$[?#.type="plugh"]'
jsonpath_expr = parse(parse_str)
jsonpath_results = jsonpath_expr.find(dct)
if len(jsonpath_results) > 0:
result = jsonpath_results[0].value
print(result)
# {'name': 'grault', 'options': None, 'type': 'plugh'}
else:
result = None
Ref: https://pypi.org/project/jsonpath-ng/ to find out more stynax about jsonpath
a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
#...
#str_key_n = 'user.level2.level3...leveln=XXX'
Let's consider this 'str_key' string, goes with infinite number of dots/levels.
Expected Output:
a = {
'user': {
'username': 'john', # username, should be replace
'name': {
'first': 'Micheal',
'last': 'henry' # lastname, should be replace
},
'email': 'micheal#domain.com',
...
... # Infinite level of another nested dict
}
}
I'm expecting the answers for applying 'n' Level of nested key string, rather than simply replacing by a['user']['username'] = 'John' statically. Answers must be work for any number of 'dotted' string values.
Thanks in advance!
There are three steps:
Separate the key-value pair string into a fully-qualified key and
value.
Split the key into path components.
Traverse the dictionary to find the relevant value to update.
Here's an example of what the code might look like:
# Split by the delimiter, making sure to split once only
# to prevent splitting when the delimiter appears in the value
key, value = str_key_n.split("=", 1)
# Break the dot-joined key into parts that form a path
key_parts = key.split(".")
# The last part is required to update the dictionary
last_part = key_parts.pop()
# Traverse the dictionary using the parts
current = a
while key_parts:
current = current[key_parts.pop(0)]
# Update the value
current[last_part] = value
I'd go with a recursive function to accomplish this, assuming your key value strings are all valid:
def assign_value(sample_dict, str_keys, value):
access_key = str_keys[0]
if len(str_keys) == 1:
sample_dict[access_key] = value
else:
sample_dict[access_key] = assign_value(sample_dict[access_key], str_keys[1:], value)
return sample_dict
The idea is to traverse your dict until you hit the lowest key and then we assign our new value to that last key;
if __name__ == "__main__":
sample_dict = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com'
}
}
str_key_1 = 'user.username=john'
str_keys_1, value_1 = str_key_1.split('=')
sample_dict = assign_value(sample_dict, str_keys_1.split('.'), value_1)
print("result: {} ".format(sample_dict))
str_key_2 = 'user.name.last=henry'
str_keys_2, value_2 = str_key_2.split('=')
sample_dict = assign_value(sample_dict, str_keys_2.split('.'), value_2)
print("result: {}".format(sample_dict))
To use the assign_value you would need to split your original key to the keys and value as seen above;
If you're okay with using exec() and modify your str_key(s), you could do something like:
def get_keys_value(string):
keys, value = string.split("=")
return keys, value
def get_exec_string(dict_name, keys):
exec_string = dict_name
for key in keys.split("."):
exec_string = exec_string + "[" + key + "]"
exec_string = exec_string + "=" + "value"
return exec_string
str_key_1 = "'user'.'username'=john"
str_key_2 = "'user'.'name'.'last'=henry"
str_key_list = [str_key_1, str_key_2]
for str_key in str_key_list:
keys, value = get_keys_value(str_key) # split into key-string and value
exec_string = get_exec_string("a", keys) # extract keys from key-string
exec(exec_string)
print(a)
# prints {'user': {'email': 'micheal#domain.com', 'name': {'last': 'henry', 'first': 'Micheal'}, 'username': 'john'}}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
def MutateDict(key):
strkey, strval = key.split('=')[0], key.split('=')[1]
strkeys = strkey.split('.')
print("strkeys = " ,strkeys)
target = a
k = ""
for k in strkeys:
print(target.keys())
if k in target.keys():
prevTarget = target
target = target[k]
else:
print ("Invalid key specified")
return
prevTarget[k] = strval
MutateDict(str_key_1)
print(a)
MutateDict(str_key_2)
print(a)
I have a text file which I read in. This is a log file so it follows a particular pattern. I need to create a JSON ultimately, but from researching this problem, once it is in a dict it will be a matter of using json.loads() or json.dumps().
A sample of the text file is below.
INFO:20180606_141527:submit:is_test=False
INFO:20180606_141527:submit:username=Mary
INFO:20180606_141527:env:sys.platform=linux2
INFO:20180606_141527:env:os.name=ubuntu
The dict structure which I am ultimatly looking for is
{
"INFO": {
"submit": {
"is_test": false,
"username": "Mary"
},
"env": {
"sys.platform": "linux2",
"os.name": "ubuntu"
}
}
}
I am ignoring the timestamp information in each list for now.
This is a snippet of the code I am using,
import csv
tree_dict = {}
with open('file.log') as file:
for row in file:
for key in reversed(row.split(":")):
tree_dict = {key: tree_dict}
Which results in an undesired output,
{'INFO': {'20180606_141527': {'submit': {'os.name=posix\n': {'INFO': {'20180606_141527': {'submit': {'sys.platform=linux2\n': {'INFO': {'20180606_141527': {'submit': {'username=a227874\n': {'INFO': {'20180606_141527': {'submit': {'is_test=False\n': {}}}}}}}}}}}}}}}}}
I need to dynamically populate the dict because I don't know the actual field/key names.
with open('demo.txt') as f:
lines = f.readlines()
dct = {}
for line in lines:
# param1 == INFO
# param2 == submit or env
# params3 == is_test=False etc.
param1, _, param2, params3 = line.strip().split(':')
# create dct[param1] = {} if it is not created
dct.setdefault(param1, {})
# create dct[param1][param2] = {} if it is no created
dct[param1].setdefault(param2, {})
# for example params3 == is_test=False
# split it by '=' and now we unpack it
# k == is_test
# v == False
k, v = params3.split('=')
# and update our `dict` with the new values
dct[param1][param2].update({k: v})
print(dct)
Output
{
'INFO': {
'submit': {
'is_test': 'False', 'username': 'Mary'
},
'env': {
'sys.platform': 'linux2', 'os.name': 'ubuntu'
}
}
}
This is one of the rare cases where recursion in Python seems to be appropriate and helpful. The following function adds a value to the hierarchical dictionary d specified by the list of keys:
def add_to_dict(d, keys, value):
if len(keys) == 1: # The last key
d[keys[0]] = value
return
if keys[0] not in d:
d[keys[0]] = {} # Create a new subdict
add_to_dict(d[keys[0]], keys[1:], value)
The function works with the dictionaries of arbitrary depth. The rest is just the matter of calling the function:
d = {}
for line in file:
keys, value = line.split("=")
keys = keys.split(":")
add_to_dict(d, keys, value.strip())
Result:
{'INFO': {'20180606_141527': {
'submit': {'is_test': 'False',
'username': 'Mary'},
'env': {'sys.platform': 'linux2',
'os.name': 'ubuntu'}}}}
You can modify the code to exclude certain levels (like the timestamp).
You could use a nested collections.defaultdict() here:
from collections import defaultdict
from pprint import pprint
d = defaultdict(lambda: defaultdict(dict))
with open('sample.txt') as in_file:
for line in in_file:
info, _, category, pair = line.strip().split(':')
props, value = pair.split('=')
d[info][category][props] = value
pprint(d)
Which gives the following:
defaultdict(<function <lambda> at 0x7ff8a341aea0>,
{'INFO': defaultdict(<class 'dict'>,
{'env': {'os.name': 'ubuntu',
'sys.platform': 'linux2'},
'submit': {'is_test': 'False',
'username': 'Mary'}})})
Note: defaultdict() is a subclass of the builtin dict, so their is not reason to convert it to dict in the end result. Additionally, defaultdict() can also be serialized to JSON with json.dumps().
You can use itertools.groupby:
import itertools, re
content = [re.split('\=|:', i.strip('\n')) for i in open('filename.txt')]
new_content = [[a, *c] for a, _, *c in content]
def group_vals(d):
new_d = [[a, [c for _, *c in b]] for a, b in itertools.groupby(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])]
return {a:b[0][0] if len(b) ==1 else group_vals(b) for a, b in new_d}
import json
print(json.dumps(group_vals(new_content), indent=4))
Output:
{
"INFO": {
"env": {
"os.name": "ubuntu",
"sys.platform": "linux2"
},
"submit": {
"is_test": "False",
"username": "Mary"
}
}
}
Check for the presence of keys:
import csv
import json
tree_dict = {}
with open('file.log') as file:
tree_dict = {}
for row in file:
keys = row.split(":")
if keys[0] not in tree_dict:
tree_dict[keys[0]] = {}
if keys[-2] not in tree_dict[keys[0]]:
tree_dict[keys[0]][keys[-2]] = {}
key, value = keys[-1].split("=")
if value == "False":
value = False
if value == "True":
value = True
tree_dict[keys[0]][keys[-2]][key] = value
dumped = json.dumps(tree_dict)
import re
from functools import reduce
with open('file.txt') as f:
lines = f.readlines()
def rec_merge(d1, d2):
for k, v in d1.items():
if k in d2:
d2[k] = rec_merge(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
lst_of_tup = re.findall(r'^([^:]*):[\d_]+:([^:]*):([^=]*)=(.*)$', lines, re.MULTILINE)
lst_of_dct = [reduce(lambda x,y: {y:x}, reversed(t)) for t in lst_of_tup]
dct = reduce(rec_merge, lst_of_dct)
pprint(dct)
# {'INFO': {'env': {'os.name': 'ubuntu', 'sys.platform': 'linux2'},
# 'submit': {'is_test': 'False', 'username': 'Mary'}}}
Source :
import os
with open('file.log') as file:
tree_dict = {}
is_test = False
username = ""
sysplatform = ""
osname = ""
for row in file:
row = row.rstrip('\n')
for key in reversed(row.split(":")):
if not key.find('is_test'):
is_test = key.split('=')[1]
elif not key.find('username'):
username =key.split('=')[1]
elif not key.find('sys.platform'):
sysplatform = key.split('=')[1]
elif not key.find('os.name'):
osname = key.split('=')[1]
tree_dict = {
"INFO": {
"submit": {
"is_test": is_test,
"username": username
},
"env": {
"sys.platform": sysplatform,
"os.name": osname
}
}
}
print(tree_dict)
Result :
{'INFO': {'submit': {'is_test': 'False', 'username': 'Mary'}, 'env': {'sys.platform': 'linux2', 'os.name': 'ubuntu'}}}
For example I have two dicts:
schema = {
'type': 'object',
'properties': {
'reseller_name': {
'type': 'string',
},
'timestamp': {
'type': 'integer',
},
},
'required': ['reseller_name', 'timestamp'],
}
and
schema_add = {
'properties': {
'user_login': {
'type': 'string',
},
},
'required': ['user_login'],
}
How I can get next merged with appending result dict:
schema_result = {
'type': 'object',
'properties': {
'reseller_name': {
'type': 'string',
},
'timestamp': {
'type': 'integer',
},
'user_login': {
'type': 'string',
},
},
'required': ['reseller_name', 'timestamp', 'user_login'],
}
Rules:
Same path is properties and required for scheme and scheme_add in example.
If both dict have dicts with same path, they merged with same rules.
If both dict have lists with same path, then add first list with second.
If both dict have simple values (or dict and non dict or list and non list) with same path, then first value overriding with second.
If only one dict have key with some path, than setting this key and value.
Not sure where the problem likes, but the way you're writing it down is almost like a computer program, and the example is like a test case. Why don't you start from this?
def add_dict(d1, d2):
newdict = {}
for (key, value) in d1.iteritems():
if key in d2: ...
#apply rules, add to newdict, use
else:
#simply add
for (key, value) in d2.iteritems():
if not key in d1:
# simply add
return newdict
This can probably be written more tightly, but might be easier like that to edit.
Edit.. after writing the last comment, couldn't help but write a nicer implementation
def merge_values(a,b):
if a==None or b==None:
return a or b
# now handle cases where both have values
if type(a)==dict:
return add_dict(a, b)
if type(a)==list:
...
def add_dict(d1,d2):
return dict(
[
(key,
merge_values(
d1.get(key,None),
d2.get(key,None)))
for key
in set(d1.keys()).union(d2.keys())
])
My own solution with #Nicolas78 help:
def merge(obj_1, obj_2):
if type(obj_1) == dict and type(obj_2) == dict:
result = {}
for key, value in obj_1.iteritems():
if key not in obj_2:
result[key] = value
else:
result[key] = merge(value, obj_2[key])
for key, value in obj_2.iteritems():
if key not in obj_1:
result[key] = value
return result
if type(obj_1) == list and type(obj_2) == list:
return obj_1 + obj_2
return obj_2
I am adding simple solution of this problem. Assuming that sample data will not change.
def merge_nested_dicts(schema,schema_add):
new_schema = schema
for k in schema:
if k in schema_add.keys():
if isinstance(schema_add[k],dict):
new_schema[k].update(schema_add[k])
if isinstance(schema_add[k],list):
new_schema[k] = new_schema[k]+schema_add[k]
return new_schema
Try this if you know the keys exactly.
schema['properties'].update(schema_add['properties'])
schema['result'].append(schema_add['result'])
result is merged in schema.
If you do not know the keys exactly then one loop is required to find inner list and dictionaries.
for value in schema:
if value is dict:
if schema_add.has_key(value) and schema_add[value] is dict:
schema[value].update(schema_add[value])
elif value is list:
if schema_add.has_key(value) and schema_add[value] is list:
schema[value].append(schema_add[value])
result can be merged into different dict as well.