How to convert all values of a nested dictionary into strings? - python

I am writing a python application where I have a variable dictionary that can be nested upto any level.
The keys in any level can be either int or string. But I want to convert all keys and values at all levels into strings. How nested the dictionary will be is variable which makes it a bit complicated.
{
"col1": {
"0": 0,
"1": 8,
"2": {
0: 2,
}
"3": 4,
"4": 5
},
"col2": {
"0": "na",
"1": 1,
"2": "na",
"3": "na",
"4": "na"
},
"col3": {
"0": 1,
"1": 3,
"2": 3,
"3": 6,
"4": 3
},
"col4": {
"0": 5,
"1": "na",
"2": "9",
"3": 9,
"4": "na"
}
}
I am looking for the shortest and quickest function to achieve that. There are other questions like Converting dictionary values in python from str to int in nested dictionary that suggest ways of doing it but none of them deals with the "variable nesting" nature of the dictionary.
Any ideas will be appreciated.

This is the most straightforward way I can think of doing it:
import json
data = {'col4': {'1': 'na', '0': 5, '3': 9, '2': '9', '4': 'na'}, 'col2': {'1': 1, '0': 'na', '3': 'na', '2': 'na', '4': 'na'}, 'col3': {'1': 3, '0': 1, '3': 6, '2': 3, '4': 3}, 'col1': {'1': 8, '0': 0, '3': 4, '2': {0: 2}, '4': 5}}
stringified_dict = json.loads(json.dumps(data), parse_int=str, parse_float=str)
Here are some links to the documentation for json loads and parse_int: Python3, Python2

You could check the dictionary recursively:
def iterdict(d):
for k, v in d.items():
if isinstance(v, dict):
iterdict(v)
else:
if type(v) == int:
v = str(v)
d.update({k: v})
return d

In case it's helpful to anyone, I modified Jose's answer to handle lists as well. The output converts all values to strings. I haven't thoroughly tested it, but I just ran it against around 40k records, and it passed schema validation:
def iterdict(d):
"""Recursively iterate over dict converting values to strings."""
for k, v in d.items():
if isinstance(v, dict):
iterdict(v)
elif isinstance(v, list):
for x, i in enumerate(v):
if isinstance(i, (dict, list)):
iterdict(i)
else:
if type(i) != str:
i = str(i)
v[x] = i
else:
if type(v) != str:
v = str(v)
d.update({k: v})
return d

Related

How to convert string to valid json or yaml

I have a large script that parses js with a dataframe entry, but to shorten the question, I put what I need in a separate variable.
My variable contains the following value
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
I apply the following script and get data like this
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
def parse_json(value):
arr = value.split("},")
arr = [x+"}" for x in arr]
arr[-1] = arr[-1][:-1]
return json.dumps({str(i):add_quotation_marks(x) for i, x in enumerate(arr)})
def add_quotation_marks(value):
words = re.findall(r'(\w+:)', value)
for word in words:
value = value.replace(word[:-1], f'"{word[:-1]}"')
return json.loads(value)
print(parse_json(value))
{"0": {"from": [3, 4], "to": [7, 4], "color": 2}, "1": {"from": [3, 6], "to": [10, 6], "color": 3}}
The script executes correctly, but I need to get a slightly different result.
This is what the result I want to get looks like:
{
"0": {
"from": {
"0": "3",
"1": "4"
},
"to": {
"0": "7",
"1": "4"
},
"color": "2"
},
"1": {
"from": {
"0": "3",
"1": "6"
},
"to": {
"0": "10",
"1": "6"
},
"color": "3"
}
}
This is valid json and valid yaml. Please tell me how can I do this
I'd suggest a regex approach in this case:
res = []
# iterates over each "{from:...,to:...,color:...}" group separately
for obj in re.findall(r'\{([^}]+)}', value):
item = {}
# iterates over each "...:..." key-value separately
for k, v in re.findall(r'(\w+):(\[[^]]+]|\d+)', obj):
if v.startswith('['):
v = v.strip('[]').split(',')
item[k] = v
res.append(item)
This produces this output in res:
[{'from': ['3', '4'], 'to': ['7', '4'], 'color': '2'}, {'from': ['3', '6'], 'to': ['10', '6'], 'color': '3'}]
Since your values can contain commas, trying to split on commas or other markers is fairly tricky, and using these regexes to match your desired values instead is more stable.
Here's the code that converts the the value to your desired output.
import json5 # pip install json5
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
def convert(str_value):
str_value = f"[{str_value}]" # added [] to make it a valid json
parsed_value = json5.loads(str_value) # convert to python object
output = {} # create empty dict
# Loop through the list of dicts. For each item, create a new dict
# with the index as the key and the value as the value. If the value
# is a list, convert it to a dict with the index as the key and the
# value as the value. If the value is not a list, just add it to the dict.
for i, d in enumerate(parsed_value):
output[i] = {}
for k, v in d.items():
output[i][k] = {j: v[j] for j in range(len(v))} if isinstance(v, list) else v
return output
print(json5.dumps(convert(value)))
Output
{
"0": {
"from": {
"1": 4
},
"to": {
"0": 7,
"1": 4
},
"color": 2
},
"1": {
"from": {
"0": 3,
"1": 6
},
"to": {
"0": 10,
"1": 6
},
"color": 3
}
}
json5 package allows you to convert a javascrip object to a python dictionary so you dont have to do split("},{").
Then added [ and ] to make the string a valid json.
Then load the string using json5.loads()
Now you can loop through the dictionary and convert it to desired output format.

Python - iterate and update a nested dictionary & lists

Having the following dict, where some of the values can be list of dictionaries:
{
"A": [
{
"B": {
"C": "D",
"X": "CHNAGE ME"
}
},
{
"E": "F"
}
],
"G": {
"Y": "CHANGE ME"
}
}
I would like to recursively iterate over the items and change the pairs of key values where the value is "CHANGE ME", so the result would be:
{
"A": [
{
"B": {
"C": "D",
"X.CHANGED": "CHANGED"
}
},
{
"E": "F"
}
],
"G": {
"Y.CHANGED": "CHANGED"
}
}
Solutions I've found were not handling a case where the value is a list, for example:
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
How can I achieve my goal?
Using recursion
Ex:
def update(data):
for k, v in data.copy().items():
if isinstance(v, dict): # For DICT
data[k] = update(v)
elif isinstance(v, list): # For LIST
data[k] = [update(i) for i in v]
elif v == 'CHANGE ME': # Update Key-Value
# data.pop(k)
# OR
del data[k]
data[f"{k}.CHANGED"] = 'CHANGED'
return data
print(update(data))
Output:
{
'A':[{'B': {'C': 'D', 'X.CHANGED': 'CHANGED'}}, {'E': 'F'}],
'G':{'Y.CHANGED': 'CHANGED'}
}
Note: I have not tested all corner cases

How to flatten deep dictionary; surrounding any child dictionary keys with square brackets

Maybe this is called something specifically - I just don't know.
If you have some data in a dictionary like so:
data1 = {
"first": {"a":24,
"b": {"green": {"look": 3,
"out": "Nope"},
"apple": True}},
"third": {"x": {"word": 8}, "y": -1, "z": 26},
"fifth": {"ae": [0, None, 2.0, 3.0],
"e": None}
}
Is there some function from some module that does this (or some other tool)?
data2 = {
"first[a]": 24,
"first[b][green][look]": 3,
"first[b][green][out]": "Nope",
"first[b][apple]": True,
"third[x][word]": 8,
"third[y]": -1,
"third[z]": 26,
"fifth[ae][0]": 0,
"fifth[ae][1]": None,
"fifth[ae][2]": 2.0,
"fifth[ae][3]": 3.0,
"fifth[e]": None
}
I made a function to get the flattened keys/values like:
def get_deep_dict_keys_gen(data, sep="."):
if isinstance(data, list): iter = map(lambda t: (str(t[0]), t[1]), enumerate(data))
elif isinstance(data, dict): iter = data.items()
else: iter = []
for k, v in iter:
if isinstance(v, dict) or isinstance(v, list):
yield from [k + sep + x for x in get_deep_dict_keys_gen(v, sep=sep)]
else:
yield k
# I can make the flatten dictionary again using a function _g(data, path), that gets the deep path value.
{x: _g(data1, x) for x in util.get_deep_dict_keys_gen(data1)}
# Which makes:
{
'fifth.ae.0': 0,
'fifth.ae.1': None,
'fifth.ae.2': 2.0,
'fifth.ae.3': 3.0,
'fifth.e': None,
'first.a': 24,
'first.b.apple': True,
'first.b.green.look': 3,
'first.b.green.out': 'Nope',
'third.x.word': 8,
'third.y': -1,
'third.z': 26
}
I just don't know how to recursively get the keys w/ the data values. Maybe there is some tool that does this; I see it done in the browser with website interactions. Taking some json on a page - and making a post request in the layout of data2.
Edit: Thanks for the help, Arun Augustine.
Try this,
from itertools import chain, starmap
def flatten_dict(dictionary):
"""Flatten a nested dictionary structure"""
def unpack(parent_key, parent_value):
"""Unpack one level of nesting in a dictionary"""
try:
items = parent_value.items()
except AttributeError:
# parent_value was not a dict, no need to flatten
yield (parent_key, parent_value)
else:
for key, value in items:
if type(value) == list:
for k, v in enumerate(value):
yield (parent_key + '[' + key + ']' + '['+str(k)+']', v)
else:
yield (parent_key + '['+key+']', value)
while True:
# Keep unpacking the dictionary until all value's are not dictionary's
dictionary = dict(chain.from_iterable(starmap(unpack, dictionary.items())))
if not any(isinstance(value, dict) for value in dictionary.values()):
break
return dictionary
# Input Dict
input_dict = {
"first": {"a": 24,
"b": {"green": {"look": 3,
"out": "Nope"},
"apple": True}},
"third": {"x": {"word": 8}, "y": -1, "z": 26},
"fifth": {"ae": [0, None, 2.0, 3.0],
"e": None}
}
print(flatten_dict(input_dict))
OutPut:
{
'first[a]': 24,
'first[b][green][look]': 3,
'first[b][green][out]': 'Nope',
'first[b][apple]': True,
'third[x][word]': 8,
'third[y]': -1,
'third[z]': 26,
'fifth[ae][0]': 0,
'fifth[ae][1]': None,
'fifth[ae][2]': 2.0,
'fifth[ae][3]': 3.0,
'fifth[e]': None
}
My assumption in this below data is list element does not contains either dict or list.
data1 = {
"first": {"a":24,
"b": {"green": {"look": 3,
"out": "Nope"},
"apple": True}},
"third": {"x": {"word": 8}, "y":-1, "z": 26},
"fifth": {"ae": [0, None, 2.0, 3.0],
"e": None}
}
add_dict method is generator and responsible for looping over each key and its value. Given value is dictionary object, the method will call itself.
def add_dict(_dict):
for key, value in _dict.items():
if isinstance(value, dict):
for indexof, value in add_dict(value):
yield '[{0}]'.format(key) + indexof, value
elif isinstance(value, list):
for indexof, value in add_list(value):
yield '[{0}]'.format(key) + indexof, value
else:
yield '[{0}]'.format(key), value
add_list method is generator. It's element does not contains either dict or list and so it looks as simple.
def add_list(_list):
for index, elem in enumerate(_list):
yield '[{0}]'.format(index), elem
flatten is a main method and generator. It's element contains dict only and so it looks as simple.
def flatten(data):
for key, value in data.items():
for indexof, value in add_dict(value):
yield '{0}'.format(key) + indexof, value
print(dict(flatten(data1)))
Output of above execution
{'fifth[ae][0]': 0,
'fifth[ae][1]': None,
'fifth[ae][2]': 2.0,
'fifth[ae][3]': 3.0,
'fifth[e]': None,
'first[a]': 24,
'first[b][apple]': True,
'first[b][green][look]': 3,
'first[b][green][out]': 'Nope',
'third[x][word]': 8,
'third[y]': -1,
'third[z]': 26}

Python - replace all keys in nested dict according to list

I have a JSON file where all the keys are numbers.
These numbers are the index of the correct key in a list of keys I have.
Example JSON file (excuse any odd formatting, this is a simplified version of the actual file):
{
"0": {
"1": [{
"2": 0,
"3": {
"4": "string"
},
"4": {
"5": 2,
"6": 1
}
}]
}
}
Example list:
[a, b, c, d, e, f, g]
Is there a way to then replace the keys with their counterparts from the list? (as in, the key '0' would become 'a', '1' would become 'b' and so on.)
I can get it working for just the parent keys, but not any of the nested keys.
Thanks for any help.
You can write a recursive function that transforms its argument, and then its nested values.
Use isinstance to handle dict and list arguments appropriately. Other values are considered "leaf nodes" and are returned unmodified.
import pprint
obj = {
"0": {
"1": [{
"2": 0,
"3": {
"4": "string"
},
"4": {
"5": 2,
"6": 1
}
}]
}
}
keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
def transform(obj):
if isinstance(obj, list):
return [transform(element) for element in obj]
elif isinstance(obj, dict):
return {keys[int(key)]: transform(value) for key, value in obj.items()}
else:
return obj
print('Before:')
pprint.pprint(obj)
other = transform(obj)
print('After:')
pprint.pprint(other)

Python: Using Map/Lambda instead of For-Loop

I am working to convert CSV file to structured Json file.
CSV.File
address,type,floor,door
"1","is","an","example"
"2","is","an","example"
"3","is","an","example"
"4","is","an","example"
"5","is","an","example"
"6","is","an","example"
"7","is","an","example"
First, I read the csv file and list all the column's items to lists.
import pandas as pd
with open('data.csv', 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
df = pd.read_csv(csvfile)
listAddress= df.address
listType= df.type
listFloor= df.floor
listDoor=df.door
In order to get lists like this one :
listAddress=["1","2","3","4","5","6","7"]
Now, I want to automate the process and make a list of dictionaries
Output= []
tempJson = {"1": 1,"2": "is"}
for i in range(len(listAddress)):
tempJson["Address"]= listAddress[i]
tempJson["Type"]= listType[j]
Output.(tempJson)
json.dumps(output)
Here is the problem, I'd like to make a list of dictionaries, I can do this in JS using map. But I am not proficient in Python.
Can I simplify the this last piece of code using Map
Output:
[
{
"1": 1,
"2": "is"
},
{
"1": 2,
"2":"is"
},
{
"1": 3,
"2": "is"
},{
"1": 4,
"2": "is"
},
{
"1": 5,
"2":"is"
},
{
"1": 6,
"2": "is"
},
{
"1": 7,
"2": "is"
}
]
You can use a list comprehension here:
>>> from pprint import pprint
>>> import csv
>>> with open('data.csv') as f:
next(f) #skip the header
reader = csv.reader(f, delimiter=',', quotechar='"')
d = [{'1':int(row[0]), '2': row[1]} for row in reader]
...
>>> pprint(d)
[{'1': 1, '2': 'is'},
{'1': 2, '2': 'is'},
{'1': 3, '2': 'is'},
{'1': 4, '2': 'is'},
{'1': 5, '2': 'is'},
{'1': 6, '2': 'is'},
{'1': 7, '2': 'is'}]
A simple change would be
Output= []
tempJson = {"1": 1,"2": "is"}
for i in range(len(listAddress)):
tempJson["Address"]= listAddress[i]
tempJson["Type"]= listType[i]
Output.append(tempJson.copy())
something probably considered more pythonic would be:
output = [{"1": 1, "2": "is", "Address": a, "Type": t}
for a, t in zip(listAddress, listType)]

Categories