I am stuck with the following data.
There is a list.
[{name: '/', children: [{name: 'bin'}, {name: 'sbin'}, {name: 'home'}]},
{name: 'home', children: [{name: 'user1'}, {name: 'user2'}]},
{name: 'user2', children: [{name: 'desktop'}]}]
I want to convert above list to the following dictionary.
{name: '/', children: [{name: '/bin'}, {name: '/sbin'}, {name: '/home', children: [{name: 'user1'}, {name: 'user2', children: [{name: 'desktop'}]}]}]}
I write some codes to convert data above style.
def recT(data, child, parent, collector):
dparent = dict(name=parent)
dchildren = dict()
lst = []
for c in child:
lst.append(dict(name=c['name']))
for d in data:
if c['name'] == d['name']:
if len(d) > 1:
dchildren.update(dict(children=recT(data, d['children'], d['name'], collector)))
dparent.update(dchildren)
collector.update(dparent)
return lst
Then,
myd = dict()
for d in data2:
if len(d) > 1:
recT(data2, d['children'], d['name'], myd)
NOTE: data2 is dictionary list I want to covert.
But, the output dictionary is the last record in list:
{'children': [{'name': 'desktop'}], 'name': 'user2'}
Please help.
As lazyr said, you can't duplicate keys in your dict like that. You could convert it to a format like the following to be valid python dict syntax:
{
'/': {
'bin': {},
'sbin': {},
'home': {
'user1': {},
'user2': {
'desktop': {}
}
}
}
The reason you're only getting the last record in the list is because your dict uses unique keys
mydict = {}
mydict['name'] = 1
mydict['name'] # is 1
mydict['name'] = 2
for x,y in mydict.iteritems():
print '{0}: {1}'.format(x,y)
>> name: 2 # Note only one entry
Now, I got it from #lazyr's answer of How to convert a strictly sorted list of strings into dict?.
Then, I converted into string and changed it into wanted format using myReplacer().
here:
def myReplacer(strdata):
strdata = strdata.replace("{", '{ name:')
strdata = strdata.replace(': {', ', children : [{')
strdata = strdata.replace('}', '}]')
strdata = strdata.replace(': None,', '},{ name:')
strdata = strdata.replace(': None', '')
strdata = strdata.replace(", '", "}, { name: '")
return strdata[:-1]
Thanks #lazyr and everybody helped me. It need some polish.
Related
a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
#...
#str_key_n = 'user.level2.level3...leveln=XXX'
Let's consider this 'str_key' string, goes with infinite number of dots/levels.
Expected Output:
a = {
'user': {
'username': 'john', # username, should be replace
'name': {
'first': 'Micheal',
'last': 'henry' # lastname, should be replace
},
'email': 'micheal#domain.com',
...
... # Infinite level of another nested dict
}
}
I'm expecting the answers for applying 'n' Level of nested key string, rather than simply replacing by a['user']['username'] = 'John' statically. Answers must be work for any number of 'dotted' string values.
Thanks in advance!
There are three steps:
Separate the key-value pair string into a fully-qualified key and
value.
Split the key into path components.
Traverse the dictionary to find the relevant value to update.
Here's an example of what the code might look like:
# Split by the delimiter, making sure to split once only
# to prevent splitting when the delimiter appears in the value
key, value = str_key_n.split("=", 1)
# Break the dot-joined key into parts that form a path
key_parts = key.split(".")
# The last part is required to update the dictionary
last_part = key_parts.pop()
# Traverse the dictionary using the parts
current = a
while key_parts:
current = current[key_parts.pop(0)]
# Update the value
current[last_part] = value
I'd go with a recursive function to accomplish this, assuming your key value strings are all valid:
def assign_value(sample_dict, str_keys, value):
access_key = str_keys[0]
if len(str_keys) == 1:
sample_dict[access_key] = value
else:
sample_dict[access_key] = assign_value(sample_dict[access_key], str_keys[1:], value)
return sample_dict
The idea is to traverse your dict until you hit the lowest key and then we assign our new value to that last key;
if __name__ == "__main__":
sample_dict = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com'
}
}
str_key_1 = 'user.username=john'
str_keys_1, value_1 = str_key_1.split('=')
sample_dict = assign_value(sample_dict, str_keys_1.split('.'), value_1)
print("result: {} ".format(sample_dict))
str_key_2 = 'user.name.last=henry'
str_keys_2, value_2 = str_key_2.split('=')
sample_dict = assign_value(sample_dict, str_keys_2.split('.'), value_2)
print("result: {}".format(sample_dict))
To use the assign_value you would need to split your original key to the keys and value as seen above;
If you're okay with using exec() and modify your str_key(s), you could do something like:
def get_keys_value(string):
keys, value = string.split("=")
return keys, value
def get_exec_string(dict_name, keys):
exec_string = dict_name
for key in keys.split("."):
exec_string = exec_string + "[" + key + "]"
exec_string = exec_string + "=" + "value"
return exec_string
str_key_1 = "'user'.'username'=john"
str_key_2 = "'user'.'name'.'last'=henry"
str_key_list = [str_key_1, str_key_2]
for str_key in str_key_list:
keys, value = get_keys_value(str_key) # split into key-string and value
exec_string = get_exec_string("a", keys) # extract keys from key-string
exec(exec_string)
print(a)
# prints {'user': {'email': 'micheal#domain.com', 'name': {'last': 'henry', 'first': 'Micheal'}, 'username': 'john'}}
str_key_1 = 'user.username=john'
str_key_2 = 'user.name.last=henry'
a = {
'user': {
'username': 'mic_jack',
'name': {
'first': 'Micheal',
'last': 'Jackson'
},
'email': 'micheal#domain.com',
#...
#... Infinite level of another nested dict
}
}
def MutateDict(key):
strkey, strval = key.split('=')[0], key.split('=')[1]
strkeys = strkey.split('.')
print("strkeys = " ,strkeys)
target = a
k = ""
for k in strkeys:
print(target.keys())
if k in target.keys():
prevTarget = target
target = target[k]
else:
print ("Invalid key specified")
return
prevTarget[k] = strval
MutateDict(str_key_1)
print(a)
MutateDict(str_key_2)
print(a)
I have a dictionary of dictionaries that looks like this:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
I need to flatten this dict into a dataframe.I tried to use json_normalize() to flatten the dictionary and the output I got looked like this:
My desired output is something like the one given below.
This can be done in R by using as.data.frame(unlist(data)) but I want to do the same flattening in Python. I am a novice in python so I dont have much idea about doing this.
I have made an attempt to normalize your json object by writing a recursive function as follows:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
sequence = ""
subDicts = []
def findAllSubDicts(data):
global subDicts
global sequence
for key, value in data.items():
sequence += key
#print(sequence)
if isinstance(value, str):
subDicts.append([sequence,value])
sequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
elif isinstance(value, dict):
tempSequence = sequence[:sequence.rfind(".")+1]
sequence += "."
#print(sequence)
findAllSubDicts(value)
sequence = tempSequence
elif isinstance(value, list) and isinstance(value[0], dict):
sequence += "."
tempSequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
findAllSubDicts(value[0])
sequence = tempSequence
elif isinstance(value, list) and len(value)==1:
tempSequence = sequence[:sequence.rfind(".")+1]
subDicts.append([sequence,value[0]])
sequence = tempSequence
return subDicts
outDict = findAllSubDicts(data)
for i in outDict:
print(i[0].ljust(40," "), end=" ")
print(i[1])
Printing the results will give you:
data input
test.and.range.month.start Jan
test.and.range.month.end July
test.and.Student.Name ABC
test.and.Student.Class 10
Notify me if you need any clarification or any modification in my code.
I have a text file which I read in. This is a log file so it follows a particular pattern. I need to create a JSON ultimately, but from researching this problem, once it is in a dict it will be a matter of using json.loads() or json.dumps().
A sample of the text file is below.
INFO:20180606_141527:submit:is_test=False
INFO:20180606_141527:submit:username=Mary
INFO:20180606_141527:env:sys.platform=linux2
INFO:20180606_141527:env:os.name=ubuntu
The dict structure which I am ultimatly looking for is
{
"INFO": {
"submit": {
"is_test": false,
"username": "Mary"
},
"env": {
"sys.platform": "linux2",
"os.name": "ubuntu"
}
}
}
I am ignoring the timestamp information in each list for now.
This is a snippet of the code I am using,
import csv
tree_dict = {}
with open('file.log') as file:
for row in file:
for key in reversed(row.split(":")):
tree_dict = {key: tree_dict}
Which results in an undesired output,
{'INFO': {'20180606_141527': {'submit': {'os.name=posix\n': {'INFO': {'20180606_141527': {'submit': {'sys.platform=linux2\n': {'INFO': {'20180606_141527': {'submit': {'username=a227874\n': {'INFO': {'20180606_141527': {'submit': {'is_test=False\n': {}}}}}}}}}}}}}}}}}
I need to dynamically populate the dict because I don't know the actual field/key names.
with open('demo.txt') as f:
lines = f.readlines()
dct = {}
for line in lines:
# param1 == INFO
# param2 == submit or env
# params3 == is_test=False etc.
param1, _, param2, params3 = line.strip().split(':')
# create dct[param1] = {} if it is not created
dct.setdefault(param1, {})
# create dct[param1][param2] = {} if it is no created
dct[param1].setdefault(param2, {})
# for example params3 == is_test=False
# split it by '=' and now we unpack it
# k == is_test
# v == False
k, v = params3.split('=')
# and update our `dict` with the new values
dct[param1][param2].update({k: v})
print(dct)
Output
{
'INFO': {
'submit': {
'is_test': 'False', 'username': 'Mary'
},
'env': {
'sys.platform': 'linux2', 'os.name': 'ubuntu'
}
}
}
This is one of the rare cases where recursion in Python seems to be appropriate and helpful. The following function adds a value to the hierarchical dictionary d specified by the list of keys:
def add_to_dict(d, keys, value):
if len(keys) == 1: # The last key
d[keys[0]] = value
return
if keys[0] not in d:
d[keys[0]] = {} # Create a new subdict
add_to_dict(d[keys[0]], keys[1:], value)
The function works with the dictionaries of arbitrary depth. The rest is just the matter of calling the function:
d = {}
for line in file:
keys, value = line.split("=")
keys = keys.split(":")
add_to_dict(d, keys, value.strip())
Result:
{'INFO': {'20180606_141527': {
'submit': {'is_test': 'False',
'username': 'Mary'},
'env': {'sys.platform': 'linux2',
'os.name': 'ubuntu'}}}}
You can modify the code to exclude certain levels (like the timestamp).
You could use a nested collections.defaultdict() here:
from collections import defaultdict
from pprint import pprint
d = defaultdict(lambda: defaultdict(dict))
with open('sample.txt') as in_file:
for line in in_file:
info, _, category, pair = line.strip().split(':')
props, value = pair.split('=')
d[info][category][props] = value
pprint(d)
Which gives the following:
defaultdict(<function <lambda> at 0x7ff8a341aea0>,
{'INFO': defaultdict(<class 'dict'>,
{'env': {'os.name': 'ubuntu',
'sys.platform': 'linux2'},
'submit': {'is_test': 'False',
'username': 'Mary'}})})
Note: defaultdict() is a subclass of the builtin dict, so their is not reason to convert it to dict in the end result. Additionally, defaultdict() can also be serialized to JSON with json.dumps().
You can use itertools.groupby:
import itertools, re
content = [re.split('\=|:', i.strip('\n')) for i in open('filename.txt')]
new_content = [[a, *c] for a, _, *c in content]
def group_vals(d):
new_d = [[a, [c for _, *c in b]] for a, b in itertools.groupby(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])]
return {a:b[0][0] if len(b) ==1 else group_vals(b) for a, b in new_d}
import json
print(json.dumps(group_vals(new_content), indent=4))
Output:
{
"INFO": {
"env": {
"os.name": "ubuntu",
"sys.platform": "linux2"
},
"submit": {
"is_test": "False",
"username": "Mary"
}
}
}
Check for the presence of keys:
import csv
import json
tree_dict = {}
with open('file.log') as file:
tree_dict = {}
for row in file:
keys = row.split(":")
if keys[0] not in tree_dict:
tree_dict[keys[0]] = {}
if keys[-2] not in tree_dict[keys[0]]:
tree_dict[keys[0]][keys[-2]] = {}
key, value = keys[-1].split("=")
if value == "False":
value = False
if value == "True":
value = True
tree_dict[keys[0]][keys[-2]][key] = value
dumped = json.dumps(tree_dict)
import re
from functools import reduce
with open('file.txt') as f:
lines = f.readlines()
def rec_merge(d1, d2):
for k, v in d1.items():
if k in d2:
d2[k] = rec_merge(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
lst_of_tup = re.findall(r'^([^:]*):[\d_]+:([^:]*):([^=]*)=(.*)$', lines, re.MULTILINE)
lst_of_dct = [reduce(lambda x,y: {y:x}, reversed(t)) for t in lst_of_tup]
dct = reduce(rec_merge, lst_of_dct)
pprint(dct)
# {'INFO': {'env': {'os.name': 'ubuntu', 'sys.platform': 'linux2'},
# 'submit': {'is_test': 'False', 'username': 'Mary'}}}
Source :
import os
with open('file.log') as file:
tree_dict = {}
is_test = False
username = ""
sysplatform = ""
osname = ""
for row in file:
row = row.rstrip('\n')
for key in reversed(row.split(":")):
if not key.find('is_test'):
is_test = key.split('=')[1]
elif not key.find('username'):
username =key.split('=')[1]
elif not key.find('sys.platform'):
sysplatform = key.split('=')[1]
elif not key.find('os.name'):
osname = key.split('=')[1]
tree_dict = {
"INFO": {
"submit": {
"is_test": is_test,
"username": username
},
"env": {
"sys.platform": sysplatform,
"os.name": osname
}
}
}
print(tree_dict)
Result :
{'INFO': {'submit': {'is_test': 'False', 'username': 'Mary'}, 'env': {'sys.platform': 'linux2', 'os.name': 'ubuntu'}}}
I am executing a python script through subprocess and passing it a named argument. In the subprocess script I then have to form a list of tags (list of key and value pair) whose format is shown below:
Tags=[
{
'Key': 'string',
'Value': 'string'
},
]
Here is my parent script:
def call_script(tags):
result = subprocess.Popen(["python","second_script.py","--tags",tags])
result.wait()
if __name__ == '__main__':
tags = '{'
tags = tags + '"Name":"' + username + '"'
tags = tags + ', "Designation":"' + Designation + '"'
tags = tags + ', "Type":"' + type + '"'
tags = tags + '}'
tags = str(tags)
call_script(tags)
Here is my second_script.py:
if __name__ == '__main__':
# Initialize tags
tags = {}
lst = []
# Basic command line argument parsing to launch using boto
parser = argparse.ArgumentParser()
parser.add_argument('--tags', type=json.loads)
args = parser.parse_args()
print args.tags
# Build a list of tags
for k, v in args.tags.iteritems():
print k
print v
tags['Key'] = k
tags['Value'] = v
print tags
lst.append(tags)
# print 'Tags: ',tags
print "List of Tags: ",lst
When I run this I see only last key-value pair in both tags as well as lst. How can I form a list of key value pair as I have shown in desired format above?
A sample input could be:
Name: Jason
Designation: Analyst
Type: Permanent
As such the desired output format should be:
Tags = [
{
'Key': 'Name',
'Value': 'Jason'
},
{
'Key': 'Designation',
'Value': 'Analyst'
},
{
'Key': 'Type',
'Value': 'Permanent'
},
]
The output of tags that I get in my above code is - {'Value': u'Permanent', 'Key': u'Type'} while lst is: [{'Value': u'Permanent', 'Key': u'Type'}, {'Value': u'Permanent', 'Key': u'Type'}, {'Value': u'Permanent', 'Key': u'Type'}]
What mistake am In making in my code and how can I correct it?
Try resetting tags in your for loop:
for k, v in args.tags.iteritems():
tags={} #changed Line
print k
print v
tags['Key'] = k
tags['Value'] = v
print tags
lst.append(tags)
This is because dict is mutable and you are appending references to the same object to lst. Replace
tags['Key'] = k
tags['Value'] = v
print tags
lst.append(tags)
with
print tags
lst.append({'Key' : k, 'Value' : v})
This question already has answers here:
Is there a built in function for string natural sort?
(23 answers)
Closed 7 years ago.
I have a python dictionary named data and have sub dictionaries inside it, such as
data = {'ind1' : {}, 'ind10' : {}, 'ind11' : {}, 'ind12' : {}, 'ind13', 'ind14' : {}, 'ind15' : {}, 'ind16' : {}, 'ind17' : {}, 'ind18' : {}, 'ind19' : {}, 'ind2' : {}, 'ind20' : {}, 'ind3' : {}, 'ind30' : : {}, 'ind31' : {} 'ind5' : {}, 'ind6' : {}, 'ind7' : {}, 'ind8' : {}, 'ind9' : {}}
I want to sort the data inside dictionary by key as
ind1
ind2 : {}
ind3 : {}
...
ind10 : {}
ind11 : {}
I tried data = collections.OrderedDict(sorted(data.items()))from the collections library
this is giving result as
ind1 : {}
ind11 : {}
ind12 : {}
ind13 : {}
.....
ind20 : {}
ind21 : {}
....
ind3 : {}
ind4 : {}
....
Please help
Do you need to have the key prefixed with "ind"? You could use integers as the key which would sort correctly. Right now it is sorting alphabetically which is causing the issue.
If you can't, assuming your keys follow the same format, sort using this:
collections.OrderedDict(sorted(data.items(), key=lambda kv: int(kv[0][3:])))
Which uses the integer after the prefix to sort.
Do you really want to sort the data inside the dictionary, or do you simply want to sort the list provided by .keys()?
If you are sorting only a list of values, then this link should contain what you need: https://wiki.python.org/moin/HowTo/Sorting
If you wish to sort data inside the dictionary, I'm intrigued to know why. I'll follow and add suggestions as you respond.
Good luck!
You need to use a natural sort algorithm on your keys if you want "ind10" to be after "ind9" ;^)
From wizkid on ActiveState
def keynat(string):
r'''A natural sort helper function for sort() and sorted()
without using regular expression.
>>> items = ('Z', 'a', '10', '1', '9')
>>> sorted(items)
['1', '10', '9', 'Z', 'a']
>>> sorted(items, key=keynat)
['1', '9', '10', 'Z', 'a']
'''
r = []
for c in string:
if c.isdigit():
if r and isinstance(r[-1], int):
r[-1] = r[-1] * 10 + int(c)
else:
r.append(int(c))
else:
r.append(c)
return r
data = collections.OrderedDict(
sorted(
data.iteritems(),
key=lambda row:keynat(row[0])
)
)
If you don't need each key to start with 'ind', then they could all just be int's, and that would work as expected.
If they do need to be string's starting with 'ind', you could do this:
from __future__ import print_function
import pprint
from collections import OrderedDict
def ind_sort_key(item):
item_key, item_value = item
return int(item_key[3:])
data = {
'ind1': {}, 'ind10': {}, 'ind11': {}, 'ind12': {}, 'ind13': {},
'ind14': {}, 'ind15': {}, 'ind16': {}, 'ind17': {}, 'ind18': {},
'ind19': {}, 'ind2': {}, 'ind20': {}, 'ind3': {}, 'ind30': {},
'ind31': {}, 'ind5': {}, 'ind6': {}, 'ind7': {}, 'ind8': {}, 'ind9': {},
}
sorted_data = OrderedDict(sorted(data.items(), key=ind_sort_key))
pprint.pprint(sorted_data)
This results in:
{'ind1': {},
'ind2': {},
'ind3': {},
'ind5': {},
'ind6': {},
'ind7': {},
'ind8': {},
'ind9': {},
'ind10': {},
'ind11': {},
'ind12': {},
'ind13': {},
'ind14': {},
'ind15': {},
'ind16': {},
'ind17': {},
'ind18': {},
'ind19': {},
'ind20': {},
'ind30': {},
'ind31': {}}