I have a list like this:
data = [
{'date':'2017-01-02', 'model': 'iphone5', 'feature':'feature1'},
{'date':'2017-01-02', 'model': 'iphone7', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone6', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone6', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone7', 'feature':'feature3'},
{'date':'2017-01-10', 'model': 'iphone7', 'feature':'feature2'},
{'date':'2017-01-10', 'model': 'iphone7', 'feature':'feature1'},
]
I want to achieve this:
[
{
'2017-01-02':[{'iphone5':['feature1']}, {'iphone7':['feature2']}]
},
{
'2017-01-03': [{'iphone6':['feature2']}, {'iphone7':['feature3']}]
},
{
'2017-01-10':[{'iphone7':['feature2', 'feature1']}]
}
]
I need an efficient way, since it could be much data.
I was trying this:
data = sorted(data, key=itemgetter('date'))
date = itertools.groupby(data, key=itemgetter('date'))
But I'm getting nothing for the value of the 'date' key.
Later I will iterate over this structure for building an HTML.
You can do this pretty efficiently and cleanly using defaultdict. Unfortunately it's a pretty advanced use and it gets hard to read.
from collections import defaultdict
from pprint import pprint
# create a dictionary whose elements are automatically dictionaries of sets
result_dict = defaultdict(lambda: defaultdict(set))
# Construct a dictionary with one key for each date and another dict ('model_dict')
# as the value.
# The model_dict has one key for each model and a set of features as the value.
for d in data:
result_dict[d["date"]][d["model"]].add(d["feature"])
# more explicit version:
# for d in data:
# model_dict = result_dict[d["date"]] # created automatically if needed
# feature_set = model_dict[d["model"]] # created automatically if needed
# feature_set.add(d["feature"])
# convert the result_dict into the required form
result_list = [
{
date: [
{phone: list(feature_set)}
for phone, feature_set in sorted(model_dict.items())
]
} for date, model_dict in sorted(result_dict.items())
]
pprint(result_list)
# [{'2017-01-02': [{'iphone5': ['feature1']}, {'iphone7': ['feature2']}]},
# {'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
# {'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}]
You can try this, here is my way, td is a dict to store { iphone : index } to check if the new item exist in the list of dict:
from itertools import groupby
from operator import itemgetter
r = []
for i in groupby(sorted(data, key=itemgetter('date')), key=itemgetter('date')):
td, tl = {}, []
for j in i[1]:
if j["model"] not in td:
tl.append({j["model"]: [j["feature"]]})
td[j["model"]] = len(tl) - 1
elif j["feature"] not in tl[td[j["model"]]][j["model"]]:
tl[td[j["model"]]][j["model"]].append(j["feature"])
r.append({i[0]: tl})
Result:
[
{'2017-01-02': [{'iphone5': ['feature1']}, {'iphone7': ['feature2']}]},
{'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
{'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}
]
As matter of fact, I think the data structure can be simplified, maybe you don't need so many nesting.
total_result = list()
result = dict()
inner_value = dict()
for d in data:
if d["date"] not in result:
if result:
total_result.append(result)
result = dict()
result[d["date"]] = set()
inner_value = dict()
if d["model"] not in inner_value:
inner_value[d["model"]] = set()
inner_value[d["model"]].add(d["feature"])
tmp_v = [{key: list(inner_value[key])} for key in inner_value]
result[d["date"]] = tmp_v
total_result.append(result)
total_result
[{'2017-01-02': [{'iphone7': ['feature2']}, {'iphone5': ['feature1']}]},
{'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
{'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}]
Related
I want to run a script which grabs all the titles of the files in a folder and collects them in a dictionary. I want the output structured like this:
{
1: {"title": "one"},
2: {"title": "two"},
...
}
I have tried the following, but how to add the "title"-part and make the dictionary dynamically?
from os import walk
mypath = '/Volumes/yahiaAmin-1'
filenames = next(walk(mypath), (None, None, []))[2] # [] if no file
courseData = {}
for index, x in enumerate(filenames):
# print(index, x)
# courseData[index]["title"].append(x)
# courseData[index].["tlt"].append(x)
courseData.setdefault(index).append(x)
print(courseData)
Assign the value dict directly to the index
courseData = {}
filenames = ["one", "two"]
for index, x in enumerate(filenames, 1):
courseData[index] = {"title": x}
print(courseData)
# {1: {'title': 'one'}, 2: {'title': 'two'}}
Not that using a dict where the key is an incremental int is generally useless, as a list will do the same
I have a input data that is parsed from a json and printing the output like this from keys like tablename,columnname,columnlength
data = ('tablename', 'abc.xyz'),('tablename','abc.xyz'),('columnname', 'xxx'),('columnname', 'yyy'),('columnlen', 55)
data[0] =
abc.xyz
abc.xyz
abc.xyz
data[1] =
xxx
yyy
zzz
data[2] =
20
30
60
data[0] represents tablename
data[1] represents columnname
data[2] represents column length
I have code below that does creating the empty list manually
TableName_list = []
ColumnName_list = []
ColumnLen_list = []
for x in data:
if x[0] == 'tablename':
TableName_list.append(data[0]])
elif x[0] == 'columnname':
ColumnName_list.append(data[1])
elif x[0] == 'columnlen':
ColumnLen_list.append(data[2])
I need to create a dynamic empty list respectively for each fields(tablename,column,columnlength) and append the data to that empty list in the dictionary
and my output is needed like this in a dictionary
dict = {'TableName':TableName_list,'ColumnName':ColumnName_list,'ColumnLen':columnLength_list }
This is probably most easily done with a defaultdict:
from collections import defaultdict
dd = defaultdict(list)
data = [
('tablename', 'abc.xyz'),('tablename','abc.xyz'),
('columnname', 'xxx'),('columnname', 'yyy'),
('columnlen', 55),('columnlen', 30)
]
for d in data:
dd[d[0]].append(d[1])
Output:
defaultdict(<class 'list'>, {
'tablename': ['abc.xyz', 'abc.xyz'],
'columnname': ['xxx', 'yyy'],
'columnlen': [55, 30]
})
If the case of the names in the result is important, you could use a dictionary to translate the incoming names:
aliases = { 'tablename' : 'TableName', 'columnname' : 'ColumnName', 'columnlen' : 'ColumnLen' }
for d in data:
dd[aliases[d[0]]].append(d[1])
Output:
defaultdict(<class 'list'>, {
'TableName': ['abc.xyz', 'abc.xyz'],
'ColumnName': ['xxx', 'yyy'],
'ColumnLen': [55, 30]
})
I suggest to make a dictionary directly, something look like this:
out_dict = {}
for x in data:
key = x[0]
if key in out_dict.keys():
out_dict[key] = out_dict[key].append(x[1])
else:
out_dict[key] = [x[1]]
using pandas:
import pandas as pd
>>> pd.DataFrame(data).groupby(0)[1].apply(list).to_dict()
'''
{'columnlen': [55, 30],
'columnname': ['xxx', 'yyy'],
'tablename': ['abc.xyz', 'abc.xyz']}
I have a Django model as
class Classification(models.Model):
kingdom = models.CharField(db_column='Kingdom', max_length=50)
phylum = models.CharField(db_column='Phylum', max_length=50)
class_field = models.CharField(db_column='Class', max_length=50)
order = models.CharField(db_column='Order', max_length=50)
family = models.CharField(db_column='Family', max_length=50)
genus = models.CharField(db_column='Genus', max_length=50)
species = models.CharField(db_column='Species', max_length=50)
to represent biological taxonomy classification as shown here:
I have classification records of over 5,000 species. I need to generate JSON hierarchical structure as shown below.
{
'name': "root",
'children': [
{
'name': "Animalia",
'children': [
{
{
'name':"Chordata"
'children': [ ... ]
}
},
...
...
]
},
...
...
]
}
Can you suggest me any method(s) to do so?
You can do the following:
Transform a list of Classifications to a nested dict.
Transform nested dict to the required format
Samples here will operate on slightly reduced Classification class to improve readability:
class Classification:
def __init__(self, kingdom, phylum, klass, species):
self.kingdom = kingdom
self.phylum = phylum
self.klass = klass
self.species = species
First part:
from collections import defaultdict
# in order to work with your actual implementation add more levels of nesting
# as lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
nested_dict = defaultdict(
lambda: defaultdict(
lambda: defaultdict(list)
)
)
for c in all_classifications:
nested_dict[c.kingdom][c.phylum][c.klass].append(c.species)
defaultdict is just a nice tool to guarantee existence of the key in a dictionary, it receives any callable and use it to create a value for missing key.
Now we have nice nested dictionary in the form of
{
'Kingdom1': {
'Phylum1': {
'Class1': ["Species1", "Species2"],
'Class2': ["Species3", "Species4"],
},
'Phylum2': { ... }
},
'Kingdom2': { 'Phylum3': { ... }, 'Phylum4': {... } }
}
Part two: converting to desired output
def nested_to_tree(key, source):
result = {'name': key, 'children':[]}
for key, value in source.items():
if isinstance(value, list):
result['children'] = value
else:
child = nested_to_tree(key, value)
result['children'].append(child)
return result
tree = nested_to_tree('root', nested_dict')
I believe it's self-explanatory - we just convert passed dictionary to desired format and recurse to it's content to form children.
Complete example is here.
Two notes:
Written in python 3. Replacing source.items() with source.iteritems() should suffice to run in python 2.
You haven't specify what leafs should looks like, so I just assumed that leaf nodes should be genus with all species attached as children. If you want species to be leaf nodes - it's pretty straightforward to modify the code to do so. If you have any trouble doing so - let me know in comments.
Finally got what I wanted. Code is not beautiful, near ugly, yet somehow I got what I wanted.
def classification_flare_json(request):
#Extracting from database and sorting the taxonomy from left to right
clazz = Classification.objects.all().order_by('kingdom','phylum','class_field','genus','species')
tree = {'name': "root", 'children': []}
#To receive previous value of given taxa type
def get_previous(type):
types = ['kingdom', 'phylum', 'class_field', 'family', 'genus', 'species']
n = types.index(type)
sub_tree = tree['children']
if not sub_tree: return None
for i in range(n):
if not sub_tree: return None
sub_tree = sub_tree[len(sub_tree)-1]['children']
if not sub_tree: return None
last_item = sub_tree[len(sub_tree)-1]
return last_item['name']
#To add new nodes in the tree
def append(type, item):
types = ['kingdom', 'phylum', 'class_field', 'family', 'genus', 'species_id']
n = types.index(type)
sub_tree = tree['children']
for i in range(n+1):
if not sub_tree: return None
sub_tree = sub_tree[len(sub_tree)-1]['children']
sub_tree.append(item)
for item in clazz:
while True:
if item.kingdom == get_previous('kingdom'):
if item.phylum == get_previous('phylum'):
if item.class_field == get_previous('class_field'):
if item.family == get_previous('family'):
if item.genus == get_previous('genus'):
append('genus', {'name':item.species, 'size': 1})
break;
else:
append('family', {'name':item.genus, 'children': []})
else:
append('class_field', {'name':item.family, 'children':[]})
else:
append('phylum', {'name': item.class_field, 'children':[]})
else:
append('kingdom', {'name': item.phylum, 'children':[]})
else:
tree['children'].append({'name': item.kingdom, 'children':[]})
return HttpResponse(json.dumps(tree), content_type="application/json")
How to add different values in the same key of a dictionary? These different values are added
in a loop.
Below is what I desired entries in the dictionary data_dict
data_dict = {}
And during each iterations, output should looks like:
Iteration1 -> {'HUBER': {'100': 5.42}}
Iteration2 -> {'HUBER': {'100': 5.42, '10': 8.34}}
Iteration3 -> {'HUBER': {'100': 5.42, '10': 8.34, '20': 7.75}} etc
However, at the end of the iterations, data_dict is left with the last entry only:
{'HUBER': {'80': 5.50}}
Here's the code:
import glob
path = "./meanFilesRun2/*.txt"
all_files = glob.glob(path)
data_dict = {}
def func_(all_lines, method, points, data_dict):
if method == "HUBER":
mean_error = float(all_lines[-1]) # end of the file contains total_error
data_dict["HUBER"] = {points: mean_error}
return data_dict
elif method == "L1":
mean_error = float(all_lines[-1])
data_dict["L1"] = {points: mean_error}
return data_dict
for file_ in all_files:
lineMthds = file_.split("_")[1] # reading line methods like "HUBER/L1/L2..."
algoNum = file_.split("_")[-2] # reading diff. algos number used like "1/2.."
points = file_.split("_")[2] # diff. points used like "10/20/30..."
if algoNum == "1":
FI = open(file_, "r")
all_lines = FI.readlines()
data_dict = func_(all_lines, lineMthds, points, data_dict)
print data_dict
FI.close()
You can use dict.setdefault here. Currently the problem with your code is that in each call to func_ you're re-assigning data_dict["HUBER"] to a new dict.
Change:
data_dict["HUBER"] = {points: mean_error}
to:
data_dict.setdefault("HUBER", {})[points] = mean_error
You can use defaultdict from the collections module:
import collections
d = collections.defaultdict(dict)
d['HUBER']['100'] = 5.42
d['HUBER']['10'] = 3.45
I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email