Multiple Dictionary in list Manipulation using Python - python

Input string in the python : multiple dictionaries in the list
input = [{'a': '1', 'b':'2','c':'10'},{'a': '1', 'b':'3','c':'11'},{'a':'2','b':'19','c':'100']
output = [ {'1':{'b': ('2','3'),'c':('10','11')},'2':{'b':(19),'c':(100)}}]

def dic_to_output(dic,col):
df = pd.DataFrame(dic)
colname = list(df.columns)
#print(colname)
colname.remove(str(col))
df = df.groupby('a').agg(lambda x: list(x)).reset_index().set_index('a').T
return df.to_dict()
input = [{'a': '1', 'b':'2','c':'10'},{'a': '1', 'b':'3','c':'11'},{'a':'2','b':'19','c':'100'}]
dic_to_output(dic,'a')
output:
{'1': {'b': ['2', '3'], 'c': ['10', '11']}, '2': {'b': ['19'], 'c': ['100']}}

def func(mylist):
t = {}
for i in d:
itr = iter(i)
k = i[next(itr)]
tmp = t.get(k, {})
for m in itr:
n = i[m]
if (tmp.get(m, None) == None):
tmp[m] = tuple()
if (i[m] not in set(tmp[m])):
tmp[m] += (n,)
t[k] = tmp
print([t])
d = [{'a': '1', 'b':'2','c':'10'},{'a': '1', 'b':'3','c':'11'},{'a':'2','b':'19','c':'100'}]
func(d)
d = [{'a': '1', 'b':'2','c':'10'},{'a': '1', 'b':'3','c':'11'}]
func(d)
d = [{'a': '1', 'd': 4, 'b':'2','c':'10'},{'a': '1', 'd': 4, 'b':'3','c':'11'}]
func(d)
d = [{'a': '1', 'd': 4, 'b':'2','c':'10'},{'a': '1', 'b':'3', 'd': 4,'c':'11'}]
func(d)

Related

I want to convert file data into 3d dictionary using python

Like I want this type of dictionary by reading file:
table = {
0: {'A': '1', 'B': '2', 'C': '3'},
1: {'A': '4', 'B': '5', 'C': '6'},
2: {'A': '7', 'B': '8', 'C': '9'}
}
or this will be enough.
table = {
{'A': '1', 'B': '2', 'C': '3'},
{'A': '4', 'B': '5', 'C': '6'},
{'A': '7', 'B': '8', 'C': '9'}
}
I have a file lets name file.txt which has data like
A B C
1 2 3
4 5 6
7 8 9
I am trying but i dint get the result this following is my try:
it gives me output {'A': '7', 'B': '8', 'C': '9'}
I know its obvious it will not give me 3d dict but I don't know how to get there.
array=[]
with open("file.txt") as f:
for line in f:
array = line.split()
break #it will give me array=['A','B','C']
v=[]
dic = {}
for i in range(0,len(array)):
for line in open("file.txt"):
x=0
v = line.split()
dic[ array[i] ] = v[i]
print(dic)
You can use Pandas
# Python env: pip install pandas
# Anaconda env: conda install pandas
import pandas as pd
df = pd.read_table('file.txt', sep=' ')
table = df.to_dict('index')
print(table)
# Output
{0: {'A': 1, 'B': 2, 'C': 3},
1: {'A': 4, 'B': 5, 'C': 6},
2: {'A': 7, 'B': 8, 'C': 9}}
If you want to use just built-in modules, you can use csv.DictReader:
import csv
with open("data.csv", "r") as f_in:
reader = csv.DictReader(f_in, delimiter=" ")
# if the file countains floats use float(v) instead int(v)
# if you want values just strings you can do:
# data = list(reader)
data = [{k: int(v) for k, v in row.items()} for row in reader]
print(data)
Prints:
[{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}, {"A": 7, "B": 8, "C": 9}]
Try to use the following code:
table = {}
with open("file.txt") as f:
headers = next(f).split() # get the headers from the first line
for i, line in enumerate(f):
row = {}
for j, value in enumerate(line.split()):
row[headers[j]] = value
table[i] = row
print(table)
You should get format like this:
{
0: {'A': '1', 'B': '2', 'C': '3'},
1: {'A': '4', 'B': '5', 'C': '6'},
2: {'A': '7', 'B': '8', 'C': '9'}
}
If you only want the inner dictionaries and not the outer structure, you can use a list instead of a dictionary to store the rows:
table = []
with open("file.txt") as f:
headers = next(f).split() # get the headers from the first line
for line in f:
row = {}
for j, value in enumerate(line.split()):
row[headers[j]] = value
table.append(row)
print(table)
This will give you the following output:
[
{'A': '1', 'B': '2', 'C': '3'},
{'A': '4', 'B': '5', 'C': '6'},
{'A': '7', 'B': '8', 'C': '9'}
]
DictReader from the csv module will give you what you seem to need - i.e., a list of dictionaries.
import csv
with open('file.txt', newline='') as data:
result = list(csv.DictReader(data, delimiter=' '))
print(result)
Output:
[{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': '6'}, {'A': '7', 'B': '8', 'C': '9'}]
Optionally:
If you have an aversion to module imports you could achieve the same objective as follows:
result = []
with open('file.txt') as data:
columns = data.readline().strip().split()
for line in map(str.strip, data):
result.append(dict(zip(columns, line.split())))
print(result)
Output:
[{'A': '1', 'B': '2', 'C': '3'}, {'A': '4', 'B': '5', 'C': '6'}, {'A': '7', 'B': '8', 'C': '9'}]

Swap keys of nested dictionaries

I have a dictionary as follows:
Each key has a dictionary associated with it.
dict_sample = {'a': {'d0': '1', 'd1': '2', 'd2': '3'}, 'b': {'d0': '1'}, 'c': {'d1': '1'}}
I need the output as follows:
output_dict = {'d0': {'a': 1, 'b': 1}, 'd1': {'a': 2, 'c': 1}, 'd2': {'a': 3}}
I'd appreciate any help on the pythonic way to achieve this. Thank You !
I believe this produces the desired output
>>> from collections import defaultdict
>>> d = defaultdict(dict)
>>>
>>> dict_sample = {'a': {'d0': '1', 'd1': '2', 'd2': '3'}, 'b': {'d0': '1'}, 'c': {'d1': '1'}}
>>>
>>> for key, value in dict_sample.items():
... for k, v in value.items():
... d[k][key] = v
...
>>> d
defaultdict(<class 'dict'>, {'d0': {'a': '1', 'b': '1'}, 'd1': {'a': '2', 'c': '1'}, 'd2': {'a': '3'}})
You can use dict.setdefault on a new dict with a nested loop:
d = {}
# for each key and sub-dict in the main dict
for k1, s in dict_sample.items():
# for each key and value in the sub-dict
for k2, v in s.items():
# this is equivalent to d[k2][k1] = int(v), except that when k2 is not yet in d,
# setdefault will initialize d[k2] with {} (a new dict)
d.setdefault(k2, {})[k1] = int(v)
d would become:
{'d0': {'a': 1, 'b': 1}, 'd1': {'a': 2, 'c': 1}, 'd2': {'a': 3}}

Python - Sum the value in the list of dictionary based on the same key

I have a list of dictionaries which looks like:
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
And I want to get a nested dictionary whose keys are the value from the key('player') and whose values are dictionaries of aggregated stats.
The output should:
{'3': {'stat3': 9, 'stat2': 8, 'player': '3'},
'1': {'stat3': 13, 'stat2': 5, 'player': '1'}}
The following is my code:
from collections import defaultdict
result = {}
total_stat = defaultdict(int)
for dict in data:
total_stat[dict['player']] += int(dict['stat3'])
total_stat[dict['player']] += int(dict['stat2'])
total_stat = ([{'player': info, 'stat3': total_stat[info],
'stat2': total_stat[info]} for info in
sorted(total_stat, reverse=True)])
for item in total_stat:
result.update({item['player']: item})
print(result)
However, I got this:
{'3': {'player': '3', 'stat3': 17, 'stat2': 17},
'1': {'player': '1', 'stat3': 18, 'stat2': 18}}
How could I make it right? Or are there other approaches?
Your data is rather a DataFrame, a natural pandas solution is :
In [34]: pd.DataFrame.from_records(data).astype(int).groupby('player').sum().T.to_dict()
Out[34]: {1: {'stat2': 5, 'stat3': 13}, 3: {'stat2': 8, 'stat3': 9}}
Just use a more nested default-factory:
>>> total_stat = defaultdict(lambda : defaultdict(int))
>>> value_fields = 'stat2', 'stat3'
>>> for datum in data:
... player_data = total_stat[datum['player']]
... for k in value_fields:
... player_data[k] += int(datum[k])
...
>>> from pprint import pprint
>>> pprint(total_stat)
defaultdict(<function <lambda> at 0x1023490d0>,
{'1': defaultdict(<class 'int'>, {'stat2': 5, 'stat3': 13}),
'3': defaultdict(<class 'int'>, {'stat2': 8, 'stat3': 9})})
This solution use a nested dictionary. The out is a {player: Counter} dictionary, where as Counter itself is another dictionary {stat: score}
import collections
def split_player_stat(dict_object):
"""
Split a row of data into player, stat
>>> split_player_stat({'stat3': '5', 'stat2': '4', 'player': '1'})
'1', {'stat3': 5, 'stat2': 4}
"""
key = dict_object['player']
value = {k: int(v) for k, v in dict_object.items() if k != 'player'}
return key, value
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
out = collections.defaultdict(collections.Counter)
for player_stat in data:
player, stat = split_player_stat(player_stat)
out[player].update(stat)
print(out)
The magic of this solution is done by the collections.defaultdict and collections.Counter classes, both behaves like dictionaries.
Not the best code, nor the more pythonic, but I think you should be able to walk through it and figure out where your code went wrong.
def sum_stats_by_player(data):
result = {}
for dictionary in data:
print(f"evaluating dictionary {dictionary}")
player = dictionary["player"]
stat3 = int(dictionary["stat3"])
stat2 = int(dictionary["stat2"])
# if the player isn't in our result
if player not in result:
print(f"\tfirst time player {player}")
result[player] = {} # add the player as an empty dictionary
result[player]["player"] = player
if "stat3" not in result[player]:
print(f"\tfirst time stat3 {stat3}")
result[player]["stat3"] = stat3
else:
print(f"\tupdating stat3 { result[player]['stat3'] + stat3}")
result[player]["stat3"] += stat3
if "stat2" not in result[player]:
print(f"\tfirst time stat2 {stat2}")
result[player]["stat2"] = stat2
else:
print(f"\tupdating stat2 { result[player]['stat2'] + stat2}")
result[player]["stat2"] += stat2
return result
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
print(sum_stats_by_player(data))
Most of the solution here are making the problem too complex. Let's make it simple and more readable. Here you go:
In [26]: result = {}
In [27]: req_key = 'player'
In [29]: for dct in data:
...: player_val = dct.pop(req_key)
...: result.setdefault(player_val, {req_key: player_val})
...: for k, v in dct.items():
...: result[player_val][k] = result[player_val].get(k, 0) + int(v)
In [30]: result
Out[30]:
{'1': {'player': '1', 'stat2': 5, 'stat3': 13},
'3': {'player': '3', 'stat2': 8, 'stat3': 9}}
Here you go simple and clean. For this simple problem no need of imports. Now coming to the program:
result.setdefault(player_val, {'player': player_val})
It sets the default value as "player": 3 or "player": 1 if there is no such key in the result.
result[player_val][k] = result[player_val].get(k, 0) + int(v)
This adds up the value for keys with common values.
Another version using Counter
import itertools
from collections import Counter
def count_group(group):
c = Counter()
for g in group:
g_i = dict([(k, int(v)) for k, v in g.items() if k != 'player'])
c.update(g_i)
return dict(c)
sorted_data = sorted(data, key=lambda x:x['player'])
results = [(k, count_group(g)) for k, g in itertools.groupby(sorted_data, lambda x: x['player'])]
print(results)
To give
[('1', {'stat3': 13, 'stat2': 5}), ('3', {'stat3': 9, 'stat2': 8})]
Two loops would allow you to:
group your data by a primary key
aggregate all secondary information
These two tasks are accomplished in the aggregate_statistics function shown below.
from collections import Counter
from pprint import pprint
def main():
data = [{'player': 1, 'stat2': 4, 'stat3': 5},
{'player': 1, 'stat2': 1, 'stat3': 8},
{'player': 3, 'stat2': 1, 'stat3': 6},
{'player': 3, 'stat2': 7, 'stat3': 3}]
new_data = aggregate_statistics(data, 'player')
pprint(new_data)
def aggregate_statistics(table, key):
records_by_key = {}
for record in table:
data = record.copy()
records_by_key.setdefault(data.pop(key), []).append(Counter(data))
new_data = []
for second_key, value in records_by_key.items():
start, *remaining = value
for record in remaining:
start.update(record)
new_data.append(dict(start, **{key: second_key}))
return new_data
if __name__ == '__main__':
main()

How do I loop over dictionary and check for values passed by a variable in Python

I've got the below dictionary and list - how do I loop over the dictionary checking if b == '1' while passing '1' as variable from a list?
dic = {'info': [{'a':0, 'b':'1'},{'a':0, 'b':'3'},{'a':0, 'b':'3'},{'a':0, 'b':'1'}]}
lst = ['1']
I want to return {'a':0, 'b':'1'}, {'a':0, 'b':'1'}.
This is a general solution using filter; the built-in method, you will have to adopt it to your needs:
>>> list(filter(lambda d: d['b'] in lst, dic['info']))
[{'b': '1', 'a': 0}, {'b': '1', 'a': 0}]
Converting the filter object into a list using list constructor is necessary only in Python3, whereas in Python2, it is not required:
>>> filter(lambda d: d['b'] in lst, dic['info'])
[{'b': '1', 'a': 0}, {'b': '1', 'a': 0}]
EDIT: To make the solution more general in case multiple items in lst, then consider the following:
>>> dic
{'info': [{'b': '1', 'a': 0}, {'b': '3', 'a': 0}, {'b': '3', 'a': 0}, {'b': '1', 'a': 0}, {'b': '2', 'a': '1'}]}
>>>
>>> lst
['1', '2']
>>> def filter_dict(dic_lst, lst):
lst_out = []
for sub_d in dic_lst:
if any(x == sub_d['b'] for x in lst):
lst_out.append(sub_d)
return lst_out
>>> filter_dict(dic['info'], lst)
[{'b': '1', 'a': 0}, {'b': '1', 'a': 0}, {'b': '2', 'a': '1'}]
OR:
>>> list(map(lambda x: list(filter(lambda d: d['b'] in x, dic['info'])),lst))
[[{'b': '1', 'a': 0}, {'b': '1', 'a': 0}], [{'b': '2', 'a': '1'}]]
Just a simple list comprehension:
In [22]: dic = {'info': [{'a':0, 'b':'1'},{'a':0, 'b':'3'},{'a':0, 'b':'3'},{'a':0, 'b':'1'}]}
In [23]: lst = ['1']
In [25]: [sub_dict for sub_dict in dic['info'] if sub_dict['b'] == lst[0]]
Out[25]: [{'a': 0, 'b': '1'}, {'a': 0, 'b': '1'}]
You could use a filter approach:
filter(lambda x:x['b'] in list, dic['info'])
It will create a generator which you can materialize in a list:
result = list(filter(lambda x:x['b'] in list, dic['info']))
Mind I would however rename your list variable since you here override a reference to the list type.
from collections import defaultdict
dic = {'info': [{'a':0, 'b':'1'},{'a':0, 'b':'3'},{'a':0, 'b':'3'},{'a':0, 'b':'1'}]}
d = defaultdict(list)
for each in dic['info']:
d[each['b']].append(each)
out:
defaultdict(list,
{'1': [{'a': 0, 'b': '1'}, {'a': 0, 'b': '1'}],
'3': [{'a': 0, 'b': '3'}, {'a': 0, 'b': '3'}]})
in:
d['1']
out:
[{'a': 0, 'b': '1'}, {'a': 0, 'b': '1'}]
Build an index dict to avoid iterate again.
First go my simple loop and iteration way
Input:
>>> dic
{'info': [{'a': 0, 'b': '1'}, {'a': 0, 'b': '3'}, {'a': 0, 'b': '3'}, {'a': 0, 'b': '1'}]}
>>> l
['1']
New List variable for result.
>>> result = []
Algo
Iterate diction by iteritems method of dictionary.
Value of main dictionary is list data type. so again iterate list by for loop.
Check b key is present in sub dictionary and check its value is present in given list l.
If yes, then append to result list.
code:
>>> for k,v in dic.iteritems():
... for i in v:
... if "b" in i and i["b"] in l:
... result.append(i)
...
Output:
>>> result
[{'a': 0, 'b': '1'}, {'a': 0, 'b': '1'}]
>>>
Notes:
Do not use list as variable name because list is reversed keyword for Python
Read basic things of dictionary and list which has properties.
Try to write code first.
You can make use of a list comprehension, or just do it using filter.
list comprehension
dict = {'info': [{'a':0, 'b':'1'},{'a':0, 'b':'3'},{'a':0, 'b':'3'},{'a':0, 'b':'1'}]}
lst = ['1']
result = [i for i in dict['info'] if i['b'] == lst[0]]
print result # [{'a': 0, 'b': '1'}, {'a': 0, 'b': '1'}]
filter
dict = {'info': [{'a':0, 'b':'1'},{'a':0, 'b':'3'},{'a':0, 'b':'3'},{'a':0, 'b':'1'}]}
list(filter(lambda i: i['b'] in lst, dic['info']))
# [{'b': '1', 'a': 0}, {'b': '1', 'a': 0}]

Parse a text file in an array python

A C G T
A 2 -1 -1 -1
C -1 2 -1 -1
G -1 -1 2 -1
T -1 -1 -1 2
This file is separated by tabs as a text file and I want it to be mapped in a similar format to in python.
{'A': {'A': 91, 'C': -114, 'G': -31, 'T': -123},
'C': {'A': -114, 'C': 100, 'G': -125, 'T': -31},
'G': {'A': -31, 'C': -125, 'G': 100, 'T': -114},
'T': {'A': -123, 'C': -31, 'G': -114, 'T': 91}}
I have tried very had but I cannot figure out how to do this as I am new to python.
Please help.
My code so far:
seq = flines[0]
newseq = []
j = 0
while(l < 4):
i = 2
while(o < 4):
newseq[i][j] = seqLine[i]
i = i + 1;
o = o + 1
j = j + 1
l = l + 1
print (seq)
print(seqLine)
I think this is what you want:
import csv
data = {}
with open('myfile.csv', 'rb') as csvfile:
ntreader = csv.reader(csvfile, delimiter="\t", quotechar='"')
for rowI, rowData in enumerate(ntreader):
if rowI == 0:
headers = rowData[1:]
else:
data[rowData[0]] = {k: int(v) for k, v in zip(headers, rowData[1:])}
print data
To make life easy I use csv-module and just say tab is delimiter, then I grab the column headers on the first row and use them for all other rows to label the values.
This produces:
{'A ': {'A': '2', 'C': '-1', 'T': '-1 ', 'G': '-1'},
'C': {'A': '-1', 'C': '2', 'T': '-1', 'G': '-1'},
'T': {'A': '-1', 'C': '-1', 'T': '2', 'G': '-1'},
'G': {'A': '-1', 'C': '-1', 'T': '-1', 'G': '2'}}
Edit*
For python <2.7 it should work if you switch the dictionary comprehension line (rowData[0]] = ....) above and use a simple loop in the same place:
rowDict = dict()
for k, v in zip(headers, rowData[1:]):
rowDict[k] = int(v)
data[rowData[0]] = rowDict
Using csv.DictReader gets you most of the way there on your own:
reader = DictReader('file.csv', delimiter='\t')
#dictdata = {row['']: row for row in reader} # <-- python 2.7+ only
dictdata = dict((row[''], row) for row in reader) # <-- python 2.6 safe
Outputs:
{'A': {None: [''], '': 'A', 'A': '2', 'C': '-1', 'G': '-1', 'T': '-1'},
'C': {'': 'C', 'A': '-1', 'C': '2', 'G': '-1', 'T': '-1'},
'G': {'': 'G', 'A': '-1', 'C': '-1', 'G': '2', 'T': '-1'},
'T': {'': 'T', 'A': '-1', 'C': '-1', 'G': '-1', 'T': '2'}}
To clean up the extraneous keys got messy, and I needed to rebuild the inner dict, but replace the last line with this:
dictdata = {row['']: {key: value for key, value in row.iteritems() if key} for row in reader}
Outputs:
{'A': {'A': '2', 'C': '-1', 'G': '-1', 'T': '-1'},
'C': {'A': '-1', 'C': '2', 'G': '-1', 'T': '-1'},
'G': {'A': '-1', 'C': '-1', 'G': '2', 'T': '-1'},
'T': {'A': '-1', 'C': '-1', 'G': '-1', 'T': '2'}}
Edit: for Python <2.7
Dictionary comprehensions were added in 2.7. For 2.6 and lower, use the dict constructor:
dictdata = dict((row[''], dict((key, value) for key, value in row.iteritems() if key)) for row in reader)

Categories