I have a problem with Python and hope someone can help me. I have a list, for example this one:
list = [['a','b','c'],['a','c1','d1'],['b','c1','c2']]
I want to combine the list in a way that all arrays with the same index[0] will be together, so then it will be like:
a, b, c, c1, d1
b, c1, c2
I tried something like this, but I did not get it working..
list = [['a','b','c'],['a','c1','d1'],['b','c1','c2']]
empty_list = []
for i in list:
if i not in empty_list:
empty_list.append(i)
print empty_list
Can someone help me?
You can try this :)
old_list = [['a','b','c'],['a','c1','d1'],['b','c1','c2']]
prev = None
empty_list = []
for l in old_list: # iterate through each sub list, sub list as l
if l[0] == prev:
# append your elements to existing sub list
for i in l: # iterate through each element in sub list
if i not in empty_list[-1]:
empty_list[-1].append(i)
else:
empty_list.append(l) # create new sub list
prev = l[0] # update prev
print(empty_list)
# [['a', 'b', 'c', 'c1', 'd1'], ['b', 'c1', 'c2']]
Using itertools.groupby:
from itertools import groupby
from operator import itemgetter
listt = [['a','b','c'],['a','c1','d1'],['b','c1','c2']]
grouped = [list(g) for _,g in groupby(listt,itemgetter(0))]
result = [[item for sslist in slist for item in sslist] for slist in grouped]
An OrderedDict can do most of the work:
from collections import OrderedDict
l = [['a','b','c'], ['a','c1','d1'], ['b','c1','c2']]
d = OrderedDict()
for el in l:
d.setdefault(el[0], el[0:1]).extend(el[1:])
print(d.values())
you can also try using defaultdict(list)
l = [['a','b','c'], ['a','c1','d1'], ['b','c1','c2']]
from collections import defaultdict
d_dict = defaultdict(list)
for i in l:
d_dict[i[0]].extend(i[1:])
[ list(k) + v for k, v in d_dict.items() ]
Output:
[['a', 'b', 'c', 'c1', 'd1'], ['b', 'c1', 'c2']]
Related
This question already has answers here:
Python: Rename duplicates in list with progressive numbers without sorting list
(8 answers)
Closed 2 years ago.
Can someone can help me how to enumerate the elements of a list to label the counting the occurrences of the elements, i.e.,
list_in = ['a','b','c']
list_out = ['a-1','b-1','c-1']
list_in = ['a','b','a']
list_out = ['a-1','b-1','a-2']
list_in = ['a','a','a']
list_out = ['a-1','a-2','a-3']
You could count the occurances of each item in list_in:
list_in1 = ['a', 'b', 'c']
list_in2 = ['a', 'b', 'a']
list_in3 = ['a', 'a', 'a']
def reformat(list_in):
counts = {item: list(range(list_in.count(item))) for item in set(list_in)}
out = [f"{item}-{counts[item].pop(0)+1}" for item in list_in]
print(out)
for list_in in (list_in1, list_in2, list_in3):
reformat(list_in)
Out:
['a-1', 'b-1', 'c-1']
['a-1', 'b-1', 'a-2']
['a-1', 'a-2', 'a-3']
Here is one way of doing it:
list_in = ['a','b','c']
lst_new = [list_in[x]+'-'+str(list_in[0:x+1].count(list_in[x])) for x in range(len(list_in))]
print(lst_new)
You could count the number of letters preceding each one in the list and format the result in a list comprehension:
list_in = ['a','b','a']
list_out = [f"{c}-{list_in[:i].count(c)+1}" for i,c in enumerate(list_in)]
# ['a-1', 'b-1', 'a-2']
I wrote one but its not in the same sequence though as in the original list.
>>> from collections import Counter
>>> def enum(listy):
>>> counter = Counter(listy)
>>> final = []
>>> for val in range(len(counter)):
>>> alp, times = counter.popitem()
>>> temp = []
>>> for p in range(1, times+1):
>>> temp.append(alp+'-'+str(p))
>>> final.extend(temp)
>>> return final
>>> list_in = ['a','b','a']
>>> print(enum(list_in))
['b-1', 'a-1', 'a-2']
I have this one long list and want to convert it to a nested list and a dictionary.
L= ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"]
output:
nested list:
[["a","abc","de","efg"], ["b","ijk","lm","op","qr"], ["c","123","45","6789"]]
dictionary:
{"a":["abc","de","efg"],
"b":["ijk","lm","op","qr"], "c":["123","45","6789] }
Can anyone tell me how to do that in python?
And I can't import anything
I assume the groups are separated by the empty strings. For this you can use itertools.groupby:
from itertools import groupby
data = ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"]
nl = [list(g) for k, g in groupby(data, ''.__ne__) if k]
d = {next(g): list(g) for k, g in groupby(data, ''.__ne__) if k}
print(nl)
print(d)
Results:
[['a', 'abc', 'de', 'efg'], ['b', 'ijk', 'lm', 'op', 'qr'], ['c', '123', '45', '6789']]
{'a': ['abc', 'de', 'efg'], 'b': ['ijk', 'lm', 'op', 'qr'], 'c': ['123', '45', '6789']}
In the groupby I'm using ''.__ne__ which is the function for "not equal" of an empty string. This way it's only capturing groups of non-empty strings.
EDIT
I just read that you cannot import. Here's a solution just using a loop:
nl = [[]]
for s in data:
if s:
nl[-1].append(s)
else:
nl.append([])
And for the dict:
itr = iter(data)
key = next(itr)
d = {key: []}
while True:
try: val = next(itr)
except StopIteration: break
if val:
d[key].append(val)
else:
key = next(itr)
d[key] = []
Here's how to convert L to a nested list:
L= ["a","abc","de","efg","","b","ijk","lm","op","qr","","c","123","45","6789"]
nested_list_L = []
temp = []
for item in L:
if item != "":
temp.append(item)
else:
nested_list_L.append(temp)
temp = []
nested_list_L.append(temp)
And here's how to convert L to a dictionary:
L= ["a","abc","de","efg","","b","ijk","lm","op","qr","","c","123","45","6789"]
dict_L = {}
temp = []
key = ""
for item in L:
if len(item) == 1:
key = item
elif len(item) > 1:
temp.append(item)
else:
dict_L[key] = temp
temp = []
key = ""
dict_L[key] = temp
From my understanding, you are trying to:
Split a list by empty string, then
Convert the resulting nested list into a dictionary, using first element of each sub-list as the key and the rest as value.
You can certainly accomplish the task without any imports. To split a list, just iterate over it and build the nested list along the way:
def split(data, on):
nested = []
curr = []
for x in data:
if x == on:
nested.append(curr)
curr = []
else:
curr.append(x)
if curr != [] or data[-1:] == [on]:
nested.append(curr)
return nested
Then, again, iterate over this nested list to build your desired dictionary:
def build_dict(key_valss):
d = {}
for key_vals in key_valss:
if key_vals != []:
key = key_vals[0]
vals = key_vals[1:]
d[key] = vals
return d
Compose the two functions to get what you want:
>>> build_dict( split(data = ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"] , on = '') )
{'a': ['abc', 'de', 'efg'], 'b': ['ijk', 'lm', 'op', 'qr'], 'c': ['123', '45', '6789']}
I have a nested list that looks something like:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']...]
Is it possible for me to split the lst into small lists by their ID so that each small list contained elements with the same ID? The results should look something looks like:
lst1 = [['ID1', 'A'], ['ID1', 'B']...]
lst2 = [['ID2', 'AAA'], ['ID2', 'DDD']...]
You can use groupby:
from itertools import groupby
grp_lists = []
for i, grp in groupby(lst, key= lambda x: x[0]):
grp_lists.append(list(grp))
print(grp_lists[0])
[['ID1', 'A'], ['ID1', 'B']]
print(grp_lists[1])
[['ID2', 'AAA'], ['ID2', 'DDD']]
using collections.defaultdict:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
from collections import defaultdict
result = defaultdict(list)
for item in lst:
result[item[0]].append(item)
print(list(result.values()))
output:
[[['ID1', 'A'], ['ID1', 'B']], [['ID2', 'AAA'], ['ID2', 'DDD']]]
Without external functions: build a set of unique indexes, then loop over the original list building a new list for each of the indexes and filling it with list items that contain that index:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
unique_set = set(elem[0] for elem in lst)
lst2 = [ [elem for elem in lst if elem[0] in every_unique] for every_unique in unique_set]
print (lst2)
Result:
[[['ID2', 'AAA'], ['ID2', 'DDD']], [['ID1', 'A'], ['ID1', 'B']]]
(It is possible to move unique_set into the final line, making it a one-liner. But that would make it less clear what happens.)
If you want to get separate variables like your example of a result:
lst1 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID1']
and
lst2 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID2']
from that, you can make a function:
def create_sub_list(id_str, original_lst):
return [x for x in original_lst if x[0] == id_str]
And call it like that:
lst1 = create_sub_list('ID1', lst)
If you want a dictionary of the sub-lists, for easier access, you can use:
from functools import reduce
def reduce_dict(ret_dict, sub_lst):
if (sub_lst[0] not in ret_dict):
ret_dict[sub_lst[0]] = sub_lst[1:]
else:
ret_dict[sub_lst[0]] += sub_lst[1:]
return ret_dict
grouped_dict = reduce(reduce_dict, lst, dict())
(If you know that in your list there will only be 1 string after each ID slot you can change both the sub_lst[1:]'s to sub_lst[1])
And then to access the elements if the dictionary you use the ID strings:
print(grouped_dict['ID1'])
This will print:
['A', 'B']
I have a list of tuples as shown:
lt = [(1,'a'),(1,'b'),(2,'a'),(3,'b'),(3,'c')]
I want to make the numbers keys of a dictionary and have them point to a list. That list then holds all associations in the list of tuples. So in the list above, it would split into a dictionary as:
dict_lt:{
1:[a,b],
2:[a],
3:[b,c]
}
Currently I use the dictionary's flexibility in automatically declaring new keys, which I then force point to an empty list. Then I fill that list accordingly.
dict_lt = {}
for tup in lt:
dict_lt[tup[0]] = []
for tup in lt:
dict_lt[tup[0]].append(tup[1])
This works fine, but its a tad slow since it needs to iterate twice over the same list, and it just seems overall redundant. Is there a better way?
You don't need to iterate the list twice. You can use setdefault() to set the initial value if the key is not in the dictionary:
lt = [(1,'a'),(1,'b'),(2,'a'),(3,'b'),(3,'c')]
d = {}
for k, v in lt:
d.setdefault(k, []).append(v)
print(d)
prints
{1: ['a', 'b'], 2: ['a'], 3: ['b', 'c']}
You can use collections.defaultdict with list factory or dict.setdefault to create a list that you can append the values to.
collections.defaultdict:
out = collections.defaultdict(list)
for k, v in lt:
out[k].append(v)
dict.setdefault:
out = {}
for k, v in lt:
out.setdefault(k, []).append(v)
Example:
In [11]: lt = [(1, 'a'),(1, 'b'),(2, 'a'),(3, 'b'),(3, 'c')]
In [12]: out = {}
In [13]: for k, v in lt:
...: out.setdefault(k, []).append(v)
...:
In [14]: out
Out[14]: {1: ['a', 'b'], 2: ['a'], 3: ['b', 'c']}
In [15]: out = collections.defaultdict(list)
In [16]: for k, v in lt:
...: out[k].append(v)
...:
...:
In [17]: out
Out[17]: defaultdict(list, {1: ['a', 'b'], 2: ['a'], 3: ['b', 'c']})
You can use defaultdict(list) in your code instead of dict, and just omit the first loop.
from collections import defaultdict
dict_lt = defaultdict(list)
for tup in lt:
dict_lt[tup[0]].append(tup[1])
How could I transform a list such as:
l=[ ['A', 'C21'], ['A','D43'],['B','D34'],['C','D45'],['C',D56']
to:
[ ['A','C21 D43'], ['B','D34'],['C','D45 D56'] ]
Where the grouping is performed according to element #0 of each sub list
and elements #1 are string concatenated within each group?
Try this :
l=[ ['A', 'C21'], ['A','D43'],['B','D34'],['C','D45'],['C','D56']]
x = {}
for a in l:
if a[0] not in x.keys():
x[a[0]] = [a[1]]
else:
x[a[0]].append(a[1])
print x
array_result = []
for keys, vals in x.iteritems():
array_result.append([keys, ' '.join(vals)])
print array_result
If the keys are contiguous then you can use itertools.groupby, eg:
from itertools import groupby
data =[ ['A', 'C21'], ['A','D43'],['B','D34'],['C','D45'],['C','D56'] ]
new_data = [[k, ' '.join(el[1] for el in g)] for k, g in groupby(data, lambda L: L[0])]
# [['A', 'C21 D43'], ['B', 'D34'], ['C', 'D45 D56']]
If not and order doesn't really matter, then:
from collections import defaultdict
dd = defaultdict(list)
for key, val in data:
dd[key].append(val)
new_data = [[k, ' '.join(v)] for k,v in dd.items()]
# [['B', 'D34'], ['C', 'D45 D56'], ['A', 'C21 D43']]
Alternatively - make use of dict.setdefault, eg:
d = {}
for key, val in data:
d.setdefault(key, []).append(val)
new_data = [[k, ' '.join(v)] for k,v in d.items()]
Or, if the keys aren't contiguous, but the output should maintain the order of the input, then use collections.OrderedDict, eg:
from collections import OrderedDict
d = OrderedDict()
for key, val in data:
d.setdefault(key, []).append(val)
new_data = [[k, ' '.join(v)] for k,v in d.items()]
# [['A', 'C21 D43'], ['B', 'D34'], ['C', 'D45 D56']]