combine lists in list/dict comprehension way - python

is it possible to apply list/dictionary comprehension to the following code to have ["abc", "ab", "cd"]
tk = {}
tk[1] = ["abc"]
tk[2] = ["ab", "cd"]
tk[3] = ["ef", "gh"]
t = (1, 2)
combined = []
combined.append(tk[i]) for i in t #does not work. note, not all tk values are used, this is based on t.
I could think of
ll = [tk[i] for i in t], then this turns to be flatten list out of lists. so
ll = [tk[i] for i in t]
[item for sublist in ll for item in sublist]
but this is not one-liner. I wonder if there is better way.

If the order of values in the desired list matters, the generalized way to achieve this would be to sort the items in the dict based on the key, and merge the list of values. For example:
>>> from operator import itemgetter
>>> from itertools import chain
>>> list(chain.from_iterable(i for _, i in sorted(tk.items(), key=itemgetter(0))))
['abc', 'ab', 'cd']

Simply iterate through the values of your dictionary. Something like this:
>>> tk = {}
>>> tk[1] = ['abc']
>>> tk[2] = ['ab', 'cd']
>>> combined = [x for t in tk.values() for x in t]
>>> print combined
['abc', 'ab', 'cd']
You can even use an OrderedDict if you need to maintain the order of your lists, since a regular dict does not guarantee the order of its keys:
>>> import collections
>>> tk = collections.OrderedDict()
>>> tk[1] = ['abc']
>>> tk[2] = ['ab', 'cd']
>>> combined = [x for t in tk.values() for x in t]
>>> print combined
['abc', 'ab', 'cd']
Note that [tk[i] for i in (1, 2)], as you proposed, won't have the desired results. You still need to iterate through the values inside each list.
>>> [tk[i] for i in (1, 2)]
[['abc'], ['ab', 'cd']]
Also note that [tk[i] for i in tk], as you proposed later, is exactly the same as tk.values(). So you could the proposed solution [x for t in tk.values() for x in t] is equivalent to what you achieved but in one line.

Given your constraint of choosing a sequence of keys manually:
>>> tk = {}
>>> tk[1] = ["abc"]
>>> tk[2] = ["ab", "cd"]
>>> tk[3] = ["ef", "gh"]
You want:
>>> [vals for i in (1,2) for vals in tk[i]]
['abc', 'ab', 'cd']

Related

How to create a new list of tuples with existing list of tuples

I have a list of tuples like this -
list1 = [('alpha', 'beta'),
('beta','gama')
('alpha','lamda')
('gama', 'lamda'),
('euphor', 'tuphor')]
And I want to make a new list based upon the logic that -
for every pair which doesn't exist in the original list will be included in the new list, like the following:
new_list = [('alpha','gama'),
(alpha, tuphor),
(alpha, euphor),
(beta,lamda),
()...]
likewise.
Can anyone suggest the method for doing so in python?
Thanks.
from itertools import combinations
list1 = [('alpha', 'beta'),
('beta','gama'),
('alpha','lamda'),
('gama', 'lamda'),
('euphor', 'tuphor')]
elements = list(set([e for l in list1 for e in l])) # find all unique elements
complete_list = list(combinations(elements, 2)) # generate all possible combinations
#convert to sets to negate the order
set1 = [set(l) for l in list1]
complete_set = [set(l) for l in complete_list]
# find sets in `complete_set` but not in `set1`
ans = [list(l) for l in complete_set if l not in set1]
Output :
[['euphor', 'lamda'],
['euphor', 'gama'],
['euphor', 'beta'],
['euphor', 'alpha'],
['lamda', 'beta'],
['lamda', 'tuphor'],
['gama', 'alpha'],
['gama', 'tuphor'],
['beta', 'tuphor'],
['tuphor', 'alpha']]
Here's a solution using itertools and sets:
list1 = [('alpha', 'beta'),
('beta','gama'),
('alpha','lamda'),
('gama', 'lamda'),
('euphor', 'tuphor')]
all_items = set(itertools.chain(*list1))
all_pairs = set(itertools.product(all_items, all_items))
new_pairs = all_pairs.difference(list1)
The result (new_pairs) is:
{('alpha', 'alpha'),
('alpha', 'euphor'),
('alpha', 'gama'),
('alpha', 'tuphor'),
('beta', 'alpha'),
('beta', 'beta'),
('beta', 'euphor'),
('beta', 'lamda'),
('beta', 'tuphor'),
...
You just need to get the unique names in the original list and then apply the if condition. Try this and let me know if you face any issue.
new_list = []
names = set(np.array(list1).ravel())
for i in names:
for j in names:
if i!=j:
if ((i,j) not in list1) & ((j,i) not in list1) & ((i,j) not in new_list) & ((j,i) not in new_list):
new_list.append((i,j))
You may
collect the different items
compute all the permutations
get the difference between all the combinations and the existing ones
list1 = [('alpha', 'beta'), ('beta', 'gama'), ('alpha', 'lamda'), ('gama', 'lamda'), ('euphor', 'tuphor')]
from itertools import chain, combinations
items = set(chain(*list1)) # {'euphor', 'gama', 'tuphor', 'beta', 'lamda', 'alpha'}
all_perm = set(combinations(items, r=2))
new_perm = all_perm - set(list1)
print(len(all_perm), all_perm) # 30
print(len(new_perm), new_perm) # 25

How to split a list into smaller lists python

I have a nested list that looks something like:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']...]
Is it possible for me to split the lst into small lists by their ID so that each small list contained elements with the same ID? The results should look something looks like:
lst1 = [['ID1', 'A'], ['ID1', 'B']...]
lst2 = [['ID2', 'AAA'], ['ID2', 'DDD']...]
You can use groupby:
from itertools import groupby
grp_lists = []
for i, grp in groupby(lst, key= lambda x: x[0]):
grp_lists.append(list(grp))
print(grp_lists[0])
[['ID1', 'A'], ['ID1', 'B']]
print(grp_lists[1])
[['ID2', 'AAA'], ['ID2', 'DDD']]
using collections.defaultdict:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
from collections import defaultdict
result = defaultdict(list)
for item in lst:
result[item[0]].append(item)
print(list(result.values()))
output:
[[['ID1', 'A'], ['ID1', 'B']], [['ID2', 'AAA'], ['ID2', 'DDD']]]
Without external functions: build a set of unique indexes, then loop over the original list building a new list for each of the indexes and filling it with list items that contain that index:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
unique_set = set(elem[0] for elem in lst)
lst2 = [ [elem for elem in lst if elem[0] in every_unique] for every_unique in unique_set]
print (lst2)
Result:
[[['ID2', 'AAA'], ['ID2', 'DDD']], [['ID1', 'A'], ['ID1', 'B']]]
(It is possible to move unique_set into the final line, making it a one-liner. But that would make it less clear what happens.)
If you want to get separate variables like your example of a result:
lst1 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID1']
and
lst2 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID2']
from that, you can make a function:
def create_sub_list(id_str, original_lst):
return [x for x in original_lst if x[0] == id_str]
And call it like that:
lst1 = create_sub_list('ID1', lst)
If you want a dictionary of the sub-lists, for easier access, you can use:
from functools import reduce
def reduce_dict(ret_dict, sub_lst):
if (sub_lst[0] not in ret_dict):
ret_dict[sub_lst[0]] = sub_lst[1:]
else:
ret_dict[sub_lst[0]] += sub_lst[1:]
return ret_dict
grouped_dict = reduce(reduce_dict, lst, dict())
(If you know that in your list there will only be 1 string after each ID slot you can change both the sub_lst[1:]'s to sub_lst[1])
And then to access the elements if the dictionary you use the ID strings:
print(grouped_dict['ID1'])
This will print:
['A', 'B']

Flatten lists of variable depths in Python

I have a list of n lists. Each internal list contains a combination of (a) strings, (b) the empty list, or (c) a list containing one string. I would like to transform the inside lists so they only contain the strings.
I have a list like this for example:
[[[],["a"],"a"],[["ab"],[],"abc"]]
and I would like it to be like this:
[["","a","a"],["ab","","abc"]]
I know I could probably go through with a loop but I am looking for a more elegant solution, preferably with a list comprehension.
List comprehension:
>>> original = [[[],["a"],"a"],[["ab"],[],"abc"]]
>>> result = [['' if not item else ''.join(item) for item in sublist] for sublist in original]
>>> result
[['', 'a', 'a'], ['ab', '', 'abc']]
As every element of the list that you'd like to flatten is iterable, instead of checking of being instance of some class (list, string) you can actually make use of duck-typing:
>> my_list = [[[],["a"],"a"],[["ab"],[],"abc"]]
>> [list(map(lambda x: ''.join(x), elem)) for elem in my_list]
Or more readable version:
result = []
for elem in my_list:
flatten = map(lambda x: ''.join(x), elem)
result.append(list(flatten))
Result:
[['', 'a', 'a'], ['ab', '', 'abc']]
It's quite pythonic to not to check what something is but rather leverage transformation mechanics to adaptive abilities of each of the structure.
Via list comprehension:
lst = [[[],["a"],"a"],[["ab"],[],"abc"]]
result = [ ['' if not v else (v[0] if isinstance(v, list) else v) for v in sub_l]
for sub_l in lst ]
print(result)
The output:
[['', 'a', 'a'], ['ab', '', 'abc']]
original_list = [[[],["a"],"a"],[["ab"],[],"abc"]]
flatten = lambda x: "" if x == [] else x[0] if isinstance(x, list) else x
flattened_list = [[flatten(i) for i in j] for j in original_list]

python get list element according to alphabet

I have a list of names alphabetically, like:
list = ['ABC', 'ACE', 'BED', 'BRT', 'CCD', ..]
How can I get element from each starting letter? Do I have to iterate the list one time? or Does python has some function to do it? New to python, this may be a really naive problem.
Suppose I want to get the second element from names that starts from 'A', this case I get 'ACE'.
If you're going to do multiple searches, you should take the one-time hit of iterating through everything and build a dictionary (or, to make it simpler, collections.defaultdict):
from collections import defaultdict
d = defaultdict(list)
words = ['ABC', 'ACE', 'BED', 'BRT', 'CCD', ...]
for word in words:
d[word[0]].append(word)
(Note that you shouldn't name your own variable list, as it shadows the built-in.)
Now you can easily query for the second word starting with "A":
d["A"][1] == "ACE"
or the first two words for each letter:
first_two = {c: w[:2] for c, w in d.items()}
Using generator expression and itertools.islice:
>>> import itertools
>>> names = ['ABC', 'ACE', 'BED', 'BRT', 'CCD']
>>> next(itertools.islice((name for name in names if name.startswith('A')), 1, 2), 'no-such-name')
'ACE'
>>> names = ['ABC', 'BBD', 'BED', 'BRT', 'CCD']
>>> next(itertools.islice((name for name in names if name.startswith('A')), 1, 2), 'no-such-name')
'no-such-name'
Simply group all the elements by their first char
from itertools import groupby
from operator import itemgetter
example = ['ABC', 'ACE', 'BED', 'BRT', 'CCD']
d = {g:list(values) for g, values in groupby(example, itemgetter(0))}
Now to get a value starting with a:
print d.get('A', [])
This is most usefull when you have a static list and will have multiple queries since as you may see, getting the 3rd item starting with 'A' is done in O(1)
You might want to use list comprehensions
mylist = ['ABC', 'ACE', 'BED', 'BRT', 'CCD']
elements_starting_with_A = [i for i in mylist if i[0] == 'A']
>>> ['ABC', 'ACE']
second = elements_starting_with_A[1]
>>> 'ACE'
In addition to list comprehension as others have mentioned, lists also have a sort() method.
mylist = ['AA', 'BB', 'AB', 'CA', 'AC']
newlist = [i for i in mylist if i[0] == 'A']
newlist.sort()
newlist
>>> ['AA', 'AB', 'AC']
The simple solution is to iterate over the whole list in O(n) :
(name for name in names if name.startswith('A'))
However you could sort the names and search in O(log(n)) for the item which is supposed to be on the index or after (using lexicographic comparison). The module bisect will help you to find the bounds :
from bisect import bisect_left
names = ['ABC', 'ACE', 'BED', 'BRT', 'CCD']
names.sort()
lower = bisect_left(names, 'B')
upper = bisect_left(names, chr(1+ord('B')))
print [names[i] for i in range(lower, upper)]
# ['BED', 'BRT']

Split a list in sublists according to charcter length

I have a list of strings and I like to split that list in different "sublists" based on the character length of the words in th list e.g:
List = [a, bb, aa, ccc, dddd]
Sublist1 = [a]
Sublist2= [bb, aa]
Sublist3= [ccc]
Sublist2= [dddd]
How can i achieve this in python ?
Thank you
by using itertools.groupby:
values = ['a', 'bb', 'aa', 'ccc', 'dddd', 'eee']
from itertools import groupby
output = [list(group) for key,group in groupby(sorted(values, key=len), key=len)]
The result is:
[['a'], ['bb', 'aa'], ['ccc', 'eee'], ['dddd']]
If your list is already sorted by string length and you just need to do grouping, then you can simplify the code to:
output = [list(group) for key,group in groupby(values, key=len)]
I think you should use dictionaries
>>> dict_sublist = {}
>>> for el in List:
... dict_sublist.setdefault(len(el), []).append(el)
...
>>> dict_sublist
{1: ['a'], 2: ['bb', 'aa'], 3: ['ccc'], 4: ['dddd']}
>>> from collections import defaultdict
>>> l = ["a", "bb", "aa", "ccc", "dddd"]
>>> d = defaultdict(list)
>>> for elem in l:
... d[len(elem)].append(elem)
...
>>> sublists = list(d.values())
>>> print(sublists)
[['a'], ['bb', 'aa'], ['ccc'], ['dddd']]
Assuming you're happy with a list of lists, indexed by length, how about something like
by_length = []
for word in List:
wl = len(word)
while len(by_length) < wl:
by_length.append([])
by_length[wl].append(word)
print "The words of length 3 are %s" % by_length[3]

Categories