Sperate the list based on a value in python - python

I have a list like below:-
list1 = ['box','ball','new','bat','chess','old']
I want to split this list based on values. In the above list, I want to insert the values before 'new' into a new list and the values before 'old' have to be inserted into a new list. The values before 'new' and 'old' can be one or more but here it has two values. I tried using split_after but I am getting the below result:-
from more_itertools import split_after
list1 = ['box','ball','new','bat','chess','old']
list(split_after(list1, lambda x: x == "new"))
Result:-
[['box', 'ball', 'new'], ['bat', 'chess', 'old']]
But desire output:-
result1 = ['box', 'ball']
result2 = ['bat', 'chess']

You might use split_at, and in the lambda check for either new or old.
Then remove the empty lists from the result.
from more_itertools import split_at
list1 = ['box', 'ball', 'new', 'bat', 'chess', 'old']
print(list(filter(None, split_at(list1, lambda x: x == "new" or x == "old"))))
Output
[['box', 'ball'], ['bat', 'chess']]

Another solution, using standard itertools.groupby:
list1 = ["box", "ball", "new", "bat", "chess", "old"]
from itertools import groupby
out = [
list(g) for v, g in groupby(list1, lambda k: k in {"new", "old"}) if not v
]
print(out)
Prints:
[['box', 'ball'], ['bat', 'chess']]

If the values are more based on which you have to split like new and old, you can do this:
list1 = ['box','ball','new','bat','chess','old']
split_val = ['new','old'] # Based on these values you can separate list. You can add more values here.
new_list=[]
k=0
for i in split_val:
for j in range(len(list1)):
if list1[j]==i and j!=0:
new_list.append(list1[k:j])
k=j+1
new_list.append(list1[k:])
new_list = [i for i in new_list if len(i)>0]
print(new_list)
Output:
[['box', 'ball'], ['bat', 'chess']]

Related

How to create a new list of tuples with existing list of tuples

I have a list of tuples like this -
list1 = [('alpha', 'beta'),
('beta','gama')
('alpha','lamda')
('gama', 'lamda'),
('euphor', 'tuphor')]
And I want to make a new list based upon the logic that -
for every pair which doesn't exist in the original list will be included in the new list, like the following:
new_list = [('alpha','gama'),
(alpha, tuphor),
(alpha, euphor),
(beta,lamda),
()...]
likewise.
Can anyone suggest the method for doing so in python?
Thanks.
from itertools import combinations
list1 = [('alpha', 'beta'),
('beta','gama'),
('alpha','lamda'),
('gama', 'lamda'),
('euphor', 'tuphor')]
elements = list(set([e for l in list1 for e in l])) # find all unique elements
complete_list = list(combinations(elements, 2)) # generate all possible combinations
#convert to sets to negate the order
set1 = [set(l) for l in list1]
complete_set = [set(l) for l in complete_list]
# find sets in `complete_set` but not in `set1`
ans = [list(l) for l in complete_set if l not in set1]
Output :
[['euphor', 'lamda'],
['euphor', 'gama'],
['euphor', 'beta'],
['euphor', 'alpha'],
['lamda', 'beta'],
['lamda', 'tuphor'],
['gama', 'alpha'],
['gama', 'tuphor'],
['beta', 'tuphor'],
['tuphor', 'alpha']]
Here's a solution using itertools and sets:
list1 = [('alpha', 'beta'),
('beta','gama'),
('alpha','lamda'),
('gama', 'lamda'),
('euphor', 'tuphor')]
all_items = set(itertools.chain(*list1))
all_pairs = set(itertools.product(all_items, all_items))
new_pairs = all_pairs.difference(list1)
The result (new_pairs) is:
{('alpha', 'alpha'),
('alpha', 'euphor'),
('alpha', 'gama'),
('alpha', 'tuphor'),
('beta', 'alpha'),
('beta', 'beta'),
('beta', 'euphor'),
('beta', 'lamda'),
('beta', 'tuphor'),
...
You just need to get the unique names in the original list and then apply the if condition. Try this and let me know if you face any issue.
new_list = []
names = set(np.array(list1).ravel())
for i in names:
for j in names:
if i!=j:
if ((i,j) not in list1) & ((j,i) not in list1) & ((i,j) not in new_list) & ((j,i) not in new_list):
new_list.append((i,j))
You may
collect the different items
compute all the permutations
get the difference between all the combinations and the existing ones
list1 = [('alpha', 'beta'), ('beta', 'gama'), ('alpha', 'lamda'), ('gama', 'lamda'), ('euphor', 'tuphor')]
from itertools import chain, combinations
items = set(chain(*list1)) # {'euphor', 'gama', 'tuphor', 'beta', 'lamda', 'alpha'}
all_perm = set(combinations(items, r=2))
new_perm = all_perm - set(list1)
print(len(all_perm), all_perm) # 30
print(len(new_perm), new_perm) # 25

How to split a list into smaller lists python

I have a nested list that looks something like:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']...]
Is it possible for me to split the lst into small lists by their ID so that each small list contained elements with the same ID? The results should look something looks like:
lst1 = [['ID1', 'A'], ['ID1', 'B']...]
lst2 = [['ID2', 'AAA'], ['ID2', 'DDD']...]
You can use groupby:
from itertools import groupby
grp_lists = []
for i, grp in groupby(lst, key= lambda x: x[0]):
grp_lists.append(list(grp))
print(grp_lists[0])
[['ID1', 'A'], ['ID1', 'B']]
print(grp_lists[1])
[['ID2', 'AAA'], ['ID2', 'DDD']]
using collections.defaultdict:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
from collections import defaultdict
result = defaultdict(list)
for item in lst:
result[item[0]].append(item)
print(list(result.values()))
output:
[[['ID1', 'A'], ['ID1', 'B']], [['ID2', 'AAA'], ['ID2', 'DDD']]]
Without external functions: build a set of unique indexes, then loop over the original list building a new list for each of the indexes and filling it with list items that contain that index:
lst = [['ID1', 'A'],['ID1','B'],['ID2','AAA'], ['ID2','DDD']]
unique_set = set(elem[0] for elem in lst)
lst2 = [ [elem for elem in lst if elem[0] in every_unique] for every_unique in unique_set]
print (lst2)
Result:
[[['ID2', 'AAA'], ['ID2', 'DDD']], [['ID1', 'A'], ['ID1', 'B']]]
(It is possible to move unique_set into the final line, making it a one-liner. But that would make it less clear what happens.)
If you want to get separate variables like your example of a result:
lst1 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID1']
and
lst2 = [sub_lst for sub_lst in lst if sub_lst[0] == 'ID2']
from that, you can make a function:
def create_sub_list(id_str, original_lst):
return [x for x in original_lst if x[0] == id_str]
And call it like that:
lst1 = create_sub_list('ID1', lst)
If you want a dictionary of the sub-lists, for easier access, you can use:
from functools import reduce
def reduce_dict(ret_dict, sub_lst):
if (sub_lst[0] not in ret_dict):
ret_dict[sub_lst[0]] = sub_lst[1:]
else:
ret_dict[sub_lst[0]] += sub_lst[1:]
return ret_dict
grouped_dict = reduce(reduce_dict, lst, dict())
(If you know that in your list there will only be 1 string after each ID slot you can change both the sub_lst[1:]'s to sub_lst[1])
And then to access the elements if the dictionary you use the ID strings:
print(grouped_dict['ID1'])
This will print:
['A', 'B']

Flatten lists of variable depths in Python

I have a list of n lists. Each internal list contains a combination of (a) strings, (b) the empty list, or (c) a list containing one string. I would like to transform the inside lists so they only contain the strings.
I have a list like this for example:
[[[],["a"],"a"],[["ab"],[],"abc"]]
and I would like it to be like this:
[["","a","a"],["ab","","abc"]]
I know I could probably go through with a loop but I am looking for a more elegant solution, preferably with a list comprehension.
List comprehension:
>>> original = [[[],["a"],"a"],[["ab"],[],"abc"]]
>>> result = [['' if not item else ''.join(item) for item in sublist] for sublist in original]
>>> result
[['', 'a', 'a'], ['ab', '', 'abc']]
As every element of the list that you'd like to flatten is iterable, instead of checking of being instance of some class (list, string) you can actually make use of duck-typing:
>> my_list = [[[],["a"],"a"],[["ab"],[],"abc"]]
>> [list(map(lambda x: ''.join(x), elem)) for elem in my_list]
Or more readable version:
result = []
for elem in my_list:
flatten = map(lambda x: ''.join(x), elem)
result.append(list(flatten))
Result:
[['', 'a', 'a'], ['ab', '', 'abc']]
It's quite pythonic to not to check what something is but rather leverage transformation mechanics to adaptive abilities of each of the structure.
Via list comprehension:
lst = [[[],["a"],"a"],[["ab"],[],"abc"]]
result = [ ['' if not v else (v[0] if isinstance(v, list) else v) for v in sub_l]
for sub_l in lst ]
print(result)
The output:
[['', 'a', 'a'], ['ab', '', 'abc']]
original_list = [[[],["a"],"a"],[["ab"],[],"abc"]]
flatten = lambda x: "" if x == [] else x[0] if isinstance(x, list) else x
flattened_list = [[flatten(i) for i in j] for j in original_list]

How to remove items from a list if they match any items in another list in Python

Say I have two lists:
list1 = ['a', 'b']
list2 = ['cat', 'dog', 'bird']
Whats the best way to get rid of any items in list2 that contain any of the substrings in list1? (In this example, only 'dog' would remain.)
You can use list comprehension with any() operator. You go through the items of second list, if any of items(charachters) in list1 is in the selected word, we don't take it. Otherwise, we add it.
list1 = ['a', 'b']
list2 = ['cat', 'dog', 'bird']
print [x for x in list2 if not any(y for y in list1 if y in x)]
Output:
['dog']
You can use filter() as well.
print filter(lambda x: not any(y for y in list1 if y in x), list2)
You can use regular expressions to do the job.
import re
pat = re.compile('|'.join(list1))
res = []
for str in list2:
if re.search(pat, str):
continue
else:
res.append(str)
print res

Combine list of lists with similar values in python

I have a list of lists, like so:
items = [['118', 'white'], ['118','Jack'], ['118','guilty'], ['200','black'], ['200','mark'], ['200','not guilty']]
Is there a way to use a for loop to grab the second value in each list and collapse it into a new list?
Like so:
['white', 'jack', 'guilty']
['black','mark','not guilty']
Assuming your list always has elements with the same key grouped as in your example, you can use itertools.groupby() to do this efficiently:
>>> import itertools
>>> items = [['118', 'white'], ['118','Jack'], ['118','guilty'], ['200','black'], ['200','mark'], ['200','not guilty']]
>>> [[x[1] for x in g] for k, g in itertools.groupby(items, lambda x: x[0])]
[['white', 'Jack', 'guilty'], ['black', 'mark', 'not guilty']]
You could also use operator.itemgetter(0) as an alternative to lambda x: x[0].
Note that if items does not necessarily have elements grouped by their keys, you can use sorted(items) instead of items in the groupby() call and it will work.
Here is a version that preserves the key as well:
>>> [(k, [x[1] for x in g]) for k, g in itertools.groupby(items, lambda x: x[0])]
[('118', ['white', 'Jack', 'guilty']), ('200', ['black', 'mark', 'not guilty'])]
You could pass this list directly into the dict() built-in function to convert this to a dictionary.
from collections import defaultdict
entries = defaultdict(list)
for (key, value) in items:
entries[key].append(value)
Now entries is a dict of lists of the second values. You can either get them by key ('118') or use values() for a list of lists.
>>> k = set(x[0] for x in items)
>>> [ [x[1] for x in items if x[0] == key] for key in sorted(k) ]
[['white', 'Jack', 'guilty'], ['black', 'mark', 'not guilty']]
output_dict = {}
for each_key in items:
for each_item in each_key:
try:
output_dict[each_key].append(each_item) #fails as I'm trying to use a list as a dict key
except Exception as e:
output_dict[each_key] = [] #see above
output_dict[each_key].append(each_item) #see above
for each_list in output_dict:
print(each_list)
as Peter DeGlopper pointed out below, this code is bad and I should feel bad. I've commented the code to point out my error. There are better solutions, but just to correct my mistake:
items = [['118', 'white'], ['118','Jack'], ['118','guilty'], ['200','black'], ['200','mark'], ['200','not guilty']]
output_dict = {}
for each_list in items:
if each_list[0] not in output_dict: output_dict[each_list[0]] = list()
output_dict[each_list[0]].append(each_list[1])
>>> for each_list in output_dict:
>>> print(each_list)
['white','Jack','guilty']
['black','mark','not guilty']

Categories