Splitting a python list into smaller lists at spaces - python

I have a list which consists of alphabets and spaces:
s = ['a','b',' ',' ','b','c',' ','d','e','f','g','h',' ','i','j'];
I need to split it into smaller individual lists:
s=[['a','b'],['b','c'],['d','e','f','g','h'],['i','j']]
I am new to python.
The entire code:
#To get the longest alphabetical substring from a given string
s = input("Enter any string: ")
alpha_string = []
for i in range(len(s)-1): #if length is 5: 0,1,2,3
if(s[i] <= s[i+1]):
if i == len(s)-2:
alpha_string.append(s[i])
alpha_string.append(s[i+1])
else:
alpha_string.append(s[i])
if(s[i] > s[i+1] and s[i-1] <= s[i]):
alpha_string.append(s[i])
alpha_string.append(" ")
if(s[i] > s[i+1] and s[i-1] > s[i]):
alpha_string.append(" ")
print(alpha_string)
#Getting the position of each space in the list
position = []
for j in range(len(alpha_string)):
if alpha_string[j] == " ":
position.append([j])
print(position)
#Using the position of each space to create slices into the list
start = 0
final_string = []
for k in range(len(position)):
final_string.append(alpha_string[start:position[k]])
temp = position[k]
start = temp
print(final_string)`

Try a list comprehension as follows
print([list(i) for i in ''.join(s).split(' ') if i != ''])
[['a', 'b'], ['b', 'c'], ['d', 'e', 'f', 'g', 'h'], ['i', 'j']]

Here generator will be perfect :
s = ['a','b',' ',' ','b','c',' ','d','e','f','g','h',' ','i','j'];
def generator_approach(list_):
list_s=[]
for i in list_:
if i==' ':
if list_s:
yield list_s
list_s=[]
else:
list_s.append(i)
yield list_s
closure=generator_approach(s)
print(list(closure))
output:
[['a', 'b'], ['b', 'c'], ['d', 'e', 'f', 'g', 'h'], ['i', 'j']]

Or simply in one line, result = [list(item) for item in ''.join(s).split()]

This is one functional way.
s = ['a','b',' ',' ','b','c',' ','d','e','f','g','h',' ','i','j']
res = list(map(list, ''.join(s).split()))
# [['a', 'b'], ['b', 'c'], ['d', 'e', 'f', 'g', 'h'], ['i', 'j']]

from itertools import groupby
s = ['a','b',' ',' ','b','c',' ','d','e','f','g','h',' ','i','j']
t = [list(g) for k, g in groupby(s, str.isspace) if not k]
print(t)
OUTPUT
[['a', 'b'], ['b', 'c'], ['d', 'e', 'f', 'g', 'h'], ['i', 'j']]
This doesn't require the strings to be single letter like many of the join() and split() solutions:
>>> from itertools import groupby
>>>
>>> s = ['abc','bcd',' ',' ','bcd','cde',' ','def','efg','fgh','ghi','hij',' ','ijk','jkl']
>>>
>>> [list(g) for k, g in groupby(s, str.isspace) if not k]
[['abc', 'bcd'], ['bcd', 'cde'], ['def', 'efg', 'fgh', 'ghi', 'hij'], ['ijk', 'jkl']]
>>>
I can never pass up an opportunity to (ab)use groupby()

Related

python consecutive elements to swap the list items [duplicate]

This question already has answers here:
What is the simplest way to swap each pair of adjoining chars in a string with Python?
(20 answers)
Closed 3 years ago.
here my input like:
['a','b','c','d','e','f']
output:
['b','a','d','c','f','e']
I tried to get consecutive list but i'm getting list in between empty string so please make to remove those empty list .
s = list(input().split())
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return ' '.join(c)
result = swap(s, 0, 1)
print(list(result))
current output:- ['b', ' ', 'a', ' ', 'c', ' ', 'd', ' ', 'e', ' ', 'f']
expected output:-['b', 'a', 'c', 'd', 'e','f']
You just need to return c as list, there is not need to convert to string and back again into a list:
s = ['a','b','c','d','e','f']
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return c
result = swap(s, 0, 1)
print(result)
Output:
['b', 'a', 'c', 'd', 'e', 'f']
a simple function to swap pairs that does not change the input:
def swap_pairs(list_to_swap):
s = list_to_swap[:] # create copy to not touch the original sequence
for i in range(0, len(s)-1, 2):
s[i], s[i+1] = s[i+1], s[i]
return s
s0 = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
s1 = ['a', 'b', 'c', 'd', 'e', 'f']
print(swap_pairs(s0))
print(swap_pairs(s1))
# ['b', 'a', 'd', 'c', 'f', 'e', 'g']
# ['b', 'a', 'd', 'c', 'f', 'e']
### check if s0 and s1 are untouched:
print(s0)
print(s1)
# ['a', 'b', 'c', 'd', 'e', 'f', 'g']
# ['a', 'b', 'c', 'd', 'e', 'f']
if you want to swap pairs 'in place', i.e. directly change the input, you could shorten the process to
def swap_pairs(s):
for i in range(0, len(s)-1, 2):
s[i], s[i+1] = s[i+1], s[i]
# return s
s1 = ['a', 'b', 'c', 'd', 'e', 'f']
swap_pairs(s1)
print(s1)
# ['b', 'a', 'd', 'c', 'f', 'e']
I think it's a matter of taste if a return statement should be added here. I'd consider it to be more clear not to return something since logically not needed. Anyway, be aware of variable scope.
this is the problem.. your joining on space. change it to the following.
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return ''.join(c)
for your output you could also do the following.
l = [x for x in [your output list] if x!= ' ']
or
l = [x for x in [your output list] if len(x.strip()) > 0]
Try returning only "C" and use recursion for swapping of all elements of list Then you will get expected Output. Check below code.
Output of below code: ['b','a','d','c','f','e']
s = ['a','b','c','d','e','f']
def swap(c, i, j):
if j<=len(c) and len(c)%2==0:
c[i], c[j] = c[j], c[i]
swap(c,i+2,j+2)
elif j<len(c):
c[i], c[j] = c[j], c[i]
swap(c,i+2,j+2)
return c
result = swap(s, 0, 1)
print(list(result))
and if you want Only output= ['b','a','c','d','e','f'] then no need of recursion just return c. Check below code:
s = ['a','b','c','d','e','f']
def swap(c, i, j):
c[i], c[j] = c[j], c[i]
return c
result = swap(s, 0, 1)
print(list(result))

Weird list assignment

I'm trying to write program that will find all palindromes in a word. For example word "radar" has got 2 palindromes radar and ada. We skip single letters, so r, a, d, etc. aren't palindromes.
import copy
def ILEP(word):
lista = list(word)
counter = 0
pali = []
def isPalindrome(listaWord):
backup = copy.deepcopy(listaWord)
backup.reverse()
a = ''.join(backup)
b = ''.join(listaWord)
if(a == b):
return True
else:
return False
for i in range(len(lista)):
current = [lista[i]]
for j in range(i+1, len(lista)):
current.append(lista[j])
if(isPalindrome(current)):
print(current)
pali.append(current)
counter+=1
print(pali)
return counter
print(ILEP("radar"))
The program is finding all palindromes correctly, but it assings them wrong to the list pali. Console:
['r', 'a', 'd', 'a', 'r']
['a', 'd', 'a']
[['r', 'a', 'd', 'a', 'r'], ['a', 'd', 'a', 'r']]
2
As U can see it prints palindromes ['r', 'a', 'd', 'a', 'r'] and ['a', 'd', 'a'], but the list pali has got wrong value [['r', 'a', 'd', 'a', 'r'], ['a', 'd', 'a', 'r']]
You are changing your list current after appending to pali. You have to make a copy:
def is_palindrome(word):
return word[::-1] == word
def ILEP(word):
pali = []
for i, ch in enumerate(word):
current = [ch]
for ch in word[i+1:]:
current.append(ch)
if is_palindrome(current):
print(current)
pali.append(current[:])
print(pali)
return len(pali)
print(ILEP("radar"))

Complicated array join in Python

I'm having an array like this:
pairs = [['a', 'b', 'c', 'd'], ['g', 'h', 'j', 'k', 'l', 'm']]
I'd like to join them and create a new array so that on the output I'd have something like this:
['a_b', 'b_c', 'c_d']
['g_h', 'h_j', 'j_k', 'k_l', 'l_m']
I'm struggling with an algorithm and can't come up with something. How can I do that?
[['{}_{}'.format(*x) for x in zip(p, p[1:])] for p in pairs]
the for p in pairs part iterate each of your input lists.
zip(p, p[1:]) returns pairs of each item with the next item
'{}_{}'.format(*x) gets the string you requested
joinedIt = []
for i in range(0, len(pairs)):
for j in range(0, len(pairs[i])-1):
joinedIt[i].append(pairs[i][j] + pairs[i][j+1])
>>> pairs = [['a', 'b', 'c', 'd'], ['g', 'h', 'j', 'k', 'l', 'm']]
>>> [[s[i] + '_' + s[i+1] for i in range(len(s)-1)] for s in pairs]
[['a_b', 'b_c', 'c_d'], ['g_h', 'h_j', 'j_k', 'k_l', 'l_m']]
You could do that by using for loops with ranges
def parse(list):
new_list = []
for i in range(len(list) - 1):
new_list.append(list[i] + "_" + list[i+1])
return new_list
zipped = (zip(x,x[1:]) for x in pairs)
[["{}_{}".format(ele[0], ele[1]) for ele in x ]for x in zipped]

How to create nested list from flatten list?

I wrote a function to create a nested list.
For example:
input= ['a','b','c','','d','e','f','g','','d','s','d','a','']
I want to create a sublist before ''
As a return I want a nested list like:
[['a','b','c'],['d','e','f','g'],['d','s','d','a']]
Try the following implementation
>>> def foo(inlist, delim = ''):
start = 0
try:
while True:
stop = inlist.index(delim, start)
yield inlist[start:stop]
start = stop + 1
except ValueError:
# if '' may not be the end delimiter
if start < len(inlist):
yield inlist[start:]
return
>>> list(foo(inlist))
[['a', 'b', 'c'], ['d', 'e', 'f', 'g'], ['d', 's', 'd', 'a']]
Another possible implementation could be by itertools.groupby. But then you have to filter the result to remove the ['']. But though it might look to be one-liner yet the above implementation is more pythonic as its intuitive and readable
>>> from itertools import ifilter, groupby
>>> list(ifilter(lambda e: '' not in e,
(list(v) for k,v in groupby(inlist, key = lambda e:e == ''))))
[['a', 'b', 'c'], ['d', 'e', 'f', 'g'], ['d', 's', 'd', 'a']]
I'd use itertools.groupby:
l = ['a','b','c','','d','e','f','g','','d','s','d','a','']
from itertools import groupby
[list(g) for k, g in groupby(l, bool) if k]
gives
[['a', 'b', 'c'], ['d', 'e', 'f', 'g'], ['d', 's', 'd', 'a']]
def nester(nput):
out = [[]]
for n in nput:
if n == '':
out.append([])
else:
out[-1].append(n)
if out[-1] == []:
out = out[:-1]
return out
edited to add check for empty list at end

Most Pythonic Way to Split an Array by Repeating Elements

I have a list of items that I want to split based on a delimiter. I want all delimiters to be removed and the list to be split when a delimiter occurs twice. For example, if the delimiter is 'X', then the following list:
['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']
Would turn into:
[['a', 'b'], ['c', 'd'], ['f', 'g']]
Notice that the last set is not split.
I've written some ugly code that does this, but I'm sure there is something nicer. Extra points if you can set an arbitrary length delimiter (i.e. split the list after seeing N delimiters).
I don't think there's going to be a nice, elegant solution to this (I'd love to be proven wrong of course) so I would suggest something straightforward:
def nSplit(lst, delim, count=2):
output = [[]]
delimCount = 0
for item in lst:
if item == delim:
delimCount += 1
elif delimCount >= count:
output.append([item])
delimCount = 0
else:
output[-1].append(item)
delimCount = 0
return output
>>> nSplit(['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g'], 'X', 2)
[['a', 'b'], ['c', 'd'], ['f', 'g']]
Here's a way to do it with itertools.groupby():
import itertools
class MultiDelimiterKeyCallable(object):
def __init__(self, delimiter, num_wanted=1):
self.delimiter = delimiter
self.num_wanted = num_wanted
self.num_found = 0
def __call__(self, value):
if value == self.delimiter:
self.num_found += 1
if self.num_found >= self.num_wanted:
self.num_found = 0
return True
else:
self.num_found = 0
def split_multi_delimiter(items, delimiter, num_wanted):
keyfunc = MultiDelimiterKeyCallable(delimiter, num_wanted)
return (list(item
for item in group
if item != delimiter)
for key, group in itertools.groupby(items, keyfunc)
if not key)
items = ['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']
print list(split_multi_delimiter(items, "X", 2))
I must say that cobbal's solution is much simpler for the same results.
Use a generator function to maintain state of your iterator through the list, and the count of the number of separator chars seen so far:
l = ['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']
def splitOn(ll, x, n):
cur = []
splitcount = 0
for c in ll:
if c == x:
splitcount += 1
if splitcount == n:
yield cur
cur = []
splitcount = 0
else:
cur.append(c)
splitcount = 0
yield cur
print list(splitOn(l, 'X', 2))
print list(splitOn(l, 'X', 1))
print list(splitOn(l, 'X', 3))
l += ['X','X']
print list(splitOn(l, 'X', 2))
print list(splitOn(l, 'X', 1))
print list(splitOn(l, 'X', 3))
prints:
[['a', 'b'], ['c', 'd'], ['f', 'g']]
[['a', 'b'], [], ['c', 'd'], [], ['f'], ['g']]
[['a', 'b', 'c', 'd', 'f', 'g']]
[['a', 'b'], ['c', 'd'], ['f', 'g'], []]
[['a', 'b'], [], ['c', 'd'], [], ['f'], ['g'], [], []]
[['a', 'b', 'c', 'd', 'f', 'g']]
EDIT: I'm also a big fan of groupby, here's my go at it:
from itertools import groupby
def splitOn(ll, x, n):
cur = []
for isdelim,grp in groupby(ll, key=lambda c:c==x):
if isdelim:
nn = sum(1 for c in grp)
while nn >= n:
yield cur
cur = []
nn -= n
else:
cur.extend(grp)
yield cur
Not too different from my earlier answer, just lets groupby take care of iterating over the input list, creating groups of delimiter-matching and not-delimiter-matching characters. The non-matching characters just get added onto the current element, the matching character groups do the work of breaking up new elements. For long lists, this is probably a bit more efficient, as groupby does all its work in C, and still only iterates over the list once.
a = ['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']
b = [[b for b in q if b != 'X'] for q in "".join(a).split("".join(['X' for i in range(2)]))]
this gives
[['a', 'b'], ['c', 'd'], ['f', 'g']]
where the 2 is the number of elements you want. there is most likely a better way to do this.
Very ugly, but I wanted to see if I could pull this off as a one-liner and I thought I would share. I beg you not to actually use this solution for anything of any importance though. The ('X', 3) at the end is the delimiter and the number of times it should be repeated.
(lambda delim, count: map(lambda x:filter(lambda y:y != delim, x), reduce(lambda x, y: (x[-1].append(y) if y != delim or x[-1][-count+1:] != [y]*(count-1) else x.append([])) or x, ['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g'], [[]])))('X', 2)
EDIT
Here's a breakdown. I also eliminated some redundant code that was far more obvious when written out like this. (changed above also)
# Wrap everything in a lambda form to avoid repeating values
(lambda delim, count:
# Filter all sublists after construction
map(lambda x: filter(lambda y: y != delim, x), reduce(
lambda x, y: (
# Add the value to the current sub-list
x[-1].append(y) if
# but only if we have accumulated the
# specified number of delimiters
y != delim or x[-1][-count+1:] != [y]*(count-1) else
# Start a new sublist
x.append([]) or x,
['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g'], [[]])
)
)('X', 2)
Here's a clean nice solution using zip and generators
#1 define traditional sequence split function
#if you only want it for lists, you can use indexing to make it shorter
def split(it, x):
to_yield = []
for y in it:
if x == y:
yield to_yield
to_yield = []
else:
to_yield.append(y)
if to_yield:
yield to_yield
#2 zip the sequence with its tail
#you could use itertools.chain to avoid creating unnecessary lists
zipped = zip(l, l[1:] + [''])
#3. remove ('X',not 'X')'s from the resulting sequence, and leave only the first position of each
# you can use list comprehension instead of generator expression
filtered = (x for x,y in zipped if not (x == 'X' and y != 'X'))
#4. split the result using traditional split
result = [x for x in split(filtered, 'X')]
This way split() is more reusable.
It's surprising python doesn't have one built in.
edit:
You can easily adjust it for longer split sequences, repeating steps 2-3 and zipping filtered with l[i:] for 0< i <= n.
import re
map(list, re.sub('(?<=[a-z])X(?=[a-z])', '', ''.join(lst)).split('XX'))
This does a list -> string -> list conversion and assumes that the non-delimiter characters are all lower case letters.
Here's another way of doing this:
def split_multi_delimiter(items, delimiter, num_wanted):
def remove_delimiter(objs):
return [obj for obj in objs if obj != delimiter]
ranges = [(index, index+num_wanted) for index in xrange(len(items))
if items[index:index+num_wanted] == [delimiter] * num_wanted]
last_end = 0
for range_start, range_end in ranges:
yield remove_delimiter(items[last_end:range_start])
last_end = range_end
yield remove_delimiter(items[last_end:])
items = ['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']
print list(split_multi_delimiter(items, "X", 2))
In [6]: input = ['a', 'b', 'X', 'X', 'cc', 'XX', 'd', 'X', 'ee', 'X', 'X', 'f']
In [7]: [s.strip('_').split('_') for s in '_'.join(input).split('X_X')]
Out[7]: [['a', 'b'], ['cc', 'XX', 'd', 'X', 'ee'], ['f']]
This assumes you can use a reserved character such as _ which is not found in the input.
Too clever by half, and only offered because the obvious right way to do it seems so brute-force and ugly:
class joiner(object):
def __init__(self, N, data = (), gluing = False):
self.data = data
self.N = N
self.gluing = gluing
def __add__(self, to_glue):
# Process an item from itertools.groupby, by either
# appending the data to the last item, starting a new item,
# or changing the 'gluing' state according to the number of
# consecutive delimiters that were found.
N = self.N
data = self.data
item = list(to_glue[1])
# A chunk of delimiters;
# return a copy of self with the appropriate gluing state.
if to_glue[0]: return joiner(N, data, len(item) < N)
# Otherwise, handle the gluing appropriately, and reset gluing state.
a, b = (data[:-1], data[-1] if data else []) if self.gluing else (data, [])
return joiner(N, a + (b + item,))
def split_on_multiple(data, delimiter, N):
# Split the list into alternating groups of delimiters and non-delimiters,
# then use the joiner to join non-delimiter groups when the intervening
# delimiter group is short.
return sum(itertools.groupby(data, delimiter.__eq__), joiner(N)).data
Regex, I choose you!
import re
def split_multiple(delimiter, input):
pattern = ''.join(map(lambda x: ',' if x == delimiter else ' ', input))
filtered = filter(lambda x: x != delimiter, input)
result = []
for k in map(len, re.split(';', ''.join(re.split(',',
';'.join(re.split(',{2,}', pattern)))))):
result.append([])
for n in range(k):
result[-1].append(filtered.__next__())
return result
print(split_multiple('X',
['a', 'b', 'X', 'X', 'c', 'd', 'X', 'X', 'f', 'X', 'g']))
Oh, you said Python, not Perl.

Categories