how to combine two text in one array - python

vowels=["a","e","i","o","u"]
word=["a","ir","the","book"]
for i in word:
if len(i) == 1 and i in vowels:
....
I just make this loop to detect 'a' in 'vowels' array and have only one space
like 'a' = 1 space , 'ant' = 3 spaces.
I want to combine 'a' with the following word in array.
how can I combine 'a' with 'ir' to make 'word' array to be
word=["air","the","book"]

You can use an iterator to construct the new list, just add the next() item if the current item is in vowels, e.g.:
>>> vowels = ['a', 'e', 'i', 'o', 'u']
>>> words = ["a", "ir", "the", "book"]
>>> iterable = iter(word)
>>> [i+next(iterable, '') if i in vowels else i for i in iterable]
['air', 'the', 'book']
Or if you happen to need to add multiple vowels:
def concat(i, iterable):
return i + concat(next(iterable, ''), iterable) if i in vowels else i
>>> words = ['a', 'i', 'r']
>>> iterable = iter(words)
>>> [concat(i, iterable) for i in iterable]
['air']

>>> vowels = {'a', 'e', 'i', 'o', 'u'} # used set for efficient `in` operation
>>> words = ['a', 'ir', 'the', 'book']
>>> for i in range(len(words) - 2, -1, -1):
... if words[i] in vowels:
... # len(words[i]) == 1 is redundant (all vowels items' length = 1)
... words[i] += words.pop(i + 1) # merge vowel with next word
...
>>> words
['air', 'the', 'book']
NOTE: iterated backward; otherwise indexes are broken as soon as list.pop called.

Related

How to get each letter of word list python

l = ['hello', 'world', 'monday']
for i in range(n):
word = input()
l.append(word)
for j in l[0]:
print(j)
Output : h e l l o
I would like to do it for every word in l.
I want to keep my list intact because i would need to get len() of each word and i won't know the number of word that i could possibly get.
I don't know if i'm clear enough, if you need more informations let me know, thanks !
def split_into_letters(word):
return ' '.join(word)
lst = ['hello', 'world', 'monday']
lst_2 = list(map(split_into_letters, lst))
print(lst_2)
You can map each word to a function that splits it into letters
l = ['hello', 'world', 'monday']
list(map(list, l))
#output
[['h', 'e', 'l', 'l', 'o'],
['w', 'o', 'r', 'l', 'd'],
['m', 'o', 'n', 'd', 'a', 'y']]
from itertools import chain
lst = ['hello', 'world', 'monday']
# Print all letters of all words seperated by spaces
print(*chain.from_iterable(lst))
# Print all letters of all words seperated by spaces
# for each word on a new line
for word in lst:
print(*word)

How do I check if a string only has letters from a list of letters in Python

word represents the string I am checking, letters is a list of random letters. I need to make sure that the word only contains letters in a list. However if their are repeating letters, there needs to be that many repeating letters in the list. If returned True it needs to remove the letters used in the word from the list. I am really struggling with this one.
example: w.wordcheck('feed') -> False
letters = ['n', 'e', 'f', 'g', 'e', 'a', 'z']
w.wordcheck('gag') -> false
w.wordcheck('gene') -> True
w.wordcheck('gene') -> True
print(letters) -> ['f', 'a', 'z']
letters = []
def wordcheck(self, word)
for char in word:
if char not in self.letters:
return False
else:
return True
One way using collections.Counter:
from collections import Counter
letters = ['n', 'e', 'f', 'g', 'e', 'a', 'z']
cnt = Counter(letters)
def wordcheck(word):
return all(cnt[k] - v >= 0 for k, v in Counter(word).items())
Output:
wordcheck("gag")
# False
wordcheck("gene")
# True
You can do solve , this problem by finding the case where word result into false.
these cases are, when the character is not in the letters and character frequency in word is more than the character frequency in the letter.
once if any of condition meet, return false else return true.
# your code goes here
from collections import Counter
letters = ['n', 'e', 'f', 'g', 'e', 'a', 'z']
letters_count = Counter(letters)
def func(word):
word_count = Counter(word)
check = True
for word in word_count:
if word not in letters_count or word_count.get(word)>letters_count.get(word):
check = False
break
return check
l = ['feed', 'gag', 'gene']
for i in l:
print(func(i))
output
False
False
True
There are already better answers, but I felt like adding a novel solution just for the heck of it:
from itertools import groupby
def chunks(s):
return set("".join(g) for _, g in groupby(sorted(s)))
def wordcheck(word, valid_letters):
return chunks(word) <= chunks(valid_letters)
Steps:
Turn word into a set of chunks, e.g.: "gag" -> {"a", "gg"}
Turn valid_letters into a set of chunks
Check if word is a subset of valid_letters
Limitations:
This is a mostly silly implementation
This will only return True if the exact number of repeated letters is present in valid_letters, e.g.: if valid_letters = "ccc" and word = "cc" this will return False because there are too few c's in word
It's really inefficient

Find all possible substrings beginning with characters from capturing group

I have for example the string BANANA and want to find all possible substrings beginning with a vowel. The result I need looks like this:
"A", "A", "A", "AN", "AN", "ANA", "ANA", "ANAN", "ANANA"
I tried this: re.findall(r"([AIEOU]+\w*)", "BANANA")
but it only finds "ANANA" which seems to be the longest match.
How can I find all the other possible substrings?
s="BANANA"
vowels = 'AIEOU'
sorted(s[i:j] for i, x in enumerate(s) for j in range(i + 1, len(s) + 1) if x in vowels)
This is a simple way of doing it. Sure there's an easier way though.
def subs(txt, startswith):
for i in xrange(len(txt)):
for j in xrange(1, len(txt) - i + 1):
if txt[i].lower() in startswith.lower():
yield txt[i:i + j]
s = 'BANANA'
vowels = 'AEIOU'
print sorted(subs(s, vowels))
A more pythonic way:
>>> def grouper(s):
... return [s[i:i+j] for j in range(1,len(s)+1) for i in range(len(s)-j+1)]
...
>>> vowels = {'A', 'I', 'O', 'U', 'E', 'a', 'i', 'o', 'u', 'e'}
>>> [t for t in grouper(s) if t[0] in vowels]
['A', 'A', 'A', 'AN', 'AN', 'ANA', 'ANA', 'ANAN', 'ANANA']
Benchmark with accepted answer:
from timeit import timeit
s1 = """
sorted(s[i:j] for i, x in enumerate(s) for j in range(i + 1, len(s) + 1) if x in vowels)
"""
s2 = """
def grouper(s):
return [s[i:i+j] for j in range(1,len(s)+1) for i in range(len(s)-j+1)]
[t for t in grouper(s) if t[0] in vowels]
"""
print '1st: ', timeit(stmt=s1,
number=1000000,
setup="vowels = 'AIEOU'; s = 'BANANA'")
print '2nd : ', timeit(stmt=s2,
number=1000000,
setup="vowels = {'A', 'I', 'O', 'U', 'E', 'a', 'i', 'o', 'u', 'e'}; s = 'BANANA'")
result :
1st: 6.08756995201
2nd : 5.25555992126
As already mentioned in the comments, Regex would not be the right way to go about this.
Try this
def get_substr(string):
holder = []
for ix, elem in enumerate(string):
if elem.lower() in "aeiou":
for r in range(len(string[ix:])):
holder.append(string[ix:ix+r+1])
return holder
print get_substr("BANANA")
## ['A', 'AN', 'ANA', 'ANAN', 'ANANA', 'A', 'AN', 'ANA', 'A']

How to search for each elements of a list in a string in python

let's say
there's a list
vowels = ['a', 'e', 'i', 'o', 'u']
x = raw_input("Enter something?")
Now how to find instances of these vowels in the x? I want to modify x so that it contains only non vowel letters.
.find won't work.
vowels = {'a', 'e', 'i', 'o', 'u'}
x =input('Enter...')
new_string = ''.join(c for c in x if c not in vowels)
Will create a new copy of x minus the vowels saved as new_string. I have changed vowels to be a set so that look up time is faster (somewhat trivial in this example but it's a good habit to sue where appropriate). Strings are immutable so you can't just take the letters out of x, you have to create a new string that is a copy of x without the values you don't need. .join() puts the whole thing back together.
You can use the count function for each letter. For example x.count('a') would count how many 'a' are in the word. The iterate over all the vowels and use sum to find the total number of vowels.
vowelCount = sum(x.count(vowel) for vowel in vowels)
from collections import Counter
vowels = {'a', 'e', 'i', 'o', 'u'}
s = "foobar"
print(sum(v for k,v in Counter(s).items() if k in vowels))
3
Or use dict.get with a default value of 0:
s = "foobar"
c = Counter(s)
print(sum(c.get(k,0) for k in vowels))
3
You can use like this,
>>> st = 'test test'
>>> len(re.findall('[aeiou]', st, re.IGNORECASE))
2
Or,
>>> vowels = ['a', 'e', 'i', 'o', 'u']
>>> sum(map(lambda x: vowels.count(x) if x in vowels else 0, st))
2
Or,
>>> len([ ch for ch in st if ch in vowels])
2

Split a string at uppercase letters

What is the pythonic way to split a string before the occurrences of a given set of characters?
For example, I want to split
'TheLongAndWindingRoad'
at any occurrence of an uppercase letter (possibly except the first), and obtain
['The', 'Long', 'And', 'Winding', 'Road'].
Edit: It should also split single occurrences, i.e.
from 'ABC' I'd like to obtain
['A', 'B', 'C'].
Unfortunately it's not possible to split on a zero-width match in Python. But you can use re.findall instead:
>>> import re
>>> re.findall('[A-Z][^A-Z]*', 'TheLongAndWindingRoad')
['The', 'Long', 'And', 'Winding', 'Road']
>>> re.findall('[A-Z][^A-Z]*', 'ABC')
['A', 'B', 'C']
Here is an alternative regex solution. The problem can be reprased as "how do I insert a space before each uppercase letter, before doing the split":
>>> s = "TheLongAndWindingRoad ABC A123B45"
>>> re.sub( r"([A-Z])", r" \1", s).split()
['The', 'Long', 'And', 'Winding', 'Road', 'A', 'B', 'C', 'A123', 'B45']
This has the advantage of preserving all non-whitespace characters, which most other solutions do not.
Use a lookahead and a lookbehind:
In Python 3.7, you can do this:
re.split('(?<=.)(?=[A-Z])', 'TheLongAndWindingRoad')
And it yields:
['The', 'Long', 'And', 'Winding', 'Road']
You need the look-behind to avoid an empty string at the beginning.
>>> import re
>>> re.findall('[A-Z][a-z]*', 'TheLongAndWindingRoad')
['The', 'Long', 'And', 'Winding', 'Road']
>>> re.findall('[A-Z][a-z]*', 'SplitAString')
['Split', 'A', 'String']
>>> re.findall('[A-Z][a-z]*', 'ABC')
['A', 'B', 'C']
If you want "It'sATest" to split to ["It's", 'A', 'Test'] change the rexeg to "[A-Z][a-z']*"
A variation on #ChristopheD 's solution
s = 'TheLongAndWindingRoad'
pos = [i for i,e in enumerate(s+'A') if e.isupper()]
parts = [s[pos[j]:pos[j+1]] for j in xrange(len(pos)-1)]
print parts
I think that a better answer might be to split the string up into words that do not end in a capital. This would handle the case where the string doesn't start with a capital letter.
re.findall('.[^A-Z]*', 'aboutTheLongAndWindingRoad')
example:
>>> import re
>>> re.findall('.[^A-Z]*', 'aboutTheLongAndWindingRoadABC')
['about', 'The', 'Long', 'And', 'Winding', 'Road', 'A', 'B', 'C']
Pythonic way could be:
"".join([(" "+i if i.isupper() else i) for i in 'TheLongAndWindingRoad']).strip().split()
['The', 'Long', 'And', 'Winding', 'Road']
Works good for Unicode, avoiding re/re2.
"".join([(" "+i if i.isupper() else i) for i in 'СуперМаркетыПродажаКлиент']).strip().split()
['Супер', 'Маркеты', 'Продажа', 'Клиент']
import re
filter(None, re.split("([A-Z][^A-Z]*)", "TheLongAndWindingRoad"))
or
[s for s in re.split("([A-Z][^A-Z]*)", "TheLongAndWindingRoad") if s]
src = 'TheLongAndWindingRoad'
glue = ' '
result = ''.join(glue + x if x.isupper() else x for x in src).strip(glue).split(glue)
Another without regex and the ability to keep contiguous uppercase if wanted
def split_on_uppercase(s, keep_contiguous=False):
"""
Args:
s (str): string
keep_contiguous (bool): flag to indicate we want to
keep contiguous uppercase chars together
Returns:
"""
string_length = len(s)
is_lower_around = (lambda: s[i-1].islower() or
string_length > (i + 1) and s[i + 1].islower())
start = 0
parts = []
for i in range(1, string_length):
if s[i].isupper() and (not keep_contiguous or is_lower_around()):
parts.append(s[start: i])
start = i
parts.append(s[start:])
return parts
>>> split_on_uppercase('theLongWindingRoad')
['the', 'Long', 'Winding', 'Road']
>>> split_on_uppercase('TheLongWindingRoad')
['The', 'Long', 'Winding', 'Road']
>>> split_on_uppercase('TheLongWINDINGRoadT', True)
['The', 'Long', 'WINDING', 'Road', 'T']
>>> split_on_uppercase('ABC')
['A', 'B', 'C']
>>> split_on_uppercase('ABCD', True)
['ABCD']
>>> split_on_uppercase('')
['']
>>> split_on_uppercase('hello world')
['hello world']
Alternative solution (if you dislike explicit regexes):
s = 'TheLongAndWindingRoad'
pos = [i for i,e in enumerate(s) if e.isupper()]
parts = []
for j in xrange(len(pos)):
try:
parts.append(s[pos[j]:pos[j+1]])
except IndexError:
parts.append(s[pos[j]:])
print parts
Replace every uppercase letter 'L' in the given with an empty space plus that letter " L". We can do this using list comprehension or we can define a function to do it as follows.
s = 'TheLongANDWindingRoad ABC A123B45'
''.join([char if (char.islower() or not char.isalpha()) else ' '+char for char in list(s)]).strip().split()
>>> ['The', 'Long', 'A', 'N', 'D', 'Winding', 'Road', 'A', 'B', 'C', 'A123', 'B45']
If you choose to go by a function, here is how.
def splitAtUpperCase(text):
result = ""
for char in text:
if char.isupper():
result += " " + char
else:
result += char
return result.split()
In the case of the given example:
print(splitAtUpperCase('TheLongAndWindingRoad'))
>>>['The', 'Long', 'A', 'N', 'D', 'Winding', 'Road']
But most of the time that we are splitting a sentence at upper case letters, it is usually the case that we want to maintain abbreviations that are typically a continuous stream of uppercase letters. The code below would help.
def splitAtUpperCase(s):
for i in range(len(s)-1)[::-1]:
if s[i].isupper() and s[i+1].islower():
s = s[:i]+' '+s[i:]
if s[i].isupper() and s[i-1].islower():
s = s[:i]+' '+s[i:]
return s.split()
splitAtUpperCase('TheLongANDWindingRoad')
>>> ['The', 'Long', 'AND', 'Winding', 'Road']
Thanks.
An alternative way without using regex or enumerate:
word = 'TheLongAndWindingRoad'
list = [x for x in word]
for char in list:
if char != list[0] and char.isupper():
list[list.index(char)] = ' ' + char
fin_list = ''.join(list).split(' ')
I think it is clearer and simpler without chaining too many methods or using a long list comprehension that can be difficult to read.
This is possible with the more_itertools.split_before tool.
import more_itertools as mit
iterable = "TheLongAndWindingRoad"
[ "".join(i) for i in mit.split_before(iterable, pred=lambda s: s.isupper())]
# ['The', 'Long', 'And', 'Winding', 'Road']
It should also split single occurrences, i.e. from 'ABC' I'd like to obtain ['A', 'B', 'C'].
iterable = "ABC"
[ "".join(i) for i in mit.split_before(iterable, pred=lambda s: s.isupper())]
# ['A', 'B', 'C']
more_itertools is a third-party package with 60+ useful tools including implementations for all of the original itertools recipes, which obviates their manual implementation.
An alternate way using enumerate and isupper()
Code:
strs = 'TheLongAndWindingRoad'
ind =0
count =0
new_lst=[]
for index, val in enumerate(strs[1:],1):
if val.isupper():
new_lst.append(strs[ind:index])
ind=index
if ind<len(strs):
new_lst.append(strs[ind:])
print new_lst
Output:
['The', 'Long', 'And', 'Winding', 'Road']
Sharing what came to mind when I read the post. Different from other posts.
strs = 'TheLongAndWindingRoad'
# grab index of uppercase letters in strs
start_idx = [i for i,j in enumerate(strs) if j.isupper()]
# create empty list
strs_list = []
# initiate counter
cnt = 1
for pos in start_idx:
start_pos = pos
# use counter to grab next positional element and overlook IndexeError
try:
end_pos = start_idx[cnt]
except IndexError:
continue
# append to empty list
strs_list.append(strs[start_pos:end_pos])
cnt += 1
You might also wanna do it this way
def camelcase(s):
words = []
for char in s:
if char.isupper():
words.append(':'+char)
else:
words.append(char)
words = ((''.join(words)).split(':'))
return len(words)
This will output as follows
s = 'oneTwoThree'
print(camecase(s)
//['one', 'Two', 'Three']
def solution(s):
st = ''
for c in s:
if c == c.upper():
st += ' '
st += c
return st
I'm using list
def split_by_upper(x):
i = 0
lis = list(x)
while True:
if i == len(lis)-1:
if lis[i].isupper():
lis.insert(i,",")
break
if lis[i].isupper() and i != 0:
lis.insert(i,",")
i+=1
i+=1
return "".join(lis).split(",")
OUTPUT:
data = "TheLongAndWindingRoad"
print(split_by_upper(data))`
>> ['The', 'Long', 'And', 'Winding', 'Road']
My solution for splitting on capitalized letters - keeps capitalized words
text = 'theLongAndWindingRoad ABC'
result = re.sub('(?<=.)(?=[A-Z][a-z])', r" ", text).split()
print(result)
#['the', 'Long', 'And', 'Winding', 'Road', 'ABC']
Little late in the party, but:
In [1]: camel = "CamelCaseConfig"
In [2]: parts = "".join([
f"|{c}" if c.isupper() else c
for c in camel
]).lstrip("|").split("|")
In [3]: screaming_snake = "_".join([
part.upper()
for part in parts
])
In [4]: screaming_snake
Out[4]: 'CAMEL_CASE_CONFIG'
part of my answer is based on other people answer from here
def split_string_after_upper_case(word):
word_lst = [x for x in word]
index = 0
for char in word[1:]:
index += 1
if char.isupper():
word_lst.insert(index, ' ')
index += 1
return ''.join(word_lst).split(" ")
k = split_string_after_upper_case('TheLongAndWindingRoad')
print(k)

Categories