Remove words containing vowels - python

I am looking output string having vowels removed.
Input: My name is 123
Output: my 123
I tried below code:
def without_vowels(sentence):
vowels = 'aeiou'
word = sentence.split()
for l in word:
for k in l:
if k in vowels:
l = ''
without_vowels('my name 123')
Can anyone give me result using list compression ?

You can use regex with search chars with 'a|e|i|o|u' with .lower() for words if have upper char like below:
>>> import re
>>> st = 'My nAmE Is 123 MUe'
>>> [s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower())]
['My', '123']
>>> ' '.join(s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower()))
'My 123'

This is one way to do it
def without_vowels(sentence):
words = sentence.split()
vowels = ['a', 'e', 'i', 'o', 'u']
cleaned_words = [w for w in words if not any(v in w for v in vowels)]
cleaned_string = ' '.join(cleaned_words)
print(cleaned_string)
Outputs my 123

def rem_vowel(string):
vowels = ['a','e','i','o','u']
result = [letter for letter in string if letter.lower() not in vowels]
result = ''.join(result)
print(result)
string = "My name is 123"
rem_vowel(string)

import re
def rem_vowel(string):
return (re.sub("[aeiouAEIOU]","",string))
Driver program
string = " I am uma Bhargav "
print rem_vowel(string)

Related

I am struggling with understanding the following list comprehension

Can please someone write the following list comprehension in simple for loops and statements.
new_words = ' '.join([word for word in line.split() if not
any([phrase in word for phrase in char_list])])
I wrote the above list comprehension in the following code but it doesn't work.
new_list = []
for line in in_list:
for word in line.split():
for phrase in char_list:
if not phrase in word:
new_list.append(word)
return new_list
Thanks
new_words = ' '.join(
[
word for word in line.split()
if not any(
[phrase in word for phrase in char_list]
)
]
)
is more or less equivalent to this:
new_list = []
for word in line.split():
phrases_in_word = []
for phrase in char_list:
# (phrase in word) returns a boolean True or False
phrases_in_word.append(phrase in word)
if not any(phrases_in_word):
new_list.append(word)
new_words = ' '.join(new_list)
new_words = ' '.join([word for word in line.split()
if not any([phrase in word for phrase in char_list])])
is the equivalent of:
lst = []
for word in line.split():
for phrase in char_list:
if phrase in word:
break
else: # word not in ANY phrase
lst.append(word)
new_words = ' '.join(lst)

Unexpected behavior with string.split()

Say I have a string, string = 'a'
I do string.split() and I get ['a']
I don't want this, I only want a list when I have whitespace in my string, ala string = 'a b c d'
So far, I've tried all the following with no luck:
>>> a = 'a'
>>> a.split()
['a']
>>> a = 'a b'
>>> a.split(' ')
['a', 'b']
>>> a = 'a'
>>> a.split(' ')
['a']
>>> import re
>>> re.findall(r'\S+', a)
['a']
>>> re.findall(r'\S', a)
['a']
>>> re.findall(r'\S+', a)
['a', 'b']
>>> re.split(r'\s+', a)
['a', 'b']
>>> a = 'a'
>>> re.split(r'\s+', a)
['a']
>>> a.split(" ")
['a']
>>> a = "a"
>>> a.split(" ")
['a']
>>> a.strip().split(" ")
['a']
>>> a = "a".strip()
>>> a.split(" ")
['a']
Am I crazy? I see no whitespace in the string "a".
>>> r"[^\S\n\t]+"
'[^\\S\\n\\t]+'
>>> print(re.findall(r'[^\S\n\t]+',a))
[]
What up?
EDIT
FWIW, this is how I got what I needed:
# test for linked array
if typename == 'org.apache.ctakes.typesystem.type.textsem.ProcedureMention':
for f in AnnotationType.all_features:
if 'Array' in f.rangeTypeName:
if attributes.get(f.name) and typesystem.get_type(f.elementType):
print([ int(i) for i in attributes[f.name].split() ])
and that is the end...
Split will always return a list, try this.
def split_it(s):
if len(s.split()) > 1:
return s.split()
else:
return s
The behavior of split makes sense, it always returns a list. Why not just check if the list length is 1?
def weird_split(a):
words = a.split()
if len(words) == 1:
return words[0]
return words
You could use the conditional expression to check for the presence of space, and use split only if a space is detected:
str1 = 'abc'
split_str1 = str1 if (' ' not in str1) else str1.split(' ')
print (split_str1)
str1 = 'ab c'
split_str1 = str1 if (' ' not in str1) else str1.split(' ')
print (split_str1)
This would give the output:
abc
['ab', 'c']

Match characters of a string

If I call matcher.match('bad') it should return a list of all permutations of 'bad' which exist in a provided list. In this example, output will be ['abd']. So far this is what i have tried but I can't seem to match this pattern.
class Matcher():
def __init__(self, string_list):
self.string_list = string_list
def match(self, string_match):
matched_list = [string for string in string_list if string_match in string]
return matched_list
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
matcher = Matcher(string_list)
print(matcher.match('abd'))
This is an O(n log n) solution:
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
my_str = 'bad'
[x for x in string_list if sorted(my_str) == sorted(x)] # ['abd']
You could always use collections.Counter() for this:
from collections import Counter
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
my_str = 'bad'
print([x for x in string_list if Counter(my_str) == Counter(x)])
# ['abd']

Matching String in a List of Strings

I basically want to create a new list 'T' which will match if each element in the list 'Word' exists as a separate element in the list 'Z'.
ie I want the output of 'T' in the following case to be T = ['Hi x']
Word = ['x']
Z = ['Hi xo xo','Hi x','yoyo','yox']
I tried the following code but it gives me all sentences with words having 'x' in it however I only want the sentences having 'x' as a separate word.
for i in Z:
for v in i:
if v in Word:
print (i)
Just another pythonic way
[phrase for phrase in Z for w in Word if w in phrase.split()]
['Hi x']
You can do it with list comprehension.
>>> [i for i in Z if any (w.lower() ==j.lower() for j in i.split() for w in Word)]
['Hi x']
Edit:
Or you can do:
>>> [i for i in Z for w in Word if w.lower() in map(lambda x:x.lower(),i.split())]
['Hi x']
if you want to print all strings from Z that contain a word from Word:
Word = ['xo']
Z = ['Hi xo xo','Hi x','yoyo','yox']
res = []
for i in Z:
for v in i.split():
if v in Word:
res.append(i)
break
print(res)
Notice the break. Without the break you could get some strings from Z twice, if two words from it would match. Like the xo in the example.
The i.split() expression splits i to words on spaces.
words = ['x']
phrases = ['Hi xo xo','Hi x','yoyo','yox']
for phrase in phrases:
for word in words:
if word in phrase.split():
print(phrase)
If you would store Word as a set instead of list you could use set operations for check. Basically following splits every string on whitespace, constructs set out of words and checks if Word is subset or not.
>>> Z = ['Hi xo xo','Hi x','yoyo','yox']
>>> Word = {'x'}
>>> [s for s in Z if Word <= set(s.split())]
['Hi x']
>>> Word = {'Hi', 'x'}
>>> [s for s in Z if Word <= set(s.split())]
['Hi x']
In above <= is same as set.issubset.

Replace element in list with white space

Is it possible to check element of list? If it has the same word as in "test01.txt" then replace with space?
test01.txt:
to
her
too
a
for
In the codes:
with open('C:/test01.txt') as words:
ws = words.read().splitlines()
with open('C:/test02.txt') as file_modify4:
for x in file_modify4:
sx = map(str.strip, x.split("\t"))
ssx = sx[0].split(" ")
print ssx
Results from "print ssx":
['wow']
['listens', 'to', 'her', 'music']
['too', 'good']
['a', 'film', 'for', 'stunt', 'scheduling', 'i', 'think']
['really', 'enjoyed']
How to replace the element in ssx?
Expected result:
['wow']
['listens', ' ', ' ', 'music']
[' ', 'good']
[' ', 'film', ' ', 'stunt', 'scheduling', 'i', 'think']
['really', 'enjoyed']
Any suggestion?
Use list comprehensions; storing the words in a set first for faster testing:
ws = set(ws)
# ...
ssx = [w if w not in ws else ' ' for w in ssx]
or, as a complete solution:
with open('C:/test01.txt') as words:
ws = set(words.read().splitlines())
with open('C:/test02.txt') as file_modify4:
for x in file_modify4:
ssx = [w if w not in ws else ' ' for w in x.strip().split('\t')[0].split()]
print ssx
The naive solution is:
new_ssx = []
for word in ssx:
if word in ws:
new_ssx.append(' ')
else:
new_ssx.append(word)
Of course whenever you have an empty list that you just append to in a loop, you can turn it into a list comprehension:
new_ssx = [' ' if word in ws else word for word in ssx]
If ws is more than a few words, you probably want to turn it into a set to make the lookups faster.
So, putting it all together:
with open('C:/test01.txt') as words:
ws = set(words.read().splitlines())
with open('C:/test02.txt') as file_modify4:
for x in file_modify4:
sx = map(str.strip, x.split("\t"))
ssx = sx[0].split(" ")
new_ssx = [' ' if word in ws else word for word in ssx]
print new_ssx

Categories