I am struggling with understanding the following list comprehension - python

Can please someone write the following list comprehension in simple for loops and statements.
new_words = ' '.join([word for word in line.split() if not
any([phrase in word for phrase in char_list])])
I wrote the above list comprehension in the following code but it doesn't work.
new_list = []
for line in in_list:
for word in line.split():
for phrase in char_list:
if not phrase in word:
new_list.append(word)
return new_list
Thanks

new_words = ' '.join(
[
word for word in line.split()
if not any(
[phrase in word for phrase in char_list]
)
]
)
is more or less equivalent to this:
new_list = []
for word in line.split():
phrases_in_word = []
for phrase in char_list:
# (phrase in word) returns a boolean True or False
phrases_in_word.append(phrase in word)
if not any(phrases_in_word):
new_list.append(word)
new_words = ' '.join(new_list)

new_words = ' '.join([word for word in line.split()
if not any([phrase in word for phrase in char_list])])
is the equivalent of:
lst = []
for word in line.split():
for phrase in char_list:
if phrase in word:
break
else: # word not in ANY phrase
lst.append(word)
new_words = ' '.join(lst)

Related

Remove words containing vowels

I am looking output string having vowels removed.
Input: My name is 123
Output: my 123
I tried below code:
def without_vowels(sentence):
vowels = 'aeiou'
word = sentence.split()
for l in word:
for k in l:
if k in vowels:
l = ''
without_vowels('my name 123')
Can anyone give me result using list compression ?
You can use regex with search chars with 'a|e|i|o|u' with .lower() for words if have upper char like below:
>>> import re
>>> st = 'My nAmE Is 123 MUe'
>>> [s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower())]
['My', '123']
>>> ' '.join(s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower()))
'My 123'
This is one way to do it
def without_vowels(sentence):
words = sentence.split()
vowels = ['a', 'e', 'i', 'o', 'u']
cleaned_words = [w for w in words if not any(v in w for v in vowels)]
cleaned_string = ' '.join(cleaned_words)
print(cleaned_string)
Outputs my 123
def rem_vowel(string):
vowels = ['a','e','i','o','u']
result = [letter for letter in string if letter.lower() not in vowels]
result = ''.join(result)
print(result)
string = "My name is 123"
rem_vowel(string)
import re
def rem_vowel(string):
return (re.sub("[aeiouAEIOU]","",string))
Driver program
string = " I am uma Bhargav "
print rem_vowel(string)

How to match between 2 list which is approximate word

I have parent list below
parent_list = ['AWS', 'GCP', 'ALIBABA', 'AZURE']
The incoming input is sentence = The use is asking for AWS and GCP
I need to check the incoming input with parent_list and put in the list
Expected out is [AWS , GCP]
My code is below which is working fine
[i for i in parent_list if i in sentence ]
Now I need to do some approximate match let's say if sentence = The use is asking for AliBab and gcp
You can see that AliBab is approximate to ALIBABA
Expected out is ['ALIBABA', 'GCP']
Depends on the definition of approximation match.
If substring is a criteria then you can iterate over the words of the sentence and parent list and return matches if the word of the sentence appear as a substring of the element of the parent list.
matches = [elt for elt in parent_list if any(word.lower() in elt.lower() for word in sentence.split())]
You can use re.split() to split on multiple delimiters:
parent_list = ['AWS', 'GCP', 'ALIBABA', 'AZURE']
sentence = "The use is asking for AliBab and gcp"
import re
matches = [elt for elt in parent_list if any(word.lower() in elt.lower() or elt.lower() in word.lower() for word in re.split('[, ]', sentence))]
print(matches)
sentence = "The use is asking for AWS,GCP"
matches = [elt for elt in parent_list if any(word.lower() in elt.lower() or elt.lower() in word.lower() for word in re.split('[, ]', sentence))]
print(matches)
You can do this:
parent_list = ['AWS', 'GCP', 'ALIBABA', 'AZURE']
used_words = []
string = "The use is asking for AWS and GCP"
for word in parent_list:
if(word.lower() in string.lower()):
used_words.append(word)
print(used_words)
Try might be this:
types = ['AWS', 'GCP', 'ALIBABA', 'AZURE']
sentence = 'The use is asking for AW and GCP or something'
result = []
for word in sentence.split():
for t in types:
if word.lower() in t.lower() or t.lower() in word.lower():
result.append(t)
print(result)
or with list comprehension:
result = [t for word in sentence.split()
for t in types
if word.lower() in t.lower() or t.lower() in word.lower()]
it looks cleaner, but bit complicated
for more than 1 delimeter, use:
import re
for word in re.split(' |,', sentence):
like:
result = [t for word in re.split(' |,', sentence)
for t in types
if word.lower() in t.lower() or t.lower() in word.lower()]
about adding delimiter, ',' is different one from ', '

Matching String in a List of Strings

I basically want to create a new list 'T' which will match if each element in the list 'Word' exists as a separate element in the list 'Z'.
ie I want the output of 'T' in the following case to be T = ['Hi x']
Word = ['x']
Z = ['Hi xo xo','Hi x','yoyo','yox']
I tried the following code but it gives me all sentences with words having 'x' in it however I only want the sentences having 'x' as a separate word.
for i in Z:
for v in i:
if v in Word:
print (i)
Just another pythonic way
[phrase for phrase in Z for w in Word if w in phrase.split()]
['Hi x']
You can do it with list comprehension.
>>> [i for i in Z if any (w.lower() ==j.lower() for j in i.split() for w in Word)]
['Hi x']
Edit:
Or you can do:
>>> [i for i in Z for w in Word if w.lower() in map(lambda x:x.lower(),i.split())]
['Hi x']
if you want to print all strings from Z that contain a word from Word:
Word = ['xo']
Z = ['Hi xo xo','Hi x','yoyo','yox']
res = []
for i in Z:
for v in i.split():
if v in Word:
res.append(i)
break
print(res)
Notice the break. Without the break you could get some strings from Z twice, if two words from it would match. Like the xo in the example.
The i.split() expression splits i to words on spaces.
words = ['x']
phrases = ['Hi xo xo','Hi x','yoyo','yox']
for phrase in phrases:
for word in words:
if word in phrase.split():
print(phrase)
If you would store Word as a set instead of list you could use set operations for check. Basically following splits every string on whitespace, constructs set out of words and checks if Word is subset or not.
>>> Z = ['Hi xo xo','Hi x','yoyo','yox']
>>> Word = {'x'}
>>> [s for s in Z if Word <= set(s.split())]
['Hi x']
>>> Word = {'Hi', 'x'}
>>> [s for s in Z if Word <= set(s.split())]
['Hi x']
In above <= is same as set.issubset.

How can I find duplicate words in a text file?

file_str = input("Enter poem: ")
my_file = open(file_str, "r")
words = file_str.split(',' or ';')
I have a file on my computer that contains a really long poem, and I want to see if there are any words that are duplicated per line (hence it being split by punctuation).
I have that much, and I don't want to use a module or Counter, I would prefer to use loops. Any ideas?
You can use sets to track seen items and duplicates:
>>> words = 'the fox jumped over the lazy dog and over the bear'.split()
>>> seen = set()
>>> dups = set()
>>> for word in words:
if word in seen:
if word not in dups:
print(word)
dups.add(word)
else:
seen.add(word)
the
over
with open (r"specify the path of the file") as f:
data = f.read()
if(set([i for i in data if f.count(f)>1])):
print "Duplicates found"
else:
print "None"
SOLVED !!!
I can give the explanation with working program
file content of sam.txt
sam.txt
Hello this is star hello the data are Hello so you can move to the
hello
file_content = []
resultant_list = []
repeated_element_list = []
with open(file="sam.txt", mode="r") as file_obj:
file_content = file_obj.readlines()
print("\n debug the file content ",file_content)
for line in file_content:
temp = line.strip('\n').split(" ") # This will strip('\n') and split the line with spaces and stored as list
for _ in temp:
resultant_list.append(_)
print("\n debug resultant_list",resultant_list)
#Now this is the main for loop to check the string with the adjacent string
for ii in range(0, len(resultant_list)):
# is_repeated will check the element count is greater than 1. If so it will proceed with identifying duplicate logic
is_repeated = resultant_list.count(resultant_list[ii])
if is_repeated > 1:
if ii not in repeated_element_list:
for2count = ii + 1
#This for loop for shifting the iterator to the adjacent string
for jj in range(for2count, len(resultant_list)):
if resultant_list[ii] == resultant_list[jj]:
repeated_element_list.append(resultant_list[ii])
print("The repeated strings are {}\n and total counts {}".format(repeated_element_list, len(repeated_element_list)))
Output:
debug the file content ['Hello this is abdul hello\n', 'the data are Hello so you can move to the hello']
debug resultant_list ['Hello', 'this', 'is', 'abdul', 'hello', 'the', 'data', 'are', 'Hello', 'so', 'you', 'can', 'move', 'to', 'the', 'hello']
The repeated strings are ['Hello', 'hello', 'the']
and total counts 3
Thanks
def Counter(text):
d = {}
for word in text.split():
d[word] = d.get(word,0) + 1
return d
there is loops :/
to split on punctionation just us
matches = re.split("[!.?]",my_corpus)
for match in matches:
print Counter(match)
For this kinda file;
A hearth came to us from your hearth
foreign hairs with hearth are same are hairs
This will check whole poem;
lst = []
with open ("coz.txt") as f:
for line in f:
for word in line.split(): #splited by gaps (space)
if word not in lst:
lst.append(word)
else:
print (word)
Output:
>>>
hearth
hearth
are
hairs
>>>
As you see there are two hearth here, because in whole poem there are 3 hearth.
For check line by line;
lst = []
lst2 = []
with open ("coz.txt") as f:
for line in f:
for word in line.split():
lst2.append(word)
for x in lst2:
if x not in lst:
lst.append(x)
lst2.remove(x)
print (set(lst2))
>>>
{'hearth', 'are', 'hairs'}
>>>

Empty list when converting from set

I'm trying to convert a set I've defined into a list so I can use it for indexing.
seen = set()
for line in p:
for word in line.split():
if word not in seen and not word.isdigit():
seen.add(word)
been = list(seen)
The set seems to contain items just fine. However the list is always empty when I monitor its value in the variable explorer (and when I later call the index function).
What am I doing wrong?
EDIT: This is the entire code. I'm trying to find the location of words in 'p' in 'o' and chart the number of its occurrences in a single line. It's a huge list of words so manually entering anything is out of the question.
p = open("p.txt", 'r')
o = open("o.txt", 'r')
t = open("t.txt", 'w')
lines = p.readlines()
vlines = o.readlines()
seen = set()
for line in p:
for word in line.split():
if word not in seen and not word.isdigit():
seen.add(word)
been = list(seen)
for i in lines:
thisline = i.split();
thisline[:] = [word for word in thisline if not word.isdigit()]
count = len(thisline)
j = []
j.append(count)
for sword in thisline:
num = thisline.count(sword)
#index=0
#for m in vlines:
#if word is not m:
#index+=1
ix = been.index(sword)
j.append(' ' + str(ix) + ':' + str(num))
j.append('\n')
for item in j:
t.write("%s" % item)
Output should be in the format '(total number of items in line) (index):(no. of occurrences)'.
I think I'm pretty close but this part is bugging me.
Your code is working just fine.
>>> p = '''
the 123 dogs
chased 567 cats
through 89 streets'''.splitlines()
>>> seen = set()
>>> for line in p:
for word in line.split():
if word not in seen and not word.isdigit():
seen.add(word)
>>> been = list(seen)
>>>
>>> seen
set(['streets', 'chased', 'cats', 'through', 'the', 'dogs'])
>>> been
['streets', 'chased', 'cats', 'through', 'the', 'dogs']
Unless there's a reason why you want to read line by line you can simply replace this:
seen = set()
for line in p:
for word in line.split():
if word not in seen and not word.isdigit():
seen.add(word)
been = list(seen)
with:
been = list(set([w for w in open('p.txt', 'r').read().split() if not w.isdigit()]))

Categories