I have a file text, I want filter some word in the text with condition:
1) same the length and starting with same a letter
2) find words with the at least 2 correctly placed letters
For example:
word = bubal
text
byres
brits
blurb
bulks
bible
debug
debut
and want to output: ['bulks', 'bible'] with bulks have 'b' and 'u' correctly placed and bible have 2 b correctly placed with bubal
My ideal to find the word with starting a lettre and so find the word same length and then find the word correct 2nd condition
But I write the code find the word starting by using re and it don't run good
import re
with open('words.txt','r') as file:
liste = file.read()
word = re.findall('[b]\w+',liste)
print(word)
My code return the ['byres','brits','bulks','but','bug']
How to fix it and find word flows condition
Edited based on your comment.
This may be what you're after:
#!/usr/bin/env python
def find_best_letter_matches(lines, target):
m = []
m_count = 0
for line in lines:
count = sum(map(lambda x: x[0] == x[1], zip(line, target)))
if count > m_count:
m = []
m_count = count
if count == m_count:
m.append(line)
return m
def find_n_letter_matches(lines, target, n):
m = []
for line in lines:
count = sum(map(lambda x: x[0] == x[1], zip(line, target)))
if count >= n:
m.append(line)
return m
if __name__ == '__main__':
with open('text.txt', 'r') as f:
lines = f.read().split('\n')
best_matches = find_best_letter_matches(lines, 'bubal')
n_matches = find_n_letter_matches(lines, 'bubal', 2)
print('Best letter matches', best_matches)
print('At least 2 letters match', n_matches)
The functions compare each line to the target, letter by letter, and counts the number of matches. The first then returns the list of the highest matching lines, and the second returns all that match with n or more letters.
The output with your example text (with bubal added) is:
Best letter matches ['bubal']
At least 2 letters match ['bulks', 'bible', 'bubal']
Try this
wordToSearch = "bubal"
singlesChar = list(wordToSearch)
finalArray = []
with open('words.txt','r') as file:
liste = file.readlines()
for each in liste:
each = each.rstrip()
fn = list(each)
flag = 0
for i in range(0,len(singlesChar)):
if(fn[i] == singlesChar[i]):
flag+=1
if(flag >= 2): finalArray.append(each)
print(finalArray)
Related
I'm trying to get the maximum numbers of words inside a sentence of a paragraph but just can't see it.
Here is what I tried:
S = input("Enter a paragraph")
def getMaxNum(S):
if "." in S:
new_list = S.split(".")[0]
return len(new_list)
else "?" in S:
new_list = S.split("?")[0]
return len(new_list)
else "!" in S:
new_list = S.split("?")[0]
return len(new_list)
getMaxNum(S)
In the else statement I could be getting the previous sentence values but that's not what I need to get. Any ideas how can I accomplish that?
I'm not 100% certain of what your requirements are, but if I borrow Buoy Rina's input, here's a solution using regular expressions (pattern search strings):
#!/usr/bin/env python3
import re
text = "I will go school tomorrow. I eat apples. Here is a six word sentence."
max_words = 0
sentences = re.split("[.!?]", text)
for sentence in sentences:
max_words = max( len( sentence.split() ), max_words )
print(f"max_words: {max_words}")
The re.split() breaks the text (or paragraph) into sentences based on "some" end of sentence punctuation. There are likely conditions under which searching for period '.' won't yield a complete sentence, but we'll ignore that for simplicity.
The string function split() then breaks up the sentence into words based on white space (the default of split()). We then get the length of the resultant list to find the word count.
text = "I will go school tomorrow. I eat apples. I will have a very long sentence. "
def getmaxwordcount(text):
count_word = 0
is_start_word = False
counts = []
for c in text:
if c == ' ':
if is_start_word:
count_word += 1
is_start_word = False
elif c == '!' or c == '.' or c == '?':
if is_start_word:
count_word += 1
is_start_word = False
counts.append(count_word)
count_word = 0
else:
if c.isalpha():
if is_start_word == False:
is_start_word = True
return max(counts)
getmaxwordcount(text) # 7
import re
text = "I will go school tomorrow. I eat apples."
def foo(txt):
max_count=0
for i in re.split('[!.?]',txt):
if len(i.split()) > max_count:
max_count = len(i.split())
return max_count
print(foo(text)) # returns 5
code
import re
paragraph = "Two words. Three other words? Finally four another words!"
all_lengths_in_paragraph = [f"Length of {n+1}th sentence is {len(list(filter(None, x.split(' '))))}" for n, x in enumerate(list(filter(None, re.split('\.|!|\?', paragraph))))]
max_length = max([len(list(filter(None, x.split(' ')))) for x in list(filter(None, re.split('\.|!|\?', paragraph)))])
for one_length in all_lengths_in_paragraph:
print(r)
print('maximum length is', max_length)
output
Length of 1th sentence is 2
Length of 2th sentence is 3
Length of 3th sentence is 4
maximum length is 4
I am trying to find out how to read a string for names with no spaces
ex. robbybobby I want it to search the string and separate them into there own groups
def wordcount(filename, listwords):
try:
file = open(filename, "r")
read = file.readline()
file.close()
for word in listwords:
lower = word.lower()
count = 0
for letter in read:
line = letter.split()
for each in line:
line2 = each.lower()
line2 = line2.strip(".")
if lower == line2:
count += 1
print(lower, ":", count)
except FileExistsError:
print("no")
wordcount("teststring.txt", ["robby"])
with this code it will only find robby if there is a space afterwards
There are several ways to do this. I am posting 2 suggestions so you can understand and improve :)
Solution 1:
def count_occurrences(line, word):
# Normalize vars
word = word.lower()
line = line.lower()
# Initialize vars
start_index = 0
total_count = 0
word_len = len(word)
# Count ignoring empty spaces
while start_index >= 0:
# Ignore if not found
if word not in line[start_index:]:
break
# Search for the word starting from <start_index> index
start_index = line.index(word, start_index)
# Increment if found
if start_index >= 0:
start_index += word_len
total_count += 1
# Return total occurrences
return total_count
print(count_occurrences('stackoverflow overflow overflowABC over', 'overflow'))
Output: 3
Solution 2:
If you want to go for a regex, this links may be usefull:
Count the occurrence of a word in a txt file in python
Exact match for words
IIUC you want to count occurrences of a word irrespective to whether it occurs as a part of other word, or as a word on its own.
You can use simple regex for that:
import re
def count_line(dict, line, words):
for word in words:
dict[word]=len(re.findall(word, line, re.IGNORECASE))+dict.get(word, 0)
return dict
allLines="""
bobby robbubobby yo xyz\n
robson bobbyrobin abc\n
xyz bob amy oo\n
amybobson robson
"""
print(allLines)
words=["amy", "robby", "bobby", "jack"]
res={}
for line in allLines.split("\n"):
res=count_line(res, line, words)
print(res)
Output:
bobby robbubobby yo xyz
robson bobbyrobin abc
xyz bob amy oo
amybobson robson
{'amy': 2, 'robby': 0, 'bobby': 3, 'jack': 0}
I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?
You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False
Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list
you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista
one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.
I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]
Also I need to make the program show the words that DO have digits in thee digits in them SEPERATELY
f = open("lolpa.txt", "r")
list1 = (f)
temp = []
for item in list1:
if "1" "2" "3" "4" "5" "6" "7" "8" "9" "0" in item:
temp.append(item)
else:
print(item)
is what i have so far, but it for some reason shows all the words.
EDIT: The lolpa.txt is just a file for comparison
EDIT: If that would change anything , I'm using python 3.2.
Something like this will get you started, question isn't very clear.
with open("lolpa.txt") as f:
for word in f.readline().split(): # assuming all words are on the first line
digits = [c for c in word if c.isdigit()]
if digits: # digits list is not empty
print(' '.join(digits)) # shows digits with space in between
else:
print(word) # prints word normally
The following will put whole words having numbers in them in one file and without numbers in another.
f = open('lolpa.txt', 'r')
d = open('lolpa_with_digit.txt', 'w')
nd = open('lolpa_without_digit.txt', 'w')
rowlist = (f)
temp = []
for line in rowlist:
words = line.split(' ')
for word in words:
word = word.strip()
has_digit = False
for char in word:
if char.isdigit():
has_digit = True
if has_digit:
d.write(word + '\n')
else:
nd.write(word +'\n')
I'm no regular expression expert but this should work(of course you'd have to loop over each line yadayada):
import re
prog = re.comile(r'\b[a-zA-Z]+\b')
result = prog.findall(your_line_here)
result would then be a list of all the words