I want to read from text file and print the first three words having the same initial three letters. I can get the first 3 initials but I cannot check if they are same or not.
Here is my code:
def main():
f = open("words.txt", "r+")
# The loop that prints the initial letters
for word in f.read().split():
# the part that takes the 3 initials letters of the word
initials = [j[:3] for j in word.split()]
print(initials)
words.txt
when, where, loop, stack, wheel, wheeler
output
You can use a mapping from the first 3 letters to the list of words. collections.defaultdict could save you a few keystrokes here:
from collections import defaultdict
def get_words():
d = defaultdict(list)
with open('words.txt') as f:
for line in f:
for word in line.split(', '):
prefix = word[:3]
d[prefix].append(word)
if len(d[prefix]) == 3:
return d[prefix]
return []
print(get_words()) # ['when', 'where', 'wheel']
This code snippet groups the words by there first 3 letters:
def main():
# a dict where the first 3 letters are the keys and the
# values are lists of words
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split():
s = word[:3]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 3:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
This stops the processing (I used a return statement) if you get to 3 words with the same 3 letters.
You can use dictionary here with first 3 characters as key. Example
d={}
f = open("words.txt", "r+")
key_with_three_element=''
for word in f.read().split():
if word[:3] in d:
d[word[:3]].append(word)
else:
d[word[:3]]=[word]
if(len(d[word[:3]])==3):
key_with_three_element=word[:3]
break
print(d[key_with_three_element])
Ouput:
['when', 'where', 'wheel']
def main():
f = open("words.txt", "r+")
for word in f.read().split():
record[word[:3]] = record.get(word[:3], [])+[word]
if len(record[word[:3]]) == 3:
print (record[word[:3]])
break
Related
I need to know which English words were used in the Italian chat and to count how many times they were used.
But in the output I also have the words I didn't use in the example chat (baby-blue-eyes': 0)
english_words = {}
with open("dizionarioen.txt") as f:
for line in f:
for word in line.strip().split():
english_words[word] = 0
with open("_chat.txt") as f:
for line in f:
for word in line.strip().split():
if word in english_words:
english_words[word] += 1
print(english_words)
You can simply iterate over your result and remove all elements that have value 0:
english_words = {}
with open("dizionarioen.txt") as f:
for line in f:
for word in line.strip().split():
english_words[word] = 0
with open("_chat.txt") as f:
for line in f:
for word in line.strip().split():
if word in english_words:
english_words[word] += 1
result = {key: value for key, value in english_words.items() if value}
print(result)
Also here is another solution that allows you to count words with usage of Counter:
from collections import Counter
with open("dizionarioen.txt") as f:
all_words = set(word for line in f for word in line.split())
with open("_chat.txt") as f:
result = Counter([word for line in f for word in line.split() if word in all_words])
print(result)
If you want to remove the words without occurrence after indexing, just delete these entries:
for w in list(english_words.keys()):
if english_words[w]==0: del english_words[w]
Then, your dictionary only contains words that occurred. Was that the question?
My program should read the content of a text file and find similar words according to the last 3 characters and print out 5 of these similar words in any order. For ex: warm, arm, charm...
I write a code but I could not complete.
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
My output is
{'ger': ['finger'], 'iss': ['premiss'], 'arm': ['arm'], 'ike': ['like']}
Putting together:
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
#VARIABLES
end_letters = [] ;
word_list = [] ;
#openning the file
file = open("words.txt","r");
#obtaining all last 3 letters and adding then to list but skip if the 3 letters already exist
for words in file:
#adding words to word_list
word_list.append(words);
#checking if the last 3 letters already exist
if words[-4::] not in end_letters:
#append list with words's last three letters
end_letters.append(words[-4::]);
#closing file
file.close();
#using end_letters list , print the words from word_list with their last 3 letters the same as the end_letters
for criteria in end_letters:
for words in word_list:
if (words[-4:] == criteria):
print(words);
my program is supposed to make a list of the count of each word in a file. I am trying to pass my file into the first function to remove all punctuation.. how can I make my file into a string to do this? Thanks.
#def extract_words(string):
"""Returns a list containing each word in the string, ignoring #punctuation, numbers, etc."""
l = []
word = ''
for c in string+' ':
if c.isalpha():
word += c
else:
if word != '':
l.append(word.lower())
word = ''
return l
#def count_words(filename):
"""Returns a dictionary containing the number of occurrences of each #word in the file."""
# create a dictionary
mydict = {}
# open the file and read the text
with open(filename, 'r') as myfile:
# extract each word in the file
# count the number of times each work occurs.
for i in myfile.read().split():
if i in mydict.keys():
mydict[i] += 1
else:
mydict[i] = 1
# return the dictionary with the word count.
return mydict
if a is the string and you want the output dictionary as b:
from collections import Counter
#Make file into a string:
with open('file.txt', 'r') as f:
a = f.read()
list_word = [i for i in a.replace('\n', '').split() if i.isalpha()]
b = Counter(list_word)
I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?
You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False
Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list
you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista
one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.
I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]