Matching input letters with a dictionary in Python - python

I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?

You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False

Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list

you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista

one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.

Related

Printing 5 words that last 3 letters are the same in Python

My program should read the content of a text file and find similar words according to the last 3 characters and print out 5 of these similar words in any order. For ex: warm, arm, charm...
I write a code but I could not complete.
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
My output is
{'ger': ['finger'], 'iss': ['premiss'], 'arm': ['arm'], 'ike': ['like']}
Putting together:
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
#VARIABLES
end_letters = [] ;
word_list = [] ;
#openning the file
file = open("words.txt","r");
#obtaining all last 3 letters and adding then to list but skip if the 3 letters already exist
for words in file:
#adding words to word_list
word_list.append(words);
#checking if the last 3 letters already exist
if words[-4::] not in end_letters:
#append list with words's last three letters
end_letters.append(words[-4::]);
#closing file
file.close();
#using end_letters list , print the words from word_list with their last 3 letters the same as the end_letters
for criteria in end_letters:
for words in word_list:
if (words[-4:] == criteria):
print(words);

Check if they are same

I want to read from text file and print the first three words having the same initial three letters. I can get the first 3 initials but I cannot check if they are same or not.
Here is my code:
def main():
f = open("words.txt", "r+")
# The loop that prints the initial letters
for word in f.read().split():
# the part that takes the 3 initials letters of the word
initials = [j[:3] for j in word.split()]
print(initials)
words.txt
when, where, loop, stack, wheel, wheeler
output
You can use a mapping from the first 3 letters to the list of words. collections.defaultdict could save you a few keystrokes here:
from collections import defaultdict
def get_words():
d = defaultdict(list)
with open('words.txt') as f:
for line in f:
for word in line.split(', '):
prefix = word[:3]
d[prefix].append(word)
if len(d[prefix]) == 3:
return d[prefix]
return []
print(get_words()) # ['when', 'where', 'wheel']
This code snippet groups the words by there first 3 letters:
def main():
# a dict where the first 3 letters are the keys and the
# values are lists of words
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split():
s = word[:3]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 3:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
This stops the processing (I used a return statement) if you get to 3 words with the same 3 letters.
You can use dictionary here with first 3 characters as key. Example
d={}
f = open("words.txt", "r+")
key_with_three_element=''
for word in f.read().split():
if word[:3] in d:
d[word[:3]].append(word)
else:
d[word[:3]]=[word]
if(len(d[word[:3]])==3):
key_with_three_element=word[:3]
break
print(d[key_with_three_element])
Ouput:
['when', 'where', 'wheel']
def main():
f = open("words.txt", "r+")
for word in f.read().split():
record[word[:3]] = record.get(word[:3], [])+[word]
if len(record[word[:3]]) == 3:
print (record[word[:3]])
break

Create a dict whose value is the set of all possible anagrams for a given word

So what im trying to do is create a dict whose:
key is the word in sorted order and
value is the set of each anagram (generated by an anagram program).
When i run my program i get Ex. word : {('w', 'o', 'r', 'd')} not word : dorw, wrdo, rowd. Text file just contains a lot of words one on each line.
Code:
def main():
wordList = readMatrix()
print(lengthWord())
def readMatrix():
wordList = []
strFile = open("words.txt", "r")
lines = strFile.readlines()
for line in lines:
word = sorted(line.rstrip().lower())
wordList.append(tuple(word))
return tuple(wordList)
def lengthWord():
lenWord = 4
sortDict = {}
wordList = readMatrix()
for word in wordList:
if len(word) == lenWord:
sortWord = ''.join(sorted(word))
if sortWord not in sortDict:
sortDict[sortWord] = set()
sortDict[sortWord].add(word)
return sortDict
main()
You are creating tuples of each word in the file:
for line in lines:
word = sorted(line.rstrip().lower())
wordList.append(tuple(word))
This will sort all your anagrams, creating duplicate sorted character tuples.
If you wanted to track all possible words, you should not produce tuples here. Just read the words:
for line in lines:
word = line.rstrip().lower()
wordList.append(word)
and process those words with your lengthWord() function; this function does need to take the wordList value as an argument:
def lengthWord(wordList):
# ...
and you need to pass that in from main():
def main():
wordList = readMatrix()
print(lengthWord(wordList))

MapReduce to count the frequency of the number consonants in words from a text file

I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]

Python Word Count

wordsFreq = {}
words = []
while True:
inputWord = raw_input()
if (inputWord != ""):
words.append(inputWord)
else:
break
for word in words:
wordsFreq[word] = wordsFreq.get(word, 0) + 1
for word,freq in wordsFreq.items():
print word + " - " + str(freq)
Apparently my words[] and the for loop is redundant but I had no further explanation than that, can anyone explain to me why it is redundant?
You can skip the step of building a list of words and instead directly create the frequency dict as the user is entering words. I've used defaultdict to avoid having to check if a word has already been added.
from collections import defaultdict
wordsFreq = defaultdict(int)
while True:
word = raw_input()
if not word:
break
wordsFreq[word] += 1
If you aren't allowed to use defaultdict, it could look like this:
wordsFreq = {}
while True:
word = raw_input()
if not word:
break
wordsFreq[word] = wordFreq.get(word, 0) + 1
You can use collections.Counter to do this easily:
from collections import Counter
words = []
input_word = True
while input_word:
input_word = raw_input()
words.append(input_word)
counted = Counter(words)
for word, freq in counted.items():
print word + " - " + str(freq)
Note that an empty string evaluates to false, so rather than breaking when it equals an empty string, we can just use the string as our loop condition.
Edit: If you don't wish to use Counter as an academic exercise, then the next best option is a collections.defaultdict:
from collections import defaultdict
words = defaultdict(int)
input_word = True
while input_word:
input_word = raw_input()
if input_word:
words[input_word] += 1
for word, freq in words.items():
print word + " - " + str(freq)
The defaultdict ensures all keys will point to a value of 0 if they havn't been used before. This makes it easy for us to count using one.
If you still want to keep your list of words as well, then you would need to do that in addition. E.g:
words = []
words_count = defaultdict(int)
input_word = True
while input_word:
input_word = raw_input()
if input_word:
words.append(input_word)
words_count[input_word] += 1
I think your teacher was trying to say you can write the loop like this
wordsFreq = {}
while True:
inputWord = raw_input()
if (inputWord != ""):
wordsFreq[inputWord] = wordsFreq.get(inputWord, 0) + 1
else:
break
for word,freq in wordsFreq.items():
print word + " - " + str(freq)
There is no need to store the words in a temporary list, you can count them as you read them in
You can do this:
wordsFreq = {}
while True:
inputWord = raw_input()
try:
wordsFreq[inputWord] = wordsFreq[inputWord] + 1
except KeyError:
wordsFreq[inputWord] = 1

Categories