Check if they are same - python

I want to read from text file and print the first three words having the same initial three letters. I can get the first 3 initials but I cannot check if they are same or not.
Here is my code:
def main():
f = open("words.txt", "r+")
# The loop that prints the initial letters
for word in f.read().split():
# the part that takes the 3 initials letters of the word
initials = [j[:3] for j in word.split()]
print(initials)
words.txt
when, where, loop, stack, wheel, wheeler
output

You can use a mapping from the first 3 letters to the list of words. collections.defaultdict could save you a few keystrokes here:
from collections import defaultdict
def get_words():
d = defaultdict(list)
with open('words.txt') as f:
for line in f:
for word in line.split(', '):
prefix = word[:3]
d[prefix].append(word)
if len(d[prefix]) == 3:
return d[prefix]
return []
print(get_words()) # ['when', 'where', 'wheel']

This code snippet groups the words by there first 3 letters:
def main():
# a dict where the first 3 letters are the keys and the
# values are lists of words
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split():
s = word[:3]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 3:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
This stops the processing (I used a return statement) if you get to 3 words with the same 3 letters.

You can use dictionary here with first 3 characters as key. Example
d={}
f = open("words.txt", "r+")
key_with_three_element=''
for word in f.read().split():
if word[:3] in d:
d[word[:3]].append(word)
else:
d[word[:3]]=[word]
if(len(d[word[:3]])==3):
key_with_three_element=word[:3]
break
print(d[key_with_three_element])
Ouput:
['when', 'where', 'wheel']

def main():
f = open("words.txt", "r+")
for word in f.read().split():
record[word[:3]] = record.get(word[:3], [])+[word]
if len(record[word[:3]]) == 3:
print (record[word[:3]])
break

Related

How can I get two txt files by finding common occurrences?

I need to know which English words were used in the Italian chat and to count how many times they were used.
But in the output I also have the words I didn't use in the example chat (baby-blue-eyes': 0)
english_words = {}
with open("dizionarioen.txt") as f:
for line in f:
for word in line.strip().split():
english_words[word] = 0
with open("_chat.txt") as f:
for line in f:
for word in line.strip().split():
if word in english_words:
english_words[word] += 1
print(english_words)
You can simply iterate over your result and remove all elements that have value 0:
english_words = {}
with open("dizionarioen.txt") as f:
for line in f:
for word in line.strip().split():
english_words[word] = 0
with open("_chat.txt") as f:
for line in f:
for word in line.strip().split():
if word in english_words:
english_words[word] += 1
result = {key: value for key, value in english_words.items() if value}
print(result)
Also here is another solution that allows you to count words with usage of Counter:
from collections import Counter
with open("dizionarioen.txt") as f:
all_words = set(word for line in f for word in line.split())
with open("_chat.txt") as f:
result = Counter([word for line in f for word in line.split() if word in all_words])
print(result)
If you want to remove the words without occurrence after indexing, just delete these entries:
for w in list(english_words.keys()):
if english_words[w]==0: del english_words[w]
Then, your dictionary only contains words that occurred. Was that the question?

Printing 5 words that last 3 letters are the same in Python

My program should read the content of a text file and find similar words according to the last 3 characters and print out 5 of these similar words in any order. For ex: warm, arm, charm...
I write a code but I could not complete.
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
My output is
{'ger': ['finger'], 'iss': ['premiss'], 'arm': ['arm'], 'ike': ['like']}
Putting together:
def main():
# An empty array
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split(" "):
s = word[-3:]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 5:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
print(main())
#VARIABLES
end_letters = [] ;
word_list = [] ;
#openning the file
file = open("words.txt","r");
#obtaining all last 3 letters and adding then to list but skip if the 3 letters already exist
for words in file:
#adding words to word_list
word_list.append(words);
#checking if the last 3 letters already exist
if words[-4::] not in end_letters:
#append list with words's last three letters
end_letters.append(words[-4::]);
#closing file
file.close();
#using end_letters list , print the words from word_list with their last 3 letters the same as the end_letters
for criteria in end_letters:
for words in word_list:
if (words[-4:] == criteria):
print(words);

How to I pass my file into the top function to split it?

my program is supposed to make a list of the count of each word in a file. I am trying to pass my file into the first function to remove all punctuation.. how can I make my file into a string to do this? Thanks.
#def extract_words(string):
"""Returns a list containing each word in the string, ignoring #punctuation, numbers, etc."""
l = []
word = ''
for c in string+' ':
if c.isalpha():
word += c
else:
if word != '':
l.append(word.lower())
word = ''
return l
#def count_words(filename):
"""Returns a dictionary containing the number of occurrences of each #word in the file."""
# create a dictionary
mydict = {}
# open the file and read the text
with open(filename, 'r') as myfile:
# extract each word in the file
# count the number of times each work occurs.
for i in myfile.read().split():
if i in mydict.keys():
mydict[i] += 1
else:
mydict[i] = 1
# return the dictionary with the word count.
return mydict
if a is the string and you want the output dictionary as b:
from collections import Counter
#Make file into a string:
with open('file.txt', 'r') as f:
a = f.read()
list_word = [i for i in a.replace('\n', '').split() if i.isalpha()]
b = Counter(list_word)

Matching input letters with a dictionary in Python

I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?
You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False
Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list
you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista
one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.

MapReduce to count the frequency of the number consonants in words from a text file

I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]

Categories