how to disregard counting a certain word in python dictionary - python

Hello I'm wondering how to make a dictionary that will not count the word 'the' or definitely remove it from the dictionary so i came up in this code:
counts = dict()
print('Enter a line of text:')
line = input('')
words = line.split()
print('Words:', words)
print('counting...')
for word in words :
if words != 'the':
counts[word]= counts.get(word,0)+1
else:
counts[word] = counts.get(word,0)
print('Counts', counts)
can you help me to make it right?

While #Steve's answer is correct, the code can be a bit prettified and simplified:
from collections import Counter
line = input('Enter a line of text:')
words = line.split()
print('Words:', words)
print('counting...')
c = Counter(words)
del c['the'] # remove 'the' key from counter
print('Counts', dict(c))

Why not just this? :
counts = dict()
print('Enter a line of text:')
line = input('')
words = line.split()
print('Words:', words)
print('counting...')
for word in words :
if word != 'the':
counts[word]= counts.get(word,0)+1
print('Counts', counts)

Related

Counting syllables in a list of strings Python without using RE

I have to count the number of syllables in a text file. My problem is that I don't know how to iterate each character of each string. My idea was to check if a letter is a vowel, and if the following letter is not a vowel, increase the count by 1. But I can't increase "letter". I've also tried to use the "range" method, but I have problem also with that. What can I try? Thank you.
PS: I can only use Python built-in methods.
txt = ['countingwords', 'house', 'plant', 'alpha', 'syllables']
This is my code so far.
def syllables(text_file):
count = 0
vowels = ['a','e','i','o','u','y']
with open(text_file, 'r') as f:
txt = f.readlines()
txt = [line.replace(' ','') for line in txt]
txt = [line.replace(',','') for line in txt]
txt = [y.lower() for y in txt]
for word in txt:
for letter in word:
if letter is in vowel and [letter + 1] is not in vowel:
count += 1
You might try this:
lines = ["You should count me too"]
count = 0
vowels = "aeiouy"
for line in lines:
for word in line.lower().split(" "):
for i in range(len(word)):
if word[i] in vowels and (i == 0 or word[i-1] not in vowels):
count +=1
print(count) # -> 5

If the character 'p' is in a word, add the word to a list variable

So my assignment is this: Using the file school_prompt.txt, if the character ā€˜pā€™ is in a word, then add the word to a list called p_words.
I'm not sure what progress I've made but I've gotten stuck.
wordsFile = open("school_prompt.txt", 'r')
words = wordsFile.read()
wordsFile.close()
wordList = words.split()
p_words = 0
for words in wordList:
if words[0] == 'p':
p_words += 1
What you want is pretty straightforward; I'm not sure why you are making p_words a count of words instead of a list of words.
p_words = [word for word in wordList if 'p' in word]
As answered by Henrik, this could be done by using the if statement. Also the p_words should be a list not a variable.
file=open("school_prompt.txt","r")
p_words=[]
file=file.read()
wordlist=file.split()
for i in wordlist:
if 'p' in i:
p_words.append(i)
This works and I tried to do one line of code using list comprehension but couldnt get it to work.
fileref = open('school_prompt.txt', 'r')
words = fileref.read().split()
p_words = [word for word in words if 'p' in word]
we need to do for loop inside for loop to check for the words contain letter "p".
Here's the code.
file = open("school_prompt.txt", "r")
content = file.readlines()
p_words = []
for lines in content:
lines = lines.split()
for words in lines:
if "p" in words:
p_words.append(words)
print(p_words)

Sorting and counting words from a text file

I'm new to programming and stuck on my current program. I have to read in a story from a file, sort the words, and count the number of occurrences per word. It will count the words, but it won't sort the words, remove the punctuation, or duplicate words. I'm lost to why its not working. Any advice would be helpful.
ifile = open("Story.txt",'r')
fileout = open("WordsKAI.txt",'w')
lines = ifile.readlines()
wordlist = []
countlist = []
for line in lines:
wordlist.append(line)
line = line.split()
# line.lower()
for word in line:
word = word.strip(". , ! ? : ")
# word = list(word)
wordlist.sort()
sorted(wordlist)
countlist.append(word)
print(word,countlist.count(word))
There main problem in your code is at the line (line 9):
wordlist.append(line)
You are appending the whole line into the wordlist, I doubt that is what you want. As you do this, the word added is not .strip()ed before it is added to wordlist.
What you have to do is to add the word only after you have strip()ed it and make sure you only do that after you checked that there are not other same words (no duplicates):
ifile = open("Story.txt",'r')
lines = ifile.readlines()
wordlist = []
countlist = []
for line in lines:
# Get all the words in the current line
words = line.split()
for word in words:
# Perform whatever manipulation to the word here
# Remove any punctuation from the word
word = word.strip(".,!?:;'\"")
# Make the word lowercase
word = word.lower()
# Add the word into wordlist only if it is not in wordlist
if word not in wordlist:
wordlist.append(word)
# Add the word to countlist so that it can be counted later
countlist.append(word)
# Sort the wordlist
wordlist.sort()
# Print the wordlist
for word in wordlist:
print(word, countlist.count(word))
Another way you could do this is using a dictionary, storing the word as they key and the number of occurences as the value:
ifile = open("Story.txt", "r")
lines = ifile.readlines()
word_dict = {}
for line in lines:
# Get all the words in the current line
words = line.split()
for word in words:
# Perform whatever manipulation to the word here
# Remove any punctuation from the word
word = word.strip(".,!?:;'\"")
# Make the word lowercase
word = word.lower()
# Add the word to word_dict
word_dict[word] = word_dict.get(word, 0) + 1
# Create a wordlist to display the words sorted
word_list = list(word_dict.keys())
word_list.sort()
for word in word_list:
print(word, word_dict[word])
You have to provide a key function to the sorting methods.
Try this
r = sorted(wordlist, key=str.lower)
punctuation = ".,!?: "
counts = {}
with open("Story.txt",'r') as infile:
for line in infile:
for word in line.split():
for p in punctuation:
word = word.strip(p)
if word not in counts:
counts[word] = 0
counts[word] += 1
with open("WordsKAI.txt",'w') as outfile:
for word in sorted(counts): # if you want to sort by counts instead, use sorted(counts, key=counts.get)
outfile.write("{}: {}\n".format(word, counts[word]))

Matching input letters with a dictionary in Python

I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?
You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False
Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list
you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista
one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.

MapReduce to count the frequency of the number consonants in words from a text file

I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]

Categories