Python Word Count - python

wordsFreq = {}
words = []
while True:
inputWord = raw_input()
if (inputWord != ""):
words.append(inputWord)
else:
break
for word in words:
wordsFreq[word] = wordsFreq.get(word, 0) + 1
for word,freq in wordsFreq.items():
print word + " - " + str(freq)
Apparently my words[] and the for loop is redundant but I had no further explanation than that, can anyone explain to me why it is redundant?

You can skip the step of building a list of words and instead directly create the frequency dict as the user is entering words. I've used defaultdict to avoid having to check if a word has already been added.
from collections import defaultdict
wordsFreq = defaultdict(int)
while True:
word = raw_input()
if not word:
break
wordsFreq[word] += 1
If you aren't allowed to use defaultdict, it could look like this:
wordsFreq = {}
while True:
word = raw_input()
if not word:
break
wordsFreq[word] = wordFreq.get(word, 0) + 1

You can use collections.Counter to do this easily:
from collections import Counter
words = []
input_word = True
while input_word:
input_word = raw_input()
words.append(input_word)
counted = Counter(words)
for word, freq in counted.items():
print word + " - " + str(freq)
Note that an empty string evaluates to false, so rather than breaking when it equals an empty string, we can just use the string as our loop condition.
Edit: If you don't wish to use Counter as an academic exercise, then the next best option is a collections.defaultdict:
from collections import defaultdict
words = defaultdict(int)
input_word = True
while input_word:
input_word = raw_input()
if input_word:
words[input_word] += 1
for word, freq in words.items():
print word + " - " + str(freq)
The defaultdict ensures all keys will point to a value of 0 if they havn't been used before. This makes it easy for us to count using one.
If you still want to keep your list of words as well, then you would need to do that in addition. E.g:
words = []
words_count = defaultdict(int)
input_word = True
while input_word:
input_word = raw_input()
if input_word:
words.append(input_word)
words_count[input_word] += 1

I think your teacher was trying to say you can write the loop like this
wordsFreq = {}
while True:
inputWord = raw_input()
if (inputWord != ""):
wordsFreq[inputWord] = wordsFreq.get(inputWord, 0) + 1
else:
break
for word,freq in wordsFreq.items():
print word + " - " + str(freq)
There is no need to store the words in a temporary list, you can count them as you read them in

You can do this:
wordsFreq = {}
while True:
inputWord = raw_input()
try:
wordsFreq[inputWord] = wordsFreq[inputWord] + 1
except KeyError:
wordsFreq[inputWord] = 1

Related

Getting output as character and it's occurrence in the string

The input I gave is like this
Input- abccdddeffg
and the output I want is character and it's occurrence number
Output- a1b1c2d3e1f2g1
my code
uni = []
string = 'abcccdd'
for i in range(0, len(string)):
for j in range(i+1, len(string)):
if (string[i] == string[j]):
uni.append(string[i])
for oc in uni:
cou= uni.count(oc)
print(oc,cou)
Thanks in advance
You can use the Counter from collections to get the count of every character in the list. Then use a forloop to generate your result and use set to make sure no character is repeated in the result.
from collections import Counter
string = "abccdddeffg"
counts = Counter(string)
sets = set()
result = []
for s in string:
if s not in sets:
result.append(f"{s}{counts[s]}")
sets.add(s)
result = ''.join(result)
print(result)
Firstly, your output contains an error, there should be 1 next to e as it was there in first occurring characters.
After clearing this, this is what you need:
import collections
s = "abccdddeffg" # your string
a = dict((letter,s.count(letter)) for letter in set(s))
a = collections.OrderedDict(sorted(a.items()))
answer = "" # to store the result
for i,j in zip(a.keys(),a.values()):
answer+= i + str(j)
print(answer)
answer will return:
'a1b1c2d3e1f2g1'
Here is a simpler approach:
def freq_string(string):
output, buffer = "", ""
for letter in string:
if buffer != letter:
buffer = letter
output += f"{letter}{string.count(letter)}"
return output
Note: this does assume that same characters are in succession rather than spread randomly around the string.
Assuming your input string is sorted and has at least one letter, you can do a simple loop to handle it:
if len(string) == 1:
# print out the only string and 1 as its occurences
print(string + '1')
else:
# initialize first string, its counter, and our result string
prev = string[0]
counter = 1
result = ''
# loop over each letter
for letter in string[1:]:
curr = letter
# if current letter is different from previous letter,
# concat the result and refresh our counter
# else just increase the counter
if curr != prev:
result = result + prev + str(counter)
counter = 1
else:
counter = counter + 1
prev = curr
# don't forget to handle the last case
result = result + prev + str(counter)
print(result)
in the simplest way:
inp = 'abccdddeffg'
l=[]
o=""
for i in inp:
if i in l:
pass
else:
l.append(i)
o+="{}{}".format(i,inp.count(i))
print(o)
output
'a1b1c2d3e1f2g1'

My code when run prints the output before it. I want it to print only what is required of that specific count number

Why does my code print the output before in the while loop. I want to print only the count # and the encrypted text for that count #.
put list_text2 inside your loop.
while count < 26:
list_text2 = []
for letter in list_text1:
I'm assuming that you're appending to list_test2 each time for a reason, so rather than sticking it outside of the loop, the following code should just slice the latest update off of the tail end of it, and print just that part only:
import string
text_to_encrypt = ('Please enter some text to encrypt: ')
offset = list(range(1,27))
alphabet = list(string.ascii_lowercase)
list_text2 = []
encrypted_text = ''
list_text1 = list(text_to_encrypt.lower())
adjusted_list = 2*alphabet
count = 1
while count < 26:
for letter in list_text1:
if letter not in alphabet:
list_text2 += letter
else:
index = adjusted_list.index(letter)
list_text2 += adjusted_list[index + offset[count]]
list_text2_len = len(list_text2)
slice_start_index = list_text2_len - len(text_to_encrypt)
print(''.join(list_text2[slice_start_index:list_text2_len]))
encrypted_text = ''.join(list_text2)
count += 1

My anagram code doesn't account for multiple letters

So my code gets two words, and checks if one is the anagram of another one.
However doesn't work if multiple letters are exchanged, although I tried to account for that.
storedword = input("Enter your primary word \t")
global word
word = list(storedword)
word3 = input("Enter anagram word \t")
word3lowercase = word3.lower()
anaw = list(word3lowercase)
counter = int(0)
letterchecker = int(0)
listlength = len(word)
newcounter = int(0)
if len(anaw) != len(word):
print ("not anagram")
if len(anaw) == len(word):
while counter < listlength and newcounter < listlength:
tempcount = 0
if anaw[counter] == word[newcounter]:
temp = word[newcounter]
word[newcounter] = word[tempcount]
word[tempcount]=temp
letterchecker +=1
counter +=1
tempcount +=1
newcounter = int(0)
else:
newcounter +=1
if counter == len(word):
print ("anagram")
else:
print ("not anagram")
I think it's gone somewhere wrong after the if len(anaw) section, for example if the primary word is "hannah", and the secondary word is "hannnn", it thinks it's an anagram.
There is much simpler logic that can be implemented here, even without using sorted and such. Let's assume you have a function anagram:
def anagram(word1, word2):
if len(word1) != len(word2):
return False
def char_count(word):
char_count = {}
for c in word:
char_count[c] = char_count.get(c, 0) + 1
return char_count
cr1 = char_count(word1)
cr2 = char_count(word2)
return cr1 == cr2
You can test this with:
>>> print(anagram("anagram", "aanragm"))
True
>>> print(anagram("anagram", "aangtfragm"))
False
And for future readers, a super simple pythonic solution might be using Counter:
from collections import Counter
>>> Counter(word1) == Counter(word2)
Or using sorted:
>>> sorted(word1) == sorted(word2)
newcounter = int(0)
This is the line that causes the trouble (in the while loop).
Because of it you start checking the word from the beginning again.
I think you want it to be newcounter=letterchecker.
Since already used characters are put to the front of word they are ignored if you start with letterchecker
Tell me if it works
Edit:Checked with example given, seems to work.
Without using sort you could use the following approach. It removes a letter from an array of a characters of the second word. The words are only anagrams if there are no letters left (and the words are the same length to start with and have a length larger than zero):
word1="hannah"
word2="nahpan"
chars1= list(word1)
chars2= list(word2)
if len(chars1)==len(chars2) and len(chars1)>0:
for char in chars1:
if char not in chars2:
break
chars2.remove(char)
if len(chars2)==0:
print "Both words are anagrams"
else:
print "Words are not anagrams"
[EDIT THIS IS JUST FOR PALINDROMES I CANT READ]
Here is something a bit more simple:
storedword = input("Enter your primary word \t")
word3 = input("Enter anagram word \t")
if storedword == word3[::-1]:
print "Is Anagram"
else:
print "Is not anagram"

Counting consecutive repeats in a string and returning a value in python

Asked my friend to give me an assignment for me to practice. It is:
If a user enters a string "AAABNNNNNNDJSSSJENDDKEW" the program will return
"3AB6NDJ2SJEN2DKEW" and vice versa.
This what I tried so far:
from collections import Counter
list_user_input =[]
list_converted_output=[]
current_char = 0 #specifies the char it is reading
next_char = 1
cycle = 0 # counts number of loops
char_repeat = 1
prev_char=""
count = 1
user_input = input("Enter your string: ")
user_input_strip = user_input.strip()
user_input_striped_replace = user_input_strip.replace(" ", "").lower()
list_user_input.append(user_input_striped_replace[0:len(user_input_striped_replace)])
print(list_user_input)
print(user_input_striped_replace)
I have "cleaned" the code so it removes white spaces and keeps it in low cap
Here is where I am stuck - the logics. I was thinking to go the through the string one index at a time and compare the next on to the other. Is this the wright way to go about it? And I'm not even sure about the loop construction.
#counter = Counter(list_user_input)
#print(counter)
#while cycle <= len(user_input_striped_replace):
for letter in user_input_striped_replace:
cycle+=1
print("index nr {}, letter: ".format(current_char)+letter +" and cycle : " + str(cycle))
current_char+=1
if letter[0:1] == letter[1:2]:
print("match")
print("index nr {}, letter: ".format(current_char)+letter +" and cycle : " + str(cycle))
current_char+=1
Counter is a good choice for such task but about the rest you can use sorted to sort the items of Counter then use a list comprehension to create the desire list then concatenate with join :
>>> from collections import Counter
>>> c=Counter(s)
>>> sor=sorted(c.items(),key=lambda x:s.index(x[0]))
>>> ''.join([i if j==1 else '{}{}'.format(j,i) for i,j in sor])
'3AB7N3D2J3S2EKW'
I'd do it with regular expressions. Have a look at those.
Spoiler:
import re
def encode(s):
return re.sub(r'(.)\1+', lambda m: str(len(m.group(0)))+m.group(1), s)
def decode(e):
return re.sub('(\d+)(.)', lambda m: int(m.group(1))*m.group(2), e)
s = "AAABNNNNNNDJSSSJENDDKEW"
e = encode(s)
print(e, decode(e) == s)
Prints:
3AB6NDJ3SJEN2DKEW True
Your "and vice versa" sentence sounds like the program needs to detect itself whether to encode or to decode, so here's that (proof of correctness left as an exercise :-)
def switch(s):
e = re.sub(r'(\D)\1+', lambda m: str(len(m.group(0)))+m.group(1), s)
d = re.sub('(\d+)(.)', lambda m: int(m.group(1))*m.group(2), s)
return e if e != s else d

Matching input letters with a dictionary in Python

I'm trying to make a program that will read in words from a .txt file and having the user input letters of own choosing, and the program will give print out all the matches.
This is what I got so far:
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = set(letters)
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = set(a_line)
if compare >= lineword:
lista.append(rad)
print lista
Now this works only to a certain degree. It does match the user input with the content of the .txt file, but I want it to be more precise. For example:
If I put in "hrose" it will find me "horse", but it will also find me "roses" with two s, since it only compares elements and not amount
How can I make the program to only use the specified letters?
You can use Counter:
from collections import Counter
def compare(query, word):
query_count = Counter(query)
word_count = Counter(word)
return all([query_count[char] >= word_count[char] for char in word])
>>> compare("hrose", "rose")
True
>>> compare("hrose", "roses")
False
Counters are your friend
from collections import Counter
fil = open("example.txt", "r")
words = [(a.strip(), Counter(a.strip())) for a in fil.readlines()]
letters = raw_input("Type in letters: ")
letter_count = Counter(letters)
word_list = []
for word, word_count in words:
if all([letter_count[char] >= word_count[char] for char in word]):
word_list.append(word)
print word_list
looking at the comments, it's possible you may only want exact matches, if so, you don't even need a counter
fil = open("example.txt", "r")
words = [(a.strip(), sorted(a.strip())) for a in fil.readlines()]
letters = sorted(raw_input("Type in letters: "))
word_list = [word for word, sorted_word in words if letters == sorted_word]
print word_list
you can map a mapping dictionary with key as the letters in the word and value being how many times it occurs in that word.
Now just compare two dictionaries.
fil = open("example.txt", "r")
words = fil.readlines()
letters = raw_input("Type in letters: ")
compare = list(letters)
letter_dict = {}
for letter in compare:
try:
letter_dict[letter] += 1
except KeyError:
letter_dict[letter] = 0
lista = []
for a_line in words:
a_line = a_line.strip()
lineword = list(a_line)
word_dict = {}
for letter in lineword:
try:
word_dict[letter] += 1
except KeyError:
word_dict[letter] = 0
flag = True
for key, value in letter_dict.items():
if key not in word_dict or word_dict[key] < value:
flag = False
break;
if flag:
lista.append(a_line)
print lista
one approach you could follow is to use set fundtions:
either use issubset/issuperset
set("horse").issubset(set("hrose")) #returs True
set("horse").issubset(set("roses")) #returns False
or
set("horse").difference(set("hrose")) #returns empty set based on set length you know close call
set("horse").difference(set("roses")) #returns set(['h'])
In the second approach, if you have the choice to choose among multiple options, you could go for result with small length.

Categories