My program creates a histogram from a dictionary. It first reads in text from a text file. Then it counts and prints out the length of each word. For example, the words "in", "the", and "Florida" would produce a count like this.
Length Count
2 1
3 1
7 1
Because there are only 1 word of length 2, 1 word of length 3 and 1 word of length 7. My text file has hundreds of words. So, there are some words that are of length 10 and greater. But after 10 on the horizontal axis it doesn't show anything. Please help. Below is my code.
import sys
import string
def rem_punc(w):
plst = list(string.punctuation)
for p in plst:
if p in w:
w = w.replace(p,'')
return w
def word_length_processor(text):
d = dict()
for w in text:
w = rem_punc(w)
if len(w) > 0:
n = len(w)
if n not in d:
d[n] = 1
else:
d[n] = d[n] + 1
for k,v in d.items():
print("{0} {1}".format(k,v))
print()
hist_maker(d)
def hist_maker(d):
freq = d
for y in range(300,9,-10):
print ("{:>6} | ".format(y), end="")
for x in range(1,10):
if freq.get(x,0) >= y:
column = "***"
else:
column = " "
print(column, end="")
print()
print(" ------------------------------------------------------")
print(" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15")
arg_string = sys.argv[1]
try:
f = open(arg_string, 'r')
except:
print('Could not open file. Please try again.')
sys.exit()
text = f.read()
f.close()
text = text.split()
print("{0} {1}".format("Length","Count"))
print(word_length_processor(text))
Related
i have list:
lst [a,b,c,d,e]
and then the input:
food
i want the output :
there is 1 d
another input example:
aachen
so the output is:
there is 2 a
there is 1 e
and its doesnt matter the upppercase or lowercase.
You can use collections.Counter() to count the occurrences of each letter, then iterate over your lst and output the number of times each number occurred.
import collections
lst = ['a','b','c','d','e']
word = 'food'
word_count = collections.Counter(word)
for letter in lst:
count = word_count.get(letter)
if count:
print(f"There is {count} {letter}")
Python native way:
l = ['a','b','c','d','e']
m = {}
s = input("Enter input string:")
for f in l:
for k in range(len(s)):
if f == s[k]:
if f in m:
m[f] = m[f] + 1
else:
m[f] = 1
for j in m.keys():
print(s,"has")
print(m[j],j)
I have been trying to write Python 3 code that calculates number of words in a file that do not contain each letter of the alphabet, but the loop seems to be working only on the first letter 'a'. Where is the bug there?
fin = open('words.txt')
def avoids (word, string):
for i in string:
for l in word:
if l == i:
return False
return True
alphabet = 'abcdefghijklmnopqrstuvwxyz'
for f in alphabet:
n = 0
for line in fin:
word = line.strip()
if (avoids (word, f)):
n += 1
print (f, n)
The 'words.txt' file can be downloaded using the following link:
http://thinkpython2.com/code/words.txt
The output appears as follows:
a 57196
b 0
c 0
d 0
e 0
f 0
g 0
h 0
i 0
j 0
k 0
l 0
m 0
n 0
o 0
p 0
q 0
r 0
s 0
t 0
u 0
v 0
w 0
x 0
y 0
z 0
Your problem is that the first time you run the loop you reach the end of the file, to restart at the beggining add this line:
fin.seek(0)
inside your alphabet loop, just like when you reset n = 0.
You need to read the file once you've opened it, and split to get the text as a list of words. Then you can loop over the words and the alphabet:
import string
with open('words.txt', 'r') as f:
text = f.read()
words = text.split()
for letter in string.ascii_lowercase:
print(letter, len([w for w in words if letter not in w]))
I have a txt file with floats separated by blank space I need to to keep only 4 elements in each line. I tried to calculate blankspace. Now i need to shift the rest of the values to the next line and restart.
fname = open("file.txt", 'r')
text = fname.read()
countBlank=0
for line in text:
for char in line:
if char.isspace():
countBlank += 1
if countBlank ==4
You can do it by converting your data and storing it in an array then you can output it to a new file like this:
import numpy as np
fname = open("file.txt", 'r')
text = fname.read()
arr = np.array(text.split())
rows = len(arr) // 4
remain = len(arr) % 4
out = np.full((rows+1, 4), None).astype(float)
out[:rows, :] = arr[:-remain]
out[rows, :remain] = arr[len(arr)-remain:]
np.savetxt('file2.txt', out)
Try this, works for me.
floatlist = fname.read().split(" ")
count = 0
finalstring = ""
for x in floatlist:
count += 1
if count == 4:
finalstring += x + "\n"
else:
finalstring += x + " "
Input:
"1 2 3 4 5 6 7 8"
Output:
"1 2 3 4
5 6 7 8"
How to write into file: (on the end of the existing code)
fname.close()
fname = open("file.txt", "w")
fname.write(finalstring)
fname.close()
classs = input("Class [1, 2 or 3] - ")
if clas
data = f.readlines()
for le:
print(line)
found = True
if found == False:
print("False")
Here is a typical printed output:
John = 10
John = 6
John = 4
I need to be able to create an average just by using the 10, 4, 6 as I need to know a way to isolate the rest and allow the numbers to proceed inorder to create the average score.
If the format of each line is the same, you can use string.split and cast to int:
classs = input("Class [1, 2 or 3] - ")
l = []
if classs =='1':
name = input("What is your name? - ")
datafile = '1.txt'
found = False
with open(datafile, 'r') as f:
data = f.readlines()
for line in data:
if name in line:
print(line)
l.append(int(line.split()[2]))
found = True
if found == False:
print("False")
Then go through the list of numbers and get the average, something like:
total = 0
num = 0
for x in l:
total = total + x
num = num + 1
print(total/num)
one way would be to extract the last 3 numbers for each player from your list (i'm assuming you only need 3, if not this code can be altered for more)
Class = input("Class: ")
dataList = []
file = open('class ' + str(Class) + '.txt', "r")
for line in file:
count = 0
record = line
studentList = record.split(': ')
score = studentList[1].strip('\n')
studentList = [studentList[0], score]
if len(dataList) == 0:
dataList.append(studentList)
else:
while count < len(dataList):
if studentList[0] == dataList[count][0]:
if len(dataList[count]) == 4:
dataList[count][3] = dataList[count][2]
dataList[count][2] = dataList[count][1]
dataList[count][1] = score
break
dataList[count].append(score)
break
elif count == len(dataList) - 1:
dataList.append(studentList)
break
count = count + 1
this will give you a 2D array. each smaller array within will conatin the persons name at index 0 and there three numbers at indecies 1,2 and 3. now that you have these, you can simply work out the average.
AverageScore = []
# this is the array where the student' record (name and highest score) is saved
# in a list
count = 0
while count < len(dataList):
entry = []
# this is whre each student name and score will be temporarily held
entry.append(dataList[count][0])
# this makes it so the array 'entry' contains the name of every student
Sum = 0
frequency = len(dataList[count])
incount = 1
while incount < frequency:
Sum = Sum + int(dataList[count][incount])
incount = incount + 1
average = Sum / (frequency-1)
entry.append(average)
AverageScore.append(entry)
# this appends the name and average score of the student to the larger array
# 'AverageScore'
count= count + 1
# the count is increased so the process is repeated for the next student
AverageSorted = sorted(AverageScore,key=lambda l:l[1], reverse=True)
# http://stackoverflow.com/questions/18563680/sorting-2d-list-python
# this is code i obtained through someone else's post which arranges the array in descending
# order of scores
count2 = 0
while count2 < len(AverageSorted):
print(AverageSorted[count2][0], ':', AverageSorted[count2][1])
count2 = count2 + 1
# this formats the array so it prints the elements in a list of each student's
# name and score
Long winded and inefficient, but its the best i can do with my small knowledge of python :)
If this is the content of 1.txt:
John = 1
Joe = 3
John = 7
Joe = 9
Bob = 3
Joe = 8
John = 2
Bob = 9
Roger = 13
Replace your "with" statement with this:
name = "John"
_class = 1
with open("%s.txt" % _class, "r") as out:
lines = out.readlines()
scores = []
for line in lines:
if name in line:
# "strip" without arguments strips out all beginning
# and trailing white-space (i.e. " ", "\n", "\r").
line = line.strip()
score = int(line.split(" = ")[1])
scores.append(score)
# If there isn't any scores in this list, then there was no user
# data.
if scores:
# Use python built-in sum to sum the list of scores and
# divide the result by the number of scores gives you the average.
average = sum(scores) / len(scores)
print "%s's average score is %.1f out of %d game(s)." % (
name, average, len(scores))
else:
print "User %s not found." % name
I need a bit of help with Python code to count the frequency of consonants in a word. Consider the following sample input:
"There is no new thing under the sun."
Then the required output would be:
1 : 2
2 : 3
3 : 2
4 : 1
as there are 2 words with 1 consonant, 3 words with 2 consonants, 2 words with 3 consonants and 1 word with 4 consonants.
The following code does a similar job but instead of consonants it counts the frequency of whole words in text file. I know there is only a bit change which loops deeper into the word (I think).
def freqCounter(file1, file2):
freq_dict = {}
dict_static = {2:0, 3:0, 5:0}
# get rid of punctuation
punctuation = re.compile(r'[.?!,"\':;]') # use re.compile() function to convert string into a RegexObject.
try:
with open(file1, "r") as infile, open(file2, "r") as infile2: # open two files at once
text1 = infile.read() # read the file
text2 = infile2.read()
joined = " ".join((text1, text2))
for word in joined.lower().split():
#remove punctuation mark
word = punctuation.sub("", word)
#print word
l = len(word) # assign l tp be the word's length
# if corresponding word's length not found in dict
if l not in freq_dict:
freq_dict[l] = 0 # assign the dict key (the length of word) to value = 0
freq_dict[l] += 1 # otherwise, increase the value by 1
except IOError as e: # exception catch for error while reading the file
print 'Operation failed: %s' % e.strerror
return freq_dict # return the dictionary
Any help will be much appreciated!
I would try a simpler approach:
from collections import Counter
words = 'There is no new thing under the sun.'
words = words.replace('a', '').replace('e', '').replace('i', '').replace('o', '').replace('u', '') # you are welcome to replace this with a smart regex
# Now words have no more vowels i.e. only consonants
word_lengths = map(len, words.split(' '))
c = Counter(word_lengths)
freq_dict = dict(Counter(c))
A simple solution
def freqCounter(_str):
_txt=_str.split()
freq_dict={}
for word in _txt:
c=0
for letter in word:
if letter not in "aeiou.,:;!?[]\"`()'":
c+=1
freq_dict[c]=freq_dict.get(c,0)+ 1
return freq_dict
txt = "There is no new thing under the sun."
table=freqCounter(txt)
for k in table:
print( k, ":", table[k])
How about this?
with open('conts.txt', 'w') as fh:
fh.write('oh my god becky look at her butt it is soooo big')
consonants = "bcdfghjklmnpqrstvwxyz"
def count_cons(_file):
results = {}
with open(_file, 'r') as fh:
for line in fh:
for word in line.split(' '):
conts = sum([1 if letter in consonants else 0 for letter in word])
if conts in results:
results[conts] += 1
else:
results[conts] = 1
return results
print count_cons('conts.txt')
Missed the results
{1: 5, 2: 5, 3: 1, 4: 1}
[Finished in 0.0s]