Finding a frequency of numbers in a list - python

from collections import Counter
f = open('input.txt')
lines = f.readlines()
counter = 0
freq = []
for line in lines:
conv_int = int(line)
counter = counter + conv_int
freq.append(counter)
for i in freq:
print(Counter(freq))
print(counter)
This code loops through a text file with various negative and positive numbers and adds them together starting from zero. However I was wondering how to find how many times each number occurs in this file?

Your file has an integer on each line, and you want the total sum and the frequency of each integer, right? Try this.
from collections import Counter
with open("input.txt", "rt") as f:
total = 0
count = Counter()
for line in f:
conv_int = int(line)
total += conv_int
count[conv_int] += 1
print(count)
print(total)

Collection's Counter is expecting an iterable as an argument and not an item:
import collections
with open('input.txt', 'r') as input_file:
numbers = [int(line) for line in input_file]
numbers_sum = sum(numbers)
numbers_frequency = collections.Counter(numbers)
But if efficiency is not an issue for you and you're just trying to sum all numbers in a file and count their frequency, you don't need to import a library just to do that:
with open('input.txt', 'r') as input_file:
numbers = [int(line) for line in input_file]
numbers_sum = sum(numbers)
numbers_frequency = {n: numbers.count(n) for n in set(numbers)}

Related

Getting average score from a text file in python?

I'm looking to have the program read a text file that is formatted like this for example.
Kristen
100
Maria
75
Frank
23
Is there anyway in python to skip lines and have it read only the numbers, accumulate them, and average them out? Could be more numbers or less numbers than the example above. I'm very much stuck.
you can use re.findall to find all numbers in a string:
import re
if __name__ == "__main__":
numbers = []
with open("./file.txt", "r") as f:
for line in f:
line = line.strip()
temp = list(map(lambda x: eval(x), re.findall(r'\d+', line)))
numbers += temp
average = sum(numbers) / len(numbers)
print(average)
This is the method I would use:
def get_average(filepath):
total = 0.0
with open(filepath, 'r') as f:
lines = f.readlines()
numbers = 0
for line in lines:
try:
number = int(line.strip())
total += number
numbers += 1
except:
continue
return total / float(numbers)
get_average("path/to/file.txt")
use strip to get rid of newline and isdigit to check for digit
In [8]: with open('a.txt', 'r') as f:
...: s = [int(i.strip()) for i in f if i.strip().isdigit()]
...:
In [9]: sum(s)/len(s)
Out[9]: 66.0
# assuming a score always follows a players name.
with open("path_to_file.ext", "r") as inf:
print(inf.readlines()[1::2]) # Do something with the result
# only grabbing lines that can be interpreted as numbers
with open("path_to_file.ext", "r") as inf:
for _ in inf.readlines():
if _.rstrip().isnumeric():
print(_.rstrip()) # Do something with the result
If the file name 'file.txt'
total = 0
i = 0
with open('file.txt', 'r') as file:
for line in file:
try:
total += int(line)
i += 1
except:
continue
average = total / i

Total number of lines and words in a file

I have an exercise that is asking to calculate the number of lines and words in an email file, excluding the subject line.
I can get the total number of lines and words with the following code:
file = "email.txt"
num_lines = 0
num_words = 0
with open(file, 'r') as f:
for line in f:
words = line.split()
if not line.startswith ('Subject'):
num_lines += 1
num_words += len(words)
print(num_lines)
print(num_words)
I would like to define a function to get the same information however, the second function for the word count is not return the desired value.
textFile = "email.txt"
def count_lines():
with open (textFile, 'r') as file:
num_lines = 0
for line in file:
words = line.split()
if not line.startswith ('Subject'):
num_lines = num_lines + 1
return num_lines
def count_words():
with open (textFile, 'r') as file:
num_words = 0
for words in file:
words = line.split()
if not line.startswith ('Subject'):
num_words = num_words + 1
return num_words
print(count_lines())
print(count_words())
I would suggest you another solution, using list comprehension:
with open(textFile, 'r') as f:
words_per_line = [len(line.split()) for line in f.readlines() if not line.startswith('Subject')]
total_lines = len(words_per_line)
total_words = sum(words_per_line)
Where words_per_line contains number of words per line in your file so if you count it (len) you will get the number of lines and if you sum it, you will get the total number of words.

Summing and Average using python

I have written a code that extracts floating point numbers from a
text file and produces a list of the numbers.
My challenge is summing the consecutive numbers and finding the
average of the numbers.
I am not allowed to use the sum function and I am new to python ..
this the code I have written so far ,
what can I do to add through the list
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") : continue
# print(line)
count = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:"):
count = count + 1
# print(count)
for line in fh:
if line.startswith("X-DSPAM-Confidence:"):
# print(line)
xpos = line.find(' ')
# print(xpos)
num = line[xpos : ]
# print(float(num))
fnum = float(num)
# print(fnum)
total = 0
for i in fnum:
total += int(i)
print(total)
Error:"float object not iterable on line 24" ... line 24 is the 4th for loop
First an open file is iterable only once, and your code shows 4 loops starting with for line in fh:. After first loop, the file pointer will reach the end of file, and the following loops should immediately return. For that reason with should be prefered.
Next somewhere in the loop you get a float value in fnum. Just initialize total before starting the loop, and add fnum when you get it:
total = 0
with open(fname) as fh:
for line in fh:
if line.startswith("X-DSPAM-Confidence:"):
# print(line)
xpos = line.find(' ')
# print(xpos)
num = line[xpos : ]
# print(float(num))
fnum = float(num)
# print(fnum)
total += fnum
# print(total)
with ensures that the file will be cleanly closed at the end of the loop.
fnum is a float. It's not an array, therefore it's not iterable and cannot be iterated in a for loop.
You probably don't need an array to determine the total and the average:
fname = "c:\\mbox-short.txt"
fh = open(fname)
count = 0
total = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:"):
xpos = line.find(' ')
num = line[xpos : ]
fnum = float(num)
total += fnum
count += 1
print("Total = " + str(total))
print("Average = " + str(total / count))
print("Number of items = " + str(count))
You don't have to use startsWith in this case. Better to use split for each line of the file. Each line will split all the words to a list. Then using the indexes you look for, X-DSPAM-Confidence:. If it exists then take the corresponding value of interest. In this case it is index number 1. Below is the code:
total = 0
number_of_items = 0
with open("dat.txt", 'r') as f:
for line in f:
fields = line.split()
if fields != []:
if fields[0] == "X-DSPAM-Confidence:":
number_of_items += 1
total += float(fields[1])
print(total)
print(number_of_items)
avg = (total/number_of_items)
print(avg)
I saved your data in a text file names, "dat.txt".
Hope it helps !!!

word counter || python

I want to print the number of words in a txt file having 1-20 letter.
Tried this but it prints 20 zeroes instead. any idea?
edit - in the end the program should plot 20 numbers, each one is the number of words in the file containing 1-20 letters.
fin = open('words.txt')
for i in range(20):
counter = 0
for line in fin:
word = line.strip()
if len(word) == i:
counter = counter + 1
print counter,
EDIT
To produce individual counts for each word length you can use a collections.Counter:
from collections import Counter
def word_lengths(f):
for line in f:
for word in line.split(): # does not ignore punctuation
yield len(word)
with open('words.txt') as fin:
counts = Counter(length for length in word_lengths(fin) if length <= 20)
This uses a generator to read the file and produce a sequence of word lengths. The filtered word lengths are fed into a Counter. You could perform the length filtering on the Counter instead.
If you want to ignore punctuation you could look at using str.translate() to remove unwanted characters, or possibly re.split(r'\W+', line) instead of line.split().
Try it like this:
with open('words.txt') as fin:
counter = 0
for line in fin:
for word in line.split():
if len(word) <= 20:
counter = counter + 1
print counter,
This could be simplified to:
with open('words.txt') as fin:
counter = sum([1 for line in fin
for word in line.split() if len(word) <= 20])
but that's playing code golf.
You can also use a collections.Counter if it is practical to read the entire file into memory:
from collections import Counter
with open('words.txt') as fin:
c = Counter(fin.read().split())
counter = sum(c[k] for k in c if len(k) <= 20)
And no doubt there are many other ways to do it. None of the above expect or handle punctuation.
It should be like this,counter shouldn't be in for loop,and you could use len() method to get the length of words:
with open("test") as f:
counter = 0
for line in f:
for word in line.split():
if len(word)<=20:
counter+=1
print counter
Or my way:
import re
with open("file") as f:
print len(filter(lambda x:len(x)<20,re.split('\n| ', f.read())))
Hope this helps.
using regular expressions
import re
REGEX = r"(\b\S{1,20}\b)"
finder = re.compile(REGEX)
with open("words.txt") as out:
data = out.read()
matches = re.findall(finder, data)
lst = [0 for _ in range(20)]
for m in matches:
lst[len(m)] += 1
print(lst)

Repeating a for in line loop python

How would I repeat this (excluding the opening of the file and the setting of the variables)?
this is my code in python3
file = ('file.csv','r')
count = 0 #counts number of times i was equal to 1
i = 0 #column number
for line in file:
line = line.split(",")
if line[i] == 1:
count = count + 1
i = i+1
If I understand the question, try this and adjust for however you want to format. Replace NUM_COLUMNS with the number of times you want it repeating
file = open('file.csv','r')
data = file.readlines()
for i in range(NUM_COLUMNS):
count = 0
for line in data:
line = line.split(",")
if line[i] == ("1"):
count = count + 1
print count
The following function will return the number of fields in the csv file file_name whose value is field_value, which is what I think you are trying to do:
import csv
def get_count(file_name, field_value):
count = 0
with open(file_name) as f:
reader = csv.reader(f)
for row in reader:
count += row.count(field_value)
return count
print(get_count('file.csv', '1'))

Categories