I want to write a program file_stats.py that when run on the command line, accepts a text file name as an argument and outputs the number of characters, words, lines, and the length (in characters) of the longest line in the file. Does anyone know the proper syntax to do something like this if I want the output to look like this:
Characters: 553
Words: 81
Lines: 21
Longest line: 38
Assuming your file path is a string, something like this should work
file = "pathtofile.txt"
with open(file, "r") as f:
text = f.read()
lines = text.split("\n")
longest_line = 0
for l in lines:
if len(l) > longest_line:
longest_line = len(l)
print("Longest line: {}".format(longest_line))
The whole program
n_chars = 0
n_words = 0
n_lines = 0
longest_line = 0
with open('my_text_file') as f:
lines = f.readlines()
# Find the number of Lines
n_lines = len(lines)
# Find the Longest line
longest_line = max([len(line) for line in lines])
# Find the number of Words
words = []
line_words = [line.split() for line in lines]
for line in line_words:
for word in line:
words.append(word)
n_words = len(words)
# Find the number of Characters
chars = []
line_chars = [list(word) for word in words]
for line in line_chars:
for char in line:
chars.append(char)
n_chars = len(chars)
print("Characters: ", n_chars)
print("Words: ", n_words)
print("Lines: ", n_lines)
print("Longest: ", longest_line)
Related
Good day!
I have the following snippets:
words_count = 0
lines_count = 0
line_max = None
file = open("alice.txt", "r")
for line in file:
line = line.rstrip("\n")
words = line.split()
words_count += len(words)
if line_max == None or len(words) > len(line_max.split()):
line_max = line
lines.append(line)
file.close()
This is using rstrip method to get rid of the white spaces in the file, but my exam unit do not allow the method rstrip since it was not introduced. My question is: Is there any other way to get the same result of Total number of words: 26466 without using the rstrip?
Thank you guys!
Interestingly, this works for me without using str.rstrip:
import requests
wc = 0
content = requests.get('https://files.catbox.moe/dz39pw.txt').text
for line in content.split('\n'):
# line = line.rstrip("\n")
words = line.split()
wc += len(words)
assert wc == 26466
Note that a one-liner way of doing that in Python could be:
wc = sum(len(line.split()) for line in content.split('\n'))
I have an exercise that is asking to calculate the number of lines and words in an email file, excluding the subject line.
I can get the total number of lines and words with the following code:
file = "email.txt"
num_lines = 0
num_words = 0
with open(file, 'r') as f:
for line in f:
words = line.split()
if not line.startswith ('Subject'):
num_lines += 1
num_words += len(words)
print(num_lines)
print(num_words)
I would like to define a function to get the same information however, the second function for the word count is not return the desired value.
textFile = "email.txt"
def count_lines():
with open (textFile, 'r') as file:
num_lines = 0
for line in file:
words = line.split()
if not line.startswith ('Subject'):
num_lines = num_lines + 1
return num_lines
def count_words():
with open (textFile, 'r') as file:
num_words = 0
for words in file:
words = line.split()
if not line.startswith ('Subject'):
num_words = num_words + 1
return num_words
print(count_lines())
print(count_words())
I would suggest you another solution, using list comprehension:
with open(textFile, 'r') as f:
words_per_line = [len(line.split()) for line in f.readlines() if not line.startswith('Subject')]
total_lines = len(words_per_line)
total_words = sum(words_per_line)
Where words_per_line contains number of words per line in your file so if you count it (len) you will get the number of lines and if you sum it, you will get the total number of words.
I have the following code which runs as expected but has a few issues when counting the number of characters and the length of the longest line in characters. The follwoing is my code:
def stats(file_name):
n_chars = 0
n_words = 0
n_lines = 0
longest_line = 0
with open(file_name) as f:
lines = f.readlines()
n_lines = len(lines)
longest_line = max([len(line) for line in lines])
words = []
line_words = [line.split() for line in lines]
for line in line_words:
for word in line:
words.append(word)
n_words = len(words)
chars = []
line_chars = [list(word) for word in words]
for line in line_chars:
for char in line:
chars.append(char)
n_chars = len(chars)
f.close()
return n_chars, n_words, n_lines, longest_line
Can you guys see anything that would make the code not count the correct number of characters. The longest line always appears as one more than the correct answer.
The input is the following:
BEAUTIFUL Soup, so rich and green,
Waiting in a hot tureen!
Who for such dainties would not stoop?
Soup of the evening, beautiful Soup!
Soup of the evening, beautiful Soup!
Beau--ootiful Soo-oop!
Beau--ootiful Soo-oop!
Soo--oop of the e--e--evening,
Beautiful, beautiful Soup!
Beautiful Soup! Who cares for fish,
Game, or any other dish?
Who would not give all else for two
Pennyworth only of Beautiful Soup?
Pennyworth only of beautiful Soup?
Beau--ootiful Soo-oop!
Beau--ootiful Soo-oop!
Soo--oop of the e--e--evening,
Beautiful, beauti--FUL SOUP!
The expected output is the following:
Characters: 553
Words: 81
Lines: 21
Longest line: 38
The actual (failed) output:
characters: 469
words: 81
lines: 21
longest: 39
You only count the non-whitespace characters. Probably the wanted number of characters include whitespaces.
def stats(file_name):
n_chars = 0
n_words = 0
longest_line = 0
with open(file_name) as lines:
for n_lines, line in enumerate(lines, 1):
longest_line = max(longest_line, len(line.rstrip('\n'))
n_chars += len(line)
n_words += len(line.split())
return n_chars, n_words, n_lines, longest_line
To get the correct number of characters, you have to count spaces as well as the other characters. Otherwise you'll get much smaller value. Something like:
n_chars = sum( [len(line) for line in lines] )
How can I show words which length are 20 in a text file?
To show how to list all the word, I know I can use the following code:
#Program for searching words is in 20 words length in words.txt file
def main():
file = open("words.txt","r")
lines = file.readlines()
file.close()
for line in lines:
print (line)
return
main()
But I not sure how to focus and show all the words with 20 letters.
Big thanks
If your lines have lines of text and not just a single word per line, you would first have to split them, which returns a list of the words:
words = line.split(' ')
Then you can iterate over each word in this list and check whether its length is 20.
for word in words:
if len(word) == 20:
# Do what you want to do here
If each line has a single word, you can just operate on line directly and skip the for loop. You may need to strip the trailing end-of-line character though, word = line.strip('\n'). If you just want to collect them all, you can do this:
words_longer_than_20 = []
for word in words:
if len(word) > 20:
words_longer_than_20.append(word)
If your file has one word only per line, and you want only the words with 20 letters you can simply use:
with open("words.txt", "r") as f:
words = f.read().splitlines()
found = [x for x in words if len(x) == 20]
you can then print the list or print each word seperately
You can try this:
f = open('file.txt')
new_file = f.read().splitlines()
words = [i for i in f if len(i) == 20]
f.close()
This is what I have so far:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
infile = open(filename)
lines = infile.readlines()
words = infile.read()
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
When executed, return the number of lines properly, but return 0 for words and character counts. Not sure why...
You can iterate through the file once and count lines, words and chars without seeking back to the beginning multiple times, which you would need to do with your approach because you exhaust the iterator when counting lines:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
lines = chars = 0
words = []
with open(filename) as infile:
for line in infile:
lines += 1
words.extend(line.split())
chars += len(line)
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
Or alternatively use the side effect that the file is at the end for chars:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
words = []
with open(filename) as infile:
for lines, line in enumerate(infile, 1):
words.extend(line.split())
chars = infile.tell()
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
you need to go back to beginning of file with infile.seek(0) after you read the position is at the end, seek(0) resets it to the start, so that you can read again.
infile = open('data')
lines = infile.readlines()
infile.seek(0)
print(lines)
words = infile.read()
infile.seek(0)
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
Output:
line count: 2
word count: 19
character counter: 113
other way of doing it....:
from collections import Counter
from itertools import chain
infile = open('data')
lines = infile.readlines()
cnt_lines = len(lines)
words = list(chain.from_iterable([x.split() for x in lines]))
cnt_words = len(words)
cnt_chars = len([ c for word in words for c in word])
# show words frequency
print(Counter(words))
You have exhausted the iterator after you call to readlines, you can seek back to the start but really you don't need to read all the file into memory at all:
def stats(filename):
chars, words, dupes = 0, 0, False
seen = set()
with open(filename) as f:
for i, line in enumerate(f, 1):
chars += len(line)
spl = line.split()
words += len(spl)
if dupes or not seen.isdisjoint(spl):
dupes = True
elif not dupes:
seen.update(spl)
return i, chars, words, dupes
Then assign the values by unpacking:
no_lines, no_chars, no_words, has_dupes = stats("your_file")
You may want to use chars += len(line.rstrip()) if you don't want to include the line endings. The code only stores exactly the amount of data needed, using readlines, read, dicts of full data etc.. means for large files your code won't be very practical
File_Name = 'file.txt'
line_count = 0
word_count = 0
char_count = 0
with open(File_Name,'r') as fh:
# This will produce a list of lines.
# Each line of the file will be an element of the list.
data = fh.readlines()
# Count of total number for list elements == total number of lines.
line_count = len(data)
for line in data:
word_count = word_count + len(line.split())
char_count = char_count + len(line)
print('Line Count : ' , line_count )
print('Word Count : ', word_count)
print('Char Count : ', char_count)