I'm running this code but I seem to keep getting an attribute error and I don't know how this would be fixed. I've included code as well as the shell window when I run it!
# the Count class. The wordleFromObject function takes a Count object as
# input, and calls its getTopWords method.
import string
class Count:
# method to initialize any data structures, such as a dictionary to
# hold the counts for each word, and a list of stop words
def __init__(self):
#print("Initializing Word Counter")
# set the attrbute wordCounts to an empty dictionary
self.wordCounts = {}
infile = open("stop_words.txt", "r")
self.stop_word_dict = {};
for line in infile.readlines():
self.stop_word_dict = 1
# method to add one to the count for a word in the dictionary.
# if the word is not yet in the dictionary, we'll need to add a
# record for the word, with a count of one.
def incCount(self,word):
my_table = str.maketrans('', '', string.punctuation)
self.wordCounts = {}
if word in self.stop_word_dict.keys():
return
else:
self.stop_word_dict += 1
cleaned_word = word.translate(my_table).lower()
if cleaned_word != '':
if cleaned_word in self.wordCounts.keys():
self.wordCounts[cleaned_word] += 1
else:
self.wordCounts[cleaned_word] = 1
# method to look up the count for a word
def lookUpCount(self, word):
return self.wordCounts.get(word.lower(), 0)
def main():
print("Initializing Word Counter")
filename = input("Enter book file:")
infile = open(filename, "r")
counter = Count()
for line in infile.readlines():
words = [word.strip() for word in line.strip().split()]
for word in words:
counter.incCount(word)
infile.close()
# Test code for Part 2 and 3
# Comment this code once you have completed part 3.
print(counter.lookUpCount("alice"))
print(counter.lookUpCount("rabbit"))
print(counter.lookUpCount("and"))
print(counter.lookUpCount("she"))
return
# Test code for Part 4 and 5
# topTen = counter.getTopWords(10)
# print(topTen)
# Test code for Part 5
# Import the wordle module and uncomment the call to the wordle function!
# wordle.wordleFromObject(counter,30)
# run the main program
main()
Error Message:
Initializing Word Counter
Enter book file:Alice.txt
Traceback (most recent call last):
line 69, in <module>
main()
line 50, in main
counter.incCount(word)
line 28, in incCount
if word in self.stop_word_dict.keys():
AttributeError: 'int' object has no attribute 'keys'
for line in infile.readlines():
self.stop_word_dict = 1
In this lines you change your stop_word_dict from dict to int, and later in the code, you are trying to reach dictionary "keys" attribute
Related
How can I store another value in the pre-existing list todos?
When I try to store the new datum, I have the following error
Traceback (most recent call last):
File "E:\Coding\python projects\project 1\add_or_show.py", line 11, in <module>
todos.append(todo)
^^^^^^^^^^^^
AttributeError: 'str' object has no attribute 'append'`
And here it is my code
while True:
action = input("what action you want add or show or exit: ")
match action:
case 'add':
todo = input("Enter the name of a student: ") + '\n'
file = open('pyt.txt', 'r')
todos = file.readline()
file.close()
todos.append(todo)
file = open('pyt.txt', 'w')
file.writelines(todos)
case 'show':
for ind, expand in enumerate(todos):
index = ind + 1
print(f"{index} - {expand}")
print("The length of Class is: ", len(todos))
case 'exit':
print("\n\nyour program exit succaessfully\n\nBye Bye!!!")
break
case 'edit':
num = int(input('Enter number which you want to edit'))
num_n = num-1
edt = todos[num_n]
print(edt)
put = ('Enter the word you want instead of', edt, ': ')
newedt = input(put)
todos[num_n] = newedt
print("Thanks!, Entry edited Successfilly")
case _:
print('Invalid action, please write add or show or exit')
try use this
todos = file.readlines()
file.readlines() is a method of the built-in Python file object that reads all the lines of the specified file and returns them as a list of strings.
file.readline() is a method of the built-in Python file object that reads a single line of the specified file and returns it as a string.
Im trying to create a program to generate text with usernames from a txt file but I keep getting a TypeError: 'int' object is not iterable i know what this means but I have no idea how to fix my issue. I tried just doing y = 12 / 2 and the same error came up when i passed the for loop y i am really confused so if someone could help me that would be great
This is my code
def generateNum():
#imports random
from random import randint
for _ in range(10):
value = randint(0, 900000)
return(str(value))
def getNumOfLines( file):
#opens txt file
with open(file) as f:
Lines = f.readlines()
count = 0
# Strips the newline character
for line in Lines:
count += 1
return(count)
class debug:
def __init__(self, credsTxt, tagsTxt):
self.credsTxt = credsTxt
self.tagsTxt = tagsTxt
self.numOfCreds = getNumOfLines(credsTxt)
self.numOfTags = getNumOfLines(tagsTxt)
self.ammountPerAccount = round(self.numOfTags / self.numOfCreds)
def getComments(self):
#initializes comment
comment = ""
#opens txt file
file1 = open(self.tagsTxt, 'r')
count = 0
while True:
count += 1
# Get next line from file
line = file1.readline()
for i in self.ammountPerAccount:
# if line is empty
# end of file is reached
if not line:
break
comment += ' ' + line.strip() + ' ' + generateNum() + '.'
return(comment)
print(debug('D:/FiverrWork/user/instagram-bot/textGen/assets/login_Info.txt', 'D:/FiverrWork/user/instagram-bot/textGen/assets/tags.txt').getComments())
this is my stack trace error
Traceback (most recent call last):
File "d:\FiverrWork\user\textgenerator\textgenerator\txt.py", line 57, in <module>
print(debug('D:/FiverrWork/user/textgenerator/textgenerator/assets/login_Info.txt', 'D:/FiverrWork/user/textgenerator/textgenerator/assets/tags.txt').getComments())
File "d:\FiverrWork\user\textgenerator\textgenerator\txt.py", line 47, in getComments
for i in self.ammountPerAccount():
TypeError: 'int' object is not callable
Your for loop as posted cannot iterate over an int. You meant to iterate over a range():
for _ in range(self.ammountPerAccount):
# if line is empty
# end of file is reached
if not line:
break
comment += ' ' + line.strip() + ' ' + generateNum() + '.'
I used _ as a placeholder variable since the actual value of i each time was not used.
I am trying to write an annotator which loops through a list of names, tagging a separate document when these names appear. These names can consist of one or two words.
The buffer on the program works, so it recognises whether it needs to look at one or both lines of the file for tagging, and tags when the name which comes up matches the candidate exactly.
However, instead of looping through all the names in the list for each candidate, takes the name in the loop it has on that particular round and if the name does not match the candidate, writes the line and moves onto the next line (with the next name in the list). This obviously results in there being many names in the file which are not tagged when they should be.
Below is my code:
import json
from tagging import import_names
def split_line(line):
"""Split a line into four parts, word, pos, lemma and tag."""
# TODO: Speak to Diana about the spaces in the vert file - do they mean
# anything?
line = line.strip().split()
if len(line) == 1:
word = line[0]
pos, lemma, tag = None, None, None
elif len(line) == 3:
word, pos, lemma = line
tag = ''
elif len(line) == 4:
word, pos, lemma, tag = line
return [word, pos, lemma, tag]
class MWUTagger(object):
"""Contains a buffer of lines split into word, pos, lemma, tag items."""
def __init__(self, f_in, f_out, n, gnrd_file, indicators=None):
"""Populate the buffer."""
# read the input vert file
self.f_in = open(f_in, 'r')
# populate the buffer (first n lines of the vert file)
self.buffer = []
for i in range(n):
self.buffer.append(split_line(self.f_in.readline()))
# read in list of names or save
self.names = import_names(gnrd_file)
# create the output vert file
self.f_out = f_out
def __iter__(self):
return self
def write_line(self):
"""Write out the oldest line in the buffer, and add a new line to the buffer."""
# write the oldest line from the buffer
tagged_line = self.buffer.pop(0)
tagged_line = [i for i in tagged_line if i]
with open(self.f_out, 'a') as f:
if tagged_line[0].startswith('<') and tagged_line[-1].endswith('>'):
f.write(' '.join(tagged_line) + '\n')
else:
f.write('\t'.join(tagged_line) + '\n')
def __next__(self):
"""write out the oldest line in the buffer and add a new line to the buffer"""
#write the oldest line from the buffer
self.write_line()
# add a new line to the buffer (found an example here https://bufferoverflow.com/a/14797993/1706564)
line = self.f_in.readline()
if line:
self.buffer.append(split_line(line))
else:
self.f_in.close()
self.flush()
raise StopIteration
def flush(self):
"""Write all remaining lines from buffer file to the output file"""
while self.buffer:
self.write_line()
def check_for_name(self, name):
"""Depending on length of name, check if the first n items in the buffer
match name."""
# check if tagged
if self.buffer[0][-1] == 'SCI':
return
name = name.strip().split()
name = [n + '-n' for n in name]
n = len(name)
# check if they match
candidate = [line[2] for line in self.buffer[:n]]
if name == candidate:
# edit the tags in the first n items in the buffer if they do
for i in range(n):
self.buffer[i][-1] += "SCI%i" % (i + 1)
# check if other names in the dictionary match
def main():
mwutagger = MWUTagger('zenodo_test_untag.vert', 'zenodomwutagged.vert', 2,'JSON_file_test.json')
while True:
try:
for name in mwutagger.names:
mwutagger.check_for_name(name)
mwutagger.__next__()
except StopIteration:
break
if __name__ == '__main__':
main ()
I am unsure if I need to add something into the check_for_name module to say if candidate =! name, go to next name til the end of the list until last list then just print, or if that is not being adequately handled in the main method.
Can anyone give me advice on this?
I am trying to simply process some twitter data in which I want to count the most frequent words produced in the dataset.
However, I keep getting the following error on Line 45:
IndexError Traceback (most recent call last) <ipython-input 346-f03e745247f4> in <module>()
43 for line in f:
44 parts = re.split("^\d+\s", line)
45 tweet = re.split("\s(Status)", parts[-1])[10]
46 tweet = tweet.replace("\\n"," ")
47 terms_all = [term for term in process_tweet(tweet)]
IndexError: list index out of range
I have added my full code for review, can someone please advise.
import codecs
import re
from collections import Counter
from nltk.corpus import stopwords
word_counter = Counter()
def punctuation_symbols():
return [".", "", "$","%","&",";",":","-","&","?"]
def is_rt_marker(word):
if word == "b\"rt" or word == "b'rt" or word == "rt":
return True
return False
def strip_quotes(word):
if word.endswith(""):
word = word[0:-1]
if word.startswith(""):
word = word[1:]
return word
def process_tweet(tweet):
keep = []
for word in tweet.split(" "):
word = word.lower()
word = strip_quotes(word)
if len(word) == 0:
continue
if word.startswith("https"):
continue
if word in stopwords.words('english'):
continue
if word in punctuation_symbols():
continue
if is_rt_marker(word):
continue
keep.append(word)
return keep
with codecs.open("C:\\Users\\XXXXX\\Desktop\\USA_TWEETS-out.csv", "r", encoding="utf-8") as f:
n = 0
for line in f:
parts = re.split("^\d+\s", line)
tweet = re.split("\s(Status)", parts[1])[0]
tweet = tweet.replace("\\n"," ")
terms_all = [term for term in process_tweet(tweet)]
word_counter.update(terms_all)
n += 1
if n == 50:
break
print(word_counter.most_common(10))
parts = re.split("^\d+\s", line)
tweet = re.split("\s(Status)", parts[1])[0]
These are likely the problematic lines.
You assume that parts did split and has more than 1 element. Splitting can fail to find the split-by string in line, so parts becomes equal to [line]. Then parts[1] crashes.
Add a check before the second line. Print the line value to better understand what happens.
I'm trying to create a function that accepts a file as input and prints the number of lines that are full-line comments (i.e. the line begins with #followed by some comments).
For example a file that contains say the following lines should print the result 2:
abc
#some random comment
cde
fgh
#another random comment
So far I tried along the lines of but just not picking up the hash symbol:
infile = open("code.py", "r")
line = infile.readline()
def countHashedLines(filename) :
while line != "" :
hashes = '#'
value = line
print(value) #here you will get all
#if(value == hashes): tried this but just wasn't working
# print("hi")
for line in value:
line = line.split('#', 1)[1]
line = line.rstrip()
print(value)
line = infile.readline()
return()
Thanks in advance,
Jemma
I re-worded a few statements for ease of use (subjective) but this will give you the desired output.
def countHashedLines(lines):
tally = 0
for line in lines:
if line.startswith('#'): tally += 1
return tally
infile = open('code.py', 'r')
all_lines = infile.readlines()
num_hash_nums = countHashedLines(all_lines) # <- 2
infile.close()
...or if you want a compact and clean version of the function...
def countHashedLines(lines):
return len([line for line in lines if line.startswith('#')])
I would pass the file through standard input
import sys
count = 0
for line in sys.stdin: """ Note: you could also open the file and iterate through it"""
if line[0] == '#': """ Every time a line begins with # """
count += 1 """ Increment """
print(count)
Here is another solution that uses regular expressions and will detect comments that have white space in front.
import re
def countFullLineComments(infile) :
count = 0
p = re.compile(r"^\s*#.*$")
for line in infile.readlines():
m = p.match(line)
if m:
count += 1
print(m.group(0))
return count
infile = open("code.py", "r")
print(countFullLineComments(infile))