The program is reading a file of keywords with number values attached to them. Then it is reading a file of a couple thousand tweets containing the latitude and longitude and the text of the tweet. You have to sort the tweets into specific regions and then calculate a sentiment average for each region based on the keywords and values of the first document. The user has to input these to two files and it has to have a try statement with exception errors. The functions work alone to calculate the proper values but when i go to put it in the try statement i get these errors:
Traceback (most recent call last):for line 129 main() and line 16 sortKets(keys). And last error line 56 keyword[lines[0]] = int(lines[1]) IndexError: list index out of range
is there anything i can do to fix it?
List item
eastern = []
central = []
mountain = []
pacific = []
keyword = {}
easternsum =[]
centralsum= []
mountainsum = []
pacificsum = []
def main() :
done = False
while not done:
try:
keys = input("Enter file: ")
readkeys(keys)
sortKeys(keys)
tweets = input("Enter second file: ")
readtweets(tweets)
sorttweet(tweets)
calcsentiment()
print("The eastern amount of tweets is",len(easternsum))
print("The eastern happiness score is",sum(easternsum)/len(easternsum))
print("The central amount of tweets is",len(centralsum))
print("The central happiness score is",sum(centralsum)/len(centralsum))
print("The mountain amount of tweets is",len(mountainsum))
print("The mountain happiness score is",sum(mountainsum)/len(mountainsum))
print("The pacific amount of tweets is",len(pacificsum))
print("The pacific happiness score is",sum(pacificsum)/len(pacificsum))
done = True
except IOError:
print("Error, file not found.")
except ValueError:
print("Invalid file.")
except RuntimeError as error:
print("Error", str(error))
def readkeys(keys):
keys = open(keys, "r")
def readtweets(tweets):
tweets = open(tweets, "r")
def sortKeys(keys):
for line in keys :
lines = line.split(",")
keyword[lines[0]] = int(lines[1])
def sorttweet(tweets) :
for line in tweets :
stuff = line.split(" ",5)
long = float(stuff[0].strip("[,"))
lat = float(stuff[1].strip('],'))
tweet = stuff[5]
if 24.660845 < long < 49.189787 and -87.518395 < lat < -67.444574 :
eastern.append(tweet)
if 24.660845 < long < 49.189787 and -101.998892 < lat < -87.518395 :
central.append(tweet)
if 24.660845 < long < 49.189787 and -115.236428 < lat < -101.998892 :
mountain.append(tweet)
if 24.660845 < long < 49.189787 and -125.242264 < lat < -115.236428 :
pacific.append(tweet)
def calcsentiment():
for tweet in eastern :
tweetlist = tweet.split()
count = 0
tweetV = 0
for word in tweetlist:
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
easternsum.append(tweetV / count)
for tweet in central:
tweetlist2 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist2 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
centralsum.append(tweetV / count)
for tweet in mountain:
tweetlist3 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist3 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
mountainsum.append(tweetV / count)
for tweet in pacific:
tweetlist4 = tweet.split()
count = 0
tweetV = 0
for word in tweetlist4 :
if word in keyword :
count = count + 1
tweetV = tweetV + keyword[word]
if count > 0:
pacificsum.append(tweetV / count)
calcsentiment()
main()
You have a problem here:
def sortKeys(keys):
for line in keys :
lines = line.split(",")
keyword[lines[0]] = int(lines[1])
when you split the line, you don't get 2 tokens, just one.
That happens when the line you are trying to split does not contain a ',' character.
Try in python console something line "xxxx".split(",") and you will see the result is ["xxxx"], so a list with just one element, while in your code lines[1] tries to access the second element of a list.
Related
I am trying to calculate the taxi and its trip using map reduce python program.
In Map program I have written the following code where it will assign each row a key.
import sys
for line in sys.stdin:
line = line.strip()
words = line.split(",")
trip = words[0]
km = words[1]
print('%s\t%s\t%s' % (trip, km, "1"))
Next while in reducer below is the program.
#!/usr/bin/env python3
import sys
current_trip = None
current_km = 0
current_count = 0
trip = None
gender = None
for line in sys.stdin:
line = line.strip()
trip,gender,count = line.split(",")
try:
count = int(count)
except ValueError:
continue
if current_trip == trip:
current_km = (km + current_km)
current_count += count
print('%s\t%s' % (current_trip,current_count, {current_km/current_count}))
current_trip = trip
current_count = count
current_km = 0
else:
if current_trip == trip:
current_count += count
print('%s\t%s' % (current_trip, current_count,km))
Here I am getting the error saying
Traceback (most recent call last):
File "reducer.py", line 23, in <module>
print('%s\t%s\t%s' % (current_trip, current_count, {current_km / current_count}))
ZeroDivisionError: division by zero
and I am not able to debug properly because if I include the print statement it is not printing in output.
Can someone please help
If the first line contains a count 0, or you have negative counts and at some point the current_count is 0, you will get this error. Try to add a condition before your print method to debug the problem:
if current_count != 0:
print('%s\t%s' % (current_trip,current_count, {current_km/current_count}))
else:
print(f"error: the current_count is 0 and the count is {count}")
f = open('studMarks.txt', 'r')
marks = 0
# Sort out names, split the words then sort which order
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
print(f"{lname},{fname}")
f.close()
f = open('studMarks.txt', 'r')
sum = 0
count = 0
for line in f:
count += 1
sum += float(line.split()[2])
n = []
average = sum/count
print(f"{average}")
When using the for loop it seems to display a value of 64.3, which I believe is for the total of the whole student list and average for all marks.
I need to produce the an output which displays the student names and average on the same line. I can do for the names but I cannot do it for the average as I keep getting errors. I don't know what to input in.
Below is the full solution. The with open line is a context manager and ensures that the file will get closed as soon as you exit the block. You should get used to using this style as it's the safe way to do I/O. The rest is just bog standard Python.
marks=dict()
with open('studMarks.txt', 'r') as f:
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
score = int(words[2])
key = f'{fname} {lname}'
count_key = f'{fname} {lname}_count'
latest_score = score + (marks.get(key)[0] if marks.get(key) else 0)
latest_count = 1 + (marks.get(key)[1] if marks.get(key) else 0)
marks[key] = (latest_score, latest_count )
for name, value in marks.items():
print(f'{name} : {value[0]/value[1]}')
This is an interesting problem.
From what I understand you have a text file that looks like this:
Johnny Ly 90 100 Adam Best 80 30 Tim Smith 10 20 in a file called studentMarks2.txt
and want output like this:
Johnny_Ly 95.0 Adam_Best 55.0 Tim_Smith 15.0
if that is true then it can be done using code like this without pandas or csv
though those would make this a lot easier.
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
students.update({studentKey:newScore})
else:
if len(names) == 2:
names.clear()
names.append(content)
else:
names.append(content)
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')
Broken down
This part reads the contents and splits on white space
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
this part then iterates through the contents
storing the names as keys in a dictionary and putting the scores in a list
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
students.update({studentKey:newScore})
else:
if len(names) == 2:
names.clear()
names.append(content)
else:
names.append(content)
Lastly it iterates over the dictionary and output the avg on one line
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')
I'm trying to get the timecode from emails in an hh:mm:ss format and parse out the the hour. Then I put it in a dictionary so it shows how many emails per hour then put that in a list and print it out like so:
04 3
06 1
07 1
09 1
10 3
11 6
I need it sorted by the hour/value with the old x.sort() function for lists. The autograder the course uses doesn't support sorted(). At the moment it seems to be ignoring the .sort() command completely:
11 6
10 3
15 2
14 4
Here's my full code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.rstrip()
words = line.split()
if words == [] : continue
if words[0] != 'From' : continue
full = words[5:6]
allitems = ".".join(full)
# print allitems
for linee in allitems:
linee = allitems.rstrip()
# if linee in counts : continue
wordss = linee.split(':')
cutt = wordss[0:1]
print cutt
for val in cutt :
counts[val] = counts.get(val, 0 ) + 1
print counts
lst = list()
for hour, frequency in counts.items() :
newtup = (hour, frequency)
lst.append(newtup)
lst.sort
print lst
for howr, freq in lst:
print howr, freq
What's going wrong here?
You need to call the sort() method, as of now, you are just getting the reference to the list.sort function, without doing anything with it, Example -
lst.sort()
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.rstrip()
wrds = line.split()
if words == [] : continue
if words[0] != 'From' : continue
full = words[5:6]
allitems = ".".join(full)
# print allitems
for linee in allitems:
linee = allitems.rstrip()
# if linee in counts : continue
wordss = linee.split(':')
cutt = wordss[0:1]
#print cutt
for val in cutt :
counts[val] = counts.get(val, 0 ) + 1
#print counts
lst = list()
for hour, frequency in counts.items() :
newtup = (hour, frequency)
lst.append(newtup)
lst.sort()
#print lst
for howr, freq in lst:
print howr, freq
python
I'm taking an online class and we were assigned the following task:
"Write a program that prompts for a file name, then opens that file and reads through the file, looking for lines of the form:
X-DSPAM-Confidence: 0.8475
Count these lines and extract the floating point values from each of the lines and compute the average of those values and produce an output as shown below.
You can download the sample data at http://www.pythonlearn.com/code/mbox-short.txt when you are testing below enter mbox-short.txt as the file name."
The desired output is: "Average spam confidence: 0.750718518519"
Here is the code I've written:
fname = raw_input("Enter file name: ")
fh = open(fname)
inp = fh.read()
for line in inp:
if not line.strip().startswith("X-DSPAM-Confidence: 0.8475") : continue
pos = line.find(':')
num = float(line[pos+1:])
total = float(num)
count = float(total + 1)
print 'Average spam confidence: ', float( total / count )
The output I get is: "Average spam confidence: nan"
What am I missing?
values = []
#fname = raw_input("Enter file name: ")
fname = "mbox-short.txt"
with open(fname, 'r') as fh:
for line in fh.read().split('\n'): #creating a list of lines
if line.startswith('X-DSPAM-Confidence:'):
values.append(line.replace('X-DSPAM-Confidence: ', '')) # I don't know whats after the float value
values = [float(i) for i in values] # need to convert the string to floats
print 'Average spam confidence: %f' % float( sum(values) / len(values))
I just tested this against the sample data it works just fine
#try the code below, it is working.
fname = raw_input("Enter file name: ")
count=0
value = 0
sum=0
fh = open(fname)
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") : continue
pos = line.find(':')
num = float(line[pos+1:])
sum=sum+num
count = count+1
print "Average spam confidence:", sum/count
My guess from the question is that the actual 0.8475 is actually just an example, and you should be finding all the X-DSPAM-Confidence: lines and reading those numbers.
Also, the indenting on the code you added has all the calcuations outside the for loop, I'm hoping that is just a formatting error for the upload, otherwise that would also be a problem.
As a matter if simplification you can also skip the
inp = fh.read()
line and just do
for line in fh:
Another thing to look at is that total will always only be the last number you read.
# Use the file name mbox-short.txt as the file name
fname = raw_input("Enter file name: ")
fh = open(fname)
count = 0
total = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") : continue
count = count + 1
# print count
num = float(line[20:])
total +=num
# print total
average = total/count
print "Average spam confidence:", average
The way you're checking if it is the correct field is too specific. You need to look for the field title without a value (see code below). Also your counting and totaling needs to happen within the loop. Here is a simpler solution that makes use of python's built in functions. Using a list like this takes a little bit more space but makes the code easier to read in my opinion.
How about this? :D
with open(raw_input("Enter file name: ")) as f:
values = [float(line.split(":")[1]) for line in f.readlines() if line.strip().startswith("X-DSPAM-Confidence")]
print 'Average spam confidence: %f' % (sum(values)/len(values))
My output:
Average spam confidence: 0.750719
If you need more precision on that float: Convert floating point number to certain precision, then copy to String
Edit: Since you're new to python that may be a little too pythonic :P Here is the same code expanded out a little bit:
fname = raw_input("Enter file name: ")
values = []
with open(fname) as f:
for line in f.readlines():
if line.strip().startswith("X-DSPAM-Confidence"):
values.append(float(line.split(":")[1]))
print 'Average spam confidence: %f' % (sum(values)/len(values))
fname = raw_input("Enter file name: ")
fh = open(fname)
x_count = 0
total_count = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") : continue
line = line.strip()
x_count = x_count + 1
num = float(line[21:])
total_count = num + total_count
aver = total_count / x_count
print "average spam confidence:", aver
user_data = raw_input("Enter the file name: ")
lines_list = [line.strip("\n") for line in open(user_data, 'r')]
def find_spam_confidence(data):
confidence_sum = 0
confidence_count = 0
for line in lines_list:
if line.find("X-DSPAM-Confidence") == -1:
pass
else:
confidence_index = line.find(" ") + 1
confidence = float(line[confidence_index:])
confidence_sum += confidence
confidence_count += 1
print "Average spam confidence:", str(confidence_sum / confidence_count)
find_spam_confidence(lines_list)
fname = raw_input("Enter file name: ")
fh = open(fname)
c = 0
t = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
c = c + 1
p = line.find(':')
n = float(line[p+1:])
t = t + n
print "Average spam confidence:", t/c
fname = input("Enter file name: ")
fh = open(fname)
count = 0
add = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:"):
count = count+1
pos = float(line[20:])
add = add+pos
print("Average spam confidence:", sum/count)
fname = input('Enter the file name : ') # file name is mbox-short.txt
try:
fopen = open(fname,'r') # open the file to read through it
except:
print('Wrong file name') #if user input wrong file name display 'Wrong file name'
quit()
count = 0 # variable for number of 'X-DSPAM-Confidence:' lines
total = 0 # variable for the sum of the floating numbers
for line in fopen: # start the loop to go through file line by line
if line.startswith('X-DSPAM-Confidence:'): # check whether a line starts with 'X-DSPAM-Confidence:'
count = count + 1 # counting total no of lines starts with 'X-DSPAM-Confidence:'
strip = line.strip() # remove whitespace between selected lines
nline = strip.find(':') #find out where is ':' in selected line
wstring = strip[nline+2:] # extract the string decimal value
fstring = float(wstring) # convert decimal value to float
total = total + fstring # add the whole float values and put sum in to variable named 'total'
print('Average spam confidence:',total/count) # printout the average value
total = float(num)
You forgot here to sum the num floats.
It should have been
total = total+num
fname = input("Enter file name: ")
fh = open(fname)
count=0
avg=0
cal=0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") :
continue
else:
count=count+1
pos = line.find(':')
num=float(line[pos+1:])
cal=float(cal+num)
#print cal,count
avg=float(cal/count)
print ("Average spam confidence:",avg)
IT WORKS JUST FINE !!!
Use the file name mbox-short.txt as the file name
fname = raw_input("Enter file name: ")
if len(fname) == 0:
fname = 'mbox-short.txt'
fh = open(fname)
count = 0
tot = 0
ans = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") : continue
count = count + 1
num = float(line[21:])
tot = num + tot
ans = tot / count
print("Average spam confidence:", ans)
# Use the file name mbox-short.txt as the file name
fname = raw_input("Enter file name: ")
fh = open(fname,'r')
count=0
avg=0.0
cal=0.00
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") :
continue
else:
count=count+1
pos = line.find(':')
num=float(line[pos+1:])
cal=cal+num
#print cal,count
avg=float(cal/count)
print "Average spam confidence:",avg
fname = raw_input("Enter file name: ")
fh = open(fname)
inp = fh.read()
i = 0
total = 0
count = 0
for line in inp:
if not line.strip().startswith("X-DSPAM-Confidence: 0.8475"):
continue
pos = line.find(':')
num = float(line[pos+1:])
total += num
count += 1
print 'Average spam confidence: ', float( total / count )
When replying to an SMS, I have a limit of 160 characters. I currently have code set up to take a reply (which can be >160) and split it into a list of multiple texts each <160. It's also set up so that it keeps words whole. I included it:
repl='message to be sent. may be >160'
texts=[]
words=repl.split()
curtext=''
for word in words:
#for the first word, drop the space
if len(curtext)==0:
curtext+=word
#check if there's enough space left in the current message
elif len(curtext)<=155-(len(word)+1):
curtext+=' '+word
#not enough space. make a new message
else:
texts.append(curtext)
curtext=word
if curtext!='':
texts.append(curtext)
return texts
However, I now want to modify it so that it appends "reply m for more" to end of every second message. Any ideas on how to do this?
(I'm writing code in Python)
reply = "text to be sent ...."
texts = []
count = 0
current_text = []
for word in reply.split():
if count + len(word) < (160 if len(texts) % 2 == 0 else (160-17)):
current_text.append(word)
count += (len(word) + 1)
else:
count = 0
if len(texts) % 2 != 0):
#odd-numbered text gets additional message...
texts.append(" ".join(current_text) + "\nreply m for more")
else:
texts.append(" ".join(current_text))
current_text = []
def sms_calculator(msg_text):
sms_lst=[]
if len(msg_text) == 0:
return sms_lst
l_m_text = (msg_text.split())
if len(max(l_m_text, key=len))> 160:
return sms_lst
sms_string=l_m_text[0]
for i in range(1,len(l_m_text)):
if len(sms_string +' '+ l_m_text[i]) < 160 :
sms_string=sms_string +' '+ l_m_text[i]
else:
sms_lst.append(sms_string)
sms_string = l_m_text[i]
sms_lst.append(sms_string)
return sms_lst