Finding the rating of words using python - python

This is my program and it display the value if i give the complete name like if i type eng than it will show me only eng with value
import re
sent = "eng"
#sent=raw_input("Enter word")
#regex = re.compile('(^|\W)sent(?=(\W|$))')
for line in open("sir_try.txt").readlines():
if sent == line.split()[0].strip():
k = line.rsplit(',',1)[0].strip()
print k
gene name utr length
ensbta 24
ensg1 12
ensg24 30
ensg37 65
enscat 22
ensm 30
Actually what i want to do is that i want to search the highest value from the text file not through words , and it delete all the values from the text file of the same word having less value than the maximum like from the above text it should delete 12 , 30 for ensg , and than it should find the minimum value from the utr values and display it with name
What you people answering me is , i already done it , and i mention it before my showing my program

please try this
file=open("sir_try.txt","r")
list_line=file.readlines()
file.close()
all_text=""
dic={}
sent="ensg"
temp_list=[]
for line in list_line:
all_text=all_text+line
name= line.rsplit()[0].strip()
score=line.rsplit()[1].strip()
dic[name]=score
for i in dic.keys():
if sent in i:
temp_list.append(dic[i])
hiegh_score=max(temp_list)
def check(index):
reverse_text=all_text[index+1::-1]
index2=reverse_text.find("\n")
if sent==reverse_text[:index2+1][::-1][1:len(sent)+1]:
return False
else:
return True
list_to_min=dic.values()
for i in temp_list:
if i!=hiegh_score:
index=all_text.find(str(i))
while check(index):
index=all_text.find(str(i),index+len(str(i)))
all_text=all_text[0:index]+all_text[index+len(str(i)):]
list_to_min.remove(str(i))
#write all text to "sir_try.txt"
file2=open("sir_try.txt","w")
file2.write(all_text)
file2.close()
min_score= min(list_to_min)
for j in dic.keys():
if min_score==dic[j]:
print "min score is :"+str(min_score)+" for person "+j
function check is for a bug in solotion for explain when your file is
gene name utr length
ali 12
ali87 30
ensbta 24
ensg1 12
ensg24 30
ensg37 65
enscat 22
ensm 30
program delete ali score but we dont have it
by adding check function i solve it
and this version is final version answer

Try instead of if sent == and replace it with a if sent in (line.split()[0].strip()):
That should check if the value of sent (engs) is anywhere in the argument (line.split()[0].strip()) in this case.
If you're still trying to only take the highest value, I would just create a variable value, then something along the lines of
if line.split()[1].strip() > value:
value = line.split()[1].strip()
Test that out and let us know how it works for you.

To find out the name (first column) with the maximum value associated (second column), you need to first split the lines at the whitespace between name and value. Then you can find the maximum value using the built-in max() function. Let it take the value column as sorting criterion. You can then easily find out the corresponding name.
Example:
file_content = """
gene name utr length
ensbta 24
ensg1 12
ensg24 30
ensg37 65
enscat 22
ensm 30
"""
# split lines at whitespace
l = [line.split() for line in file_content.splitlines()]
# skip headline and empty lines
l = [line for line in l if len(line) == 2]
print l
# find the maximum of second column
max_utr_length_tuple = max(l, key=lambda x:x[1])
print max_utr_length_tuple
print max_utr_length_tuple[0]
the output is:
$ python test.py
[['ensbta', '24'], ['ensg1', '12'], ['ensg24', '30'], ['ensg37', '65'], ['enscat', '22'], ['ensm', '30']]
['ensg37', '65']
ensg37

Short and sweet:
In [01]: t=file_content.split()[4:]
In [02]: b=((zip(t[0::2], t[1::2])))
In [03]: max(b, key=lambda x:x[1])
Out[03]: ('ensg37', '65')

import operator
f = open('./sir_try.txt', 'r')
f = f.readlines()
del f[0]
gene = {}
matched_gene = {}
for line in f:
words = line.strip().split(' ')
words = [word for word in words if not word == '']
gene[words[0]] = words[1]
# getting user input
user_input = raw_input('Enter gene name: ')
for gene_name, utr_length in gene.iteritems():
if user_input in gene_name:
matched_gene[gene_name] = utr_length
m = max(matched_gene.iteritems(), key=operator.itemgetter(1))[0]
print m, matched_gene[m] # expected answer
# code to remove redundant gene names as per requirement
for key in matched_gene.keys():
if not key == m:
matched_gene.pop(key)
for key in gene.keys():
if user_input in key:
gene.pop(key)
final_gene = dict(gene.items() + matched_gene.items())
out = open('./output.txt', 'w')
out.write('gene name' + '\t\t' + 'utr length' + '\n\n')
for key, value in final_gene.iteritems():
out.write(key + '\t\t\t\t' + value + '\n')
out.close()
Output:
Enter gene name: ensg
ensg37 65

Since you have tagged your question regex,
Here's something that you would want to see and it's the only one (at the moment) that uses regex!
import re
sent = 'ensg' # your sequence
# regex that will "filter" the lines containing value of sent
my_re = re.compile(r'(.*?%s.*?)\s+?(\d+)' % sent)
with open('stack.txt') as f:
lines = f.read() # get data from file
filtered = my_re.findall(lines) # "filter" your data
print filtered
# get the desired (tuple with maximum "utr length")
max_tuple = max(filtered, key=lambda x: x[1])
print max_tuple
Output:
[('ensg1', '12'), ('ensg24', '30'), ('ensg37', '65')]
('ensg37', '65')

Related

Find specific values in a txt file and adding them up with python

I have a txt file which looks like that:
[Chapter.Title1]
Irrevelent=90 B
Volt=0.10 ienl
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=999
Irrevelent=999
[Chapter.Title3]
Irrevelent=92 B
Volt=0.20 ienl
Watt=5 W
Ampere=6 A
Irrevelent=93 C
What I want is that it catches "Title1" and the values "0,1", "2" and "3". Then adds them up (which would be 5.1).
I don't care about the lines with "irrevelent" at the beginning.
And then the same with the third block. Catching "Title3" and adding "0.2", "5" and "6".
The second block with "Title2" does not contain "Volt", Watt" and "Ampere" and is therefore not relevant.
Can anyone please help me out with this?
Thank you and cheers
You can use regular expressions to get the values and the titles in lists, then use them.
txt = """[Chapter.Title1]
Irrevelent=90 B
Volt=1 V
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=92 B
Volt=4 V
Watt=5 W
Ampere=6 A
Irrevelent=93 C"""
#that's just the text
import re
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=(\d+)'
rxv2=r'Watt=(\d+)'
rxv3=r'Ampere=(\d+)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
print(res1)
print(resv1)
print(resv2)
print(resv3)
Here you get the titles and the interesting values you want :
['Title1', 'Title2']
['1', '4']
['2', '5']
['3', '6']
You can then use them as you want, for example :
for title_index in range(len(res1)):
print(res1[title_index])
value=int(resv1[title_index])+int(resv2[title_index])+int(resv3[title_index])
#use float() instead of int() if you have non integer values
print("the value is:", value)
You get :
Title1
the value is: 6
Title2
the value is: 15
Or you can store them in a dictionary or an other structure, for example :
#dict(zip(keys, values))
data= dict(zip(res1, [int(resv1[i])+int(resv2[i])+int(resv3[i]) for i in range(len(res1))] ))
print(data)
You get :
{'Title1': 6, 'Title2': 15}
Edit : added opening of the file
import re
with open('filename.txt', 'r') as file:
txt = file.read()
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
data= dict(zip(res1, [float(resv1[i])+float(resv2[i])+float(resv3[i]) for i in range(len(res1))] ))
print(data)
Edit 2 : ignoring missing values
import re
with open('filename.txt', 'r') as file:
txt = file.read()
#divide the text into parts starting with "chapter"
substr = "Chapter"
chunks_idex = [_.start() for _ in re.finditer(substr, txt)]
chunks = [txt[chunks_idex[i]:chunks_idex[i+1]-1] for i in range(len(chunks_idex)-1)]
chunks.append(txt[chunks_idex[-1]:]) #add the last chunk
#print(chunks)
keys=[]
values=[]
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
for chunk in chunks:
res1 = re.findall(rx1, chunk)
resv1 = re.findall(rxv1, chunk)
resv2 = re.findall(rxv2, chunk)
resv3 = re.findall(rxv3, chunk)
# check if we can find all of them by checking if the lists are not empty
if res1 and resv1 and resv2 and resv3 :
keys.append(res1[0])
values.append(float(resv1[0])+float(resv2[0])+float(resv3[0]))
data= dict(zip(keys, values ))
print(data)
Here's a quick and dirty way to do this, reading line by line, if the input file is predictable enough.
In the example I just print out the titles and the values; you can of course process them however you want.
f = open('file.dat','r')
for line in f.readlines():
## Catch the title of the line:
if '[Chapter' in line:
print(line[9:-2])
## catch the values of Volt, Watt, Amere parameters
elif line[:4] in ['Volt','Watt','Ampe']:
value = line[line.index('=')+1:line.index(' ')]
print(value)
## if line is "Irrelevant", or blank, do nothing
f.close()
There are many ways to achieve this. Here's one:
d = dict()
V = {'Volt', 'Watt', 'Ampere'}
with open('chapter.txt', encoding='utf-8') as f:
key = None
for line in f:
if line.startswith('[Chapter'):
d[key := line.strip()] = 0
elif key and len(t := line.split('=')) > 1 and t[0] in V:
d[key] += float(t[1].split()[0])
for k, v in d.items():
if v > 0:
print(f'Total for {k} = {v}')
Output:
Total for [Chapter.Title1] = 6
Total for [Chapter.Title2] = 15

How to find the average for a file then put it in another file

I want to find the average of the list inFile and then I would like to move it to the classscores.
classgrades.txt is:
Chapman 90 100 85 66 80 55
Cleese 80 90 85 88
Gilliam 78 82 80 80 75 77
Idle 91
Jones 68 90 22 100 0 80 85
Palin 80 90 80 90
classcores.txt is empty
This is what I have so far... what should I do?
inFile = open('classgrades.txt','r')
outFile = open('classscores.txt','w')
for line in inFile:
with open(r'classgrades.txt') as data:
total_stuff = #Don't know what to do past here
biggest = min(total_stuff)
smallest = max(total_stuff)
print(biggest - smallest)
print(sum(total_stuff)/len(total_stuff))
You will need to:
- split each line by whitespace and slice out all items but the first
- convert each string value in array to integer
- sum all of those integer values in the array
- add the sum for this line to total_sum
- add the length of those values (the number of numbers) to total_numbers
However, this is only part of the problem... I will leave the rest up to you. This code will not write to the new file, it will simply take an average of all the numbers in the first file. If this isn't exactly what you are asking for, then try playing around with this stuff and you should be able to figure it all out.
inFile = open('classgrades.txt','r')
outFile = open('classscores.txt','w')
total_sum = 0
total_values = 0
with open(r'classgrades.txt') as inFile:
for line in inFile:
# split by whitespace and slice out everything after 1st item
num_strings = line.split(' ')[1:]
# convert each string value to an integer
values = [int(n) for n in num_strings]
# sum all the values on this line
line_sum = sum(values)
# add the sum of numbers in this line to the total_sum
total_sum += line_sum
# add the length of values in this line to total_values
total_numbers += len(values)
average = total_sum // total_numbers # // is integer division in python3
return average
you don't need to open file many times and you should close the files at the end of your program. Below is what I tried hope this works for you:
d1 = {}
with open(r'classgrades.txt','r') as fp:
for line in fp:
contents = line.strip().split(' ')
# create mapping of student and his numbers
d1[contents[0]] = map(int,contents[1:])
with open(r'classscores.txt','w') as fp:
for key, item in d1.items():
biggest = min(item)
smallest = max(item)
print(biggest - smallest)
# average of all numbers
avg = sum(item)/len(item)
fp.write("%s %s\n"%(key,avg))
Apologies if this is kind of advanced, I try to provide key words/phrases for you to search for to learn more.
Presuming you're looking for each student's separate average:
in_file = open('classgrades.txt', 'r') # python naming style is under_score
out_file = open('classcores.txt', 'w')
all_grades = [] # if you want a class-wide average as well as individual averages
for line in in_file:
# make a list of the things between spaces, like ["studentname", "90", "100"]
student = line.split(' ')[0]
# this next line includes "list comprehension" and "list slicing"
# it gets every item in the list aside from the 0th index (student name),
# and "casts" them to integers so we can do math on them.
grades = [int(g) for g in line.split(' ')[1:]]
# hanging on to every grade for later
all_grades += grades # lists can be +='d like numbers can
average = int(sum(grades) / len(grades))
# str.format() here is one way to do "string interpolation"
out_file.write('{} {}\n'.format(student, average))
total_avg = sum(all_grades) / len(all_grades)
print('Class average: {}'.format(total_avg))
in_file.close()
out_file.close()
As others pointed out, it is good to get in the habit of closing files.
Other responses here use with open() (as a "context manager") which is best practice because it automatically closes the file for you.
To work with two files without having a data container in between (like Amit's d1 dictionary), you would do something like:
with open('in.txt') as in_file:
with open('out.txt', 'w') as out_file:
... do things ...
This script should accomplish what you are trying to do I think:
# define a list data structure to store the classgrades
classgrades = []
with open( 'classgrades.txt', 'r' ) as infile:
for line in infile:
l = line.split()
# append a dict to the classgrades list with student as the key
# and value is list of the students scores.
classgrades.append({'name': l[0], 'scores': l[1:]})
with open( 'classscores.txt', 'w' ) as outfile:
for student in classgrades:
# get the students name out of dict.
name = student['name']
# get the students scores. use list comprehension to convert
# strings to ints so that scores is a list of ints.
scores = [int(s) for s in student['scores']]
# calc. total
total = sum(scores)
# get the number of scores.
count = len( student['scores'] )
# calc. average
average = total/count
biggest = max(scores)
smallest = min(scores)
diff = ( biggest - smallest )
outfile.write( "%s %s %s\n" % ( name, diff , average ))
Running the above code will create a file called classscores.txt which will contain this:
Chapman 45 79.33333333333333
Cleese 10 85.75
Gilliam 7 78.66666666666667
Idle 0 91.0
Jones 100 63.57142857142857
Palin 10 85.0

Creating a ranking for lines of text file and keeping only top lines

Let's say I have a text file with thousands of lines of the following form:
Word Number1 Number2
In this text file, the "Word" is indeed some word that changes from one line to another, and the numbers are likewise changing numbers. However, some of these words are the same... Consider the following example:
Hello 5 7
Hey 3 2
Hi 7 3
Hi 5 2
Hello 1 4
Hey 5 2
Hello 8 1
What would be a python script that reads the text file and keeps only the lines that contain the highest Number1 for any given Word (deleting all lines that do not satisfy this condition)? The output for the above example with such a script would be:
Hi 7 3
Hey 5 2
Hello 8 1
Note: the order of the lines in the output is irrelevant, all that matters is that the above condition is satisfied. Also, if for a given Word, the highest Number1 is the same for two or more lines, the output should keep only one of them, such that there is only one occurence of any Word in the output.
I've no clue how to approach the deletion aspect, but I can guess (perhaps incorrectly) that the first step would be to make a list from all the lines in the text file, i.e.
List1 = open("textfile.txt").readlines()
At any rate, many thanks in advance for the help!
You can try this:
f = [i.strip('\n').split() for i in open('the_file.txt')]
other_f = {i[0]:map(int, i[1:]) for i in f}
for i in f:
if other_f[i[0]][0] < int(i[1]):
other_f[i[0]] = map(int, i[1:])
new_f = open('the_file.txt', 'w')
for a, b in other_f.items():
new_f.write(a + " "+' '.join(map(str, b))+"\n")
new_f.close()
Output:
Hi 7 3
Hello 8 1
Hey 5 2
You can store the lines in a dict, with the words as keys. To make things easier, you can store a tuple with the value of the first numeric field (converted to integer, otherwise you would sort by lexicographic order) and the line.
We use dict.setdefault in case we encounter the word for the first time.
highest = {}
with open('text.txt') as f:
for line in f:
name, val, _ = line.split(' ', 2)
val = int(val)
if val > highest.setdefault(name, (val, line))[0]:
highest[name] = (val, line)
out = [tup[1] for name, tup in highest.items()]
print(''.join(out))
# Hey 5 2
# Hello 8 1
# Hi 7 3
first sorted the list with 1st and 2nd column as the key from high to low
then remove the duplicate items
list1 = open(r'textfile.txt').read().splitlines()
output = sorted(list1, key=lambda x:(x.split()[0], int(x.split()[1])), reverse=True)
uniq_key = []
for i in sorted_dat:
key = i.split()[0]
if key in uniq_key:
output.remove(i)
else:
uniq_key.append(key)
>>> output
['Hi 7 3', 'Hey 5 2', 'Hello 8 1']
Because file objects are iterable, it is not necessary to do the readlines up front. So let's open the file and then just iterate over it using a for loop.
fin = open('sometext.txt')
We create a dictionary to hold the results, as we go.
topwords = dict()
Iterating now, over the lines in the file:
for line in fin:
We strip off the new line characters and split the lines into individual strings, based on where the spaces are (the default behavior for split()).
word, val1, val2 = line.strip().split()
val1 = int(val1)
We check to see if we have already seen the word, if yes, we then check to see if the first value is greater than the first value previously stored.
if word in topwords:
if val1 > topwords[word][0]:
topwords[word] = [val1, val2]
else:
topwords[word] = [val1, val2]
Once we finish parsing all the words, we go back and iterate over the top words and print the results to the screen.
for word in topwords:
output = '{} {} {}'.format(word, *topwords[word])
print(output)
The final script looks like this:
fin = open('sometext.txt')
topwords = dict()
for line in fin:
word, val1, val2 = line.strip().split()
val1 = int(val1)
if word in topwords:
if val1 > topwords[word][0]:
topwords[word] = [val1, val2]
else:
topwords[word] = [val1, val2]
for word in topwords:
output = '{} {} {}'.format(word, *topwords[word])
print(output)

Python File IO - building dictionary and finding max value

Problem is to return the name of the event that has the highest number of participants in this text file:
#Beyond the Imposter Syndrome
32 students
4 faculty
10 industries
#Diversifying Computing Panel
15 students
20 faculty
#Movie Night
52 students
So I figured I had to split it into a dictionary with the keys as the event names and the values as the sum of the integers at the beginning of the other lines. I'm having a lot of trouble and I think I'm making it too complicated than it is.
This is what I have so far:
def most_attended(fname):
'''(str: filename, )'''
d = {}
f = open(fname)
lines = f.read().split(' \n')
print lines
indexes = []
count = 0
for i in range(len(lines)):
if lines[i].startswith('#'):
event = lines[i].strip('#').strip()
if event not in d:
d[event] = []
print d
indexes.append(i)
print indexes
if not lines[i].startswith('#') and indexes !=0:
num = lines[i].strip().split()[0]
print num
if num not in d[len(d)-1]:
d[len(d)-1] += [num]
print d
f.close()
import sys
from collections import defaultdict
from operator import itemgetter
def load_data(file_name):
events = defaultdict(int)
current_event = None
for line in open(file_name):
if line.startswith('#'):
current_event = line[1:].strip()
else:
participants_count = int(line.split()[0])
events[current_event] += participants_count
return events
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\t{} <file>\n'.format(sys.argv[0]))
else:
events = load_data(sys.argv[1])
print('{}: {}'.format(*max(events.items(), key=itemgetter(1))))
Here's how I would do it.
with open("test.txt", "r") as f:
docText = f.read()
eventsList = []
#start at one because we don't want what's before the first #
for item in docText.split("#")[1:]:
individualLines = item.split("\n")
#get the sum by finding everything after the name, name is the first line here
sumPeople = 0
#we don't want the title
for line in individualLines[1:]:
if not line == "":
sumPeople += int(line.split(" ")[0]) #add everything before the first space to the sum
#add to the list a tuple with (eventname, numpeopleatevent)
eventsList.append((individualLines[0], sumPeople))
#get the item in the list with the max number of people
print(max(eventsList, key=lambda x: x[1]))
Essentially you first want to split up the document by #, ignoring the first item because that's always going to be empty. Now you have a list of events. Now for each event you have to go through, and for every additional line in that event (except the first) you have to add that lines value to the sum. Then you create a list of tuples like (eventname) (numPeopleAtEvent). Finally you use max() to get the item with the maximum number of people.
This code prints ('Movie Night', 104) obviously you can format it to however you like
Similar answers to the ones above.
result = {} # store the results
current_key = None # placeholder to hold the current_key
for line in lines:
# find what event we are currently stripping data for
# if this line doesnt start with '#', we can assume that its going to be info for the last seen event
if line.startswith("#"):
current_key = line[1:]
result[current_key] = 0
elif current_key:
# pull the number out of the string
number = [int(s) for s in line.split() if s.isdigit()]
# make sure we actually got a number in the line
if len(number) > 0:
result[current_key] = result[current_key] + number[0]
print(max(result, key=lambda x: x[1]))
This will print "Movie Night".
Your problem description says that you want to find the event with highest number of participants. I tried a solution which does not use list or dictionary.
Ps: I am new to Python.
bigEventName = ""
participants = 0
curEventName = ""
curEventParticipants = 0
# Use RegEx to split the file by lines
itr = re.finditer("^([#\w+].*)$", lines, flags = re.MULTILINE)
for m in itr:
if m.group(1).startswith("#"):
# Whenever a new group is encountered, check if the previous sum of
# participants is more than the recent event. If so, save the results.
if curEventParticipants > participants:
participants = curEventParticipants
bigEventName = curEventName
# Reset the current event name and sum as 0
curEventName = m.group(1)[1:]
curEventParticipants = 0
elif re.match("(\d+) .*", m.group(1)):
# If it is line which starts with number, extract the number and sum it
curEventParticipants += int(re.search("(\d+) .*", m.group(1)).group(1))
# This nasty code is needed to take care of the last event
bigEventName = curEventName if curEventParticipants > participants else bigEventName
# Here is the answer
print("Event: ", bigEventName)
You can do it without a dictionary and maybe make it a little simpler if just using lists:
with open('myfile.txt', 'r') as f:
lines = f.readlines()
lines = [l.strip() for l in lines if l[0] != '#'] # remove comment lines and '\n'
highest = 0
event = ""
for l in lines:
l = l.split()
if int(l[0]) > highest:
highest = int(l[0])
event = l[1]
print (event)

Project Euler #22 Python, 2205 points missing?

I'm working on problem 22 of Project Euler:
Using names.txt (right click and 'Save Link/Target As...'), a 46K text file containing over five-thousand first names, begin by sorting it into alphabetical order. Then working out the alphabetical value for each name, multiply this value by its alphabetical position in the list to obtain a name score.
For example, when the list is sorted into alphabetical order, COLIN,
which is worth 3 + 15 + 12 + 9 + 14 = 53, is the 938th name in the
list. So, COLIN would obtain a score of 938 × 53 = 49714.
What is the total of all the name scores in the file?
http://projecteuler.net/problem=22
When I compile my code below, I get the answer 871196077. The correct answer should be 871198282.
import time
def euler_22():
## Creates a sorted list of the names in Py_Euler_22.txt
names = open('Py_Euler_22.txt', 'r')
names = names.read()
names = names.split('","')
names[0] = names[0][1:]
names[-1] = names[-1][:-2]
names = sorted(names)
## Creates a dictionary: letter -> value
value_letters = {}
start = ord("A")
for i in range(0, 26):
value_letters[chr(start+i)] = i+1
result = 0
for i in range(1, len(names)+1):
name = names[i-1]
sum_letters = 0
for letter in name:
sum_letters += value_letters[letter]*i
# = value of the letter multiplied with the name position
result += sum_letters
return result
tstart = time.time()
print euler_22()
print "Run time: " + str(time.time() - tstart)
I tried to find a program with a similar solution, but I only know Python, that limits the options.
I ran the program with simpler text-files, I created, where I can get the answer without a program and all of them worked. I googled the answer to the problem, but that didn't help either, since I cant find the missing points.
I'm a beginner, so I would really appreciate any tips regarding the program and Python, not only those, that will help me to solve the problem correctly.
Thanks a lot!
You have accidentally mangled one name.
Here qnames is the sorted list of names your code produces, and sorted_names is mine:
>>> for a,b in zip(qnames, sorted_names):
... if a != b:
... print a, b
...
ALONS ALONSO
For fun: a one-liner - nested list comprehensions, avast ye!
print sum ( [ (pos+1) * nv for pos, nv in enumerate([ sum ( [ ord(char) - 64 for char in name ] ) for name in sorted([name.strip('"') for name in open('names.txt','r').readline().split(",")]) ]) ] )
Or more readably:
print sum (
[(pos+1) * nv for pos, nv in
enumerate([ sum ([ ord(char) - 64 for char in name ] ) for name in
sorted([name.strip('"') for name in
open('names.txt','r').readline().split(",")]) ]) ] )
The black magic is that ASCII A is integer 65, ASCII B is integer 66, and so on - so ord(char) - 64 gets you the "letter value" of char.
Edit 2:
The full, human-readable, solution that I crammed into one line for your amusement.
with open('names.txt','r') as f:
data = f.readline();
names = [name.strip('"') for name in data.split(",")]
sorted_names = sorted(names)
name_values = [ sum ( [ ord(char) - 64 for char in name ] ) for name in sorted_names ]
name_position_values = [ (pos+1) * nv for pos, nv in enumerate(name_values) ]
total_sum = sum(name_position_values)
# debug output
from pprint import pprint
#position, word value, position * word value, word
pprint(zip(xrange(1,len(names)+1),name_values,name_position_values,sorted_names))
Note the heavy use of list comprehensions [x for x in list_of_xes] instead of loops, and the sum() function instead of for x in xes: sum += x.
There are some other tricks in here, but the take-home lesson is that list comprehensions and functions that process lists can make your code much simpler and easier to read.
Edit 3:
The pprint.pprint() function is a "pretty print()". It's great for debugging.
Edit 4:
Code golf version (142 chars):
print sum([(p+1)*v for p,v in enumerate([sum(map(ord,n))-64*len(n) for n in sorted([n[1:-1] for n in open('names.txt').read().split(",")])])])
I just cross-checked your code, and it looks like you're inadvertently chopping off the last character of the last word. To strip off the quotes from the last word, use:
names[-1] = names[-1][:-1]
Rather than trying to strip all the quotes from the names at once when you're converting the file contents to a list, strip them when you're processing the list.
# Project Euler Problem 22
# Name Scores
def score(name):
total = 0
for char in name:
total += (ord(char) - 64) # scale so A = 1, B = 2...
return total
def main():
# Open the names file for reading
infile = open('names.txt', 'r')
# Read the entire contents of the file
file_contents = infile.read()
# Close the file
infile.close()
# Convert file contents to a list of quoted names and sort them
list_of_names = file_contents.split(',')
list_of_names.sort()
position = 1
total = 0
for name in list_of_names:
name = name.strip('"') # strip the quotes from names individually
total += score(name) * position
position += 1
print(total)
if __name__ == "__main__":
main()

Categories