Python grouping and randomization - python

I just started playing around with python and I
wanted to create a little project. I have a dictionary
"Teams" with name as key and score as values
(1-5). I want to create a code that randomizes the 15
players in 3 teams of 5 based on their score which
needs to be the average of their scores. Example: a
team needs to include top players (score 5) and
also weaker players (score 1 to 3) in order to create
an equilibrium.
If more details are needed please let me know :)
Cheers

Something similar to this perhaps?
import csv
import os
import random
names = []
userhome = os.path.expanduser('~')
csvfile = userhome + r'/Desktop/uni_spread.csv'
with open(csvfile, 'r') as f:
reader = csv.reader(f)
data = list(reader)
bracket1 = []
bracket2 = []
lenx = len(data)
lenx = int(lenx/2)
reshuffle = True
shuffleCount = 0
while (reshuffle == True) and (shuffleCount <= 100):
random.shuffle(data)
bracket1 = data[0:lenx]
bracket2 = data[lenx:]
for i in range((lenx-1)):
uni1 = str(bracket1[i])
uni2 = str(bracket2[i])
uni1,team1 = uni1.split('#')
uni2,team2 = uni2.split('#')
if uni1 == uni2:
reshuffle = True
break
reshuffle = False
shuffleCount +=1
print('final shuffle')
print("""
""")
if len(bracket1) != len(bracket2):
print('uneven teams, walkover granted to: ')
print(bracket1[(len(bracket1)-1)])
print("""
team matchup
""")
for i in range(lenx):
uni1 = str(bracket1[i])
uni2 = str(bracket2[i])
uni1,team1 = uni1.split('#')
uni2,team2 = uni2.split('#')
print( uni1, '', team1 , ' VS ', uni2, '', team2)
print('shuffle count is ', shuffleCount)

Related

I want to parallelize this code to execute faster for 800000 sentences

from app import getPhonemes
import pandas as pd
import sys
triphones = []
def phonemize(sentence):
tokens = sentence.split(' ')
phonemes = getPhonemes(tokens)
return '$'.join(phonemes)
def generateTriphones(phonemes):
triphones = []
for i in range(len(phonemes)):
for j in range(len(phonemes)):
for k in range(len(phonemes)):
triphones.append(phonemes[i] + ' ' + phonemes[j] + ' ' + phonemes[k])
return triphones
def scoreSentence(sentence,phonemes):
flag = 0
global triphones
score = 0
tokens = sentence.split('$')
uniqueTokens = set(tokens)
triphoneticTokens = [token for token in uniqueTokens if token.count(' ') > 1]
for token in triphoneticTokens:
for triphone in triphones:
if token.find(triphone) != -1:
score += 1
triphones.remove(triphone)
if triphones == []:
flag = -1
return score, flag
def Process(fil):
global triphones
file = open('itudict/vocab.phoneme', 'r',encoding='utf-8')
data = []
for line in file:
data.append(line.strip())
file.close()
phonemes = data[4:]
triphones = generateTriphones(phonemes)
data = pd.read_csv(fil+'.csv')
data = data.drop(['score','covered_vocab'],axis=1)
i = 1
while len(data) > 0:
print('Processing File: '+str(i))
sentencee = data[:10000]
data = data[10000:]
sentences = sentencee['sentence'].tolist()
phonemes = []
scores = []
for j in range(len(sentences)):
if j%1000 == 0:
print('Processing Sentence: '+str(j))
print(len(triphones))
phones = phonemize(sentences[j])
score, flag = scoreSentence(phones,phonemes)
if flag == -1:
data = []
phonemes.append(phones)
scores.append(score)
data['Phonemes'] = phonemes
data['score'] = scores
data.to_csv(fil+'phonemized'+str(i)+'.csv', index=False)
i += 1
if __name__ == '__main__':
Process(sys.argv[1])
I am trying to generate the phonemes for 800000 sentences. The model which am using is G2P which phonemizes the sentence. after phonemization i am calculating the scores. the phoneme array which i am using for calculating scores is of size 2620000.
The length of sentences are 800000 and the code is taking days, can somebody parallelize this code or suggest some solution
I want to parallelize this code to execute faster.

Syntax error calculating the average of student marks while reading from a text file

f = open('studMarks.txt', 'r')
marks = 0
# Sort out names, split the words then sort which order
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
print(f"{lname},{fname}")
f.close()
f = open('studMarks.txt', 'r')
sum = 0
count = 0
for line in f:
count += 1
sum += float(line.split()[2])
n = []
average = sum/count
print(f"{average}")
When using the for loop it seems to display a value of 64.3, which I believe is for the total of the whole student list and average for all marks.
I need to produce the an output which displays the student names and average on the same line. I can do for the names but I cannot do it for the average as I keep getting errors. I don't know what to input in.
Below is the full solution. The with open line is a context manager and ensures that the file will get closed as soon as you exit the block. You should get used to using this style as it's the safe way to do I/O. The rest is just bog standard Python.
marks=dict()
with open('studMarks.txt', 'r') as f:
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
score = int(words[2])
key = f'{fname} {lname}'
count_key = f'{fname} {lname}_count'
latest_score = score + (marks.get(key)[0] if marks.get(key) else 0)
latest_count = 1 + (marks.get(key)[1] if marks.get(key) else 0)
marks[key] = (latest_score, latest_count )
for name, value in marks.items():
print(f'{name} : {value[0]/value[1]}')
This is an interesting problem.
From what I understand you have a text file that looks like this:
Johnny Ly 90 100 Adam Best 80 30 Tim Smith 10 20 in a file called studentMarks2.txt
and want output like this:
Johnny_Ly 95.0 Adam_Best 55.0 Tim_Smith 15.0
if that is true then it can be done using code like this without pandas or csv
though those would make this a lot easier.
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
students.update({studentKey:newScore})
else:
if len(names) == 2:
names.clear()
names.append(content)
else:
names.append(content)
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')
Broken down
This part reads the contents and splits on white space
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
this part then iterates through the contents
storing the names as keys in a dictionary and putting the scores in a list
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
students.update({studentKey:newScore})
else:
if len(names) == 2:
names.clear()
names.append(content)
else:
names.append(content)
Lastly it iterates over the dictionary and output the avg on one line
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')

Am trying to get the average of a list from a text file

This the link to see the image of the text file I need help with feature one, click here to see the imageFor some reason when I print out the average of the list, it only shows the number from the text file. I don't know where my mistake is. I think the issue might be when am appending the list.
f = open("votes.txt")
lines = f.read().split('\n')
issueA = {}
voteAa = {}
voteBa = {}
while(True):
for line in lines:
col = line.split(' ')
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
if voteA in voteAa:
voteAa[issue].append(voteA)
else:
voteAa[issue] = [voteA]
if voteB in voteBa:
voteBa[issue].append(voteB)
else:
voteBa[issue] = [voteB]
choice = int(input("Which choice? "))
if choice == 1:
for issue in voteAa:
sumVote = sum(voteAa[issue])
avg = sumVote / len(voteAa[issue])
print("avg is ",sumVote)
how about this
f = open("votes.txt")
lines = f.read().split('\n')
issues = []
voteAa = []
voteBa = []
for line in lines:
col = line.split(' ')
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
issues.append(issue)
voteAa.append(voteA)
voteBa.append(voteB)
avgA = voteAa.count(0)/len(issues) * 1.0
avgB = voteBa.count(0)/len(issues) * 1.0
if (avgA > avgB):
print('B higher than A'
elif (avgB > avbA):
print('A higher than B')
Since you need average by issue
remove for issue loop
and compute average in one line with special mean function like below
avg = mean(voteAa.values())
or, if you prefer keep it close to your code
sumVote = sum(voteAa.values())
avg = sumVote / len(voteAa)
also correct print line to
print("avg is ", avg)
Also you should just collect votes not need for if in the first loop.
So resulting script is
f = open("votes.txt")
lines = f.read().split('\n')
issueA = {}
voteAa = {}
voteBa = {}
for line in lines:
col = line.split()
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
voteAa[issue] = voteA
sumVote = sum(voteAa.values())
avg = sumVote / len(voteAa)
print("avg is ", avg)
I tried to keep close to your original code, and did not say replace dictionaries with arrays, it could be simplified even further if you like, say with pandas or even standard https://docs.python.org/3/library/csv.html

How to randomly pair data in text file

I have two files: file(student) where there are 20 students and file(lecturer) where there are 3 lecturers. I want to pair the students and lecturers randomly. For example:
lecturer(1) = student(2),student(3),student(19)
lecturer(3) = student(20),student(23)......
This is the code I have tried. It is not behaving in the manner I had hoped for:
import random
lecturer = open("lecturer.txt", "r")
students = open("students.txt", "r")
spliti = lecturer.read().split("\n")
splitis = students.read().split("\n")
stud = (random.choice(splitis))
for stud in splitis:
file = open(stud + "txt","w")
for i in range():
questinss = random.choice(spliti)
file.write(lecturer + "\n")
files = open(students + ",txt", "r")
file.close()
lecturer.close()
students.close()
Here are some codes that you can use. Hope it can give you some thoughts.
import random
# get the students
with open('student.txt','r') as f:
students = f.read().split()
# get the lectures
with open('lecture.txt','r') as f:
lectures = f.read().spilt()
# since you only have three different lectures, we can sequently
# collect them as 0,1,2
reflist = []
for student in students:
reflist.append( lectures[ random.randrange(3) ] )
# prepare for the print
lecture_student = []
for lecture in lectures:
count = 0
ls = []
for ndx in reflist:
if lecture == ndx:
ls.append(students[count])
count += 1
lecture_student.append(ls)
# now to file them
with open('pro_lecture_student.txt','wt') as f:
count = 0
for lecture in lectures:
f.write(lecture)
f.write(': ')
for student in lecture_student[count]:
f.write(student)
f.write('\n\n')
count += 1

Geneate a random integer or to predefined int

I am working with some data that for a specific column can only formatted in 1 of three ways:
3884932039484 (this is randomly generated from my program)
0 (this is static and will never change)
-1 (this is static and will never change)
I want the program to randomly pick between option 1,2 or 3 and insert one of three options. This is what I currently have:
file = open(r'I:\PythonDataFiles\StandardFeedInput\standardfeed_test.tsv', 'r')
all_lines = file.readlines()
#date_time_answer = input('Please input a date and time(2015-09-15 00:00:00): ')
#if date_time_answer == '':
date_time_answer = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now() - datetime.timedelta(days = 1))
date_time = 1
is_imp = 16
person_id = 19
aid = 44
line_id = 49
cid = 50
is_click = 60
app_id = 0
prev_app_id = ''
new_file = open(r'I:\PythonDataFiles\Standard Feed Output\test2.txt', 'w')
for line in all_lines:
row = line.split('\t')
row[date_time] = date_time_answer
row[person_id] = str((random.randint(1000000000, 9999999999)), 0, -1)
if row[app_id] == str(prev_app_id):
row[is_imp] = str(0)
row[is_click] = str(1)
else:
row[is_imp] = str(1)
prev_app_id = app_id
print(row)
new_file.write('\t'.join(row))
Use random.choice() to pick one of the three options:
random.choice([random.randint(1000000000, 9999999999), 0, -1])

Categories