So this is a variation of the Knapsack Problem I came with the other day.
It is like a 0-1 Knapsack Problem where there are multiple groups and each item belongs to only one group. The goal is to maximize the profits subject to the constraints. In this case, a fixed number of items from each group have to be chosen for each group.
It is similar to the Multiple Choice Knapsack Problem, but in that case you only pick 1 of item of each group, in this one you want to pick x amount of items of each group
So, each item has: value, weight and group
Each group has an item count (Ex: if group A (or 0) has 2, the final solution needs to have 2 items of group A, no more no less)
And and you also have a maximum capacity (not related to the groups)
This translates into:
values[i] = The value of the ith element
weights[i] = The weigth of the ith element
groups[i] = The group of the ith element
C = Capacity
n = Amount of elements
m = Amount of groups
count[j] = Amount of items of group j
I'm attempting a Recursive solution first and then I will try a Dynamic approach.
Any solution would be appreciated (preferably Python, but anything will do :) ).
Usefull links I found:
Theorical solution of a similar problem
First approach to the Multiple Choice Knapsack Problem
Multiple Choice Knapsack Problem solved in Python
Knapsack with count constraint
Full code also in: https://github.com/pabloroldan98/knapsack-football-formations
Explanation after the code.
This code is for an example where you have a Fantasy League with a playersDB where each player has price (weight), points (value) and position (group); there is a list of possible_formations (group variations); and a budget (W) you can't go over.
Full code:
main.py:
from group_knapsack import best_full_teams
playersDB = [
Player(name="Keylor Navas", price=16, points=7.5, position="GK"),
Player(name="Laporte", price=23, points=7.2, position="DEF"),
Player(name="Modric", price=22, points=7.3, position="MID"),
Player(name="Messi", price=51, points=8.2, position="ATT"),
...
]
possible_formations = [
[3, 4, 3],
[3, 5, 2],
[4, 3, 3],
[4, 4, 2],
[4, 5, 1],
[5, 3, 2],
[5, 4, 1],
]
budget = 300
best_full_teams(playersDB, possible_formations, budget)
group_knapsack.py:
import itertools
from MCKP import knapsack_multichoice_onepick
def best_full_teams(players_list, formations, budget):
formation_score_players = []
for formation in formations:
players_points, players_prices, players_comb_indexes = players_preproc(
players_list, formation)
score, comb_result_indexes = knapsack_multichoice_onepick(
players_prices, players_points, budget)
result_indexes = []
for comb_index in comb_result_indexes:
for winning_i in players_comb_indexes[comb_index[0]][comb_index[1]]:
result_indexes.append(winning_i)
result_players = []
for res_index in result_indexes:
result_players.append(players_list[res_index])
formation_score_players.append((formation, score, result_players))
print("With formation " + str(formation) + ": " + str(score))
for best_player in result_players:
print(best_player)
print()
print()
formation_score_players_by_score = sorted(formation_score_players,
key=lambda tup: tup[1],
reverse=True)
for final_formation_score in formation_score_players_by_score:
print((final_formation_score[0], final_formation_score[1]))
return formation_score_players
def players_preproc(players_list, formation):
max_gk = 1
max_def = formation[0]
max_mid = formation[1]
max_att = formation[2]
gk_values, gk_weights, gk_indexes = generate_group(players_list, "GK")
gk_comb_values, gk_comb_weights, gk_comb_indexes = group_preproc(gk_values,
gk_weights,
gk_indexes,
max_gk)
def_values, def_weights, def_indexes = generate_group(players_list, "DEF")
def_comb_values, def_comb_weights, def_comb_indexes = group_preproc(
def_values, def_weights, def_indexes, max_def)
mid_values, mid_weights, mid_indexes = generate_group(players_list, "MID")
mid_comb_values, mid_comb_weights, mid_comb_indexes = group_preproc(
mid_values, mid_weights, mid_indexes, max_mid)
att_values, att_weights, att_indexes = generate_group(players_list, "ATT")
att_comb_values, att_comb_weights, att_comb_indexes = group_preproc(
att_values, att_weights, att_indexes, max_att)
result_comb_values = [gk_comb_values, def_comb_values, mid_comb_values,
att_comb_values]
result_comb_weights = [gk_comb_weights, def_comb_weights, mid_comb_weights,
att_comb_weights]
result_comb_indexes = [gk_comb_indexes, def_comb_indexes, mid_comb_indexes,
att_comb_indexes]
return result_comb_values, result_comb_weights, result_comb_indexes
def generate_group(full_list, group):
group_values = []
group_weights = []
group_indexes = []
for i, item in enumerate(full_list):
if item.position == group:
group_values.append(item.points)
group_weights.append(item.price)
group_indexes.append(i)
return group_values, group_weights, group_indexes
def group_preproc(group_values, group_weights, initial_indexes, r):
comb_values = list(itertools.combinations(group_values, r))
comb_weights = list(itertools.combinations(group_weights, r))
comb_indexes = list(itertools.combinations(initial_indexes, r))
group_comb_values = []
for value_combinations in comb_values:
values_added = sum(list(value_combinations))
group_comb_values.append(values_added)
group_comb_weights = []
for weight_combinations in comb_weights:
weights_added = sum(list(weight_combinations))
group_comb_weights.append(weights_added)
return group_comb_values, group_comb_weights, comb_indexes
MCKP.py:
import copy
def knapsack_multichoice_onepick(weights, values, max_weight):
if len(weights) == 0:
return 0
last_array = [-1 for _ in range(max_weight + 1)]
last_path = [[] for _ in range(max_weight + 1)]
for i in range(len(weights[0])):
if weights[0][i] < max_weight:
if last_array[weights[0][i]] < values[0][i]:
last_array[weights[0][i]] = values[0][i]
last_path[weights[0][i]] = [(0, i)]
for i in range(1, len(weights)):
current_array = [-1 for _ in range(max_weight + 1)]
current_path = [[] for _ in range(max_weight + 1)]
for j in range(len(weights[i])):
for k in range(weights[i][j], max_weight + 1):
if last_array[k - weights[i][j]] > 0:
if current_array[k] < last_array[k - weights[i][j]] + \
values[i][j]:
current_array[k] = last_array[k - weights[i][j]] + \
values[i][j]
current_path[k] = copy.deepcopy(
last_path[k - weights[i][j]])
current_path[k].append((i, j))
last_array = current_array
last_path = current_path
solution, index_path = get_onepick_solution(last_array, last_path)
return solution, index_path
def get_onepick_solution(scores, paths):
scores_paths = list(zip(scores, paths))
scores_paths_by_score = sorted(scores_paths, key=lambda tup: tup[0],
reverse=True)
return scores_paths_by_score[0][0], scores_paths_by_score[0][1]
player.py:
class Player:
def __init__(
self,
name: str,
price: float,
points: float,
position: str
):
self.name = name
self.price = price
self.points = points
self.position = position
def __str__(self):
return f"({self.name}, {self.price}, {self.points}, {self.position})"
#property
def position(self):
return self._position
#position.setter
def position(self, pos):
if pos not in ["GK", "DEF", "MID", "ATT"]:
raise ValueError("Sorry, that's not a valid position")
self._position = pos
def get_group(self):
if self.position == "GK":
group = 0
elif self.position == "DEF":
group = 1
elif self.position == "MID":
group = 2
else:
group = 3
return group
Explanation:
Okay,so I managed to find a solution translating what was here: Solving the Multiple Choice Knapsack Problem from C++ to Python. My solution also gives the path that got you to that solution. It uses Dynamic Programming and it's very fast.
The input data, instead of having groups[i], has the weights and the values as a list of lists, where every list inside represent the values of each group:
weights[i] = [weights_group_0, weights_group_1, ...]
values[i] = [values_group_0, values_group_1, ...]
Where:
weights_group_i[j] = The weigth of the jth element of the ith group
values_group_i[j] = The value of the jth element of the ith group
Those would be the inputs of knapsack_multichoice_onepick. Here is an example:
# Example
values = [[6, 10], [12, 2], [2, 3]]
weights = [[1, 2], [6, 2], [3, 2]]
W = 7
print(knapsack_multichoice_onepick(weights, values, W)) # (15, [(0, 1), (1, 1), (2, 1)])
After that I followed #user3386109 's suggestion and did the combinations with the indexes. The group preprocesing methods are players_preproc, generate_group and group_preproc.
Again, this code is for an example where you have a Fantasy League with a playersDB where each player has price (weight), points (value) and position (group); there is a list of possible_formations (group variations); and a budget (W) you can't go over.
The best_full_teams method prints everything and uses all the previous ones.
Related
I am trying to write a function that returns from and to bits in [from:to] format.
I am not quite sure how exactly it can be done (recursively?). The expected output is in incremental range of bits. Here is the piece of code to start with,
cntlist = [5,1,4,3,1]
def find_size(cnt):
if cnt>1:
a = "[%s:%s]" % (cnt-1, cnt-cnt)
left = cnt-1
right = cnt-cnt
if cnt==1:
a = "[%s]" % (cnt)
left = a
right = a
return a, left, right
newlist = list(map(find_size, cntlist))
print(newlist)
Output:
[('[4:0]', 4, 0), ('[1]', '[1]', '[1]'), ('[3:0]', 3, 0), ('[2:0]', 2, 0), ('[1]', '[1]', '[1]')]
Expected output:
['[4:0]', '[5]', '[9:6]', '[12:10]', '[13]']
Note: If size is 1 in cntlist, the range will have only one element which will be +1 to previous range's left number.
IIUC, a simple loop should work:
def bitrange(cntlst):
out = []
total = 0
for i in cntlst:
prev = total
total += i
if i == 1:
out.append(f'[{total-1}]')
else:
out.append(f'[{total-1}:{prev}]')
return out
bitrange([5,1,4,3,1])
output:
['[4:0]', '[5]', '[9:6]', '[12:10]', '[13]']
Background
We have a family tradition where my and my siblings' Christmas presents are identified by a code that can be solved using only numbers related to us. For example, the code could be birth month * age + graduation year (This is a simple one). If the numbers were 8 * 22 + 2020 = 2196, the number 2196 would be written on all my Christmas presents.
I've already created a Python class that solves the code with certain constraints, but I'm wondering if it's possible to do it recursively.
Current Code
The first function returns a result set for all possible combinations of numbers and operations that produce a value in target_values
#Master algorithm (Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value, using only the number_of_numbers_in_solution)
#Example: sibling1.results[1] = [(3, 22, 4), (<built-in function add>, <built-in function add>), 29]. This means that 3 + 22 + 4 = 29, and 29 is in target_values
import operator
from itertools import product
from itertools import combinations
NUMBER_OF_OPERATIONS_IN_SOLUTION = 2 #Total numbers involved is this plus 1
NUMBER_OF_NUMBERS_IN_SOLUTION = NUMBER_OF_OPERATIONS_IN_SOLUTION + 1
TARGET_VALUES = {22,27,29,38,39}
def getresults( list ):
#Add the cartesian product of all possible operations to a variable ops
ops = []
opslist = [operator.add, operator.sub, operator.mul, operator.truediv]
for val in product(opslist, repeat=NUMBER_OF_OPERATIONS_IN_SOLUTION):
ops.append(val)
#Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value
results = []
for x in combinations(list, NUMBER_OF_NUMBERS_IN_SOLUTION):
for y in ops:
result = 0
for z in range(len(y)):
#On the first iteration, do the operation on the first two numbers (x[z] and x[z+1])
if (z == 0):
#print(y[z], x[z], x[z+1])
result = y[z](x[z], x[z+1])
#For all other iterations, do the operation on the current result and x[z+1])
else:
#print(y[z], result, x[z+1])
result = y[z](result, x[z+1])
if result in TARGET_VALUES:
results.append([x, y, result])
#print (x, y)
print(len(results))
return results
Then a class that takes in personal parameters for each person and gets the result set
def getalpha( str, inverse ):
"Converts string to alphanumeric array of chars"
array = []
for i in range(0, len(str)):
alpha = ord(str[i]) - 96
if inverse:
array.append(27 - alpha)
else:
array.append(alpha)
return array;
class Person:
def __init__(self, name, middlename, birthmonth, birthday, birthyear, age, orderofbirth, gradyear, state, zip, workzip, cityfirst3):
#final list
self.listofnums = []
self.listofnums.extend((birthmonth, birthday, birthyear, birthyear - 1900, age, orderofbirth, gradyear, gradyear - 2000, zip, workzip))
self.listofnums.extend(getalpha(cityfirst3, False))
self.results = getresults(self.listofnums)
Finally, a "solve code" method that takes from the result sets and finds any possible combinations that produce the full list of target_values.
#Compares the values of two sets
def compare(l1, l2):
result = all(map(lambda x, y: x == y, l1, l2))
return result and len(l1) == len(l2)
#Check every result in sibling2 with a different result target_value and equal operation sets
def comparetwosiblings(current_values, sibling1, sibling2, a, b):
if sibling2.results[b][2] not in current_values and compare(sibling1.results[a][1], sibling2.results[b][1]):
okay = True
#If the indexes aren't alphanumeric, ensure they're the same before adding to new result set
for c in range(0, NUMBER_OF_NUMBERS_IN_SOLUTION):
indexintersection = set([index for index, value in enumerate(sibling1.listofnums) if value == sibling1.results[a][0][c]]) & set([index for index, value in enumerate(sibling2.listofnums) if value == sibling2.results[b][0][c]])
if len(indexintersection) > 0:
okay = True
else:
okay = False
break
else:
okay = False
return okay
#For every result, we start by adding the result number to the current_values list for sibling1, then cycle through each person and see if a matching operator list leads to a different result number. (Matching indices as well)
#If there's a result set for everyone that leads to five different numbers in the code, the values will be added to the newresult set
def solvecode( sibling1, sibling2, sibling3, sibling4, sibling5 ):
newresults = []
current_values = []
#For every result in sibling1
for a in range(len(sibling1.results)):
current_values = []
current_values.append(sibling1.results[a][2])
for b in range(len(sibling2.results)):
if comparetwosiblings(current_values, sibling1, sibling2, a, b):
current_values.append(sibling2.results[b][2])
for c in range(len(sibling3.results)):
if comparetwosiblings(current_values, sibling1, sibling3, a, c):
current_values.append(sibling3.results[c][2])
for d in range(len(sibling4.results)):
if comparetwosiblings(current_values, sibling1, sibling4, a, d):
current_values.append(sibling4.results[d][2])
for e in range(len(sibling5.results)):
if comparetwosiblings(current_values, sibling1, sibling5, a, e):
newresults.append([sibling1.results[a][0], sibling2.results[b][0], sibling3.results[c][0], sibling4.results[d][0], sibling5.results[e][0], sibling1.results[a][1]])
current_values.remove(sibling4.results[d][2])
current_values.remove(sibling3.results[c][2])
current_values.remove(sibling2.results[b][2])
print(len(newresults))
print(newresults)
It's the last "solvecode" method that I'm wondering if I can optimize and make into a recursive algorithm. In some cases it can be helpful to add or remove a sibling, which would look nice recursively (My mom sometimes makes a mistake with one sibling, or we get a new brother/sister-in-law)
Thank you for any and all help! I hope you at least get a laugh out of my weird family tradition.
Edit: In case you want to test the algorithm, here's an example group of siblings that result in exactly one correct solution
#ALL PERSONAL INFO CHANGED FOR STACKOVERFLOW
sibling1 = Person("sibling1", "horatio", 7, 8, 1998, 22, 5, 2020, "ma", 11111, 11111, "red")
sibling2 = Person("sibling2", "liem", 2, 21, 1995, 25, 4, 2018, "ma", 11111, 11111, "pho")
sibling3 = Person("sibling3", "kyle", 4, 21, 1993, 26, 3, 2016, "ma", 11111, 11111, "okl")
sibling4 = Person("sibling4", "jamal", 4, 7, 1991, 29, 2, 2014, "ma", 11111, 11111, "pla")
sibling5 = Person("sibling5", "roberto", 9, 23, 1990, 30, 1, 2012, "ma", 11111, 11111, "boe")
I just spent a while improving the code. Few things I need to mention:
It's not good practice to use python keywords(like list, str and zip) as variables, it will give you problems and it makes it harder to debug.
I feel like you should use the permutation function as combination gives unordered pairs while permutation gives ordered pairs which are more in number and will give more results. For example, for the sibling info you gave combination gives only 1 solution through solvecode() while permutation gives 12.
Because you are working with operators, there can be more cases with brackets. To solve that problem and to make the getresults() function a bit more optimized, I suggest you explore the reverse polish notation. Computerphile has an excellent video on it.
You don't need a compare function. list1==list2 works.
Here's the optimized code:
import operator
from itertools import product
from itertools import permutations
NUMBER_OF_OPERATIONS_IN_SOLUTION = 2 #Total numbers involved is this plus 1
NUMBER_OF_NUMBERS_IN_SOLUTION = NUMBER_OF_OPERATIONS_IN_SOLUTION + 1
TARGET_VALUES = {22,27,29,38,39}
def getresults(listofnums):
#Add the cartesian product of all possible operations to a variable ops
ops = []
opslist = [operator.add, operator.sub, operator.mul, operator.truediv]
for val in product(opslist, repeat=NUMBER_OF_OPERATIONS_IN_SOLUTION):
ops.append(val)
#Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value
results = []
for x in permutations(listofnums, NUMBER_OF_NUMBERS_IN_SOLUTION):
for y in ops:
result = y[0](x[0], x[1])
if NUMBER_OF_OPERATIONS_IN_SOLUTION>1:
for z in range(1, len(y)):
result = y[z](result, x[z+1])
if result in TARGET_VALUES:
results.append([x, y, result])
return results
def getalpha(string, inverse):
"Converts string to alphanumeric array of chars"
array = []
for i in range(0, len(string)):
alpha = ord(string[i]) - 96
array.append(27-alpha if inverse else alpha)
return array
class Person:
def __init__(self, name, middlename, birthmonth, birthday, birthyear, age, orderofbirth, gradyear, state, zipcode, workzip, cityfirst3):
#final list
self.listofnums = [birthmonth, birthday, birthyear, birthyear - 1900, age, orderofbirth, gradyear, gradyear - 2000, zipcode, workzip]
self.listofnums.extend(getalpha(cityfirst3, False))
self.results = getresults(self.listofnums)
#Check every result in sibling2 with a different result target_value and equal operation sets
def comparetwosiblings(current_values, sibling1, sibling2, a, b):
if sibling2.results[b][2] not in current_values and sibling1.results[a][1]==sibling2.results[b][1]:
okay = True
#If the indexes aren't alphanumeric, ensure they're the same before adding to new result set
for c in range(0, NUMBER_OF_NUMBERS_IN_SOLUTION):
indexintersection = set([index for index, value in enumerate(sibling1.listofnums) if value == sibling1.results[a][0][c]]) & set([index for index, value in enumerate(sibling2.listofnums) if value == sibling2.results[b][0][c]])
if len(indexintersection) > 0:
okay = True
else:
okay = False
break
else:
okay = False
return okay
And now, the million dollar function or should i say two functions:
# var contains the loop variables a-e, depth keeps track of sibling number
def rec(arg, var, current_values, newresults, depth):
for i in range(len(arg[depth].results)):
if comparetwosiblings(current_values, arg[0], arg[depth], var[0], i):
if depth<len(arg)-1:
current_values.append(arg[depth].results[i][2])
rec(arg, var[:depth]+[i], current_values, newresults, depth+1)
current_values.remove(arg[depth].results[i][2])
else:
var.extend([i])
newresults.append([arg[0].results[var[0]][0], arg[1].results[var[1]][0], arg[2].results[var[2]][0], arg[3].results[var[3]][0], arg[4].results[var[4]][0], arg[0].results[var[0]][1]])
def solvecode(*arg):
newresults = []
for a in range(len(arg[0].results)):
current_values = [arg[0].results[a][2]]
rec(arg, var=[a], current_values=current_values, newresults=newresults, depth=1)
print(len(newresults))
print(newresults)
There is a need for two functions as the first one is the recursive one and the second one is like a packaging. I've also fulfilled your second wish, that was being able to have variable number of siblings' data that can be input into the new solvecode function. I've checked the new functions and they work together exactly like the original solvecode function. Something to be noted is that there is no significant difference in the version's runtimes although the second one has 8 less lines of code. Hope this helped. lmao took me 3 hours.
One of the Rugby Coaches at my school have asked me to code a conditional rugby match draw for the upcoming games with the task laid out something like this: Given a list of teams from 1 - 12 split into 3 groups ([Group1 = 1, 2, 3, 4], [Group2 = 5, 6, 7, 8,], [Group3 = 9, 10, 11, 12])
generate and print an 11 round-robin matchup with the conditions that:
Teams in Group1 does NOT verse teams in Group3
Teams in Group1 verses every other team in Group 1 twice (Eg. 1v2, 2v1, 1v3, 3v1, 1v4, 4v1, 1v5, 5v1.....)
This same rule applies to teams in Group3 as they verse other teams in Group3
Teams in Group2 verse every other team once.
Teams in Group1 and Group3 need one Bye Game.
I have attempted multiple times but inevitably become stuck, below are my 2 attempts:
Attempt 1:
import operator
import functools
import random
###First Generation (Flawed unclean round robin)
def fixtures(teams):
if len(teams) % 2:
teams.append('Day off') # if team number is odd - use 'day off' as fake team
rotation = list(teams) # copy the list
random.shuffle(rotation)
fixtures = []
for i in range(0, len(teams)-1):
fixtures.append(rotation)
rotation = [rotation[0]] + [rotation[-1]] + rotation[1:-1]
return fixtures
def main():
# demo code
teams = ["Team1","Team2","Team3","Team4","Team5","Team6","Team7","Team8","Team9","Team10","Team11","Team12"]
groupA = ["Team1","Team2","Team3","Team4"]
groupB = ["Team5","Team6","Team7","Team8"]
groupC = ["Team9","Team10","Team11","Team12"]
# for one match each - use this block only
matches = fixtures(teams)
print("flawed matches:")
RoundCounter = 0
homeTeams = []
awayTeams = []
for f in matches:
#print(f)
homeTeams = f[::2]
awayTeams = f[1::2]
print("Home Teams:{}".format(homeTeams))
print("Away Teams:{}".format(awayTeams))
HomeTeamGroupA = set(homeTeams).intersection(groupA)
HomeTeamGroupC = set(homeTeams).intersection(groupC)
AwayTeamGroupA = set(awayTeams).intersection(groupA)
AwayTeamGroupC = set(awayTeams).intersection(groupC)
VSCounter = 0
for p, o in zip(homeTeams, awayTeams):
if p in HomeTeamGroupA:
if o in AwayTeamGroupC:
AvsCPosition = awayTeams.index(o)
VSCounter += 1
RoundCleanUp(homeTeams, awayTeams, AvsCPosition, VSCounter) #if this is returned begin cleaning the round
else: print("GroupA is versing either Group B or GroupA") #if this is returned it is a team 1-4 but is vs either group b or group a
elif p in HomeTeamGroupC:
if o in AwayTeamGroupA:
AvsCPosition = awayTeams.index(o)
VSCounter += 1
RoundCleanUp(homeTeams, awayTeams, AvsCPosition, VSCounter) #if this is returned begin cleaning the round
else:
print("GroupC is versing either Group B or GroupC") #if this is returned it is a team 9-12 but is vs either group b or group c
else:
pass
def RoundCleanUp(HTeam, ATeam, AvsCPos, VSCounter):
##gets Value of List at position
HTeamVal = HTeam[AvsCPos]
ATeamVal = ATeam[AvsCPos]
main()
Attempt 2:
import operator
import functools
import random
def make_round(rotation, num_teams, fixtures):
for i in range(num_teams - 1):
rotation = list(range(1, num_teams + 1))
# clip to 0 .. num_teams - 2 # if i == 0, no rotation is needed (and using -0 as list index will cause problems)
i %= (num_teams - 1)
if i:
rotation = rotation[:1] + rotation[-i:] + rotation[1:-i]
half = num_teams // 2
fixtures.append(list(rotation[:half]))
fixtures.append(list(rotation[half:][::-1]))
return fixtures
def make_schedule(teams):
"""Produces RoundRobin"""
# number of teams must be even
TeamLength = len(teams)
if TeamLength % 2:
TeamLength += 1 # add a dummy team for padding
# build first round-robin
rotation = list(teams)
Fixture = []
schedule = make_round(rotation, TeamLength, Fixture)
return schedule
def homeAwayRotation(matches):
for homeTeams, awayTeams in zip(matches[0::2], matches[1::2]):
print("Home Rotation: {}".format(homeTeams))
print("Away Rotation: {}".format(awayTeams))
validation(homeTeams, awayTeams)
def validation(homeTeams, awayTeams):
groupA = [1, 2, 3, 4]
groupC = [9, 10, 11, 12]
for x, y in zip(homeTeams, awayTeams):
if x in groupA:
if y in groupC:
AvsCPosition = awayTeams.index(y)
cleanDirtyData(homeTeams, awayTeams, AvsCPosition)
else:
# if this is returned it is a team 1-4 but is vs either group b or group a
print("Group A vsing either itself or GroupB\n")
elif x in groupC:
if y in groupA:
AvsCPosition = awayTeams.index(y)
cleanDirtyData(homeTeams, awayTeams, AvsCPosition)
else:
# if this is returned it is a team 9-12 but is vs either group b or group c
print("Group C vsing either itself or GroupB\n")
else:
# if this is returned it is a team in group B
print("This is team B\n")
def cleanDirtyData(homeTeams, awayTeams, AvsCPosition):
HTeamVal = homeTeams[AvsCPosition]
ATeamVal = awayTeams[AvsCPosition]
Dirtlist = []
Dirtlist.append(HTeamVal)
Dirtlist.append(ATeamVal)
def main():
# demo code
teams = ["Team1", "Team2", "Team3", "Team4", "Team5", "Team6",
"Team7", "Team8", "Team9", "Team10", "Team11", "Team12"]
# for one match each - use this block only
matches = make_schedule(teams)
print("flawed matches:")
homeAwayRotation(matches)
main()
My expected results would be printing each round showing which team is versing which and each team having a history a bit like this:
a team in Group1 has a verse history of: (in any random order)
1v2, 2v1, 1v3, 3v1, 1v4, 4v1, 1v5, 1v6, 1v7, 1v8, bye
a team in Group2 has a verse history of: (in any random order)
5v1, 5v2, 5v3, 5v4, 5v6, 5v7, 5v8, 5v9 5v10, 5v11, 5v12
a team in Group3 has a verse history of: (in any random order)
9v10, 10v9, 9v11, 11v9, 9v12, 12v9, 9v5, 9v6, 9v7, 9v8, bye
Any pointers or improvements I could possibly do would be greatly appreciated as I have been stuck on the final hurdle for the last 2 weeks
If I have understood the problem correct, then all you need is some combining of teams with every member in different groups.
I put some code together that should solve your problem:
def vs(team, group):
matchups = map(lambda opponent: (team,opponent), group)
matchups = filter(lambda tup: tup[0] != tup[1], matchups)
return list(matchups)
def matches(teams):
group_size = len(teams) // 3
# Make the groups, basically just splitting the team list in three parts
groups = [teams[:group_size], teams[group_size:2*group_size], teams[2*group_size:]]
matchups = []
for index, team in enumerate(teams):
group_index = index // group_size
current_matchup = []
# Check if we're working with group 1 or 3
if group_index == 0 or group_index == 2:
# Flip the order of a tuple
def flip(x):
return (x[1], x[0])
own_group = vs(team, groups[group_index])
# Add matches against everyone in the group
current_matchup.extend(own_group)
# Add matches agains everyone in the group again, but now the current team is 'away'
current_matchup.extend(list(map(flip, own_group)))
# Add matches against everyone in group 2
current_matchup.extend(vs(team, groups[1]))
# Lastly, add the bye
current_matchup.append((team, "bye"))
else:
# Just all matches against all other teams, once.
current_matchup.extend(vs(team, teams))
matchups.append(current_matchup)
return matchups
# This can be anything. Numbers, 'Team 1' or even 'The wondrous flying squirrels of death'
teams = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
# Make matches
matchups = matches(teams)
# Just pretty print
for i in range(len(matchups)):
matches = '\n\t'.join(map(lambda m: f'{str(m[0]).rjust(10)} vs {str(m[1]).ljust(10)}', matchups[i]))
print(f"Team '{teams[i]}' matches:\n\t{matches}")
I have, for example, two lists of numbers:
List_numbers_1 = [3, 54, -30]
List_numbers_2 = [65, 8, 800]
I want to create a function that runs the following table of sums:
3 + 65 = 68
54 + 8 = 62
-30 + 800 = 770
The table is lined up, and that is my goal. In order to creat that function I have create other 3 functions that maybe will help me:
'''it returns the width of a number '''
def max_width(List_numbers_1):
string_List_numbers_1 = map(str, List_numbers_1)
width_List_numbers_1 = map(len, string_List_numbers_1)
return max(width_List_numbers_1)
Output: 3
'''it returns the padd for a number'''
def left_padded(number, width):
return str(number).rjust(width)
left_padded(54, 5)
' 54'
left_padded(-56, 5)
' -56'
'''It returns a padd for all the numbers of the list'''
def all_left_padded(List_numbers_1, width):
return list(map(lambda number: left_padded(number, width), List_numbers_1))
all_left_padded(List_numbers_1, 5)
[' 3', ' 54', ' -30']
I think the above functions can be useful for my last function. Nevertheless, I will really appreciate any other idea. If it is possible I would prefer a function that use the return statement but with print() will be ok.
I fact I think this function has to contain return and print as well.
THANK YOU
Without numpy, you can zip the lists together and add them up:
[sum(i) for i in zip(lst1,lst2)]
It is easier to use a list comprehension rather than map
For formatting the numbers it is natural to use str.format().
As you don't know the widths of the numbers in advance, you first create
a format string, and the easiest way to do that is to use format
# maxlen returns the length of the longest element
def maxlen(l):
return max([len(str(i)) for i in l])
# sumtable returns a formatted multiline string containing the sums
# written in a human readable form.
def sumtable(l1,l2):
#sums contains the answers, and table is the full table numbers in
#the calculations as a list of tuples
sums = [sum(i) for i in zip(l1,l2)]
table = list(zip(l1,l2,sums))
width1 = maxlen(l1)
width2 = maxlen(l2)
widthsum = maxlen(sums)
# formatstring has a form like "{:3d} + {:5d} = {:5d}\n"
formatstring = "{{:{}d}} + {{:{}d}} = {{:{}d}}\n".format(width1,width2,widthsum)
# unpack the values from the table into the format string and return.
return (formatstring.format(*table[0])
+ formatstring.format(*table[1])
+ formatstring.format(*table[2]))
print(sumtable([3,54,-30],[65,8,800]))
# 3 + 65 = 68
# 54 + 8 = 62
# -30 + 800 = 770
If you can use a 3rd party library, this is trivial with numpy:
import numpy as np
lst1 = [3, 54, -30]
lst2 = [65, 8, 800]
res = np.sum([lst1, lst2], axis=0)
# array([ 68, 62, 770])
`in order to display the table of sums'''
>def table(L1, L2):
Sum = list(map(lambda x, y: x + y, L1, L2))
widthL1 = max_width(L1)
widthL2 = max_width(L2)
widthSum = max_width(Sum)
first_column = all_left_padded(L1, widthL1)
second_column = all_left_padded(L2, widthL2)
third_column = all_left_padded(Sum, widthsum)
triplets = zip(first_column, second_column, third_column)
return list(map(lambda triplet: str_sum(triplet), triplets))
# for displaying the table
>def printable(L1, L2):
t = table(L1, L2)
for row in t:
print(row)
😀
# the str_sum function was:
def str_sum(Tuple):
return '{ } + { } = { }'.format(Tuple)
> str_sum((2,3,5))
'2 + 3 = 5'
# I was thinking: why instead summing two Lists, I will sum n lists?
# I would create two functions
>L1 = [1, 2]
>L2 = [2, 3]
> L3 = [4, 5]
> Group = (L1, L2, L3)
> Sum = [sum(l) for l in zip(*Group)]
>Sum
[7, 10]
# and for creating a Tuple with n elements because if the table has n Lists the Tuple must have n elements
> def str_sum(Tuple):
n = len (Tuple)
f = ((n -2)*'{ } + ' + '{ } = { }')
return f.format(*Tuple)
>str_sum((1, 2, 3, 8, 7, 21))
('1 + 2 + 3 + 8 + 7 = 21')
I have a very large ndarray A, and a sorted list of points k (a small list, about 30 points).
For every element of A, I want to determine the closest element in the list of points k, together with the index. So something like:
>>> A = np.asarray([3, 4, 5, 6])
>>> k = np.asarray([4.1, 3])
>>> values, indices
[3, 4.1, 4.1, 4.1], [1, 0, 0, 0]
Now, the problem is that A is very very large. So I can't do something inefficient like adding one dimension to A, take the abs difference to k, and then take the minimum of each column.
For now I have been using np.searchsorted, as shown in the second answer here: Find nearest value in numpy array but even this is too slow. This is the code I used (modified to work with multiple values):
def find_nearest(A,k):
indicesClosest = np.searchsorted(k, A)
flagToReduce = indicesClosest==k.shape[0]
modifiedIndicesToAvoidOutOfBoundsException = indicesClosest.copy()
modifiedIndicesToAvoidOutOfBoundsException[flagToReduce] -= 1
flagToReduce = np.logical_or(flagToReduce,
np.abs(A-k[indicesClosest-1]) <
np.abs(A - k[modifiedIndicesToAvoidOutOfBoundsException]))
flagToReduce = np.logical_and(indicesClosest > 0, flagToReduce)
indicesClosest[flagToReduce] -= 1
valuesClosest = k[indicesClosest]
return valuesClosest, indicesClosest
I then thought of using scipy.spatial.KDTree:
>>> d = scipy.spatial.KDTree(k)
>>> d.query(A)
This turns out to be much slower than the searchsorted solution.
On the other hand, the array A is always the same, only k changes. So it would be beneficial to use some auxiliary structure (like a "inverse KDTree") on A, and then query the results on the small array k.
Is there something like that?
Edit
At the moment I am using a variant of np.searchsorted that requires the array A to be sorted. We can do this in advance as a pre-processing step, but we still have to restore the original order after computing the indices. This variant is about twice as fast as the one above.
A = np.random.random(3000000)
k = np.random.random(30)
indices_sort = np.argsort(A)
sortedA = A[indices_sort]
inv_indices_sort = np.argsort(indices_sort)
k.sort()
def find_nearest(sortedA, k):
midpoints = k[:-1] + np.diff(k)/2
idx_aux = np.searchsorted(sortedA, midpoints)
idx = []
count = 0
final_indices = np.zeros(sortedA.shape, dtype=int)
old_obj = None
for obj in idx_aux:
if obj != old_obj:
idx.append((obj, count))
old_obj = obj
count += 1
old_idx = 0
for idx_A, idx_k in idx:
final_indices[old_idx:idx_A] = idx_k
old_idx = idx_A
final_indices[old_idx:] = len(k)-1
indicesClosest = final_indices[inv_indices_sort] #<- this takes 90% of the time
return k[indicesClosest], indicesClosest
The line that takes so much time is the line that brings the indices back to their original order.
Update:
The builtin function numpy.digitize can actually do exactly what you need. Only a small trick is required: digitize assigns values to bins. We can convert k to bins by sorting the array and setting the bin borders exactly in the middle between adjacent elements.
import numpy as np
A = np.asarray([3, 4, 5, 6])
k = np.asarray([4.1, 3, 1]) # added another value to show that sorting/binning works
ki = np.argsort(k)
ks = k[ki]
i = np.digitize(A, (ks[:-1] + ks[1:]) / 2)
indices = ki[i]
values = ks[i]
print(values, indices)
# [ 3. 4.1 4.1 4.1] [1 0 0 0]
Old answer:
I would take a brute-force approach to perform one vectorized pass over A for each element in k and update those locations where the current element improves the approximation.
import numpy as np
A = np.asarray([3, 4, 5, 6])
k = np.asarray([4.1, 3])
err = np.zeros_like(A) + np.inf # keep track of error over passes
values = np.empty_like(A, dtype=k.dtype)
indices = np.empty_like(A, dtype=int)
for i, v in enumerate(k):
d = np.abs(A - v)
mask = d < err # only update where v is closer to A
values[mask] = v
indices[mask] = i
err[mask] = d[mask]
print(values, indices)
# [ 3. 4.1 4.1 4.1] [1 0 0 0]
This approach requires three temporary variables of same size as A, so it will fail if not enough memory is available.
So, after some work and an idea from the scipy mailing list, I think that in my case (with a constant A and slowly varying k), the best way to do this is to use the following implementation.
class SearchSorted:
def __init__(self, tensor, use_k_optimization=True):
'''
use_k_optimization requires storing 4x the size of the tensor.
If use_k_optimization is True, the class will assume that successive calls will be made with similar k.
When this happens, we can cut the running time significantly by storing additional variables. If it won't be
called with successive k, set the flag to False, as otherwise would just consume more memory for no
good reason
'''
self.indices_sort = np.argsort(tensor)
self.sorted_tensor = tensor[self.indices_sort]
self.inv_indices_sort = np.argsort(self.indices_sort)
self.use_k_optimization = use_k_optimization
self.previous_indices_results = None
self.prev_idx_A_k_pair = None
def query(self, k):
midpoints = k[:-1] + np.diff(k) / 2
idx_count = np.searchsorted(self.sorted_tensor, midpoints)
idx_A_k_pair = []
count = 0
old_obj = 0
for obj in idx_count:
if obj != old_obj:
idx_A_k_pair.append((obj, count))
old_obj = obj
count += 1
if not self.use_k_optimization or self.previous_indices_results is None:
#creates the index matrix in the sorted case
final_indices = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
#and now unsort it to match the original tensor position
indicesClosest = final_indices[self.inv_indices_sort]
if self.use_k_optimization:
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results = indicesClosest
return indicesClosest
old_indices_unsorted = self._create_indices_matrix(self.prev_idx_A_k_pair, self.sorted_tensor.shape, len(k))
new_indices_unsorted = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
mask = new_indices_unsorted != old_indices_unsorted
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results[self.indices_sort[mask]] = new_indices_unsorted[mask]
indicesClosest = self.previous_indices_results
return indicesClosest
#staticmethod
def _create_indices_matrix(idx_A_k_pair, matrix_shape, len_quant_points):
old_idx = 0
final_indices = np.zeros(matrix_shape, dtype=int)
for idx_A, idx_k in idx_A_k_pair:
final_indices[old_idx:idx_A] = idx_k
old_idx = idx_A
final_indices[old_idx:] = len_quant_points - 1
return final_indices
The idea is to sort the array A beforehand, then use searchsorted of A on the midpoints of k. This gives the same information as before, in that it tells us exactly which points of A are closer to which points of k. The method _create_indices_matrix will create the full indices array from these informations, and then we will unsort it to recover the original order of A. To take advantage of slowly varying k, we save the last indices and we determine which indices we have to change; we then change only those. For slowly varying k, this produces superior performance (at a quite bigger memory cost, however).
For random matrix A of 5 million elements and k of about 30 elements, and repeating the experiments 60 times, we get
Function search_sorted1; 15.72285795211792s
Function search_sorted2; 13.030786037445068s
Function query; 2.3306031227111816s <- the one with use_k_optimization = True
Function query; 4.81286096572876s <- with use_k_optimization = False
scipy.spatial.KDTree.query is too slow, and I don't time it (above 1 minute, though). This is the code used to do the timing; contains also the implementation of search_sorted1 and 2.
import numpy as np
import scipy
import scipy.spatial
import time
A = np.random.rand(10000*500) #5 million elements
k = np.random.rand(32)
k.sort()
#first attempt, detailed in the answer, too
def search_sorted1(A, k):
indicesClosest = np.searchsorted(k, A)
flagToReduce = indicesClosest == k.shape[0]
modifiedIndicesToAvoidOutOfBoundsException = indicesClosest.copy()
modifiedIndicesToAvoidOutOfBoundsException[flagToReduce] -= 1
flagToReduce = np.logical_or(flagToReduce,
np.abs(A-k[indicesClosest-1]) <
np.abs(A - k[modifiedIndicesToAvoidOutOfBoundsException]))
flagToReduce = np.logical_and(indicesClosest > 0, flagToReduce)
indicesClosest[flagToReduce] -= 1
return indicesClosest
#taken from #Divakar answer linked in the comments under the question
def search_sorted2(A, k):
indicesClosest = np.searchsorted(k, A, side="left").clip(max=k.size - 1)
mask = (indicesClosest > 0) & \
((indicesClosest == len(k)) | (np.fabs(A - k[indicesClosest - 1]) < np.fabs(A - k[indicesClosest])))
indicesClosest = indicesClosest - mask
return indicesClosest
def kdquery1(A, k):
d = scipy.spatial.cKDTree(k, compact_nodes=False, balanced_tree=False)
_, indices = d.query(A)
return indices
#After an indea on scipy mailing list
class SearchSorted:
def __init__(self, tensor, use_k_optimization=True):
'''
Using this requires storing 4x the size of the tensor.
If use_k_optimization is True, the class will assume that successive calls will be made with similar k.
When this happens, we can cut the running time significantly by storing additional variables. If it won't be
called with successive k, set the flag to False, as otherwise would just consume more memory for no
good reason
'''
self.indices_sort = np.argsort(tensor)
self.sorted_tensor = tensor[self.indices_sort]
self.inv_indices_sort = np.argsort(self.indices_sort)
self.use_k_optimization = use_k_optimization
self.previous_indices_results = None
self.prev_idx_A_k_pair = None
def query(self, k):
midpoints = k[:-1] + np.diff(k) / 2
idx_count = np.searchsorted(self.sorted_tensor, midpoints)
idx_A_k_pair = []
count = 0
old_obj = 0
for obj in idx_count:
if obj != old_obj:
idx_A_k_pair.append((obj, count))
old_obj = obj
count += 1
if not self.use_k_optimization or self.previous_indices_results is None:
#creates the index matrix in the sorted case
final_indices = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
#and now unsort it to match the original tensor position
indicesClosest = final_indices[self.inv_indices_sort]
if self.use_k_optimization:
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results = indicesClosest
return indicesClosest
old_indices_unsorted = self._create_indices_matrix(self.prev_idx_A_k_pair, self.sorted_tensor.shape, len(k))
new_indices_unsorted = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
mask = new_indices_unsorted != old_indices_unsorted
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results[self.indices_sort[mask]] = new_indices_unsorted[mask]
indicesClosest = self.previous_indices_results
return indicesClosest
#staticmethod
def _create_indices_matrix(idx_A_k_pair, matrix_shape, len_quant_points):
old_idx = 0
final_indices = np.zeros(matrix_shape, dtype=int)
for idx_A, idx_k in idx_A_k_pair:
final_indices[old_idx:idx_A] = idx_k
old_idx = idx_A
final_indices[old_idx:] = len_quant_points - 1
return final_indices
mySearchSorted = SearchSorted(A, use_k_optimization=True)
mySearchSorted2 = SearchSorted(A, use_k_optimization=False)
allFunctions = [search_sorted1, search_sorted2,
mySearchSorted.query,
mySearchSorted2.query]
print(np.array_equal(mySearchSorted.query(k), kdquery1(A, k)[1]))
print(np.array_equal(mySearchSorted.query(k), search_sorted2(A, k)[1]))
print(np.array_equal(mySearchSorted2.query(k), search_sorted2(A, k)[1]))
if __name__== '__main__':
num_to_average = 3
for func in allFunctions:
if func.__name__ == 'search_sorted3':
indices_sort = np.argsort(A)
sA = A[indices_sort].copy()
inv_indices_sort = np.argsort(indices_sort)
else:
sA = A.copy()
if func.__name__ != 'query':
func_to_use = lambda x: func(sA, x)
else:
func_to_use = func
k_to_use = k
start_time = time.time()
for idx_average in range(num_to_average):
for idx_repeat in range(10):
k_to_use += (2*np.random.rand(*k.shape)-1)/100 #uniform between (-1/100, 1/100)
k_to_use.sort()
indices = func_to_use(k_to_use)
if func.__name__ == 'search_sorted3':
indices = indices[inv_indices_sort]
val = k[indices]
end_time = time.time()
total_time = end_time-start_time
print('Function {}; {}s'.format(func.__name__, total_time))
I'm sure that it still possible to do better (I use a loot of space for SerchSorted class, so we could probably save something). If you have any ideas for an improvement, please let me know!