Is it possible to make this algorithm recursive? - python

Background
We have a family tradition where my and my siblings' Christmas presents are identified by a code that can be solved using only numbers related to us. For example, the code could be birth month * age + graduation year (This is a simple one). If the numbers were 8 * 22 + 2020 = 2196, the number 2196 would be written on all my Christmas presents.
I've already created a Python class that solves the code with certain constraints, but I'm wondering if it's possible to do it recursively.
Current Code
The first function returns a result set for all possible combinations of numbers and operations that produce a value in target_values
#Master algorithm (Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value, using only the number_of_numbers_in_solution)
#Example: sibling1.results[1] = [(3, 22, 4), (<built-in function add>, <built-in function add>), 29]. This means that 3 + 22 + 4 = 29, and 29 is in target_values
import operator
from itertools import product
from itertools import combinations
NUMBER_OF_OPERATIONS_IN_SOLUTION = 2 #Total numbers involved is this plus 1
NUMBER_OF_NUMBERS_IN_SOLUTION = NUMBER_OF_OPERATIONS_IN_SOLUTION + 1
TARGET_VALUES = {22,27,29,38,39}
def getresults( list ):
#Add the cartesian product of all possible operations to a variable ops
ops = []
opslist = [operator.add, operator.sub, operator.mul, operator.truediv]
for val in product(opslist, repeat=NUMBER_OF_OPERATIONS_IN_SOLUTION):
ops.append(val)
#Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value
results = []
for x in combinations(list, NUMBER_OF_NUMBERS_IN_SOLUTION):
for y in ops:
result = 0
for z in range(len(y)):
#On the first iteration, do the operation on the first two numbers (x[z] and x[z+1])
if (z == 0):
#print(y[z], x[z], x[z+1])
result = y[z](x[z], x[z+1])
#For all other iterations, do the operation on the current result and x[z+1])
else:
#print(y[z], result, x[z+1])
result = y[z](result, x[z+1])
if result in TARGET_VALUES:
results.append([x, y, result])
#print (x, y)
print(len(results))
return results
Then a class that takes in personal parameters for each person and gets the result set
def getalpha( str, inverse ):
"Converts string to alphanumeric array of chars"
array = []
for i in range(0, len(str)):
alpha = ord(str[i]) - 96
if inverse:
array.append(27 - alpha)
else:
array.append(alpha)
return array;
class Person:
def __init__(self, name, middlename, birthmonth, birthday, birthyear, age, orderofbirth, gradyear, state, zip, workzip, cityfirst3):
#final list
self.listofnums = []
self.listofnums.extend((birthmonth, birthday, birthyear, birthyear - 1900, age, orderofbirth, gradyear, gradyear - 2000, zip, workzip))
self.listofnums.extend(getalpha(cityfirst3, False))
self.results = getresults(self.listofnums)
Finally, a "solve code" method that takes from the result sets and finds any possible combinations that produce the full list of target_values.
#Compares the values of two sets
def compare(l1, l2):
result = all(map(lambda x, y: x == y, l1, l2))
return result and len(l1) == len(l2)
#Check every result in sibling2 with a different result target_value and equal operation sets
def comparetwosiblings(current_values, sibling1, sibling2, a, b):
if sibling2.results[b][2] not in current_values and compare(sibling1.results[a][1], sibling2.results[b][1]):
okay = True
#If the indexes aren't alphanumeric, ensure they're the same before adding to new result set
for c in range(0, NUMBER_OF_NUMBERS_IN_SOLUTION):
indexintersection = set([index for index, value in enumerate(sibling1.listofnums) if value == sibling1.results[a][0][c]]) & set([index for index, value in enumerate(sibling2.listofnums) if value == sibling2.results[b][0][c]])
if len(indexintersection) > 0:
okay = True
else:
okay = False
break
else:
okay = False
return okay
#For every result, we start by adding the result number to the current_values list for sibling1, then cycle through each person and see if a matching operator list leads to a different result number. (Matching indices as well)
#If there's a result set for everyone that leads to five different numbers in the code, the values will be added to the newresult set
def solvecode( sibling1, sibling2, sibling3, sibling4, sibling5 ):
newresults = []
current_values = []
#For every result in sibling1
for a in range(len(sibling1.results)):
current_values = []
current_values.append(sibling1.results[a][2])
for b in range(len(sibling2.results)):
if comparetwosiblings(current_values, sibling1, sibling2, a, b):
current_values.append(sibling2.results[b][2])
for c in range(len(sibling3.results)):
if comparetwosiblings(current_values, sibling1, sibling3, a, c):
current_values.append(sibling3.results[c][2])
for d in range(len(sibling4.results)):
if comparetwosiblings(current_values, sibling1, sibling4, a, d):
current_values.append(sibling4.results[d][2])
for e in range(len(sibling5.results)):
if comparetwosiblings(current_values, sibling1, sibling5, a, e):
newresults.append([sibling1.results[a][0], sibling2.results[b][0], sibling3.results[c][0], sibling4.results[d][0], sibling5.results[e][0], sibling1.results[a][1]])
current_values.remove(sibling4.results[d][2])
current_values.remove(sibling3.results[c][2])
current_values.remove(sibling2.results[b][2])
print(len(newresults))
print(newresults)
It's the last "solvecode" method that I'm wondering if I can optimize and make into a recursive algorithm. In some cases it can be helpful to add or remove a sibling, which would look nice recursively (My mom sometimes makes a mistake with one sibling, or we get a new brother/sister-in-law)
Thank you for any and all help! I hope you at least get a laugh out of my weird family tradition.
Edit: In case you want to test the algorithm, here's an example group of siblings that result in exactly one correct solution
#ALL PERSONAL INFO CHANGED FOR STACKOVERFLOW
sibling1 = Person("sibling1", "horatio", 7, 8, 1998, 22, 5, 2020, "ma", 11111, 11111, "red")
sibling2 = Person("sibling2", "liem", 2, 21, 1995, 25, 4, 2018, "ma", 11111, 11111, "pho")
sibling3 = Person("sibling3", "kyle", 4, 21, 1993, 26, 3, 2016, "ma", 11111, 11111, "okl")
sibling4 = Person("sibling4", "jamal", 4, 7, 1991, 29, 2, 2014, "ma", 11111, 11111, "pla")
sibling5 = Person("sibling5", "roberto", 9, 23, 1990, 30, 1, 2012, "ma", 11111, 11111, "boe")

I just spent a while improving the code. Few things I need to mention:
It's not good practice to use python keywords(like list, str and zip) as variables, it will give you problems and it makes it harder to debug.
I feel like you should use the permutation function as combination gives unordered pairs while permutation gives ordered pairs which are more in number and will give more results. For example, for the sibling info you gave combination gives only 1 solution through solvecode() while permutation gives 12.
Because you are working with operators, there can be more cases with brackets. To solve that problem and to make the getresults() function a bit more optimized, I suggest you explore the reverse polish notation. Computerphile has an excellent video on it.
You don't need a compare function. list1==list2 works.
Here's the optimized code:
import operator
from itertools import product
from itertools import permutations
NUMBER_OF_OPERATIONS_IN_SOLUTION = 2 #Total numbers involved is this plus 1
NUMBER_OF_NUMBERS_IN_SOLUTION = NUMBER_OF_OPERATIONS_IN_SOLUTION + 1
TARGET_VALUES = {22,27,29,38,39}
def getresults(listofnums):
#Add the cartesian product of all possible operations to a variable ops
ops = []
opslist = [operator.add, operator.sub, operator.mul, operator.truediv]
for val in product(opslist, repeat=NUMBER_OF_OPERATIONS_IN_SOLUTION):
ops.append(val)
#Get the result set of all combinations of numbers and cartesian products of operations that reach a target_value
results = []
for x in permutations(listofnums, NUMBER_OF_NUMBERS_IN_SOLUTION):
for y in ops:
result = y[0](x[0], x[1])
if NUMBER_OF_OPERATIONS_IN_SOLUTION>1:
for z in range(1, len(y)):
result = y[z](result, x[z+1])
if result in TARGET_VALUES:
results.append([x, y, result])
return results
def getalpha(string, inverse):
"Converts string to alphanumeric array of chars"
array = []
for i in range(0, len(string)):
alpha = ord(string[i]) - 96
array.append(27-alpha if inverse else alpha)
return array
class Person:
def __init__(self, name, middlename, birthmonth, birthday, birthyear, age, orderofbirth, gradyear, state, zipcode, workzip, cityfirst3):
#final list
self.listofnums = [birthmonth, birthday, birthyear, birthyear - 1900, age, orderofbirth, gradyear, gradyear - 2000, zipcode, workzip]
self.listofnums.extend(getalpha(cityfirst3, False))
self.results = getresults(self.listofnums)
#Check every result in sibling2 with a different result target_value and equal operation sets
def comparetwosiblings(current_values, sibling1, sibling2, a, b):
if sibling2.results[b][2] not in current_values and sibling1.results[a][1]==sibling2.results[b][1]:
okay = True
#If the indexes aren't alphanumeric, ensure they're the same before adding to new result set
for c in range(0, NUMBER_OF_NUMBERS_IN_SOLUTION):
indexintersection = set([index for index, value in enumerate(sibling1.listofnums) if value == sibling1.results[a][0][c]]) & set([index for index, value in enumerate(sibling2.listofnums) if value == sibling2.results[b][0][c]])
if len(indexintersection) > 0:
okay = True
else:
okay = False
break
else:
okay = False
return okay
And now, the million dollar function or should i say two functions:
# var contains the loop variables a-e, depth keeps track of sibling number
def rec(arg, var, current_values, newresults, depth):
for i in range(len(arg[depth].results)):
if comparetwosiblings(current_values, arg[0], arg[depth], var[0], i):
if depth<len(arg)-1:
current_values.append(arg[depth].results[i][2])
rec(arg, var[:depth]+[i], current_values, newresults, depth+1)
current_values.remove(arg[depth].results[i][2])
else:
var.extend([i])
newresults.append([arg[0].results[var[0]][0], arg[1].results[var[1]][0], arg[2].results[var[2]][0], arg[3].results[var[3]][0], arg[4].results[var[4]][0], arg[0].results[var[0]][1]])
def solvecode(*arg):
newresults = []
for a in range(len(arg[0].results)):
current_values = [arg[0].results[a][2]]
rec(arg, var=[a], current_values=current_values, newresults=newresults, depth=1)
print(len(newresults))
print(newresults)
There is a need for two functions as the first one is the recursive one and the second one is like a packaging. I've also fulfilled your second wish, that was being able to have variable number of siblings' data that can be input into the new solvecode function. I've checked the new functions and they work together exactly like the original solvecode function. Something to be noted is that there is no significant difference in the version's runtimes although the second one has 8 less lines of code. Hope this helped. lmao took me 3 hours.

Related

Most frequently overlapping range - Python3.x

I'm a beginner, trying to write code listing the most frequently overlapping ranges in a list of ranges.
So, input is various ranges (#1 through #7 in the example figure; https://prntscr.com/kj80xl) and I would like to find the most common range (in the example 3,000- 4,000 in 6 out of 7 - 86 %). Actually, I would like to find top 5 most frequent.
Not all ranges overlap. Ranges are always positive and given as integers with 1 distance (standard range).
What I have now is only code comparing one sequence to another and returning the overlap, but after that I'm stuck.
def range_overlap(range_x,range_y):
x = (range_x[0], (range_x[-1])+1)
y = (range_y[0], (range_y[-1])+1)
overlap = (max(x[0],y[0]),min(x[-1],(y[-1])))
if overlap[0] <= overlap[1]:
return range(overlap[0], overlap[1])
else:
return "Out of range"
I would be very grateful for any help.
Better solution
I came up with a simpler solution (at least IMHO) so here it is:
def get_abs_min(ranges):
return min([min(r) for r in ranges])
def get_abs_max(ranges):
return max([max(r) for r in ranges])
def count_appearances(i, ranges):
return sum([1 for r in ranges if i in r])
def create_histogram(ranges):
keys = [str(i) for i in range(len(ranges) + 1)]
histogram = dict.fromkeys(keys)
results = []
min = get_abs_min(range_list)
max = get_abs_max(range_list)
for i in range(min, max):
count = str(count_appearances(i, ranges))
if histogram[count] is None:
histogram[count] = dict(start=i, end=None)
elif histogram[count]['end'] is None:
histogram[count]['end'] = i
elif histogram[count]['end'] == i - 1:
histogram[count]['end'] = i
else:
start = histogram[count]['start']
end = histogram[count]['end']
results.append((range(start, end + 1), count))
histogram[count]['start'] = i
histogram[count]['end'] = None
for count, d in histogram.items():
if d is not None and d['start'] is not None and d['end'] is not None:
results.append((range(d['start'], d['end'] + 1), count))
return results
def main(ranges, top):
appearances = create_histogram(ranges)
return sorted(appearances, key=lambda t: t[1], reverse=True)[:top]
The idea here is as simple as iterating through a superposition of all the ranges and building a histogram of appearances (e.g. the number of original ranges this current i appears in)
After that just sort and slice according to the chosen size of the results.
Just call main with the ranges and the top number you want (or None if you want to see all results).
OLDER EDITS BELOW
I (almost) agree with #Kasramvd's answer.
here is my take on it:
from collections import Counter
from itertools import combinations
def range_overlap(x, y):
common_part = list(set(x) & set(y))
if common_part:
return range(common_part[0], common_part[-1] +1)
else:
return False
def get_most_common(range_list, top_frequent):
overlaps = Counter(range_overlap(i, j) for i, j in
combinations(list_of_ranges, 2))
return [(r, i) for (r, i) in overlaps.most_common(top_frequent) if r]
you need to input the range_list and the number of top_frequent you want.
EDIT
the previous answer solved this question for all 2's combinations over the range list.
This edit is tested against your input and results with the correct answer:
from collections import Counter
from itertools import combinations
def range_overlap(*args):
sets = [set(r) for r in args]
common_part = list(set(args[0]).intersection(*sets))
if common_part:
return range(common_part[0], common_part[-1] +1)
else:
return False
def get_all_possible_combinations(range_list):
all_combos = []
for i in range(2, len(range_list)):
all_combos.append(combinations(range_list, i))
all_combos = [list(combo) for combo in all_combos]
return all_combos
def get_most_common_for_combo(combo):
return list(filter(None, [range_overlap(*option) for option in combo]))
def get_most_common(range_list, top_frequent):
all_overlaps = []
combos = get_all_possible_combinations(range_list)
for combo in combos:
all_overlaps.extend(get_most_common_for_combo(combo))
return [r for (r, i) in Counter(all_overlaps).most_common(top_frequent) if r]
And to get the results just run get_most_common(range_list, top_frequent)
Tested on my machine (ubunut 16.04 with python 3.5.2) with your input range_list and top_frequent = 5 with the results:
[range(3000, 4000), range(2500, 4000), range(1500, 4000), range(3000, 6000), range(1, 4000)]
You can first change your function to return a valid range in both cases so that you can use it in a set of comparisons. Also, since Python's range objects are not already created iterables but smart objects that only get start, stop and step attributes of a range and create the range on-demand, you can do a little change on your function as well.
def range_overlap(range_x,range_y):
rng = range(max(range_x.start, range_y.start),
min(range_x.stop, range_y.stop)+1)
if rng.start < rng.stop:
return rng.start, rng.stop
Now, if you have a set of ranges and you want to compare all the pairs you can use itertools.combinations to get all the pairs and then using range_overlap and collections.Counter you can find the number of overlapped ranges.
from collections import Counter
from itertools import combinations
overlaps = Counter(range_overlap(i,j) for i, j in
combinations(list_of_ranges, 2))

Changing this Python program to have function def()

The following Python program flips a coin several times, then reports the longest series of heads and tails. I am trying to convert this program into a program that uses functions so it uses basically less code. I am very new to programming and my teacher requested this of us, but I have no idea how to do it. I know I'm supposed to have the function accept 2 parameters: a string or list, and a character to search for. The function should return, as the value of the function, an integer which is the longest sequence of that character in that string. The function shouldn't accept input or output from the user.
import random
print("This program flips a coin several times, \nthen reports the longest
series of heads and tails")
cointoss = int(input("Number of times to flip the coin: "))
varlist = []
i = 0
varstring = ' '
while i < cointoss:
r = random.choice('HT')
varlist.append(r)
varstring = varstring + r
i += 1
print(varstring)
print(varlist)
print("There's this many heads: ",varstring.count("H"))
print("There's this many tails: ",varstring.count("T"))
print("Processing input...")
i = 0
longest_h = 0
longest_t = 0
inarow = 0
prevIn = 0
while i < cointoss:
print(varlist[i])
if varlist[i] == 'H':
prevIn += 1
if prevIn > longest_h:
longest_h = prevIn
print("",longest_h,"")
inarow = 0
if varlist[i] == 'T':
inarow += 1
if inarow > longest_t:
longest_t = inarow
print("",longest_t,"")
prevIn = 0
i += 1
print ("The longest series of heads is: ",longest_h)
print ("The longest series of tails is: ",longest_t)
If this is asking too much, any explanatory help would be really nice instead. All I've got so far is:
def flip (a, b):
flipValue = random.randint
but it's barely anything.
import random
def Main():
numOfFlips=getFlips()
outcome=flipping(numOfFlips)
print(outcome)
def getFlips():
Flips=int(input("Enter number if flips:\n"))
return Flips
def flipping(numOfFlips):
longHeads=[]
longTails=[]
Tails=0
Heads=0
for flips in range(0,numOfFlips):
flipValue=random.randint(1,2)
print(flipValue)
if flipValue==1:
Tails+=1
longHeads.append(Heads) #recording value of Heads before resetting it
Heads=0
else:
Heads+=1
longTails.append(Tails)
Tails=0
longestHeads=max(longHeads) #chooses the greatest length from both lists
longestTails=max(longTails)
return "Longest heads:\t"+str(longestHeads)+"\nLongest tails:\t"+str(longestTails)
Main()
I did not quite understand how your code worked, so I made the code in functions that works just as well, there will probably be ways of improving my code alone but I have moved the code over to functions
First, you need a function that flips a coin x times. This would be one possible implementation, favoring random.choice over random.randint:
def flip(x):
result = []
for _ in range(x):
result.append(random.choice(("h", "t")))
return result
Of course, you could also pass from what exactly we are supposed to take a choice as a parameter.
Next, you need a function that finds the longest sequence of some value in some list:
def longest_series(some_value, some_list):
current, longest = 0, 0
for r in some_list:
if r == some_value:
current += 1
longest = max(current, longest)
else:
current = 0
return longest
And now you can call these in the right order:
# initialize the random number generator, so we get the same result
random.seed(5)
# toss a coin a hundred times
series = flip(100)
# count heads/tails
headflips = longest_series('h', series)
tailflips = longest_series('t', series)
# print the results
print("The longest series of heads is: " + str(headflips))
print("The longest series of tails is: " + str(tailflips))
Output:
>> The longest series of heads is: 8
>> The longest series of heads is: 5
edit: removed the flip implementation with yield, it made the code weird.
Counting the longest run
Let see what you have asked for
I'm supposed to have the function accept 2 parameters: a string or list,
or, generalizing just a bit, a sequence
and a character
again, we'd speak, generically, of an item
to search for. The function should return, as the value of the
function, an integer which is the longest sequence of that character
in that string.
My implementation of the function you are asking for, complete of doc
string, is
def longest_run(i, s):
'Counts the longest run of item "i" in sequence "s".'
c, m = 0, 0
for el in s:
if el==i:
c += 1
elif c:
m = m if m >= c else c
c = 0
return m
We initialize c (current run) and m (maximum run so far) to zero,
then we loop, looking at every element el of the argument sequence s.
The logic is straightforward but for elif c: whose block is executed at the end of a run (because c is greater than zero and logically True) but not when the previous item (not the current one) was not equal to i. The savings are small but are savings...
Flipping coins (and more...)
How can we simulate flipping n coins? We abstract the problem and recognize that flipping n coins corresponds to choosing from a collection of possible outcomes (for a coin, either head or tail) for n times.
As it happens, the random module of the standard library has the exact answer to this problem
In [52]: random.choices?
Signature: choices(population, weights=None, *, cum_weights=None, k=1)
Docstring:
Return a k sized list of population elements chosen with replacement.
If the relative weights or cumulative weights are not specified,
the selections are made with equal probability.
File: ~/lib/miniconda3/lib/python3.6/random.py
Type: method
Our implementation, aimed at hiding details, could be
def roll(n, l):
'''Rolls "n" times a dice/coin whose face values are listed in "l".
E.g., roll(2, range(1,21)) -> [12, 4] simulates rolling 2 icosahedron dices.
'''
from random import choices
return choices(l, k=n)
Putting this together
def longest_run(i, s):
'Counts the longest run of item "i" in sequence "s".'
c, m = 0, 0
for el in s:
if el==i:
c += 1
elif c:
m = m if m >= c else c
c = 0
return m
def roll(n, l):
'''Rolls "n" times a dice/coin whose face values are listed in "l".
E.g., roll(2, range(1,21)) -> [12, 4] simulates rolling 2 icosahedron dices.
'''
from random import choices
return choices(l, k=n)
N = 100 # n. of flipped coins
h_or_t = ['h', 't']
random_seq_of_h_or_t = flip(N, h_or_t)
max_h = longest_run('h', random_seq_of_h_or_t)
max_t = longest_run('t', random_seq_of_h_or_t)

Generate equation with the result value closest to the requested one, have speed problems

I am writing some quiz game and need computer to solve 1 game in the quiz if players fail to solve it.
Given data :
List of 6 numbers to use, for example 4, 8, 6, 2, 15, 50.
Targeted value, where 0 < value < 1000, for example 590.
Available operations are division, addition, multiplication and division.
Parentheses can be used.
Generate mathematical expression which evaluation is equal, or as close as possible, to the target value. For example for numbers given above, expression could be : (6 + 4) * 50 + 15 * (8 - 2) = 590
My algorithm is as follows :
Generate all permutations of all the subsets of the given numbers from (1) above
For each permutation generate all parenthesis and operator combinations
Track the closest value as algorithm runs
I can not think of any smart optimization to the brute-force algorithm above, which will speed it up by the order of magnitude. Also I must optimize for the worst case, because many quiz games will be run simultaneously on the server.
Code written today to solve this problem is (relevant stuff extracted from the project) :
from operator import add, sub, mul, div
import itertools
ops = ['+', '-', '/', '*']
op_map = {'+': add, '-': sub, '/': div, '*': mul}
# iterate over 1 permutation and generates parentheses and operator combinations
def iter_combinations(seq):
if len(seq) == 1:
yield seq[0], str(seq[0])
else:
for i in range(len(seq)):
left, right = seq[:i], seq[i:] # split input list at i`th place
# generate cartesian product
for l, l_str in iter_combinations(left):
for r, r_str in iter_combinations(right):
for op in ops:
if op_map[op] is div and r == 0: # cant divide by zero
continue
else:
yield op_map[op](float(l), r), \
('(' + l_str + op + r_str + ')')
numbers = [4, 8, 6, 2, 15, 50]
target = best_value = 590
best_item = None
for i in range(len(numbers)):
for current in itertools.permutations(numbers, i+1): # generate perms
for value, item in iter_combinations(list(current)):
if value < 0:
continue
if abs(target - value) < best_value:
best_value = abs(target - value)
best_item = item
print best_item
It prints : ((((4*6)+50)*8)-2). Tested it a little with different values and it seems to work correctly. Also I have a function to remove unnecessary parenthesis but it is not relevant to the question so it is not posted.
Problem is that this runs very slowly because of all this permutations, combinations and evaluations. On my mac book air it runs for a few minutes for 1 example. I would like to make it run in a few seconds tops on the same machine, because many quiz game instances will be run at the same time on the server. So the questions are :
Can I speed up current algorithm somehow (by orders of magnitude)?
Am I missing on some other algorithm for this problem which would run much faster?
You can build all the possible expression trees with the given numbers and evalate them. You don't need to keep them all in memory, just print them when the target number is found:
First we need a class to hold the expression. It is better to design it to be immutable, so its value can be precomputed. Something like this:
class Expr:
'''An Expr can be built with two different calls:
-Expr(number) to build a literal expression
-Expr(a, op, b) to build a complex expression.
There a and b will be of type Expr,
and op will be one of ('+','-', '*', '/').
'''
def __init__(self, *args):
if len(args) == 1:
self.left = self.right = self.op = None
self.value = args[0]
else:
self.left = args[0]
self.right = args[2]
self.op = args[1]
if self.op == '+':
self.value = self.left.value + self.right.value
elif self.op == '-':
self.value = self.left.value - self.right.value
elif self.op == '*':
self.value = self.left.value * self.right.value
elif self.op == '/':
self.value = self.left.value // self.right.value
def __str__(self):
'''It can be done smarter not to print redundant parentheses,
but that is out of the scope of this problem.
'''
if self.op:
return "({0}{1}{2})".format(self.left, self.op, self.right)
else:
return "{0}".format(self.value)
Now we can write a recursive function that builds all the possible expression trees with a given set of expressions, and prints the ones that equals our target value. We will use the itertools module, that's always fun.
We can use itertools.combinations() or itertools.permutations(), the difference is in the order. Some of our operations are commutative and some are not, so we can use permutations() and assume we will get many very simmilar solutions. Or we can use combinations() and manually reorder the values when the operation is not commutative.
import itertools
OPS = ('+', '-', '*', '/')
def SearchTrees(current, target):
''' current is the current set of expressions.
target is the target number.
'''
for a,b in itertools.combinations(current, 2):
current.remove(a)
current.remove(b)
for o in OPS:
# This checks whether this operation is commutative
if o == '-' or o == '/':
conmut = ((a,b), (b,a))
else:
conmut = ((a,b),)
for aa, bb in conmut:
# You do not specify what to do with the division.
# I'm assuming that only integer divisions are allowed.
if o == '/' and (bb.value == 0 or aa.value % bb.value != 0):
continue
e = Expr(aa, o, bb)
# If a solution is found, print it
if e.value == target:
print(e.value, '=', e)
current.add(e)
# Recursive call!
SearchTrees(current, target)
# Do not forget to leave the set as it were before
current.remove(e)
# Ditto
current.add(b)
current.add(a)
And then the main call:
NUMBERS = [4, 8, 6, 2, 15, 50]
TARGET = 590
initial = set(map(Expr, NUMBERS))
SearchTrees(initial, TARGET)
And done! With these data I'm getting 719 different solutions in just over 21 seconds! Of course many of them are trivial variations of the same expression.
24 game is 4 numbers to target 24, your game is 6 numbers to target x (0 < x < 1000).
That's much similar.
Here is the quick solution, get all results and print just one in my rMBP in about 1-3s, I think one solution print is ok in this game :), I will explain it later:
def mrange(mask):
#twice faster from Evgeny Kluev
x = 0
while x != mask:
x = (x - mask) & mask
yield x
def f( i ) :
global s
if s[i] :
#get cached group
return s[i]
for x in mrange(i & (i - 1)) :
#when x & i == x
#x is a child group in group i
#i-x is also a child group in group i
fk = fork( f(x), f(i-x) )
s[i] = merge( s[i], fk )
return s[i]
def merge( s1, s2 ) :
if not s1 :
return s2
if not s2 :
return s1
for i in s2 :
#print just one way quickly
s1[i] = s2[i]
#combine all ways, slowly
# if i in s1 :
# s1[i].update(s2[i])
# else :
# s1[i] = s2[i]
return s1
def fork( s1, s2 ) :
d = {}
#fork s1 s2
for i in s1 :
for j in s2 :
if not i + j in d :
d[i + j] = getExp( s1[i], s2[j], "+" )
if not i - j in d :
d[i - j] = getExp( s1[i], s2[j], "-" )
if not j - i in d :
d[j - i] = getExp( s2[j], s1[i], "-" )
if not i * j in d :
d[i * j] = getExp( s1[i], s2[j], "*" )
if j != 0 and not i / j in d :
d[i / j] = getExp( s1[i], s2[j], "/" )
if i != 0 and not j / i in d :
d[j / i] = getExp( s2[j], s1[i], "/" )
return d
def getExp( s1, s2, op ) :
exp = {}
for i in s1 :
for j in s2 :
exp['('+i+op+j+')'] = 1
#just print one way
break
#just print one way
break
return exp
def check( s ) :
num = 0
for i in xrange(target,0,-1):
if i in s :
if i == target :
print numbers, target, "\nFind ", len(s[i]), 'ways'
for exp in s[i]:
print exp, ' = ', i
else :
print numbers, target, "\nFind nearest ", i, 'in', len(s[i]), 'ways'
for exp in s[i]:
print exp, ' = ', i
break
print '\n'
def game( numbers, target ) :
global s
s = [None]*(2**len(numbers))
for i in xrange(0,len(numbers)) :
numbers[i] = float(numbers[i])
n = len(numbers)
for i in xrange(0,n) :
s[2**i] = { numbers[i]: {str(numbers[i]):1} }
for i in xrange(1,2**n) :
#we will get the f(numbers) in s[2**n-1]
s[i] = f(i)
check(s[2**n-1])
numbers = [4, 8, 6, 2, 2, 5]
s = [None]*(2**len(numbers))
target = 590
game( numbers, target )
numbers = [1,2,3,4,5,6]
target = 590
game( numbers, target )
Assume A is your 6 numbers list.
We define f(A) is all result that can calculate by all A numbers, if we search f(A), we will find if target is in it and get answer or the closest answer.
We can split A to two real child groups: A1 and A-A1 (A1 is not empty and not equal A) , which cut the problem from f(A) to f(A1) and f(A-A1). Because we know f(A) = Union( a+b, a-b, b-a, a*b, a/b(b!=0), b/a(a!=0) ), which a in A, b in A-A1.
We use fork f(A) = Union( fork(A1,A-A1) ) stands for such process. We can remove all duplicate value in fork(), so we can cut the range and make program faster.
So, if A = [1,2,3,4,5,6], then f(A) = fork( f([1]),f([2,3,4,5,6]) ) U ... U fork( f([1,2,3]), f([4,5,6]) ) U ... U stands for Union.
We will see f([2,3,4,5,6]) = fork( f([2,3]), f([4,5,6]) ) U ... , f([3,4,5,6]) = fork( f([3]), f([4,5,6]) ) U ..., the f([4,5,6]) used in both.
So if we can cache every f([...]) the program can be faster.
We can get 2^len(A) - 2 (A1,A-A1) in A. We can use binary to stands for that.
For example: A = [1,2,3,4,5,6], A1 = [1,2,3], then binary 000111(7) stands for A1. A2 = [1,3,5], binary 010101(21) stands for A2. A3 = [1], then binary 000001(1) stands for A3...
So we get a way stands for all groups in A, we can cache them and make all process faster!
All combinations for six number, four operations and parenthesis are up to 5 * 9! at least. So I think you should use some AI algorithm. Using genetic programming or optimization seems to be the path to follow.
In the book Programming Collective Intelligence in the chapter 11 Evolving Intelligence you will find exactly what you want and much more. That chapter explains how to find a mathematical function combining operations and numbers (as you want) to match a result. You will be surprised how easy is such task.
PD: The examples are written using Python.
I would try using an AST at least it will
make your expression generation part easier
(no need to mess with brackets).
http://en.wikipedia.org/wiki/Abstract_syntax_tree
1) Generate some tree with N nodes
(N = the count of numbers you have).
I've read before how many of those you
have, their size is serious as N grows.
By serious I mean more than polynomial to say the least.
2) Now just start changing the operations
in the non-leaf nodes and keep evaluating
the result.
But this is again backtracking and too much degree of freedom.
This is a computationally complex task you're posing. I believe if you
ask the question as you did: "let's generate a number K on the output
such that |K-V| is minimal" (here V is the pre-defined desired result,
i.e. 590 in your example) , then I guess this problem is even NP-complete.
Somebody please correct me if my intuition is lying to me.
So I think even the generation of all possible ASTs (assuming only 1 operation
is allowed) is NP complete as their count is not polynomial. Not to talk that more
than 1 operation is allowed here and not to talk of the minimal difference requirement (between result and desired result).
1. Fast entirely online algorithm
The idea is to search not for a single expression for target value,
but for an equation where target value is included in one part of the equation and
both parts have almost equal number of operations (2 and 3).
Since each part of the equation is relatively small, it does not take much time to
generate all possible expressions for given input values.
After both parts of equation are generated it is possible to scan a pair of sorted arrays
containing values of these expressions and find a pair of equal (or at least best matching)
values in them. After two matching values are found we could get corresponding expressions and
join them into a single expression (in other words, solve the equation).
To join two expression trees together we could descend from the root of one tree
to "target" leaf, for each node on this path invert corresponding operation
('*' to '/', '/' to '*' or '/', '+' to '-', '-' to '+' or '-'), and move "inverted"
root node to other tree (also as root node).
This algorithm is faster and easier to implement when all operations are invertible.
So it is best to use with floating point division (as in my implementation) or with
rational division. Truncating integer division is most difficult case because it produces same result for different inputs (42/25=1 and 25/25 is also 1). With zero-remainder integer division this algorithm gives result almost instantly when exact result is available, but needs some modifications to work correctly when approximate result is needed.
See implementation on Ideone.
2. Even faster approach with off-line pre-processing
As noticed by #WolframH, there are not so many possible input number combinations.
Only 3*3*(49+4-1) = 4455 if repetitions are possible.
Or 3*3*(49) = 1134 without duplicates. Which allows us to pre-process
all possible inputs off-line, store results in compact form, and when some particular result
is needed quickly unpack one of pre-processed values.
Pre-processing program should take array of 6 numbers and generate values for all possible
expressions. Then it should drop out-of-range values and find nearest result for all cases
where there is no exact match. All this could be performed by algorithm proposed by #Tim.
His code needs minimal modifications to do it. Also it is the fastest alternative (yet).
Since pre-processing is offline, we could use something better than interpreted Python.
One alternative is PyPy, other one is to use some fast interpreted language. Pre-processing
all possible inputs should not take more than several minutes.
Speaking about memory needed to store all pre-processed values, the only problem are the
resulting expressions. If stored in string form they will take up to 4455*999*30 bytes or 120Mb.
But each expression could be compressed. It may be represented in postfix notation like this:
arg1 arg2 + arg3 arg4 + *. To store this we need 10 bits to store all arguments' permutations,
10 bits to store 5 operations, and 8 bits to specify how arguments and operations are
interleaved (6 arguments + 5 operations - 3 pre-defined positions: first two are always
arguments, last one is always operation). 28 bits per tree or 4 bytes, which means it is only
20Mb for entire data set with duplicates or 5Mb without them.
3. Slow entirely online algorithm
There are some ways to speed up algorithm in OP:
Greatest speed improvement may be achieved if we avoid trying each commutative operation twice and make recursion tree less branchy.
Some optimization is possible by removing all branches where the result of division operation is zero.
Memorization (dynamic programming) cannot give significant speed boost here, still it may be useful.
After enhancing OP's approach with these ideas, approximately 30x speedup is achieved:
from itertools import combinations
numbers = [4, 8, 6, 2, 15, 50]
target = best_value = 590
best_item = None
subsets = {}
def get_best(value, item):
global best_value, target, best_item
if value >= 0 and abs(target - value) < best_value:
best_value = abs(target - value)
best_item = item
return value, item
def compare_one(value, op, left, right):
item = ('(' + left + op + right + ')')
return get_best(value, item)
def apply_one(left, right):
yield compare_one(left[0] + right[0], '+', left[1], right[1])
yield compare_one(left[0] * right[0], '*', left[1], right[1])
yield compare_one(left[0] - right[0], '-', left[1], right[1])
yield compare_one(right[0] - left[0], '-', right[1], left[1])
if right[0] != 0 and left[0] >= right[0]:
yield compare_one(left[0] / right[0], '/', left[1], right[1])
if left[0] != 0 and right[0] >= left[0]:
yield compare_one(right[0] / left[0], '/', right[1], left[1])
def memorize(seq):
fs = frozenset(seq)
if fs in subsets:
for x in subsets[fs].items():
yield x
else:
subsets[fs] = {}
for value, item in try_all(seq):
subsets[fs][value] = item
yield value, item
def apply_all(left, right):
for l in memorize(left):
for r in memorize(right):
for x in apply_one(l, r):
yield x;
def try_all(seq):
if len(seq) == 1:
yield get_best(numbers[seq[0]], str(numbers[seq[0]]))
for length in range(1, len(seq)):
for x in combinations(seq[1:], length):
for value, item in apply_all(list(x), list(set(seq) - set(x))):
yield value, item
for x, y in try_all([0, 1, 2, 3, 4, 5]): pass
print best_item
More speed improvements are possible if you add some constraints to the problem:
If integer division is only possible when the remainder is zero.
If all intermediate results are to be non-negative and/or below 1000.
Well I don't will give up. Following the line of all the answers to your question I come up with another algorithm. This algorithm gives the solution with a time average of 3 milliseconds.
#! -*- coding: utf-8 -*-
import copy
numbers = [4, 8, 6, 2, 15, 50]
target = 590
operations = {
'+': lambda x, y: x + y,
'-': lambda x, y: x - y,
'*': lambda x, y: x * y,
'/': lambda x, y: y == 0 and 1e30 or x / y # Handle zero division
}
def chain_op(target, numbers, result=None, expression=""):
if len(numbers) == 0:
return (expression, result)
else:
for choosen_number in numbers:
remaining_numbers = copy.copy(numbers)
remaining_numbers.remove(choosen_number)
if result is None:
return chain_op(target, remaining_numbers, choosen_number, str(choosen_number))
else:
incomming_results = []
for key, op in operations.items():
new_result = op(result, choosen_number)
new_expression = "%s%s%d" % (expression, key, choosen_number)
incomming_results.append(chain_op(target, remaining_numbers, new_result, new_expression))
diff = 1e30
selected = None
for exp_result in incomming_results:
exp, res = exp_result
if abs(res - target) < diff:
diff = abs(res - target)
selected = exp_result
if diff == 0:
break
return selected
if __name__ == '__main__':
print chain_op(target, numbers)
Erratum: This algorithm do not include the solutions containing parenthesis. It always hits the target or the closest result, my bad. Still is pretty fast. It can be adapted to support parenthesis without much work.
Actually there are two things that you can do to speed up the time to milliseconds.
You are trying to find a solution for given quiz, by generating the numbers and the target number. Instead you can generate the solution and just remove the operations. You can build some thing smart that will generate several quizzes and choose the most interesting one, how ever in this case you loose the as close as possible option.
Another way to go, is pre-calculation. Solve 100 quizes, use them as build-in in your application, and generate new one on the fly, try to keep your quiz stack at 100, also try to give the user only the new quizes. I had the same problem in my bible games, and I used this method to speed thing up. Instead of 10 sec for question it takes me milliseconds as I am generating new question in background and always keeping my stack to 100.
What about Dynamic programming, because you need same results to calculate other options?

fetching values from dictionary. Range match on keys

I created a dictionary myDict holding 10 million entries in the following form. Each entry in the dictionary represent {(id, age): code}
>>> myDict = {('1039', '68.0864'): '42731,42781,V4501',
('1039', '68.1704'): '4770,4778,V071',
('0845', '60.4476'): '2724,27800,4019',
('0983', '63.3936'): '41401,4168,4240,V1582,V7281'
}
A constant ageOffset is defined with value = 0.1
Given an (id,age) tuple, how can I fetch all values from myDict which have key (id, X) where:
age <= X <= age+ageOffset
I need to perform this fetch operation 20 billion times.
Examples:
1.
myTup = ('1039', '68.0')
the answer is: '42731,42781,V4501'
2.
myTup = ('0845', '60.0')
Ans : No value returned
Edit:
Can I create a sub-dictionary, on the basis of partial match on the first element of the Key. I mean, If first element of the tuple Key matched, then create a subdictionary. According to my data, this wont be longer than a couple of hundreds. And then perform linear range search comparing the second element in the tuple key and finding the corresponding values.
To do this operation 20 billion(!) times, you will have to preprocess your data a bit.
First, I would group by id:
def preprocess(data):
from collections import defaultdict # Python 2.5+ only
preprocessed = defaultdict(list)
# group by id
for (id, age), value in data.iteritems():
preprocessed[id].append((float(age), value))
# sort lists for binary search, see edit
for key, value in preprocessed.iteritems():
value.sort()
return preprocessed
Result should look like this:
>>> preprocess(myDict)
defaultdict(<type 'list'>, {
'0845': [(60.4476, '2724,27800,4019')],
'0983': [(63.3936, '41401,4168,4240,V1582,V7281')],
'1039': [(68.0864, '42731,42781,V4501'), (68.1704, '4770,4778,V071')]}
If relatively few items share the same id, thus resulting in short lists, you might get away with filtering the list.
def lookup(data, id, age, age_offset=0.1):
if id in data:
return [value for x, value in data[id] if age <= x <= age+age_offset]
else:
return None
lookup(preprocessed, '1039', 68.0) # Note that I use floats for age
['42731,42781,V4501']
However, if many items share the same id, you will have to traverse long lists, making the lookup relatively slow. In this case, you will have to apply further optimizations.
Edit: as suggested by #Andrey Petrov
from bisect import bisect_left
from itertools import islice, takewhile
def optimized_lookup(data, id, age, age_offset=0.1):
if id in data:
l = data[id]
idx = bisect_left(l, age)
return [a for a,v in takewhile(lambda (x, value): x <= age+age_offset, islice(l, idx, None))]
else:
return None
Here's a way to do it in numpy, and though I haven't tested it I'm pretty confident it will be vastly faster than looping over the dictionary. I replaced the dictionary structure with a Numpy record array, and used np.where to locate the rows where they match the parameters you gave.
import numpy as np
myDict = {('1039', '68.0864'): '42731,42781,V4501',
('1039', '68.1704'): '4770,4778,V071',
('0845', '60.4476'): '2724,27800,4019',
('0983', '63.3936'): '41401,4168,4240,V1582,V7281'
}
records=[]
for k,v in myDict.iteritems():
records.append([k[0], float(k[1]), v])
myArr = np.rec.fromrecords(records, formats='S10, f4, S100',
names="ID, Age, Code")
def findInMyArray(arr, requestedID, requestedAge, tolerance=0.1):
idx = np.where(((arr["Age"] - requestedAge) < tolerance) & (arr["ID"] == requestedID))
return idx
idx = findInMyArray(myArr, "1039", 68.0, tolerance=0.1)
print "The index found is: ", idx
print "The values are: ", myArr["Code"][idx[0]]
def getr(t):
id = float(t[0])
age = float(t[1])
os = 0.1
rs = []
correct_id=fixed[id]
for k in correct_id.keys():
if (k > age and k <= age + os):
rs.append(correct_id.get(k))
return rs
ct = {('1039', '68.0864'): '42731,42781,V4501',
('1039', '68.1704'): '4770,4778,V071',
('0845', '60.4476'): '2724,27800,4019',
('0983', '63.3936'): '41401,4168,4240,V1582,V7281' }
fixed={}
for k in ct:
if not(float(k[0]) in fixed):
fixed[float(k[0])]={}
fixed[float(k[0])][float(k[1])] = ct[k]
print "1"
myTup = ('1039', '68.0')
assert(getr(myTup) == ['42731,42781,V4501'])
#the answer is: '42731,42781,V4501'
print "2"
myTup = ('0845', '60.0')
assert(getr(myTup) == [])
#Ans : No value returned

How to make a random but partial shuffle in Python?

Instead of a complete shuffle, I am looking for a partial shuffle function in python.
Example : "string" must give rise to "stnrig", but not "nrsgit"
It would be better if I can define a specific "percentage" of characters that have to be rearranged.
Purpose is to test string comparison algorithms. I want to determine the "percentage of shuffle" beyond which an(my) algorithm will mark two (shuffled) strings as completely different.
Update :
Here is my code. Improvements are welcome !
import random
percent_to_shuffle = int(raw_input("Give the percent value to shuffle : "))
to_shuffle = list(raw_input("Give the string to be shuffled : "))
num_of_chars_to_shuffle = int((len(to_shuffle)*percent_to_shuffle)/100)
for i in range(0,num_of_chars_to_shuffle):
x=random.randint(0,(len(to_shuffle)-1))
y=random.randint(0,(len(to_shuffle)-1))
z=to_shuffle[x]
to_shuffle[x]=to_shuffle[y]
to_shuffle[y]=z
print ''.join(to_shuffle)
This is a problem simpler than it looks. And the language has the right tools not to stay between you and the idea,as usual:
import random
def pashuffle(string, perc=10):
data = list(string)
for index, letter in enumerate(data):
if random.randrange(0, 100) < perc/2:
new_index = random.randrange(0, len(data))
data[index], data[new_index] = data[new_index], data[index]
return "".join(data)
Your problem is tricky, because there are some edge cases to think about:
Strings with repeated characters (i.e. how would you shuffle "aaaab"?)
How do you measure chained character swaps or re arranging blocks?
In any case, the metric defined to shuffle strings up to a certain percentage is likely to be the same you are using in your algorithm to see how close they are.
My code to shuffle n characters:
import random
def shuffle_n(s, n):
idx = range(len(s))
random.shuffle(idx)
idx = idx[:n]
mapping = dict((idx[i], idx[i-1]) for i in range(n))
return ''.join(s[mapping.get(x,x)] for x in range(len(s)))
Basically chooses n positions to swap at random, and then exchanges each of them with the next in the list... This way it ensures that no inverse swaps are generated and exactly n characters are swapped (if there are characters repeated, bad luck).
Explained run with 'string', 3 as input:
idx is [0, 1, 2, 3, 4, 5]
we shuffle it, now it is [5, 3, 1, 4, 0, 2]
we take just the first 3 elements, now it is [5, 3, 1]
those are the characters that we are going to swap
s t r i n g
^ ^ ^
t (1) will be i (3)
i (3) will be g (5)
g (5) will be t (1)
the rest will remain unchanged
so we get 'sirgnt'
The bad thing about this method is that it does not generate all the possible variations, for example, it could not make 'gnrits' from 'string'. This could be fixed by making partitions of the indices to be shuffled, like this:
import random
def randparts(l):
n = len(l)
s = random.randint(0, n-1) + 1
if s >= 2 and n - s >= 2: # the split makes two valid parts
yield l[:s]
for p in randparts(l[s:]):
yield p
else: # the split would make a single cycle
yield l
def shuffle_n(s, n):
idx = range(len(s))
random.shuffle(idx)
mapping = dict((x[i], x[i-1])
for i in range(len(x))
for x in randparts(idx[:n]))
return ''.join(s[mapping.get(x,x)] for x in range(len(s)))
import random
def partial_shuffle(a, part=0.5):
# which characters are to be shuffled:
idx_todo = random.sample(xrange(len(a)), int(len(a) * part))
# what are the new positions of these to-be-shuffled characters:
idx_target = idx_todo[:]
random.shuffle(idx_target)
# map all "normal" character positions {0:0, 1:1, 2:2, ...}
mapper = dict((i, i) for i in xrange(len(a)))
# update with all shuffles in the string: {old_pos:new_pos, old_pos:new_pos, ...}
mapper.update(zip(idx_todo, idx_target))
# use mapper to modify the string:
return ''.join(a[mapper[i]] for i in xrange(len(a)))
for i in xrange(5):
print partial_shuffle('abcdefghijklmnopqrstuvwxyz', 0.2)
prints
abcdefghljkvmnopqrstuxwiyz
ajcdefghitklmnopqrsbuvwxyz
abcdefhwijklmnopqrsguvtxyz
aecdubghijklmnopqrstwvfxyz
abjdefgcitklmnopqrshuvwxyz
Evil and using a deprecated API:
import random
# adjust constant to taste
# 0 -> no effect, 0.5 -> completely shuffled, 1.0 -> reversed
# Of course this assumes your input is already sorted ;)
''.join(sorted(
'abcdefghijklmnopqrstuvwxyz',
cmp = lambda a, b: cmp(a, b) * (-1 if random.random() < 0.2 else 1)
))
maybe like so:
>>> s = 'string'
>>> shufflethis = list(s[2:])
>>> random.shuffle(shufflethis)
>>> s[:2]+''.join(shufflethis)
'stingr'
Taking from fortran's idea, i'm adding this to collection. It's pretty fast:
def partial_shuffle(st, p=20):
p = int(round(p/100.0*len(st)))
idx = range(len(s))
sample = random.sample(idx, p)
res=str()
samptrav = 1
for i in range(len(st)):
if i in sample:
res += st[sample[-samptrav]]
samptrav += 1
continue
res += st[i]
return res

Categories