Minimum set cover

Minimum set cover - python

I would like to solve a minimum set cover problem of the following sort. All the lists contain only 1s and 0s.
I say that a list A covers a list B if you can make B from A by inserting exactly x symbols.
Consider all 2^n lists of 1s and 0s of length n and set x = n/3. I would to compute a minimal set of lists of length 2n/3 that covers them all.
Here is a naive approach I have started on. For every possible list of length 2n/3 I create a set of all lists I can create from it using this function (written by DSM).
from itertools import product, combinations
def all_fill(source, num):
output_len = len(source) + num
for where in combinations(range(output_len), len(source)):
# start with every possibility
poss = [[0,1]] * output_len
# impose the source list
for w, s in zip(where, source):
poss[w] = [s]
# yield every remaining possibility
for tup in product(*poss):
yield tup
I then create the set of sets as follows using n = 6 as an example.
n = 6
shortn = 2*n/3
x = n/3
coversets=set()
for seq in product([0,1], repeat = shortn):
coversets.add(frozenset(all_fill(seq,x)))
I would like to find a minimal set of sets from coversets whose union is allset = set(product([0,1], repeat=n)).
In this case, set(all_fill([1,1,1,1],2)), set(all_fill([0,0,0,0],2)), set(all_fill([1,1,0,0],2)), set(all_fill([0,0,1,1],2)) will do.
My aim is to solve the problem for n = 12. I am happy to use external libraries if that will help and I expect the time to be exponential in n in the worst case.

I’ve written a small program to write an integer program to be solved by
CPLEX or another MIP solver. Below it is a solution for n=12.
from collections import defaultdict
from itertools import product, combinations
def all_fill(source, num):
output_len = (len(source) + num)
for where in combinations(range(output_len), len(source)):
poss = ([[0, 1]] * output_len)
for (w, s) in zip(where, source):
poss[w] = [s]
for tup in product(*poss):
(yield tup)
def variable_name(seq):
return ('x' + ''.join((str(s) for s in seq)))
n = 12
shortn = ((2 * n) // 3)
x = (n // 3)
all_seqs = list(product([0, 1], repeat=shortn))
hit_sets = defaultdict(set)
for seq in all_seqs:
for fill in all_fill(seq, x):
hit_sets[fill].add(seq)
print('Minimize')
print(' + '.join((variable_name(seq) for seq in all_seqs)))
print('Subject To')
for (fill, seqs) in hit_sets.items():
print(' + '.join((variable_name(seq) for seq in seqs)), '>=', 1)
print('Binary')
for seq in all_seqs:
print(variable_name(seq))
print('End')
MIP - Integer optimal solution: Objective = 1.0000000000e+01
Solution time = 7.66 sec. Iterations = 47411 Nodes = 337
CPLEX> Incumbent solution
Variable Name Solution Value
x00000000 1.000000
x00000111 1.000000
x00011110 1.000000
x00111011 1.000000
x10110001 1.000000
x11000100 1.000000
x11001110 1.000000
x11100001 1.000000
x11111000 1.000000
x11111111 1.000000
All other variables matching '*' are 0.
CPLEX>

Related

how to find 3 Numbers with Sum closest to a given number

I'm trying to write simple code for that problem. If I get an array and number I need to find the 3 numbers that their sum are close to the number that's given.
I've thought about first to pop out the last digit (the first number)
then I'll have a new array without this digit. So now I look for the second number who needs to be less the sum target. so I take only the small numbers that it's smaller them the second=sum-first number (but I don't know how to choose it.
The last number will be third=sum-first-second
I tried to write code but it's not working and it's very basic
def f(s,target):
s=sorted(s)
print(s)
print(s[0])
closest=s[0]+s[1]+s[2]
m=s[:-1]
print(m)
for i in range(len(s)):
for j in range(len(m)):
if (closest<=target-m[0]) and s[-1] + m[j] == target:
print (m[j])
n = m[:j] + nums[j+1:]
for z in range (len(z)):
if (closest<target-n[z]) and s[-1]+ m[j]+n[z] == target:
print (n[z])
s=[4,2,12,3,4,8,14]
target=20
f(s,target)
if you have idea what to change here. Please let me know
Thank you

Here is my solution I tried to maximize the performance of the code to not repeat any combinations. Let me know if you have any questions.
Good luck.
def find_3(s,target):
to_not_rep=[] #This list will store all combinations without repetation
close_to_0=abs(target - s[0]+s[1]+s[2]) #initile
There_is_one=False #False: don't have a combination equal to the target yet
for s1,first_n in enumerate(s):
for s2,second_n in enumerate(s):
if (s1==s2) : continue #to not take the same index
for s3,third_n in enumerate(s):
if (s1==s3) or (s2==s3) : continue #to not take the same index
val=sorted([first_n,second_n,third_n]) #sorting
if val in to_not_rep :continue #to not repeat the same combination with diffrent positions
to_not_rep.append(val)#adding all the combinations without repetation
sum_=sum(val) #the sum of the three numbers
# Good one
if sum_==target:
print(f"Found a possibility: {val[0]} + {val[1]} + {val[2]} = {target}")
There_is_one = True
if There_is_one is False: #No need if we found combination equal to the target
# close to the target
# We know that (target - sum) should equal to 0 otherwise :
# We are looking for the sum of closet combinations(in abs value) to 0
pos_n=abs(target-sum_)
if pos_n < close_to_0:
closet_one=f"The closet combination to the target is: {val[0]} + {val[1]} + {val[2]} = {sum_} almost {target} "
close_to_0=pos_n
# Print the closet combination to the target in case we did not find a combination equal to the target
if There_is_one is False: print(closet_one)
so we can test it :
s =[4,2,3,8,6,4,12,16,30,20,5]
target=20
find_3(s,target)
#Found a possibility: 4 + 4 + 12 = 20
#Found a possibility: 2 + 6 + 12 = 20
#Found a possibility: 3 + 5 + 12 = 20
another test :
s =[4,2,3,8,6,4,323,23,44]
find_3(s,target)
#The closet combination to the target is: 4 + 6 + 8 = 18 almost 20

This is a simple solution that returns all possibilites.
For your case it completed in 0.002019 secs
from itertools import combinations
import numpy as np
def f(s, target):
dic = {}
for tup in combinations(s, 3):
try:
dic[np.absolute(np.sum(tup) - target)].append(str(tup))
except KeyError:
dic[np.absolute(np.sum(tup) - target)] = [tup]
print(dic[min(dic.keys())])

Use itertools.combinations to get all combinations of your numbers without replacement of a certain length (three in your case). Then take the three-tuple for which the absolute value of the difference of the sum and target is minimal. min can take a key argument to specify the ordering of the iterable passed to the function.
from typing import Sequence, Tuple
def closest_to(seq: Sequence[float], target: float, length: int = 3) -> Tuple[float]:
from itertools import combinations
combs = combinations(seq, length)
diff = lambda x: abs(sum(x) - target)
return min(combs, key=diff)
closest_to([4,2,12,3,4,8,14], 20) # (4, 2, 14)
This is not the fastest or most efficient way to do it, but it's conceptionally simple and short.

Something like this?
import math
num_find = 1448
lst_Results = []
i_Number = num_find
while i_Number > 0:
num_Exp = math.floor(math.log(i_Number) / math.log(2))
lst_Results.append(dict({num_Exp: int(math.pow(2, num_Exp))}))
i_Number = i_Number - math.pow(2, num_Exp)
print(lst_Results)
In a sequence of numbers: for example 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, etc ...
The sum of the previous numbers is never greater than the next. This gives us the possibility of combinations, for example:
The number: 1448, there is no other combination than the sum of the previous numbers: 8 + 32 + 128 + 256 + 1024
Then you find the numbers whose sum is close to the number provided

Extracting total value of a tuple

I'm very new to Python, so please forgive my ignorance. I'm trying to calculate the total number of energy units in a system. For example, the Omega here will output both (0,0,0,1) and (2,2,2,1) along with a whole lot of other tuples. I want to extract from Omega how many tuples have a total value of 1 (like the first example) and how many have a total value of 7 (like the second example). How do I achieve this?
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
N = 4 ##The number of Oscillators
q = range(3) ## Range of number of possible energy units per oscillator
Omega = product(q, repeat = N)
print(list(product(q, repeat = N)))

try this:
Omega = product(q, repeat = N)
l = list(product(q, repeat = N))
l1 = [i for i in l if sum(i)==1]
l2 = [i for i in l if sum(i)==7]
print(l1,l2)

I believe you can use sum() on tuples as well as lists of integers/numbers.
Now you say omega is a list of tuples, is that correct? Something like
Omega = [(0,0,0,1), (2,2,2,1), ...)]
In that case I think you can do
sums_to_1 = [int_tuple for int_tuple in omega if sum(int_tuple) == 1]
If you want to have some default value for the tuples that don't sum to one you can put the if statement in the list comprehension in the beginning and do
sums_to_1 = [int_tuple if sum(int_tuple) == 1 else 'SomeDefaultValue' for int_tuple in omega]

Unique ordered ratio of integers

I have two ordered lists of consecutive integers m=0, 1, ... M and n=0, 1, 2, ... N. Each value of m has a probability pm, and each value of n has a probability pn. I am trying to find the ordered list of unique values r=n/m and their probabilities pr. I am aware that r is infinite if n=0 and can even be undefined if m=n=0.
In practice, I would like to run for M and N each be of the order of 2E4, meaning up to 4E8 values of r - which would mean 3 GB of floats (assuming 8 Bytes/float).
For this calculation, I have written the python code below.
The idea is to iterate over m and n, and for each new m/n, insert it in the right place with its probability if it isn't there yet, otherwise add its probability to the existing number. My assumption is that it is easier to sort things on the way instead of waiting until the end.
The cases related to 0 are added at the end of the loop.
I am using the Fraction class since we are dealing with fractions.
The code also tracks the multiplicity of each unique value of m/n.
I have tested up to M=N=100, and things are quite slow. Are there better approaches to the question, or more efficient ways to tackle the code?
Timing:
M=N=30: 1 s
M=N=50: 6 s
M=N=80: 30 s
M=N=100: 82 s
import numpy as np
from fractions import Fraction
import time # For timiing
start_time = time.time() # Timing
M, N = 6, 4
mList, nList = np.arange(1, M+1), np.arange(1, N+1) # From 1 to M inclusive, deal with 0 later
mProbList, nProbList = [1/(M+1)]*(M), [1/(N+1)]*(N) # Probabilities, here assumed equal (not general case)
# Deal with mn=0 later
pmZero, pnZero = 1/(M+1), 1/(N+1) # P(m=0) and P(n=0)
pNaN = pmZero * pnZero # P(0/0) = P(m=0)P(n=0)
pZero = pmZero * (1 - pnZero) # P(0) = P(m=0)P(n!=0)
pInf = pnZero * (1 - pmZero) # P(inf) = P(m!=0)P(n=0)
# Main list of r=m/n, P(r) and mult(r)
# Start with first line, m=1
rList = [Fraction(mList[0], n) for n in nList[::-1]] # Smallest first
rProbList = [mProbList[0] * nP for nP in nProbList[::-1]] # Start with first line
rMultList = [1] * len(rList) # Multiplicity of each element
# Main loop
for m, mP in zip(mList[1:], mProbList[1:]):
for n, nP in zip(nList[::-1], nProbList[::-1]): # Pick an n value
r, rP, rMult = Fraction(m, n), mP*nP, 1
for i in range(len(rList)-1): # See where it fits in existing list
if r < rList[i]:
rList.insert(i, r)
rProbList.insert(i, rP)
rMultList.insert(i, 1)
break
elif r == rList[i]:
rProbList[i] += rP
rMultList[i] += 1
break
elif r < rList[i+1]:
rList.insert(i+1, r)
rProbList.insert(i+1, rP)
rMultList.insert(i+1, 1)
break
elif r == rList[i+1]:
rProbList[i+1] += rP
rMultList[i+1] += 1
break
if r > rList[-1]:
rList.append(r)
rProbList.append(rP)
rMultList.append(1)
break
# Deal with 0
rList.insert(0, Fraction(0, 1))
rProbList.insert(0, pZero)
rMultList.insert(0, N)
# Deal with infty
rList.append(np.Inf)
rProbList.append(pInf)
rMultList.append(M)
# Deal with undefined case
rList.append(np.NAN)
rProbList.append(pNaN)
rMultList.append(1)
print(".... done in %s seconds." % round(time.time() - start_time, 2))
print("************** Final list\nr", 'Prob', 'Mult')
for r, rP, rM in zip(rList, rProbList, rMultList): print(r, rP, rM)
print("************** Checks")
print("mList", mList, 'nList', nList)
print("Sum of proba = ", np.sum(rProbList))
print("Sum of multi = ", np.sum(rMultList), "\t(M+1)*(N+1) = ", (M+1)*(N+1))

Based on the suggestion of #Prune, and on this thread about merging lists of tuples, I have modified the code as below. It's a lot easier to read, and runs about an order of magnitude faster for N=M=80 (I have omitted dealing with 0 - would be done same way as in original post). I assume there may be ways to tweak the merge and conversion back to lists further yet.
# Do calculations
data = [(Fraction(m, n), mProb(m) * nProb(n)) for n in range(1, N+1) for m in range(1, M+1)]
data.sort()
# Merge duplicates using a dictionary
d = {}
for r, p in data:
if not (r in d): d[r] = [0, 0]
d[r][0] += p
d[r][1] += 1
# Convert back to lists
rList, rProbList, rMultList = [], [], []
for k in d:
rList.append(k)
rProbList.append(d[k][0])
rMultList.append(d[k][1])

I expect that "things are quite slow" because you've chosen a known inefficient sort. A single list insertion is O(K) (later list elements have to be bumped over, and there is added storage allocation on a regular basis). Thus a full-list insertion sort is O(K^2). For your notation, that is O((M*N)^2).
If you want any sort of reasonable performance, research and use the best-know methods. The most straightforward way to do this is to make your non-exception results as a simple list comprehension, and use the built-in sort for your penultimate list. Simply append your n=0 cases, and you're done in O(K log K) time.
I the expression below, I've assumed functions for m and n probabilities.
This is a notational convenience; you know how to directly compute them, and can substitute those expressions if you wish.
data = [ (mProb(m) * nProb(n), Fraction(m, n))
for n in range(1, N+1)
for m in range(0, M+1) ]
data.sort()
data.extend([ # generate your "zero" cases here ])

Standard deviation of combinations of dices

I am trying to find stdev for a sequence of numbers that were extracted from combinations of dice (30) that sum up to 120. I am very new to Python, so this code makes the console freeze because the numbers are endless and I am not sure how to fit them all into a smaller, more efficient function. What I did is:
found all possible combinations of 30 dice;
filtered combinations that sum up to 120;
multiplied all items in the list within result list;
tried extracting standard deviation.
Here is the code:
import itertools
import numpy
dice = [1,2,3,4,5,6]
subset = itertools.product(dice, repeat = 30)
result = []
for x in subset:
if sum(x) == 120:
result.append(x)
my_result = numpy.product(result, axis = 1).tolist()
std = numpy.std(my_result)
print(std)

Note that D(X^2) = E(X^2) - E(X)^2, you can solve this problem analytically by following equations.
f[i][N] = sum(k*f[i-1][N-k]) (1<=k<=6)
g[i][N] = sum(k^2*g[i-1][N-k])
h[i][N] = sum(h[i-1][N-k])
f[1][k] = k ( 1<=k<=6)
g[1][k] = k^2 ( 1<=k<=6)
h[1][k] = 1 ( 1<=k<=6)
Sample implementation:
import numpy as np
Nmax = 120
nmax = 30
min_value = 1
max_value = 6
f = np.zeros((nmax+1, Nmax+1), dtype ='object')
g = np.zeros((nmax+1, Nmax+1), dtype ='object') # the intermediate results will be really huge, to keep them accurate we have to utilize python big-int
h = np.zeros((nmax+1, Nmax+1), dtype ='object')
for i in range(min_value, max_value+1):
f[1][i] = i
g[1][i] = i**2
h[1][i] = 1
for i in range(2, nmax+1):
for N in range(1, Nmax+1):
f[i][N] = 0
g[i][N] = 0
h[i][N] = 0
for k in range(min_value, max_value+1):
f[i][N] += k*f[i-1][N-k]
g[i][N] += (k**2)*g[i-1][N-k]
h[i][N] += h[i-1][N-k]
result = np.sqrt(float(g[nmax][Nmax]) / h[nmax][Nmax] - (float(f[nmax][Nmax]) / h[nmax][Nmax]) ** 2)
# result = 32128174994365296.0

You ask for a result of an unfiltered lengths of 630 = 2*1023, impossible to handle as such.
There are two possibilities that can be combined:
Include more thinking to pre-treat the problem, e.g. on how to sample only
those with sum 120.
Do a Monte Carlo simulation instead, i.e. don't sample all
combinations, but only a random couple of 1000 to obtain a representative
sample to determine std sufficiently accurate.
Now, I only apply (2), giving the brute force code:
N = 30 # number of dices
M = 100000 # number of samples
S = 120 # required sum
result = [[random.randint(1,6) for _ in xrange(N)] for _ in xrange(M)]
result = [s for s in result if sum(s) == S]
Now, that result should be comparable to your result before using numpy.product ... that part I couldn't follow, though...
Ok, if you are out after the standard deviation of the product of the 30 dices, that is what your code does. Then I need 1 000 000 samples to get roughly reproducible values for std (1 digit) - takes my PC about 20 seconds, still considerably less than 1 million years :-D.
Is a number like 3.22*1016 what you are looking for?
Edit after comments:
Well, sampling the frequency of numbers instead gives only 6 independent variables - even 4 actually, by substituting in the constraints (sum = 120, total number = 30). My current code looks like this:
def p2(b, s):
return 2**b * 3**s[0] * 4**s[1] * 5**s[2] * 6**s[3]
hits = range(31)
subset = itertools.product(hits, repeat=4) # only 3,4,5,6 frequencies
product = []
permutations = []
for s in subset:
b = 90 - (2*s[0] + 3*s[1] + 4*s[2] + 5*s[3]) # 2 frequency
a = 30 - (b + sum(s)) # 1 frequency
if 0 <= b <= 30 and 0 <= a <= 30:
product.append(p2(b, s))
permutations.append(1) # TODO: Replace 1 with possible permutations
print numpy.std(product) # TODO: calculate std manually, considering permutations
This computes in about 1 second, but the confusing part is that I get as a result 1.28737023733e+17. Either my previous approaches or this one has a bug - or both.
Sorry - not that easy: The sampling is not of the same probability - that is the problem here. Each sample has a different number of possible combinations, giving its weight, which has to be considered before taking the std-deviation. I have drafted that in the code above.

How to make a random but partial shuffle in Python?

Instead of a complete shuffle, I am looking for a partial shuffle function in python.
Example : "string" must give rise to "stnrig", but not "nrsgit"
It would be better if I can define a specific "percentage" of characters that have to be rearranged.
Purpose is to test string comparison algorithms. I want to determine the "percentage of shuffle" beyond which an(my) algorithm will mark two (shuffled) strings as completely different.
Update :
Here is my code. Improvements are welcome !
import random
percent_to_shuffle = int(raw_input("Give the percent value to shuffle : "))
to_shuffle = list(raw_input("Give the string to be shuffled : "))
num_of_chars_to_shuffle = int((len(to_shuffle)*percent_to_shuffle)/100)
for i in range(0,num_of_chars_to_shuffle):
x=random.randint(0,(len(to_shuffle)-1))
y=random.randint(0,(len(to_shuffle)-1))
z=to_shuffle[x]
to_shuffle[x]=to_shuffle[y]
to_shuffle[y]=z
print ''.join(to_shuffle)

This is a problem simpler than it looks. And the language has the right tools not to stay between you and the idea,as usual:
import random
def pashuffle(string, perc=10):
data = list(string)
for index, letter in enumerate(data):
if random.randrange(0, 100) < perc/2:
new_index = random.randrange(0, len(data))
data[index], data[new_index] = data[new_index], data[index]
return "".join(data)

Your problem is tricky, because there are some edge cases to think about:
Strings with repeated characters (i.e. how would you shuffle "aaaab"?)
How do you measure chained character swaps or re arranging blocks?
In any case, the metric defined to shuffle strings up to a certain percentage is likely to be the same you are using in your algorithm to see how close they are.
My code to shuffle n characters:
import random
def shuffle_n(s, n):
idx = range(len(s))
random.shuffle(idx)
idx = idx[:n]
mapping = dict((idx[i], idx[i-1]) for i in range(n))
return ''.join(s[mapping.get(x,x)] for x in range(len(s)))
Basically chooses n positions to swap at random, and then exchanges each of them with the next in the list... This way it ensures that no inverse swaps are generated and exactly n characters are swapped (if there are characters repeated, bad luck).
Explained run with 'string', 3 as input:
idx is [0, 1, 2, 3, 4, 5]
we shuffle it, now it is [5, 3, 1, 4, 0, 2]
we take just the first 3 elements, now it is [5, 3, 1]
those are the characters that we are going to swap
s t r i n g
^ ^ ^
t (1) will be i (3)
i (3) will be g (5)
g (5) will be t (1)
the rest will remain unchanged
so we get 'sirgnt'
The bad thing about this method is that it does not generate all the possible variations, for example, it could not make 'gnrits' from 'string'. This could be fixed by making partitions of the indices to be shuffled, like this:
import random
def randparts(l):
n = len(l)
s = random.randint(0, n-1) + 1
if s >= 2 and n - s >= 2: # the split makes two valid parts
yield l[:s]
for p in randparts(l[s:]):
yield p
else: # the split would make a single cycle
yield l
def shuffle_n(s, n):
idx = range(len(s))
random.shuffle(idx)
mapping = dict((x[i], x[i-1])
for i in range(len(x))
for x in randparts(idx[:n]))
return ''.join(s[mapping.get(x,x)] for x in range(len(s)))

import random
def partial_shuffle(a, part=0.5):
# which characters are to be shuffled:
idx_todo = random.sample(xrange(len(a)), int(len(a) * part))
# what are the new positions of these to-be-shuffled characters:
idx_target = idx_todo[:]
random.shuffle(idx_target)
# map all "normal" character positions {0:0, 1:1, 2:2, ...}
mapper = dict((i, i) for i in xrange(len(a)))
# update with all shuffles in the string: {old_pos:new_pos, old_pos:new_pos, ...}
mapper.update(zip(idx_todo, idx_target))
# use mapper to modify the string:
return ''.join(a[mapper[i]] for i in xrange(len(a)))
for i in xrange(5):
print partial_shuffle('abcdefghijklmnopqrstuvwxyz', 0.2)
prints
abcdefghljkvmnopqrstuxwiyz
ajcdefghitklmnopqrsbuvwxyz
abcdefhwijklmnopqrsguvtxyz
aecdubghijklmnopqrstwvfxyz
abjdefgcitklmnopqrshuvwxyz

Evil and using a deprecated API:
import random
# adjust constant to taste
# 0 -> no effect, 0.5 -> completely shuffled, 1.0 -> reversed
# Of course this assumes your input is already sorted ;)
''.join(sorted(
'abcdefghijklmnopqrstuvwxyz',
cmp = lambda a, b: cmp(a, b) * (-1 if random.random() < 0.2 else 1)
))

maybe like so:
>>> s = 'string'
>>> shufflethis = list(s[2:])
>>> random.shuffle(shufflethis)
>>> s[:2]+''.join(shufflethis)
'stingr'
Taking from fortran's idea, i'm adding this to collection. It's pretty fast:
def partial_shuffle(st, p=20):
p = int(round(p/100.0*len(st)))
idx = range(len(s))
sample = random.sample(idx, p)
res=str()
samptrav = 1
for i in range(len(st)):
if i in sample:
res += st[sample[-samptrav]]
samptrav += 1
continue
res += st[i]
return res

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Minimum set cover - python

Related

how to find 3 Numbers with Sum closest to a given number

Extracting total value of a tuple

Unique ordered ratio of integers

Standard deviation of combinations of dices

How to make a random but partial shuffle in Python?

Categories

Resources