python random.shuffle() in a while loop - python

I have a list:
k = [1,2,3,4,5]
Now I want 3 permutations of this list to be listed in another list but when I do this:
x = []
i = 0
while i < 3:
random.shuffle(k)
x.append(k)
i += 1
I end up with 3 times the same permutation of k in x, like this:
x = [[1,3,5,2,4], [1,3,5,2,4], [1,3,5,2,4]]
In stead of what I would like, something like this:
x = [[1,5,4,2,3], [1,3,5,2,4], [5,3,4,1,2]]
Note that it is not possible due to the way the data in k is gathered to place k inside the loop, as for I know this would solve the problem. The real code is this:
def create_random_chromosomes(genes):
temp_chromosomes = []
chromosomes = []
i = 0
while i < 2000:
print(genes)
random.shuffle(genes)
temp_chromosomes.append(genes)
i += 1
print(temp_chromosomes)
for element in temp_chromosomes:
if element not in chromosomes:
chromosomes.append(element)
return chromosomes

Shuffling a list changes it in-place, and you are creating 3 references to the same list. Create a copy of the list before shuffling:
x = []
for i in range(3):
kcopy = k[:]
random.shuffle(kcopy)
x.append(kcopy)
I've simplified your loop as well; just use for i in range(3). Or, to place this in the context of your full method:
def create_random_chromosomes(genes):
temp_chromosomes = []
chromosomes = []
for i in range(2000):
print(genes)
randomgenes = genes[:]
random.shuffle(randomgenes)
temp_chromosomes.append(randomgenes)
print(temp_chromosomes)
for element in temp_chromosomes:
if element not in chromosomes:
chromosomes.append(element)
return chromosomes
You can further simplify the above by using a set to weed out dupes:
def create_random_chromosomes(genes):
chromosomes = set()
randomgenes = genes[:]
for i in range(2000):
random.shuffle(randomgenes)
chromosomes.add(tuple(randomgenes))
return list(chromosomes)
This uses a tuple copy of the random genes list to fit the hashable constraint of set contents.
You can then even ensure that you return 2000 unique items regardless:
def create_random_chromosomes(genes):
chromosomes = set()
randomgenes = genes[:]
while len(chromosomes) < 2000:
random.shuffle(randomgenes)
chromosomes.add(tuple(randomgenes))
return list(chromosomes)

Related

How to speed up combination algorithm?

Code below finds minimum items of list B that forms string A. lets assume A='hello world how are you doing' and B=['hello world how', 'hello are' ,'hello', 'hello are you doing']. Then since items with index 0 and 3 contains all words of string A, the answer will be 2.
I converted all the strings to integer to speed up the algorithm, but since there are larger and complicated test cases I need more optimized algorithm. I wondering how to speed up this algorithm.
import itertools
A='hello world how are you doing'
B=['hello world how', 'hello are' ,'hello', 'hello are you doing']
d = {}
res_A = [d.setdefault(word, len(d)+1) for word in A.lower().split()]
mapping = dict(zip(A.split(), range(1, len(A) + 1)))
# find mappings of words in B
res_B = [[mapping[word] for word in s.split()] for s in B]
set_a = set(res_A)
solved = False
for L in range(0, len(res_B)+1):
for subset in itertools.combinations(res_B, L):
s = set(item for sublist in subset for item in sublist)
if set_a.issubset(s):
print(f'{L}')
solved = True
break
if solved: break
I Had a logic mistake on remove_sub, no idea why it still worked
try cleaning the data and reducing as much items from b
import itertools as it
import time
import numpy as np
from collections import Counter, defaultdict as dd
import copy
A='hello world how are you doing'
B=['hello world how', 'hello are' ,'hello', 'hello are you doing']
d = {}
res_A = [d.setdefault(word, len(d)+1) for word in A.lower().split()
mapping = dict(zip(A.split(), range(1, len(A) + 1)))
# find mappings of words in B
res_B = [[mapping[word] for word in s.split()] for s in B]
set_a = set(res_A)
# my adding works on list of sets
for i in range(len(res_B)):
res_B[i] = set(res_B[i])
# a is a list of numbers, b is a list of sets of numbers, we are trying to cover a using min items from b
a = np.random.randint(0,50,size = 30)
np_set_a = set(a)
b = []
for i in range(200):
size = np.random.randint(0,20)
b.append(set(np.random.choice(a,size)))
# till here, created a,b for larger data test
def f1(set_a, b):
solved = False
for L in range(0, len(b)+1):
for subset in it.combinations(b, L):
s = set(item for sublist in subset for item in sublist)
if set_a.issubset(s):
print(f'{L}','**************f1')
solved = True
break
if solved: break
def rare(b):
c = Counter() #a dict where the key is a num and the value is how many times this num appears on all b sets
items = dd(list) # dict where the key is num and value is list of index where this num exist in b
for i in range(len(b)):
c.update(b[i])
for num in b[i]:
items[num].append(i)
rare = set()
common = c.most_common() #return sorted list of tuples with a number and how many times it appear
for i in range(1,len(common)-1): #take all the numbers that appear only once on b, these items will have to be on the final combination so you can remove them from b and their numbers from a because those numbers are covered
if common[-i][1] ==1:
rare.add(common[0])
continue
break
rare_items = {} # a set of all index that have rare number in them
for k in rare:
rare_items.update(items[k])
values_from_rare_items = set() # a set of all the numbers in the items with the rare numbers
for i in rare_items:
values_from_rare_items.update(b[i])
for i in reversed(sorted(rare_items)): #remove from b all the items with rare numbers, because they have to be on the final combination, you dont need to check them
b.pop(i)
return values_from_rare_items,b, len(rare_items)
#check sets on b, if 2 are equal remove 1, if 1 is a subset of the other, remove it
def remove_sub(b):
to_pop = set()
t = copy.deepcopy(b)
for i in range(len(b)):
for j in range(len(t)):
if i ==j:
continue
if b[i] == t[j]:
to_pop.add(i)
continue
if b[i].issubset(t[j]):
to_pop.add(i)
if t[j].issubset(b[i]):
to_pop.add(j)
for i in reversed(sorted(to_pop)):
b.pop(i)
return b
def f2(set_a, b):
b1 = remove_sub(b)
values_from_rare_items,b2, num_rare_items = rare(b)
a_without_rare = set_a-values_from_rare_items #remove from a all the number you added with the rare unique numbers, they are already covered
solved = False
for L in range(0, len(b2)+1):
for subset in it.combinations(b2, L):
s = set(item for sublist in subset for item in sublist)
if a_without_rare.issubset(s):
length = L+num_rare_items
print(f'{length}', "*********f2")
solved = True
break
if solved: break
s = time.time()
f1(set_a,b)
print(time.time()-s,'********************f1')
s = time.time()
f2(set_a,b)
print(time.time()-s,'******************f2')
s = time.time()
f1(set_a,res_B)
print(time.time()-s,'********************f1')
s = time.time()
f2(set_a,res_B)
print(time.time()-s,'******************f2')
this is the out put
2 **************f1
0.16755199432373047 ********************f1 num_array
2 *********f2
0.09078240394592285 ******************f2 num_array
2 **************f1
0.0009989738464355469 ********************f1 your_data
2 *********f2
0.0009975433349609375 ******************f2 your_data
you can improve it more by taking all item that appear just few times, and treat them as if they appear once, in rare cases it will not be the real min number, but the time improvement is significant

Split a list into X evenly sized chunks?

I need to split a list of integers into nbChunks chunks that all have the sum indicated in parameters(max).
The function should be like:
def split(list, nbChunks, max):
and the result of split([25,5,10,13,7,30],3,30)
would be like [[10,7,13],[30],[25,5]] (I don't care about the order of the numbers).
I've already tried it by sorting a long list, but never get the same sum.
If you have any idea, feel free to let me know.
Here is my current version:
def dispatchToServers(liste, nbServers, max):
liste.sort(reverse = True)
output = []
for server in range(nbServers):
contentOfServer = []
for element in liste:
if canAdd(element, contentOfServer,max):
contentOfServer.append(element)
liste.remove(element)
output.append(contentOfServer)
return output
def canAdd(element, serverContent,max):
if sum(serverContent, element) > max:
return False
else:
return True
this works by looking at all the possible arrangements of the data using the permutations function
from itertools import permutations
def split_chunks(data, nb_chunks, desired_max):
for ordering in permutations(data):
groups = []
group = []
group_total = 0
found = True
for x in ordering:
group.append(x)
group_total += x
if len(group) > nb_chunks or group_total > desired_max:
found = False
break
elif(group_total == desired_max):
groups.append(group)
group = []
group_total = 0
if found:
return groups
return None
split_chunks([25,5,10,13,7,30], 3, 30)

removing numbers which are close to each other in a list

I have a list like
mylist = [75,75,76,77,78,79,154,155,154,156,260,262,263,550,551,551,552]
i need to remove numbers are close to each other by maxumim four number like:
num-4 <= x <= num +4
the list i need at the end should be like :
list = [75,154,260,550]
or
list = [76,156,263,551]
doesn't really matter which number to stay in the list , only one of those which are close.
i tried this which gave me :
for i in range(len(l)):
for j in range(len(l)):
if i==j or i==j+1 or i==j+2 or i == j+3:
pp= l.pop(j)
print(pp)
print(l)
IndexError: pop index out of range
and this one which doesn't work the way i need:
for q in li:
for w in li:
print(q,'////',w)
if q == w or q ==w+1 or q==w+2 or q==w+3:
rem = li.remove(w)
thanks
The below uses groupby to identify runs from the iterable that start with a value start and contain values that differ from start by no more than 4. We then collect all of those start values into a list.
from itertools import groupby
def runs(difference=4):
start = None
def inner(n):
nonlocal start
if start is None:
start = n
elif abs(start-n) > difference:
start = n
return start
return inner
print([next(g) for k, g in groupby(mylist, runs())])
# [75, 154, 260, 550]
This assumes that the input data is already sorted. If it's not, you'll have to sort it: groupby(sorted(mylist), runs()).
You can accomplish this using a set or list, you don't need a dict.
usedValues = set()
newList = []
for v in myList:
if v not in usedValues:
newList.append(v)
for lv in range(v - 4, v + 5):
usedValues.add(lv)
print(newList)
This method stores all values within 4 of every value you've seen so far. When you look at a new value from myList, you only need to check if you've seen something in it's ballpark before by checking usedValues.

Out of range issue within a loop

I try to make a script allowing to loop through a list (tmpList = openFiles(cop_node)). This list contains 5 other sublists of 206 components.
The last 200 components of the sublists are string numbers ( a line of 200 string numbers for each component separated with a space character).
I need to loop through the main list and create a new list of 5 components, each new component containing the 200*200 values in float.
My actual code is try to add a second loop to an older code working with the equivalent of one sublist. But python return an error "Index out of range"
def valuesFiles(cop_node):
tmpList = openFiles(cop_node)
valueList = []
valueListStr = []*len(tmpList)
for j in range (len(tmpList)):
tmpList = openFiles(cop_node)[j][6:]
tmpList.reverse()
for i in range (len(tmpList)):
splitList = tmpList[i].split(' ')
valueListStr[j].extend(splitList)
#valueList.append(float(valueListStr[j][i]))
return(valueList)
valueListStr = []*len(tmpList) does not do what you think it does, if you want a list of lists use a list comp with range:
valueListStr = [[] for _ in range(len(tmpList))]
That will create a list of lists:
In [9]: valueListStr = [] * i
In [10]: valueListStr
Out[10]: []
In [11]: valueListStr = [[] for _ in range(i)]
In [12]: valueListStr
Out[12]: [[], [], [], []]
So why you get an error is because of valueListStr[j].extend(splitList), you cannot index an empty list.
You don't actually seem to return the list anywhere so I presume you actually want to actually return it, you can also just create lists inside the loop as needed, you can also just loop over tmpList and openFiles(cop_node):
def valuesFiles(cop_node):
valueListStr = []
for j in openFiles(cop_node):
tmpList = j[6:]
tmpList.reverse()
tmp = []
for s in tmpList:
tmp.extend(s.split(' '))
valueListStr.append(tmp)
return valueListStr
Which using itertools.chain can become:
from itertools import chain
def values_files(cop_node):
return [list(chain(*(s.split(' ') for s in reversed(sub[6:]))))
for sub in openFiles(cop_node)]
def valuesFiles(cop_node):
valueListStr = []
for j in openFiles(cop_node):
tmpList = j[6:]
tmpList.reverse()
tmp = []
for s in tmpList:
tmp.extend(s.split(' '))
valueListStr.append(tmp)
return valueListStr
After little modification I get it to work as excepted :
def valuesFiles(cop_node):
valueList = []
for j in range (len(openFiles(cop_node))):
tmpList = openFiles(cop_node)[j][6:]
tmpList.reverse()
tmpStr =[]
for s in tmpList:
tmpStr.extend(s.split(' '))
tmp = []
for t in tmpStr:
tmp.append(float(t))
valueList.append(tmp)
return(valueList)
I don't understand why but the first loop statement didn't work. At the end the I had empty lists like so : [[],[],[],[],[]] . That's why I changed the beginning. Finally I converted the strings to floats.

Finding items that occur exactly once in an array

I have an 2 dimensional array. Each of the row vectors, in this case, is considered a quantity of interest. What I want to do is return all the rows that appear exactly once as one array, and all the rows that appear more than once as a second array.
For example, if the array was:
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
I would like to return two arrays:
nonsingles=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [5,1,6,0]]
singles= [[3,2,1,0], [4,4,1,0]]
It is important that the order stay preserved. The code I have written to do this is as follows:
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
arrayhash=map(tuple, array)
for x in arrayhash:
if (arrayhash.count(x)==1):
singles.append(x)
if (arrayhash.count(x)>1):
nonsingles.append(x)
nonsingles=array(nonsingles)
singles=array(singles)
return {'singles':singles, 'nonsingles':nonsingles}
Now, I am happy to say that this works, but unhappy to say that it is extremely slow, as a typical array i have is 30000(rows)x10 elements/row=300000 elements. Can anyone give me some tips about how to speed this up?? I apologize if this question is very simple, I am new to Python. Also, I am using Numpy/Scipy with Python 2.7, if that is any help.
In Python 2.7 or above, you can use collections.Counter to count the number of occurrences:
def unique_items(iterable):
tuples = map(tuple, iterable)
counts = collections.Counter(tuples)
unique = []
non_unique = []
for t in tuples:
if counts[t] == 1:
unique.append(t)
else:
non_unique.append(t)
return unique, non_unique
I think your problem is that you are doing an in test on a list. This has O(n) performance.
It should be faster to build a dict and then use that to figure out what to do with each row.
EDIT: The code had an unnecessary enumerate() in it; I stripped it out.
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
return {'singles':singles, 'nonsingles':nonsingles}
Here's a version that only returns unique rows:
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
already_seen = set()
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if row in already_seen:
continue
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
already_seen.add(row)
return {'singles':singles, 'nonsingles':nonsingles}
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
x = singles_nonsingles(a)
print("Array: " + str(a))
print(x)
The first return only the list of the single/no single arrays without repetitions, the second with repetitions
def comp (multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] > 1:
no_singles.append(list(k))
elif res[k] == 1:
singles.append(list(k))
return singles, no_singles
def count_w_repetitions(multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] == 1:
singles.append(list(k))
else:
for i in xrange(res[k]):
no_singles.append(list(k))
return singles, no_singles
from itertools import compress,imap
def has_all_unique(a):
return len(a) == len(frozenset(a))
uniq = map( has_all_unique,a)
singles = list(compress(a,uniq))
notuniq = imap(lambda x: not x,uniq)
nonsingles = list(compress(a,notuniq))

Categories