Split a list into X evenly sized chunks? - python

I need to split a list of integers into nbChunks chunks that all have the sum indicated in parameters(max).
The function should be like:
def split(list, nbChunks, max):
and the result of split([25,5,10,13,7,30],3,30)
would be like [[10,7,13],[30],[25,5]] (I don't care about the order of the numbers).
I've already tried it by sorting a long list, but never get the same sum.
If you have any idea, feel free to let me know.
Here is my current version:
def dispatchToServers(liste, nbServers, max):
liste.sort(reverse = True)
output = []
for server in range(nbServers):
contentOfServer = []
for element in liste:
if canAdd(element, contentOfServer,max):
contentOfServer.append(element)
liste.remove(element)
output.append(contentOfServer)
return output
def canAdd(element, serverContent,max):
if sum(serverContent, element) > max:
return False
else:
return True

this works by looking at all the possible arrangements of the data using the permutations function
from itertools import permutations
def split_chunks(data, nb_chunks, desired_max):
for ordering in permutations(data):
groups = []
group = []
group_total = 0
found = True
for x in ordering:
group.append(x)
group_total += x
if len(group) > nb_chunks or group_total > desired_max:
found = False
break
elif(group_total == desired_max):
groups.append(group)
group = []
group_total = 0
if found:
return groups
return None
split_chunks([25,5,10,13,7,30], 3, 30)

Related

removing numbers which are close to each other in a list

I have a list like
mylist = [75,75,76,77,78,79,154,155,154,156,260,262,263,550,551,551,552]
i need to remove numbers are close to each other by maxumim four number like:
num-4 <= x <= num +4
the list i need at the end should be like :
list = [75,154,260,550]
or
list = [76,156,263,551]
doesn't really matter which number to stay in the list , only one of those which are close.
i tried this which gave me :
for i in range(len(l)):
for j in range(len(l)):
if i==j or i==j+1 or i==j+2 or i == j+3:
pp= l.pop(j)
print(pp)
print(l)
IndexError: pop index out of range
and this one which doesn't work the way i need:
for q in li:
for w in li:
print(q,'////',w)
if q == w or q ==w+1 or q==w+2 or q==w+3:
rem = li.remove(w)
thanks
The below uses groupby to identify runs from the iterable that start with a value start and contain values that differ from start by no more than 4. We then collect all of those start values into a list.
from itertools import groupby
def runs(difference=4):
start = None
def inner(n):
nonlocal start
if start is None:
start = n
elif abs(start-n) > difference:
start = n
return start
return inner
print([next(g) for k, g in groupby(mylist, runs())])
# [75, 154, 260, 550]
This assumes that the input data is already sorted. If it's not, you'll have to sort it: groupby(sorted(mylist), runs()).
You can accomplish this using a set or list, you don't need a dict.
usedValues = set()
newList = []
for v in myList:
if v not in usedValues:
newList.append(v)
for lv in range(v - 4, v + 5):
usedValues.add(lv)
print(newList)
This method stores all values within 4 of every value you've seen so far. When you look at a new value from myList, you only need to check if you've seen something in it's ballpark before by checking usedValues.

Delete elements from list based on substring in Python

I have a huge list of strings where a couple of strings only differ in 2 or three characters like this:
ENSH-DFFEV1-5F
ENSH-DFFEV2-5F
ENSH-DFFEV3-5F
FVB.DFFVRV2-4T
FVB.DFFVRV3-4T
What I would like to do is to keep only those elements for which the number after the 'V' is the largest. From the above example I would like to have
ENSH-DFFEV3-5F
FVB.DFFVRV3-4T
Is there a simple way to do this in Python?
#stevieb is right, but anyway, I did the effort for you.
s = """
ENSH-DFFEV1-5F
ENSH-DFFEV2-5F
ENSH-DFFEV3-5F
FVB.DFFVRV2-4T
FVB.DFFVRV3-4T
""".split()
def custom_filter(s):
out = []
current_max = -1
for r in s:
v = int(r.rsplit('-', 1)[0][-1]) # <- you should probably edit this line to fit your data structure
if v > current_max:
current_max = v
out = []
if v == current_max:
out += [r]
return out
for e in custom_filter(s):
print e

How to find elements in a list is consecutive

Hi I have a list [tag1 tag2], I would like to find out whether the number following tag is incremental.
match_atr_value = re.search('(.+)\~(\w+)',each_line)
tags = match_atr_value.group(1).split('.')
print tag
Input:
[tag1 tag2]
[tag1 tag3]
[tag1 tag2 tag4]
Output:
Incremental
Not
Not
"Consecutive integers are integers that follow each other in order"
Is there a simpler way to do it? All I have to do is check if its incremental and if yes I should use them else I should throw an exception.
Thanks
You can extract all the digits followed by tag via re.findall() and then use enumerate() and all() to check if the numbers are consecutive:
import re
l = [
"[tag1 tag2]",
"[tag1 tag3]",
"[tag1 tag2 tag4]"
]
pattern = re.compile(r"tag(\d+)")
for item in l:
numbers = map(int, pattern.findall(item)) # if Python 3: call list() on that
result = all(index == item for index, item in enumerate(numbers, start=numbers[0]))
print(result)
Prints:
True
False
False
tag1val = int(re.search(r'\D*(\d+)', tag1).group(1))
tag2val = int(re.search(r'\D*(\d+)', tag2).group(1))
(tag1val - tag2val) == -1
True
tag3val = int(re.search(r'\D*(\d+)', tag3).group(1))
(tag1val - tag3val) == -1
False
(tag2val - tag3val) == -1
True
If you wanted to stick with regular expressions, you can go this route. If the difference is -1 then the one on the right is 1 greater than the one on the left.
If I were you, I would avoid a regex and do something like below. I get the number from the string, whether it's beginning, end, etc. and then compare it to the previous value and return False if it's not increasing
def isIncreasing( listy ):
prev = 0
for w in listy:
val = [''.join(s) for s in w if s.isdigit()]
cur = int(val[0])
if cur != prev+1:
return False
prev = cur
return True

python random.shuffle() in a while loop

I have a list:
k = [1,2,3,4,5]
Now I want 3 permutations of this list to be listed in another list but when I do this:
x = []
i = 0
while i < 3:
random.shuffle(k)
x.append(k)
i += 1
I end up with 3 times the same permutation of k in x, like this:
x = [[1,3,5,2,4], [1,3,5,2,4], [1,3,5,2,4]]
In stead of what I would like, something like this:
x = [[1,5,4,2,3], [1,3,5,2,4], [5,3,4,1,2]]
Note that it is not possible due to the way the data in k is gathered to place k inside the loop, as for I know this would solve the problem. The real code is this:
def create_random_chromosomes(genes):
temp_chromosomes = []
chromosomes = []
i = 0
while i < 2000:
print(genes)
random.shuffle(genes)
temp_chromosomes.append(genes)
i += 1
print(temp_chromosomes)
for element in temp_chromosomes:
if element not in chromosomes:
chromosomes.append(element)
return chromosomes
Shuffling a list changes it in-place, and you are creating 3 references to the same list. Create a copy of the list before shuffling:
x = []
for i in range(3):
kcopy = k[:]
random.shuffle(kcopy)
x.append(kcopy)
I've simplified your loop as well; just use for i in range(3). Or, to place this in the context of your full method:
def create_random_chromosomes(genes):
temp_chromosomes = []
chromosomes = []
for i in range(2000):
print(genes)
randomgenes = genes[:]
random.shuffle(randomgenes)
temp_chromosomes.append(randomgenes)
print(temp_chromosomes)
for element in temp_chromosomes:
if element not in chromosomes:
chromosomes.append(element)
return chromosomes
You can further simplify the above by using a set to weed out dupes:
def create_random_chromosomes(genes):
chromosomes = set()
randomgenes = genes[:]
for i in range(2000):
random.shuffle(randomgenes)
chromosomes.add(tuple(randomgenes))
return list(chromosomes)
This uses a tuple copy of the random genes list to fit the hashable constraint of set contents.
You can then even ensure that you return 2000 unique items regardless:
def create_random_chromosomes(genes):
chromosomes = set()
randomgenes = genes[:]
while len(chromosomes) < 2000:
random.shuffle(randomgenes)
chromosomes.add(tuple(randomgenes))
return list(chromosomes)

Finding items that occur exactly once in an array

I have an 2 dimensional array. Each of the row vectors, in this case, is considered a quantity of interest. What I want to do is return all the rows that appear exactly once as one array, and all the rows that appear more than once as a second array.
For example, if the array was:
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
I would like to return two arrays:
nonsingles=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [5,1,6,0]]
singles= [[3,2,1,0], [4,4,1,0]]
It is important that the order stay preserved. The code I have written to do this is as follows:
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
arrayhash=map(tuple, array)
for x in arrayhash:
if (arrayhash.count(x)==1):
singles.append(x)
if (arrayhash.count(x)>1):
nonsingles.append(x)
nonsingles=array(nonsingles)
singles=array(singles)
return {'singles':singles, 'nonsingles':nonsingles}
Now, I am happy to say that this works, but unhappy to say that it is extremely slow, as a typical array i have is 30000(rows)x10 elements/row=300000 elements. Can anyone give me some tips about how to speed this up?? I apologize if this question is very simple, I am new to Python. Also, I am using Numpy/Scipy with Python 2.7, if that is any help.
In Python 2.7 or above, you can use collections.Counter to count the number of occurrences:
def unique_items(iterable):
tuples = map(tuple, iterable)
counts = collections.Counter(tuples)
unique = []
non_unique = []
for t in tuples:
if counts[t] == 1:
unique.append(t)
else:
non_unique.append(t)
return unique, non_unique
I think your problem is that you are doing an in test on a list. This has O(n) performance.
It should be faster to build a dict and then use that to figure out what to do with each row.
EDIT: The code had an unnecessary enumerate() in it; I stripped it out.
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
return {'singles':singles, 'nonsingles':nonsingles}
Here's a version that only returns unique rows:
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
already_seen = set()
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if row in already_seen:
continue
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
already_seen.add(row)
return {'singles':singles, 'nonsingles':nonsingles}
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
x = singles_nonsingles(a)
print("Array: " + str(a))
print(x)
The first return only the list of the single/no single arrays without repetitions, the second with repetitions
def comp (multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] > 1:
no_singles.append(list(k))
elif res[k] == 1:
singles.append(list(k))
return singles, no_singles
def count_w_repetitions(multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] == 1:
singles.append(list(k))
else:
for i in xrange(res[k]):
no_singles.append(list(k))
return singles, no_singles
from itertools import compress,imap
def has_all_unique(a):
return len(a) == len(frozenset(a))
uniq = map( has_all_unique,a)
singles = list(compress(a,uniq))
notuniq = imap(lambda x: not x,uniq)
nonsingles = list(compress(a,notuniq))

Categories