iterate through an array looking at multiple values - python

Test Array = [1, 2, 3, 1, 0.4, 1, 0.1, 0.4, 0.3, 1, 2]
I need to iterate through an array in order to find the first time that 3 consecutive entries are <0.5, and to return the index of this occurence.
Test Array = [1, 2, 3, 1, 0.4, 1, 0.1, 0.4, 0.3, 1, 2]
^ ^ ^ ^
(indices) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
^
So within this test array the index/value that is being looked for is 6
Alongside a proposed solution, it would be good to know what value gets returned if the '3 consecutive values <0.5' condition is not met - would it simply return nothing? or the last index number?
(I would like the returned value to be 0 if the condition is not met)

You can use zip and enumerate:
def solve(lis, num):
for i, (x,y,z) in enumerate(zip(lis, lis[1:], lis[2:])):
if all(k < num for k in (x,y,z)):
return i
#default return value if none of the items matched the condition
return -1 #or use None
...
>>> lis = [1, 2, 3, 1, 0.4, 1, 0.1, 0.4, 0.3, 1, 2]
>>> solve(lis, 0.5)
6
>>> solve(lis, 4) # for values >3 the answer is index 0,
0 # so 0 shouldn't be the default return value.
>>> solve(lis, .1)
-1
Use itertools.izip for memory efficient solution.

from itertools import groupby
items = [1, 2, 3, 1, 0.4, 1, 0.1, 0.4, 0.3, 1, 2]
def F(items, num, k):
# find first consecutive group < num of length k
groups = (list(g) for k, g in groupby(items, key=num.__gt__) if k)
return next((g[0] for g in groups if len(g) >= k), 0)
>>> F(items, 0.5, 3)
0.1

Related

Combinations with Replacement and maximal Occurrence Constraint

from itertools import *
import collections
for i in combinations_with_replacement(['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'],15):
b = (''.join(i))
freq = collections.Counter(b)
for k in freq:
if freq [k] < 5:
print(k)
this code most print chars what count if less than 5
what i try do , cheek if at string from join at fly if there is repeated any of characters les than x times at any possition of that string and print strings only what true to that.
Problem is no mater what i try do , or its print all and ignore if ... or print notting.
how do it right , or maybe at python exist simple solution ?
Result most be as example les than 5
False - fffaaffbbdd ( repeat 5 titemes f)
False - fffffaaaaac ( repeat 5 times a and f)
True - aaabbbccc11 ( no any character repeated more than 4 times )
More clear explain qustion - filter all string with characters more than x repetions before give to next function.
As examble - there is simple print that strings , and not print strings what not at rule.
If I understand you right, you want to print strings where each character is found only 4-times at maximum:
from collections import Counter
from itertools import combinations_with_replacement
for i in combinations_with_replacement(['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'],15):
c = Counter(i)
if c.most_common(1)[0][1] > 4:
continue
print(''.join(i))
Prints:
...
00002446899cccd
00002446899ccce
00002446899cccf
00002446899ccdd
...
a more constructive approach (meaning: i do not iterate over all possible combinations - i construct the valid combinations directly).
you need to have sympy installed for this to work.
in the example i only use the elements "abcdef" and restrict the repetitions to be strictly smaller than MAX = 4. i fix the length of the strings to be output at M = 6.
i start by getting all the partitions of M with restricted repetitions k=MAX - 1 and not constisting of more than m=N parts. i immediately convert those to a list:
{3: 2} [3, 3, 0, 0, 0, 0]
{3: 1, 2: 1, 1: 1} [3, 2, 1, 0, 0, 0]
{3: 1, 1: 3} [3, 1, 1, 1, 0, 0]
{2: 3} [2, 2, 2, 0, 0, 0]
{2: 2, 1: 2} [2, 2, 1, 1, 0, 0]
{2: 1, 1: 4} [2, 1, 1, 1, 1, 0]
{1: 6} [1, 1, 1, 1, 1, 1]
of those lists i iterate over the multiset permutations - i mean those to represent the elements that i select and how often they are repeated: e.g:
[2, 1, 2, 0, 0, 1] -> "aabccf" # 2*"a", 1*"b", ..., 0*"e", 1*"f"
the result you want is then the multiset permutation of those strings.
from sympy.utilities.iterables import multiset_permutations, partitions
MAX = 4 # (all counts < MAX)
elements = "abcdef"
N = len(elements)
M = 6 # output length
def dict_to_list(dct, N):
ret = [0] * N
j = 0
for k, v in dct.items():
ret[j:j + v] = [k] * v
j += v
return ret
for dct in partitions(M, k=MAX - 1, m=N):
lst = dict_to_list(dct, N)
for part in multiset_permutations(lst):
el = ''.join(n * v for n, v in zip(part, elements))
for msp in multiset_permutations(el):
print(''.join(msp))
for your case you'd then need to change:
MAX = 5 # (all counts < MAX)
elements = "0123456789abcdef"
M = 15 # output length
but the complexity of that is huge (but way better that the one of the original approach)!

Encode array of integers into unique int

I have a fixed amount of int arrays of the form:
[a,b,c,d,e]
for example:
[2,2,1,1,2]
where a and b can be ints from 0 to 2, c and d can be 0 or 1, and e can be ints from 0 to 2.
Therefore there are: 3 * 3 * 2 * 2 * 3: 108 possible arrays of this form.
I would like to assign to each of those arrays a unique integer code from 0 to 107.
I am stuck, i thought of adding each numbers in the array, but two arrays such as:
[0,0,0,0,1] and [1,0,0,0,0]
would both add to 1.
Any suggestion?
Thank you.
You could use np.ravel_multi_index:
>>> np.ravel_multi_index([1, 2, 0, 1, 2], (3, 3, 2, 2, 3))
65
Validation:
>>> {np.ravel_multi_index(j, (3, 3, 2, 2, 3)) for j in itertools.product(*map(range, (3,3,2,2,3)))} == set(range(np.prod((3, 3, 2, 2, 3))))
True
Going back the other way:
>>> np.unravel_index(65, dims=(3, 3, 2, 2, 3))
(1, 2, 0, 1, 2)
Just another way, similar to Horner's method for polynomials:
>>> array = [1, 2, 0, 1, 2]
>>> ranges = (3, 3, 2, 2, 3)
>>> reduce(lambda i, (a, r): i * r + a, zip(array, ranges), 0)
65
Unrolled that's ((((0 * 3 + 1) * 3 + 2) * 2 + 0) * 2 + 1) * 3 + 2 = 65.
This is a little like converting digits from a varying-size number base to a standard integer. In base-10, you could have five digits, each from 0 to 9, and then you would convert them to a single integer via i = a*10000 + b*1000 + c*100 + d*10 + e*1.
Equivalently, for the decimal conversion, you could write i = np.dot([a, b, c, d, e], bases), where bases = [10*10*10*10, 10*10*10, 10*10, 10, 1].
You can do the same thing with your bases, except that your positions introduce multipliers of [3, 3, 2, 2, 3] instead of [10, 10, 10, 10, 10]. So you could set bases = [3*2*2*3, 2*2*3, 2*3, 3, 1] (=[36, 12, 6, 3, 1]) and then use i = np.dot([a, b, c, d, e], bases). Note that this will always give answers in the range of 0 to 107 if a, b, c, d, and e fall in the ranges you specified.
To convert i back into a list of digits, you could use something like this:
digits = []
remainder = i
for base in bases:
digit, remainder = divmod(remainder, base)
digits.append(digit)
On the other hand, to keep your life simple, you are probably better off using Paul Panzer's answer, which pretty much does the same thing. (I never thought of an n-digit number as the coordinates of a cell in an n-dimensional grid before, but it turns out they're mathematically equivalent. And np.ravel is an easy way to assign a serial number to each cell.)
This data is small enough that you may simply enumerate them:
>>> L = [[a,b,c,d,e] for a in range(3) for b in range(3) for c in range(2) for d in range(2) for e in range(3)]
>>> L[0]
[0, 0, 0, 0, 0]
>>> L[107]
[2, 2, 1, 1, 2]
If you need to go the other way (from the array to the integer) make a lookup dict for it so that you will get O(1) instead of O(n):
>>> lookup = {tuple(x): i for i, x in enumerate(L)}
>>> lookup[1,1,1,1,1]
58
getting dot-product of your vectors as following:
In [210]: a1
Out[210]: array([2, 2, 1, 1, 2])
In [211]: a2
Out[211]: array([1, 0, 1, 1, 0])
In [212]: a1.dot(np.power(10, np.arange(5,0,-1)))
Out[212]: 221120
In [213]: a2.dot(np.power(10, np.arange(5,0,-1)))
Out[213]: 101100
should produce 108 unique numbers - use their indices...
If the array lenght is not very huge, you can calculate out the weight first, then use simple math formula to get the ID.
The code will be like:
#Test Case
test1 = [2, 2, 1, 1, 2]
test2 = [0, 2, 1, 1, 2]
test3 = [0, 0, 0, 0, 2]
def getUniqueID(target):
#calculate out the weights first;
#When Index=0; Weight[0]=1;
#When Index>0; Weight[Index] = Weight[Index-1]*(The count of Possible Values for Previous Index);
weight = [1, 3, 9, 18, 36]
return target[0]*weight[0] + target[1]*weight[1] + target[2]*weight[2] + target[3]*weight[3] + target[4]*weight[4]
print 'Test Case 1:', getUniqueID(test1)
print 'Test Case 2:', getUniqueID(test2)
print 'Test Case 3:', getUniqueID(test3)
#Output
#Test Case 1: 107
#Test Case 2: 105
#Test Case 3: 72
#[Finished in 0.335s]

python multiply list elements in a changing manner

I want to decay elements of a list such that on every 5 element, the elements will be reduced by half. For example, a list of ones with length 10 will become:
[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
[1,1,1,1,1,0.5,0.5,0.5,0.5,0.5,0.25,0.25,0.25,0.25,0.25]
I tried list comprehensions and a basic for loop, but I couldn't construc the logic behind it.
Is this what you're looking for?
>>> x = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> r = [v*2**(-(i//5)) for i, v in enumerate(x)]
>>> r
[1, 1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25]
>>>
Think simple.
value = 1
result = []
for i in range(3):
for j in range(5):
result.append(value)
else:
value /= 2
print(result)
# [1, 1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25]
All other answers are great, i would like to add a stretched solution for this.
start_range = 0
end_range = 5
num = 1
x = [1 for _ in range(10)]
res = []
while start_range <= len(x):
for item in x[start_range:end_range]:
res.append(item*num)
start_range = end_range
end_range = start_range + 5
num /= float(2)
print res
# output: [1, 1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5]

Sum and aggregate over a list with equal values

I have a pair lists of the same length, the first containing int values and the second contains float values. I wish to replace these with another pair of lists which may be shorter, but still have the same length, in which the first list will contain only unique values, and the second list will contain the sums for each matching value. That is, if the i'th element of the first list in the new pair is x, and the indices in the first list of the original pair in which x has appeared are i_1,...,i_k, then the i'th element of the second list in the new pair should contain the sum of the values in indices i_1,...,i_k in the second list of the original pair.
An example will clarify.
Input:
([1, 2, 2, 1, 1, 3], [0.1, 0.2, 0.3, 0.4, 0.5, 1.0])
Ourput:
([1, 2, 3], [1.0, 0.5, 1.0])
I was trying to do some list comprehension trick here but failed. I can write a silly loop function for that, but I believe there should be something much nicer here.
Not a one-liner, but since you've not posted your solution I'll suggest this solution that is using collections.OrderedDict:
>>> from collections import OrderedDict
>>> a, b = ([1, 2, 2, 1, 1, 3], [0.1, 0.2, 0.3, 0.4, 0.5, 1.0])
>>> d = OrderedDict()
>>> for k, v in zip(a, b):
... d[k] = d.get(k, 0) + v
...
>>> d.keys(), d.values()
([1, 2, 3], [1.0, 0.5, 1.0])
Of course if order doesn't matter then it's better to use collections.defaultdict:
>>> from collections import defaultdict
>>> a, b = ([1, 'foo', 'foo', 1, 1, 3], [0.1, 0.2, 0.3, 0.4, 0.5, 1.0])
>>> d = defaultdict(int)
>>> for k, v in zip(a, b):
d[k] += + v
...
>>> d.keys(), d.values()
([3, 1, 'foo'], [1.0, 1.0, 0.5])
one way to go is using pandas:
>>> import pandas as pd
>>> df = pd.DataFrame({'tag':[1, 2, 2, 1, 1, 3],
'val':[0.1, 0.2, 0.3, 0.4, 0.5, 1.0]})
>>> df
tag val
0 1 0.1
1 2 0.2
2 2 0.3
3 1 0.4
4 1 0.5
5 3 1.0
>>> df.groupby('tag')['val'].aggregate('sum')
tag
1 1.0
2 0.5
3 1.0
Name: val, dtype: float64
Build a map with the keys:
la,lb = ([1, 2, 2, 1, 1, 3], [0.1, 0.2, 0.3, 0.4, 0.5, 1.0])
m = {k:0.0 for k in la}
And fill it with the summations:
for i in xrange(len(lb)):
m[la[i]] += lb[i]
Finally, from your map:
zip(*[(k,m[k]) for k in m]*1)

Sort a list based on a given distribution

Answering one Question, I ended up with a problem that I believe was a circumlocution way of solving which could have been done in a better way, but I was clueless
There are two list
percent = [0.23, 0.27, 0.4, 0.1]
optimal_partition = [3, 2, 2, 1]
optimal_partition, is one of the integer partition of the number 8 into 4 parts
I would like to sort optimal_partition, in a manner which matches the percentage distribution to as closest as possible which would mean, the individual partition should match the percent magnitude as closest as possible
So 3 -> 0.4, 2 -> 0.27 and 0.23 and 1 -> 0.1
So the final result should be
[2, 2, 3, 1]
The way I ended up solving this was
>>> percent = [0.23, 0.27, 0.4, 0.1]
>>> optimal_partition = [3, 2, 2, 1]
>>> optimal_partition_percent = zip(sorted(optimal_partition),
sorted(enumerate(percent),
key = itemgetter(1)))
>>> optimal_partition = [e for e, _ in sorted(optimal_partition_percent,
key = lambda e: e[1][0])]
>>> optimal_partition
[2, 2, 3, 1]
Can you suggest an easier way to solve this?
By easier I mean, without the need to implement multiple sorting, and storing and later rearranging based on index.
Couple of more examples:
percent = [0.25, 0.25, 0.4, 0.1]
optimal_partition = [3, 2, 2, 1]
result = [2, 2, 3, 1]
percent = [0.2, 0.2, 0.4, 0.2]
optimal_partition = [3, 2, 2, 1]
result = [1, 2, 3, 2]
from numpy import take,argsort
take(opt,argsort(argsort(perc)[::-1]))
or without imports:
zip(*sorted(zip(sorted(range(len(perc)), key=perc.__getitem__)[::-1],opt)))[1]
#Test
l=[([0.23, 0.27, 0.4, 0.1],[3, 2, 2, 1]),
([0.25, 0.25, 0.4, 0.1],[3, 2, 2, 1]),
([0.2, 0.2, 0.4, 0.2],[3, 2, 2, 1])]
def f1(perc,opt):
return take(opt,argsort(argsort(perc)[::-1]))
def f2(perc,opt):
return zip(*sorted(zip(sorted(range(len(perc)),
key=perc.__getitem__)[::-1],opt)))[1]
for i in l:
perc, opt = i
print f1(perc,opt), f2(perc,opt)
# output:
# [2 2 3 1] (2, 2, 3, 1)
# [2 2 3 1] (2, 2, 3, 1)
# [1 2 3 2] (1, 2, 3, 2)
Use the fact that the percentages sum to 1:
percent = [0.23, 0.27, 0.4, 0.1]
optimal_partition = [3, 2, 2, 1]
total = sum(optimal_partition)
output = [total*i for i in percent]
Now you need to figure out a way to redistribute the fractional components somehow. Thinking out loud:
from operator import itemgetter
intermediate = [(i[0], int(i[1]), i[1] - int(i[1])) for i in enumerate(output)]
# Sort the list by the fractional component
s = sorted(intermediate, key=itemgetter(2))
# Now, distribute the first item's fractional component to the rest, starting at the top:
for i, tup in enumerate(s):
fraction = tup[2]
# Go through the remaining items in reverse order
for index in range(len(s)-1, i, -1):
this_fraction = s[index][2]
if fraction + this_fraction >= 1:
# increment this item by 1, clear the fraction, carry the remainder
new_fraction = fraction + this_fraction -1
s[index][1] = s[index][1] + 1
s[index][2] = 0
fraction = new_fraction
else:
#just add the fraction to this element, clear the original element
s[index][2] = s[index][2] + fraction
Now, I'm not sure I'd say that's "easier". I haven't tested it, and I'm sure I got the logic wrong in that last section. In fact, I'm attempting assignment to tuples, so I know there's at least one error. But it's a different approach.

Categories