calculating the length of identical entries in list in python - python

I have a time series representing regular queries into the functionality of a system, where 1 = working and 0 = not working. For example, expressing the time series as a list
U = [0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,0]
I am interested in calculating things like the mean time to failure (how long the system stays up) and the mean time to repair (how long the system stays down) and other similar statistics so what I want to do is count the sequential 1 entries and the sequential 0 entries. I want to trim off the beginning and end sets since, for the example above, I don't know when the system went down initially nor when it will come back up in the future. So the output I'd be looking to generate in this case would be
uptime = [6, 4, 9, 2] # 6 ones followed by zeros, then 4 ones followed by zeros, etc.
downtime = [3, 3, 2] # like uptime but ignoring zeros at indices [0,1] and [-1]
I've written a script that does this but it seems a little bit awkward and I'm wondering if there's a better, more pythonic way to do it. Here's what I have.
def count_times(U, down=False):
if down:
U = [1 - u for u in U]
T = []
# Skip the first entry as you don't know when it started
m = U.index(0)
m += U[m:].index(1)
while m < len(U):
try:
T.append(U[m:].index(0))
m += U[m:].index(0)
m += U[m:].index(1)
except ValueError:
# skip the last entry as you don't know when it will end
return T
yielding:
print count_times(U)
# [6, 4, 9, 2]
print count_times(U, down = True)
# [3, 3, 2]
This works, but I can't help but wonder if there is a cleaner way to do this?

My approach is similar to Ruben's, but it initially keeps the up and down times in the same list after applying groupby, so it's easier to trim the beginning and ending sets.
import itertools
U = [0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,0]
run_lengths = [(value, len(list(group))) for value, group in itertools.groupby(U)]
#discard first and last runs
run_lengths = run_lengths[1:-1]
#split runs into separate up and down time lists
uptime = [length for value, length in run_lengths if value == 1]
downtime = [length for value, length in run_lengths if value == 0]
print uptime
print downtime
Result:
[6, 4, 9, 2]
[3, 3, 2]

You can use groupby from itertools module:
from itertools import groupby
testvalue = [0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,0]
def count_times(U, down=False):
if down:
return [len(list(group)) for key, group in groupby(U) if key == 0]
else:
return [len(list(group)) for key, group in groupby(U) if key == 1]
print count_times(testvalues, True) # [2, 3, 3, 2, 1]
print count_times(testvalues, False) # [6, 4, 9, 2]

Using reduce.
def groups(U,i):
a = reduce(lambda u,v: (u[0],u[1]+1) if v==i else (u[0] + [u[1]], 0) if u[1]>0 else u, U,([],0))[0]
if U[0]== i: a=a[1:] # truncate begining
if U[-1]==i: a=a[:-1] # truncate end
return a
U = [0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,0,0,1,1,0]
uptime = groups(U,1)
downtime = groups(U,0)

This is sometimes called run-length encoding. R has a nice built-in function for this rle(). Anyway here is my approach, originally thought about using takewhile() but this is the cleanest way I could think of:
from itertools import chain
def rle(x):
x = chain(x)
last = x.next()
i = 1
for item in x:
if item != last:
yield (last, i)
i = 1
else:
i += 1
last = item
yield (last, i)
Then you can get downtime or uptime like so:
[L for v,L in rle(U) if v == 1]
[L for v,L in rle(U) if v == 0]

Related

Find longest consecutive sub array (not sorted)-Python

v=[1,2,3,11,5,8,9,10,11,6,4] in the list above 1,2,3 are consecutive numbers (1st consecutive set). 8,9,10,11 are consecutive numbers (2nd set,largest one). How can I find this 2nd set? This code below gives the consecutive numbers:
for i in range(len(v)-1):
if v[i+1]==v[i]+1:
if v[i-1]!=v[i]-1:
print(v[i])
print(v[i]+1)
Output:1,2,3,8,9,10,11
I was thinking of using something like below and add the outputs in a new list and then find out max value of the list.I can't think of a logic to combining those 2 ideas.
for i in range(len(v)-1):
for j in range(i+1,len(v)):
if v[j]-v[i]
I looked at this example but I think that solution is different from what I am looking for. Thanks in advance for your time and suggestion.
You're pretty close. Store the current run as a list, update the best list when necessary and clear it whenever you break the run. Care should be taken to include the last grouping if it appears at the very end of the list.
v = [1,2,3,11,5,8,9,10,11,6,4]
best = []
run = []
for i in range(1, len(v) + 1):
run.append(v[i-1])
if i == len(v) or v[i-1] + 1 != v[i]:
if len(best) < len(run):
best = run
run = []
print(best)
Output:
[8, 9, 10, 11]
You can iterate over the list and keep appending the item to the potentially longest consecutive sub-list, and start a new one if the item not consecutive to the last item of the sub-list, and assign the sub-list as the new longest sub-list if it is longer than the current longest sub-list:
candidate = []
longest = []
for i in v:
if candidate and candidate[-1] != i - 1:
if len(candidate) > len(longest):
longest = candidate
candidate = []
candidate.append(i)
if len(candidate) > len(longest):
longest = candidate
longest becomes:
[8, 9, 10, 11]
You can use a sliding window shrinking the size and check if all numbers are in ascending order:
from itertools import islice
def window(seq, n=2):
"Returns a sliding window (of width n) over data from the iterable"
" s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ... "
it = iter(seq)
result = tuple(islice(it, n))
if len(result) == n:
yield result
for elem in it:
result = result[1:] + (elem,)
yield result
def longestConsecutiveSeq(s):
for seq in (window(s, i) for i in range(len(s)-1, 1, -1)):
for subseq in seq:
l = list(subseq)
if all((y-x) == 1 for (x, y) in zip(l, l[1:])):
return l
print(longestConsecutiveSeq([1,2,3,11,5,8,9,10,11,6,4]))
Result: [8, 9, 10, 11]
This algorithm will stop on the first encounter of biggest size.
You can use pandas:
import pandas as pd
v=[1,2,3,11,5,8,9,10,11,6,4]
s = pd.Series(v)
sgc = s.groupby(s.diff().ne(1).cumsum()).transform('count')
result = s[sgc == sgc.max()].tolist()
result
Output:
[8, 9, 10, 11]
Details:
Create a pandas series, use diff to calculate the difference from the previous value. Next, use ne to create a boolean series where the difference is not equal to 1, then cumsum this boolean series to create groups, where consective values are all grouped together. Use,groupby with transform to a count of the group size to each record. Lastly, use boolean indexing to only select parts of the series where the count in a group is equal to the max count of all groups. Then convert to array using tolist.
You can use differences between elements and their indices to group elements using the function ‘groupby()’:
from itertools import groupby
l = [1, 2, 3, 11, 5, 8, 9, 10, 11, 6, 4]
gb = groupby(enumerate(l), lambda x: x[0] - x[1])
max(([i for _, i in g] for _, g in gb), key=len)
# [8, 9, 10, 11]

Sorting a list depending on multiple elements within the list

is it possible to implement a python key for sorting depending on multiple list elements?
For example:
list = [1, 2, 3, 4]
And I want to sort the list depending on the difference between two elements, so that the delta is maximized between them.
Expected result:
list = [1, 4, 2, 3] # delta = 4-1 + 4-2 + 3-2 = 6
Other result would also be possible, but 1 is before 4 in the origin array so 1 should be taken first:
list = [4, 1, 3, 2] # delta = 4-1 + 3-1 + 3-2 = 6
I want to use python sorted like:
sorted(list, key=lambda e1, e2: abs(e1-e2))
Is there any possibility to do it this way? Maybe there is another library which could be used.
Since (as you showed us) there could be multiple different results - it means that this sorting/order is not deterministic and hence you can't apply a key function to it.
That said, it's easy to implement the sorting by yourself:
def my_sort(col):
res = []
while col:
_max = max(col)
col.remove(_max)
res.append(_max)
if col:
_min = min(col)
col.remove(_min)
res.append(_min)
return res
print(my_sort([1,2,3,4])) # [4, 1, 3, 2]
This solution runs in O(n^2) but it can be improved by sorting col in the beginning and then instead of looking for max and min we can extract the items in the beginning and the end of the list. By doing that we'll reduce the time complexity to O(n log(n))
EDIT
Per your comment below: if the index plays a role, again, it's not a "real" sorting :) that said, this solution can be engineered to keep the smaller index first and etc:
def my_sort(col):
res = []
while col:
_max = max(col)
max_index = col.index(_max)
col.remove(_max)
if col:
_min = min(col)
min_index = col.index(_min)
col.remove(_min)
if max_index < min_index:
res.extend([_max, _min])
else:
res.extend([_min, _max])
continue
res.append((_max))
return res
print(my_sort([1,2,3,4])) # [1, 4, 2, 3]
This solution is quite brute force; however, it is still a possibility:
from itertools import permutations
list = [1, 2, 3, 4]
final_list = ((i, sum(abs(i[b]-i[b+1]) for b in range(len(i)-1))) for i in permutations(list, len(list)))
final_lists = max(final_list, key=lambda x:x[-1])
Output:
((2, 4, 1, 3), 7)
Note that the output is in the form: (list, total_sum))

Identify least common number in a list python

With a list of numbers, each number can appear multiple times, i need to find the least common number in the list. If different numbers have the same lowest frequency, the result is the one occurring last in the list. An example, the least common integer in [1, 7, 2, 1, 2] is 7 (not 2, as originally said). And the list needs to stay unsorted
I have the following but it always sets the last entry to leastCommon
def least_common_in_unsorted(integers):
leastCommon = integers[0]
check = 1
appears = 1
for currentPosition in integers:
if currentPosition == leastCommon:
appears + 1
elif currentPosition != leastCommon:
if check <= appears:
check = 1
appears = 1
leastCommon = currentPosition
return leastCommon
Any help would be greatly appreciated
It is the simplest way come in my mind right now:
a = [1, 7, 2, 1, 2]
c, least = len(a), 0
for x in a:
if a.count(x) <= c :
c = a.count(x)
least = x
least # 7
and in two least items it will return the last occurrence one.
a = [1, 7, 2, 1, 2, 7] # least = 7
Using the Counter:
from collections import Counter
lst = [1, 7, 2, 1, 2]
cnt = Counter(lst)
mincnt = min(cnt.values())
minval = next(n for n in reversed(lst) if cnt[n] == mincnt)
print(minval) #7
This answer is based on #offtoffel to incorporate multiple items of the same number of occurrences while choosing the last occurring one:
def least_common(lst):
return min(lst, key=lambda x: (lst.count(x), lst[::-1].index(x)))
print(least_common([1,2,1,2]))
# 2
print(least_common([1,2,7,1,2]))
# 7
Edit: I noticed that there’s a even simpler solution, that is efficient and effective (just reverse the list in the beginning and min will keep the last value that has the minimum count):
def least_common(lst):
lst = lst[::-1]
return min(lst, key=lst.count)
print(least_common([1,2,1,2]))
# 2
print(least_common([1,2,7,1,2]))
# 7
Short but inefficient:
>>> min(a[::-1], key=a.count)
7
Efficient version using collections.Counter:
>>> min(a[::-1], key=Counter(a).get)
7
def least_common(lst):
return min(set(lst), key=lst.count)
Edit: sorry, this does not always take the last list item with least occurancy, as demanded by the user...it works on the example, but not for every instance.

longest decreasing sublist inside a given list

I wanted to find the longest decreasing sub sequence inside a given list for example L = [1, 2, 1, 2, 1, 2, 1, 2, 1], the result should be [2,1] however I cant seem to produce that result. Can someone tell me why it doesn't work ? The output is something [0,2,1,2,1,2,1,2,1] Nevermind the first zero but the result should produce [2,1].
Here's a code I tried
L = [1, 2, 1, 2, 1, 2, 1, 2, 1]
current = [0]
smallest = []
for i in range(len(L)):
if i < (len(L)-1):
if L[i] >= L[i+1]:
current.append(L[i])
else :
if L[i] < current[-1]:
current.append(L[i])
elif i>= (len(L)-1):
if L[-1]<L[i-1]:
current.append(L[i])
else:
current = [i]
if len(current) > len(smallest):
smallest = current
Result : [0,2,1,2,1,2,1,2,1]
Desired Result : [2,1]
There are so many ways to solve this. In Py3 - using itertools.accumulate for dynamic programming:
>>> import operator as op
>>> import itertools as it
>>> L = [1, 2, 1, 2, 1, 2, 1, 2, 1]
>>> dyn = it.accumulate(it.chain([0], zip(L, L[1:])), lambda x, y: (x+1)*(y[0]>y[1]))
>>> i, l = max(enumerate(dyn), key=op.itemgetter(1))
>>> L[i-l:i+1]
[2, 1]
when you say current = [0], it actually adds 0 to the list, maybe you want current = [L[0]].
See this:
def longest_decreasing_sublist(a):
lds, current = [], [a[0]]
for val in a[1:]:
if val < current[-1]: current.append(val)
else:
lds = current[:] if len(current) > len(lds) else lds
current = [val]
lds = current[:] if len(current) > len(lds) else lds
return lds
L = [1, 2, 1, 2, 1, 2, 1, 2, 1]
print (longest_decreasing_sublist(L))
# [2, 1]
You would be better off engineering your code for maintainability and readability up front, so that you can easily test components of your software individually rather than trying to solve the entire thing at once. This is known as functional decomposition.
Think about what you need at the topmost level.
First, you need a way to get the longest decreasing subset at a given point in the array, so that you can intelligently compare the various sequences.
You can do that with code such as:
def getNextSequence(myList, index):
# Beyond list end, return empty list.
if index > len(myList):
return []
# Construct initial list, keep adding until either
# an increase is found or no elements left.
sequence = [ myList[index] ]
addIdx = index + 1
while addIdx < len(myList) and myList[addIdx] <= myList[addIdx - 1]:
sequence.append(myList[addIdx])
addIdx += 1
# And there you have it, the sequence at a given point.
return sequence
Second, you need to be able to store the current longest one and check the lengths to see whether the current one is greater than the longest to date.
That breaks down to something like:
def getLongestSequence(myList):
# Initially no sequence, even a sequence of one
# will beat this.
longestSequence = []
# This index is where we are checking. Keep checking
# until all possibilities exhausted.
checkIndex = 0
while checkIndex < len(myList):
# Get current sequence, save it if it's longer
# than the currently longest one.
currentSequence = getNextSequence(myList, index)
if len(currentSequence) > len(longestSequence):
longestSequence = currentSequence
# Adjust index to next checking point.
checkIndex += len(currentSequence)
# All done, just return the longest sequence.
return longestSequence
The important thing there (other than readability, of course) is the changing of the index. Once you've established a decreasing sequence, you never need to look anywhere inside it since any partial sequence within it will naturally be shorter than the whole.
In other words, if you have 8, 7, 6, 5, 9, a sequence starting at 7 cannot be (by definition) longer than the sequence starting at 8. Hence you can skip straight to the 9.

How to perform a pairwise swap of a list?

I want to write a small code in python that Swap Elements in a list this program will accept a list, and will return a list that exchanges the positions of each pair of adjacent elements: positions 0 and 1, positions 2 and 3, and so on. If the list has an odd number of elements, then the element in the last position stays “in place”.
Before: [1,2,3,4,5]
After: [2,1,4,3,5]
This looks unpythonic. What is the Python way to do it?
Here is a neat one, if you are always guaranteed to have even numbers:
nums = [1,2,3,4,5,6]
print([nums[i^1] for i in range(len(nums))])
>>[2, 1, 4, 3, 6, 5]
Explanation:
print (0^1) #1
print (1^1) #0
print (2^1) #3
print (3^1) #2
print (4^1) #5
print (5^1) #4
As a refresher, the XOR has the following effect:
A B | Output
---------------
0 0 0
0 1 1
1 0 1
1 1 0
And the official description: Each bit of the output is the same as the corresponding bit in x if that bit in y is 0, and it's the complement of the bit in x if that bit in y is 1.
Most pythonic way:
def swappairwise(a):
l = len(a)&~1
a[1:l:2],a[:l:2] = a[:l:2],a[1:l:2]
Building on the answer above from #Arpegius, here a, hopefully, somewhat more readable solution. Uses the same approach.
def swap_list_pairwise(lis):
"""Pairwise swap of all elements in a list.
If the number of elements is odd, the leftover element
stays at its place.
"""
length = len(lis)
# Stop at second last if length is odd, otherwise use full list.
end = length - 1 if length % 2 else length
lis[1:end:2], lis[:end:2] = lis[:end:2], lis[1:end:2]
If you want "pythonic", try "How do you split a list into evenly sized chunks in Python?", followed by a map() that reverses every chunk. May not be too performant, though.
(Oh, forgot the flattening of the list at the end)
Here is a way:
def pairwise_swap(iterable):
for i, value in enumerate(iterable):
if i % 2 == 0:
saved = value
else:
yield value
yield saved
>>> list(pairwise_swap(range(10)))
[1, 0, 3, 2, 5, 4, 7, 6, 9, 8]
Nice approach by #Alok above. This should fix the missing last
element if the number of elements is odd.
def pairwise_swap(iterable):
"""Pairwise swap of all elements in an iterable.
If the number of elements is odd, the leftover element
stays at its place.
"""
for i, value in enumerate(iterable):
if i % 2 == 0:
saved = value
else:
yield value
yield saved
# Don't forget the last element if `iterable` has an odd
# number of elements. Since counting starts a zero, we need
# to test if `i` is even.
if iterable and i % 2 == 0:
yield value
How about trying this one out?
>>> l = [1,2,3,4,5,6]
>>> pl = [l[i:i+2] for i in range(0,len(l),2)]
>>> pl
[[1, 2], [3, 4], [5, 6]]
>>> for i in pl:
... i[0],i[1] = i[1],i[0]
... print i
...
[2, 1]
[4, 3]
[6, 5]
>>> pl
[[2, 1], [4, 3], [6, 5]]
>>>
>>> zpl = [i for sublist in pl for i in sublist]
>>> zpl
[2, 1, 4, 3, 6, 5]
>>>
I tried to resolve in the easiest way out.
It will work for both even and odd elements. if list elements are even
first part will work if not else will do his task.
a = list(input("Put your list here: "))
len_a = len(a)
last_element = len_a-1
result = 0
if len_a % 2==0:
for i in range(0, len(a), 2):
a[i], a[i + 1] = a[i + 1], a[i]
print("its if",a)
else:
a_n = a.pop()
for i in range(0, len(a), 2):
a[i], a[i + 1] = a[i + 1], a[i]
a.insert(0,a_n)
# a.insert(last_element,a_n) if you want last element remain unmodified
print("its else:",a)
This is what worked for me. Hopefully, it can help others. I think about using boxes. The value in the first position needs a temporary place to be before it can be swapped, so I assign it to x and then make the swap. Incrementing by 2 will skip over the last element, so it's okay with odd-numbered lists as well.
a = [int(s) for s in input().split()]
i = 0
while i in range(0, len(a) - 1):
x = a[i]
a[i] = a[i + 1]
a[i + 1] = x
i += 2
print(a)
Found this to be much simpler solution and handels the list with odd elements
elements = list(map(int,input().split()))
swap = []
i = 0
while i < len(elements):
if i+1 < len(elements):
swap.append(elements[i+1])
swap.append(elements[i])
else:
swap.append(elements[-1])
i = i+2
print(swap)

Categories