Ternary heapsort in python 2.7

Ternary heapsort in python 2.7 - python

I am trying to implement ternary heap sort, where every parent of the heap has 3 children, however I can not get my code to return a sorted list. I think I may be making a logical error somewhere but can not spot it, any help fixing this code would be much appreciated.
def swap(i, j):
sqc[i], sqc[j] = sqc[j], sqc[i]
def heapify(end,i):
l=3 * (i + 1)
m=3 * (i + 2)
r=3 * (i + 3)
max=i
if l < end and sqc[i] < sqc[l]:
max = l
if r < end and sqc[max] < sqc[r]:
max = r
if m < end and sqc[max] < sqc[m]:
max = m
if max != i:
swap(i, max)
heapify(end, max)
def heap_sort():
end = len(sqc)
start = end / 2 - 1
for i in range(start, -1, -1):
heapify(end, i)
for i in range(end-1, 0, -1):
swap(i, 0)
heapify(i, 0)
sqc = [2, 7, 1, -2, 56, 5, 3]
heap_sort()
print(sqc)
[7, 1, -2, 56, 5, 2, 3] is what is returned, completely out of order.

Code
def heapSort3(a):
def sift(start, count):
root = start
while root * 3 + 1 < count:
r3 = root * 3
upper = min(count - r3, 4)
children = list(range(r3 + 1, r3 + upper))
min_child = min((a[i], i) for i in children)
v, i = min_child
if a[root] > a[i]:
a[root], a[i] = a[i], a[root]
root = i
else:
break
count = len(a)
for start in reversed(range(count // 3 + 2)):
sift(start, count)
for end in reversed(range(count)):
a[end], a[0] = a[0], a[end]
sift(0, end)
return a
Test on sorted and reverse-sorted
for i in range(2, 25):
print '-' * 30
data = list(range(i))
sorted_data = heapSort3(data)
print i, sorted_data
data = list(reversed(range(i)))
sorted_data = heapSort3(data)
print i, sorted_data
Test on shuffled data
from random import shuffle
for i in range(1, 100):
print '-' * 30, i
expected = list(reversed(range(i)))
for _ in range(5000):
data = list(range(i))
shuffle(data)
sorted_data = heapSort3(data)
assert sorted_data == expected
Reference
Adapted from this code. Sort of.

Here is a solution that merely corrects your heapify function:
def swap(series, i, j):
series[i], series[j] = series[j], series[i]
def heapify(series, end, i):
firstchild = 3*i + 1
lastchild = min(firstchild + 3, end)
children = list(range(firstchild, lastchild))
maxelem = max((series[j], j) for j in [i] + children)
maxval, maxindex = maxelem
if maxindex != i:
swap(series, i, maxindex)
heapify(series, end, maxindex)
def heap_sort(series):
end = len(series)
start = end / 2 - 1
for i in range(start, -1, -1):
heapify(series, end, i)
for i in range(end-1, 0, -1):
swap(series, i, 0)
heapify(series, i, 0)
return series
heapsort changes
You can also change your heapsort code to this, but it is not necessary:
def heap_sort(series):
end = len(series)
start = end // 2
for i in reversed(range(start)):
heapify(series, end, i)
for i in reversed(range(end)):
swap(series, i, 0)
heapify(series, i, 0)
return series
Test on your data
>>> sqc = [2, 7, 1, -2, 56, 5, 3]
>>> print heap_sort(sqc)
[-2, 1, 2, 3, 5, 7, 56]
Test on random shuffled data
from random import shuffle
for i in range(1, 100):
print '-' * 30, i
expected = list(range(i))
for _ in range(5000):
data = list(range(i))
shuffle(data)
sorted_data = heap_sort(data)
assert sorted_data == expected

Related

Getting GCD of all numbers of an array except elements in a given range

I have written a program to calculate GCD of all numbers of an array except elements in a given range.
Given an integer array and a q number of queries.
Each query contains the number left and right. I need to find out the greatest common divisor of all the integers except within the given range of the query.
arr = [2, 6, 9]
query = [(0, 0), (1, 1), (1, 2)]
My Output: [3, 1, 1]
Expected Output: [3, 1, 2]
Code:
def computeGCD(a, b):
if b == 0:
return a
else:
return computeGCD(b, a%b)
def getGCD(arr, query):
n = len(arr)
left_gcd = right_gcd = [0 for i in range(n)]
left_gcd[0] = arr[0]
for i in range(1, n):
left_gcd[i] = computeGCD(left_gcd[i-1], arr[i])
print(left_gcd)
right_gcd[n-1] = arr[n-1]
for i in range(n-2, -1, -1):
right_gcd[i] = computeGCD(right_gcd[i+1], arr[i])
print(right_gcd)
res = []
for q in query:
if q[0] == 0:
gcd_outside_q = right_gcd[q[1]+1]
elif q[1] == n-1:
gcd_outside_q = left_gcd[q[0]-1]
else:
gcd_outside_q = computeGCD(left_gcd[q[0]-1], right_gcd[q[1]+1])
res.append(gcd_outside_q)
return res
getGCD(arr, query)
Please let me know what I am missing.

The problem is with line:
left_gcd = right_gcd = [0 for i in range(n)]
This way, left_gcd and right_gcd refer to the same list. If you change one of them, the other changes too. You can check this by printing left_gcd after the right_gcd block:
...
left_gcd[0] = arr[0]
for i in range(1, n):
left_gcd[i] = computeGCD(left_gcd[i-1], arr[i])
print(left_gcd)
right_gcd[n-1] = arr[n-1]
for i in range(n-2, -1, -1):
right_gcd[i] = computeGCD(right_gcd[i+1], arr[i])
print(right_gcd)
print(left_gcd)
...
The output is:
[2, 2, 1]
[1, 3, 9]
[1, 3, 9]
An easy way to fix this is to change it like so:
def getGCD(arr, query):
n = len(arr)
left_gcd = [0 for i in range(n)]
right_gcd = [0 for i in range(n)]
...

How to calculate the 1st and 3th quartile of a list without import?

I need to make a function q1 that calculates the 1st quartile of a list called: lst.
i am not allowed to use any imports!
When i run my function the answer is not correct. What can i change?
def q1(lst):
lst.sort()
middle = len(lst)//2
k1 = median([middle])
return k1
This is my median function:
def median(lst):
lst.sort()
half_list = len(lst) // 2
even = lst[half_list]
odd = lst[-half_list-1]
return (even + odd) / 2

The median is the middle item of the sorted list, i.e len(lst)//2.
Accordingly, what is the first and third quartile of a list?
firstQIndex = len(lst)//4
thirdQIndex = (3*len(lst))//4
firstQ = lst[firstQIndex]
thirdQ = lst[thirdQIndex]

You can try this function (standalone, no imports):
def quantile(x, y):
n = int(round(y * len(x) + 0.5))
return x[n-1]
And call like:
print(quantile(myList, 0.1))
print(quantile(myList, 0.3))

Implemented a simple quantile function that uses linear interpolation to get the value between two indices, if required.
def calc_quantile(lst, q):
s_lst = sorted(lst)
idx = (len(s_lst) - 1)*q
int_idx = int(idx)
remainder = idx % 1
if remainder > 0:
lower_val = s_lst[int_idx]
upper_val = s_lst[int_idx + 1]
return lower_val * (1 - remainder) + upper_val * remainder
else:
return s_lst[int_idx]
Edited my reply with below extended code, to match your test-cases:
def calc_quantile(lst, q, method='linear'):
if not lst:
return ValueError("list is empty, cannot calculate quantile")
if not 0 <= q <= 1:
return ValueError("q must be in the domain 0 to 1")
if method not in ['linear', 'left-side', 'average']:
return NotImplementedError("the chosen method has not been implemented")
s_lst = sorted(lst)
idx = (len(s_lst) - 1)*q
int_idx = int(idx)
remainder = idx % 1
if remainder > 0 and method != 'left-side':
lower_val = s_lst[int_idx]
upper_val = s_lst[int_idx + 1]
if method == 'linear':
return lower_val * (1 - remainder) + upper_val * remainder
else:
return (lower_val + upper_val) / 2
else:
return s_lst[int_idx]
print(calc_quantile([1, 3, 4, 6, 4, 2], 0.25, 'left-side')) # 2
print(calc_quantile([1, 3, 5, 6, 1, 4, 2], 0.25, 'left-side')) # 1
print(calc_quantile([1, 3, 3, 5, 6, 2, 4, 1], 0.25, 'average')) # 1.5

O(n) solution for finding maximum sum of differences python 3.x?

I was wondering, given a list of integers, say l, and if we are allowed to select 3 integers from this list, say left, middle, right, where middle > left, right and left, middle, right appear in that order in the list (ie. index(left)<index(middle)<index(right)), does there exist an O(n) solution for finding the maximum of middle - left + middle - right? You may suppose that lists that do not satisfy these conditions do not appear (eg. [5, 0, 5] as pointed out by Eric Duminil)
Currently, I am able to come up with what I believe is (roughly) an O(n^2) solution (correct me if I am wrong).
Essentially, my current idea is to do:
maximum = 0
for idx in range(1, N - 1):
left = min(l[0: idx])
right = min(l[idx + 1:])
middle = l[idx]
if left < middle and right < middle:
new_max = middle - left + middle - right
maximum = max(new_max, maximum)
Help/hints would be greatly appreciated.
Thanks!

You can run through your numbers once, keeping a running minimum value, and storing it at each step, so that at the end you know what the minimum value is to the left of each index.
That's O(n).
Similarly, you can run through all your numbers once from right to left, and work out what the minimum value is to the right of each index. That is O(n).
Then you can run through each possible middle value, and take the left and right values from your earlier computations. That is O(n).
O(n) + O(n) + O(n) = O(n).

Here's a way to calculate the minimums, left and right of every index, in O(n):
import random
N = 10
l = [random.randrange(N) for _ in range(N)]
print(l)
# => [9, 9, 3, 4, 6, 7, 0, 0, 7, 6]
min_lefts = []
min_left = float("inf")
min_rights = [None for _ in range(N)]
min_right = float("inf")
for i in range(N):
e = l[i]
if e < min_left:
min_left = e
min_lefts.append(min_left)
print(min_lefts)
# => [9, 9, 3, 3, 3, 3, 0, 0, 0, 0]
for i in range(N-1,-1,-1):
e = l[i]
if e < min_right:
min_right = e
min_rights[i] = min_right
print(min_rights)
# => [0, 0, 0, 0, 0, 0, 0, 0, 6, 6]
You can now iterate over every middle element in l (idx between 1 and N-2), and find the minimum of 2 * l[idx] - min_rights[idx] - min_lefts[idx]. This operation is also O(n):
print(max(2 * l[i] - min_rights[i] - min_lefts[i] for i in range(1, N-2)))
It outputs :
11
which is 2 * 7 - 0 - 3.

The trick is that minimum value of the list is always the part of solution (either left, or right).
Find minimum of the list, which is O(n). Now this minimum element will be either left or right.
Find maximum of (2x-y), where idx(x) > idx(y), and idx(x) < idx(min), that is check the left part of the list
Find max(2x-y), where idx(x) < idx(y), and idx(x) > idx(min), that is check the right part of the list
Now take maximum of the steps 2 and 3, which is your left/middle (or right/middle).

Here are some timings! Feel free to edit the code that does the timing and\add new entries.
from timeit import timeit
setup10 = '''
import numpy.random as nprnd
lst = list(nprnd.randint(1000, size=10))
'''
setup100 = '''
import numpy.random as nprnd
lst = list(nprnd.randint(1000, size=100))
'''
setup1000 = '''
import numpy.random as nprnd
lst = list(nprnd.randint(1000, size=1000))
'''
fsetup = '''
import sys
def f2(lst):
N = len(lst)
maximum = 0
for idx in range(1, N - 1):
left = min(lst[0: idx])
right = min(lst[idx + 1:])
middle = lst[idx]
if left < middle and right < middle:
new_max = middle - left + middle - right
maximum = max(new_max, maximum)
return maximum
def eric(lst):
N = len(lst)
min_lefts = []
min_left = float("inf")
min_rights = [None for _ in range(N)]
min_right = float("inf")
for i in range(N):
e = lst[i]
if e < min_left:
min_left = e
min_lefts.append(min_left)
for i in range(N-1,-1,-1):
e = lst[i]
if e < min_right:
min_right = e
min_rights[i] = min_right
return max(2 * lst[i] - min_rights[i] - min_lefts[i] for i in range(1, N-2))
def bpl(lst):
res = -sys.maxsize
a = sys.maxsize
b = -sys.maxsize
c = sys.maxsize
for i, v in enumerate(lst[1:-1]):
a = min(lst[i], a)
c = min(lst[i + 2], c)
b = max(lst[i], b)
res = max(2 * b - a - c, res)
return res
def meow(l):
N = len(l)
right_min = (N - 2) * [sys.maxsize]
right_min[0] = l[N - 1]
for i in range(3, N):
right_min[i - 2] = min(right_min[i - 2], l[N - i + 1])
left = l[2]
maximum = 2*l[1] - left - right_min[N - 3]
for idx in range(2, N - 1):
left = min(left, l[idx-1])
right = right_min[N - idx - 2]
middle = l[idx]
if left < middle and right < middle:
new_max = middle - left + middle - right
maximum = max(new_max, maximum)
return maximum
'''
print('OP with 10\t:{}'.format(timeit(stmt="f2(lst)", setup=setup10 + fsetup, number=100)))
print('eric with 10\t:{}'.format(timeit(stmt="eric(lst)", setup=setup10 + fsetup, number=100)))
print('bpl with 10\t:{}'.format(timeit(stmt="bpl(lst)", setup=setup10 + fsetup, number=100)))
print('meow with 10\t:{}'.format(timeit(stmt="meow(lst)", setup=setup10 + fsetup, number=100)))
print()
print('OP with 100\t:{}'.format(timeit(stmt="f2(lst)", setup=setup100 + fsetup, number=100)))
print('eric with 100\t:{}'.format(timeit(stmt="eric(lst)", setup=setup100 + fsetup, number=100)))
print('bpl with 100\t:{}'.format(timeit(stmt="bpl(lst)", setup=setup100 + fsetup, number=100)))
print('meow with 10\t:{}'.format(timeit(stmt="meow(lst)", setup=setup100 + fsetup, number=100)))
print()
print('OP with 1000\t:{}'.format(timeit(stmt="f2(lst)", setup=setup1000 + fsetup, number=100)))
print('eric with 1000\t:{}'.format(timeit(stmt="eric(lst)", setup=setup1000 + fsetup, number=100)))
print('bpl with 1000\t:{}'.format(timeit(stmt="bpl(lst)", setup=setup1000 + fsetup, number=100)))
print('meow with 10\t:{}'.format(timeit(stmt="meow(lst)", setup=setup1000 + fsetup, number=100)))
10 elements on the list, 100 repetitions
OP :0.00102
eric :0.00117
bpl :0.00141
meow :0.00159
100 elements on the list, 100 repetitions
OP :0.03200
eric :0.00654
bpl :0.01023
meow :0.02011
1000 elements on the list, 100 repetitions
OP :2.34821
eric :0.06086
bpl :0.10305
meow :0.21190
And as a bonus an inefficient one-liner:
maximum = max(2*z -sum(x) for x, z in zip([[min(lst[:i+1]), min(lst[i+2:])] for i, _ in enumerate(lst[:-2])], lst[1:-1]))

possible solution:
import sys
import random
random.seed(1)
l = [random.randint(0, 100) for i in range(10)]
print(l)
res = -sys.maxsize
a = sys.maxsize
b = -sys.maxsize
c = sys.maxsize
for i, v in enumerate(l[1:-1]):
a = min(l[i], a)
c = min(l[i + 2], c)
b = max(l[i], b)
res = max(2 * b - a - c, res)
print(res)
output:
[13, 85, 77, 25, 50, 45, 65, 79, 9, 2]
155

You are definitely on the right track, you just have to get rid of those min operations. So my hint for you is that you can pre-compute them (in linear time) beforehand, then look up the min in the loop, as you are already doing.
To clarify: you have to pre-compute min(list[0:i]) and min(list[i:n]) for all i's, before the part you already have. The idea is to store them in two arrays, say m1 and m2, such that m1[i] = min(list[0:i]) and m2[i] = min(list[i:n]). Then, use m1 and m2 in your loop
The challenge now is to compute m1 and m2 in linear time, meaning that you are not allowed to use the min function to compute them. If you have m1[i], how can you compute m1[i+1] using list[i+1]?

Arithmetic Sequences Slices in Python

I'm trying to write a function that takes a list of integers and finds all arithmetic sequences in it.
A = [-1, 1, 3, 3, 3, 2, 1, 0]
There are five arithmetic sequences in this list: (0, 2), (2,4), (4, 6), (4,7), (5,7) - these are indexes of first and last element of a sequence. A sequence is derived by the difference between elements.
As you see from the example above - the sequence must be longer than 2 elements (otherwise it would find a sequence between every two elements).
The function that I need to write must return the number of sequences it finds on the list - in this case it should return 5.
I'm kind of stuck - tried a few different approaches but failed miserably. The most recent thing I've done is:
def solution(A):
slices = []
x = 0
listlen = len(A)
while x < listlen-1:
print ("Current x:", x)
difference = A[x+1] - A[x]
#print ("1st diff: ", A[x+1], ",", A[x], " = ", difference)
for y in range(x+1, len(A)-1):
difference_2 = A[y+1] - A[y]
#print ("Next Items: ", A[y+1], A[y])
#print ("2nd diff: ", difference_2)
if (difference == difference_2):
#print ("I'm in a sequence, first element at index", x)
else:
#print ("I'm leaving a sequence, last element at index ", y)
slice = str(x) + "," + str(y)
slices.append(slice)
x += 1
#print ("Changing X to find new slice: x:", x)
break
print (slices)
I messed something up with iterating X, at this point in time, it's an endless loop.

Maybe you can use a logic like this -
>>> A = [-1, 1, 3, 3, 3, 2, 1, 0]
>>> def indices(l):
... res = []
... for i in range(0,len(l)-2):
... diff = l[i+1] - l[i]
... for j in range(i+2,len(l)):
... if (l[j] - l[j-1]) == diff:
... res.append((i,j))
... else:
... break;
... return res
...
>>> indices(A)
[(0, 2), (2, 4), (4, 6), (4, 7), (5, 7)]

a brute force approach is to just check each slice > len 3, for each slice you just need to subtract the first and last element to get the difference and see if all a[i+1] - A[i] are equal to the difference:
def is_arith(x):
return all(x[i + 1] - x[i] == x[1] - x[0]
for i in range(len(x) - 1))
def arith_count(A):
return sum(is_arith(A[i:j])for i in range(len(A))
for j in range(i + 3,len(A)+1))
A more efficient version:
def arith_sli(A):
n = len(A)
st,tot = 0, 0
while st < n - 2:
end = st + 1
dif = A[end] - A[st]
while end < n - 1 and A[end + 1] - A[end] == dif:
end += 1
ln = end - st + 1
if ln >= 3:
tot += (ln - 2) * (ln - 1) // 2
st = end
return tot
tot += (ln - 2) * (ln - 1) // 2 is the max number of slices that can be formed for any length progression >= 3, we set st = end because no progressions can overlap.
Both return the correct output, the latter is just considerably more efficient:
In [23]: A = [-1, 1, 3, 3, 3, 2, 1, 0]
In [24]: arith_sli(A)
Out[24]: 5
In [25]: arith_count(A)
Out[25]: 5
In [26]: A = [-1, 1, 3, 3, 4, 2, 1, 0,1,2]
In [27]: arith_sli(A)
Out[27]: 3
In [28]: arith_count(A)
Out[28]: 3

Get the wrong sort of an array (writing heap sort in Python)

I have written the following heap sort code and I get the wrong output (not sorted) at times and I can't seem to find why...any help will be much appreciated! (WARNING: I am still learning Python that might be the problem)
def heap_sort(self, a):
self.build_max_heap(a)
n = len(a)-1
i = len(a)-1
for i in range(len(a)-1, 1):
temp = a[0]
a[0] = a[i]
a[i] = temp
a.heapsize = heapsize - 1
self.max_heapify(a, 0) #rebuild max heap at with new root
def max_heapify(self, a, i):
left = (2*(i+1))-1 #left child of i
right = 2*(i+1) #right child of i
largest = i
if left < a.heapsize and a[left] > a[i]:
largest = left
if right < a.heapsize and a[right] > a[largest]:
largest = right
if largest != i:
temp = a[largest]
a[largest] = a[i]
a[i] = temp
self.max_heapify(a, largest)
def build_max_heap(self, a):
heapsize = len(a)
i = int(heapsize/2)-1
for i in range(i, 0):
self.max_heapify(a, i)
These are my tests:
#--Test for 0 in array--#
def zero_array(self):
a = [12,0,232]
print self.sort.heap_sort(a)
return
#--Test for duplicate in array--#
def duplicate_array(self):
a = [12, 12, 7]
print self.sort.heap_sort(a)
return
#--Test for all same values in array--#
def allsame_array(self):
a = [1,1,1]
print self.sort.heap_sort(a)
return
#--Test for negative values in array--#
def negative_array(self):
a = [-23, -2, 123]
print self.sort.heap_sort(a)
return
I get the following returned arrays (which are suppose to be sorted):
[12, 0, 232]
[12, 12, 7]
[1, 1, 1]
[-23, -2, 123]

I see one issue right away:
for i in range(len(a)-1, 1)
If you want to go down to 1 inclusive use:
for i in range(len(a)-1, 0, -1)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Ternary heapsort in python 2.7 - python

Related

Getting GCD of all numbers of an array except elements in a given range

How to calculate the 1st and 3th quartile of a list without import?

O(n) solution for finding maximum sum of differences python 3.x?

Arithmetic Sequences Slices in Python

Get the wrong sort of an array (writing heap sort in Python)

Categories

Resources