Describing QuickSort Algoritm - python

Im having a problem to understand this algorithm, how would you describe it, mostly the while loops. I understand that this i an iterative function, and the use of Hoare partition scheme. But what does the while loops do, and why does it use breaks.
def inplace_quick_sort_nonrecursive(S):
if len(S) < 2:
return S
stack = [] #initialize the stack
stack.append([0, len(S) - 1])
while len(stack) > 0: #loop till the stack is empty
low, high = stack.pop() #pop low and high indexes
pivot = S[(low + high) // 2] #create pivot, of any S[] except S[high]
i = low - 1 #Hoare partition
j = high + 1
while True:
while True: #while (S[++i] < pivot)
i += 1
if(S[i] >= pivot):
break
while True: #while(S[--j] < p)
j -= 1
if(S[j] <= pivot):
break
if (i >= j): #if indexes met or crossed, break
break
S[i], S[j] = S[j], S[i] #else swap the elements
if (j > low): #push the indexes into the stack
stack.append([low, j])
j += 1
if (high > j):
stack.append([j, high])

Related

Hello guys, I'm not experienced with programming and I would like to run the Mergesort algorithm in Python for a default generated list

It takes the random list I generated but the output is something a lot different than I expected.
I think that something goes wrong with merging.
e.g. : Input --> [267,168,236,190,2,500,4,45,86]
Output --> [2,2,2,2,2,4,4,4,45,45,86]
Thank you in advance.
import numpy as np
def mergeSort(myList):
if len(myList) > 1:
mid = len(myList) // 2
left = myList[:mid]
right = myList[mid:]
# Recursive call on each half
mergeSort(left)
mergeSort(right)
# Two iterators for traversing the two halves
i = 0
j = 0
# Iterator for the main list
k = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
# The value from the left half has been used
myList[k] = left[i]
# Move the iterator forward
i += 1
else:
myList[k] = right[j]
j += 1
# Move to the next slot
k += 1
# For all the remaining values
while i < len(left):
myList[k] = left[i]
i += 1
k += 1
while j < len(right):
myList[k]=right[j]
j += 1
k += 1
list1 = np.random.randint(low=1, high=800, size=100)
myList = list1
print("Given array is", end="\n")
print(myList)
mergeSort(myList)
print("Sorted array is: ", end="\n")
print(myList)
The problem with your code is only with inplace manipulation of your original list. merge sort needs extra space O(n).
you could simply rewrite your code like this to work: (notice that it is your code, only I modified two lines, look for # changed ...)
import numpy as np
def mergeSort(myList):
if len(myList) > 1:
mid = len(myList) // 2
left = myList[:mid].copy() # changed this line
right = myList[mid:].copy() # changed this line
# Recursive call on each half
mergeSort(left)
mergeSort(right)
# Two iterators for traversing the two halves
i = 0
j = 0
# Iterator for the main list
k = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
# The value from the left half has been used
myList[k] = left[i]
# Move the iterator forward
i += 1
else:
myList[k] = right[j]
j += 1
# Move to the next slot
k += 1
# For all the remaining values
while i < len(left):
myList[k] = left[i]
i += 1
k += 1
while j < len(right):
myList[k]=right[j]
j += 1
k += 1
myList = np.random.randint(low=1, high=800, size=100)
print("Given array is", end="\n")
print(myList)
mergeSort(myList)
print("Sorted array is: ", end="\n")
print(myList)
However this is not very optimized version of implementation for the mergsort specially in python.
Here is better implementation, pure python (from: https://github.com/amirhm/algo-data-scratch/blob/main/Sorting/mergesort.ipynb)
def mergesort(l):
def merge(l, r):
lp, rp = 0 , 0
d = []
while lp < len(l) and rp < len(r):
if l[lp] < r[rp]:
d.append(l[lp])
lp += 1
else:
d.append(r[rp])
rp += 1
if rp < len(r): d.extend(l[lp:])
if lp < len(l): d.extend(r[rp:])
return d
if len(l) <= 1:
return l
n = len(l)
return merge(mergesort(l[:n//2]), mergesort(l[n//2:]))
or much more abstract only in 9 lines:
def mergesort(l):
def merge(l, r):
res = []
while l and r : ((res.append(l.pop())) if (l[-1] > r[-1]) else res.append(r.pop()))
#while r or l: res.append(r.pop()) if r else (res.append(l.pop()))
if r: res[::-1].extend(r)
if l: res[::-1].extend(l)
return res
if len(l) <= 1: return l
return merge(mergesort(l[:len(l) // 2]), mergesort(l[len(l) // 2:]))

Optimising Performance of Codility Flags Python

I've written the below algorithm as a solution to Codility Flags. This passes the correctness checks, however it times out on most of the performance checks.
The complexity of this should be O(m**2) where m is the number of peaks in A and n is the length of A. However, the while potentialK > maxFlags loop should only execute until a suitable number of flags is found which satisfies the criteria. I'm not sure how to further optimise this for time complexity.
def solution(A):
peaks = []
distances = []
if len(A) == 1: return 0
for i in range(1, len(A) -1):
if A[i] > A[i-1] and A[i] > A[i+1]:
if len(distances) == 0:
distances.append(i)
else:
distances.append(i - peaks[-1])
peaks.append(i)
if len(peaks) == 0: return 0
if len(peaks) == 1: return 1
if len(peaks) == 2: return 2 if peaks[1] - peaks[0] >= 2 else 1
potentialK = len(peaks)
maxFlags = 0
while potentialK > maxFlags:
cumDistance = 0
flags = 0
firstFlag = True
for i in range(1, len(distances)):
cumDistance += distances[i]
if cumDistance >= potentialK:
if firstFlag:
flags += 2
firstFlag = False
else:
flags += 1
cumDistance = 0
if flags > maxFlags and flags == potentialK:
return flags
elif flags > maxFlags and potentialK > flags:
maxFlags = flags
potentialK -= 1
return maxFlags
Your algorithm is O(n^2), since there can be O(n) peaks in the input. Speeding up your algorithm relies on the fact that you know the input size in advance.
Observe that the answer is an integer in the interval [1, ceil(sqrt(n))]. Any distance requirement that's less than 1 means that you can't place any flags. You can't place more than ceil(sqrt(n)) flags because of the distance requirement, even if every element was somehow a peak (which isn't possible).
So one optimization you could make is that you need to only check for O(sqrt(n)) potentialK values. (You posted this as an answer to your own question.) That would bring the runtime down to O(n^(3/2)), since m is O(n), which is apparently fast enough to pass Codility's tests, but I think that the runtime can still be improved (and so can the correctness).
We can make one more observation:
There exists a positive integer i such that:
for every j, such that j is a positive integer less than i, we can place j flags that are at least j distance apart, and
for every j, such that j is a positive integer greater than i, we cannot place j flags that are at least j distance apart.
This enables us to use binary search:
import math
def does_distance_work(peaks, distance):
peak_count = 1
last_peak = peaks[0]
for i in range(len(peaks)):
if peaks[i] >= last_peak + distance:
peak_count += 1
last_peak = peaks[i]
return peak_count >= distance
def solution(A):
# Get the indices of the peaks.
peaks = []
for i in range(1, len(A) - 1):
if A[i] > A[i - 1] and A[i] > A[i + 1]:
peaks.append(i)
# Return 0 if no peaks.
if not peaks:
return 0
# Check maximum value.
if does_distance_work(peaks, math.ceil(math.sqrt(len(A)))):
return math.ceil(math.sqrt(len(A)))
# If neither of the above two checks apply, find the largest i (as specified above) using binary search.
low, high = 1, math.ceil(math.sqrt(len(A))) - 1
while low <= high:
mid = low + (high - low) // 2
mid_valid_distance = does_distance_work(peaks, mid)
mid_plus_one_valid_distance = does_distance_work(peaks, mid + 1)
if not mid_valid_distance:
high = mid
elif mid_plus_one_valid_distance:
low = mid + 1
else:
return mid
# If we've reached this line, something has gone wrong.
return -1
which recurses to a depth of O(log(sqrt(n)), with O(n) work for each iteration of our binary search. Then the final runtime is O(n * log(sqrt(n))), which should (and does) pass the performance tests.
I managed to optimize it as follows:
Since the distance between the individual flags has to be >= the number of flags, we know the maximum number of flags will be the root of the last element of peaks - the first element of peaks: sqrt(peaks[-1] - peaks[0])
I was then able to update the initial value of potentialK to
potentialK = math.ceil(math.sqrt(peaks[-1] - peaks[0]))
which should substantially reduce the number of iterations of the outer while loop.
import math
def solution(A):
peaks = []
distances = []
if len(A) == 1: return 0
for i in range(1, len(A) -1):
if A[i] > A[i-1] and A[i] > A[i+1]:
if len(distances) == 0:
distances.append(i)
else:
distances.append(i - peaks[-1])
peaks.append(i)
if len(peaks) == 0: return 0
if len(peaks) == 1: return 1
if len(peaks) == 2: return 2 if peaks[1] - peaks[0] >= 2 else 1
potentialK = math.ceil(math.sqrt(peaks[-1] - peaks[0]))
maxFlags = 0
while potentialK > maxFlags:
cumDistance = 0
flags = 0
firstFlag = True
for i in range(1, len(distances)):
cumDistance += distances[i]
if cumDistance >= potentialK:
if firstFlag:
flags += 2
firstFlag = False
else:
flags += 1
cumDistance = 0
if flags > maxFlags and flags == potentialK:
return flags
elif flags > maxFlags and potentialK > flags:
maxFlags = flags
potentialK -= 1
return maxFlags

Merge sort in python: slicing vs iterating - impact on complexity

I want to check that my understanding of how python handles slices is correct.
Here's my implementation of merge sort:
def merge_sort(L):
def merge(a, b):
i, j = 0, 0
c = []
while i < len(a) and j < len(b):
if a[i] < b[j]:
c.append(a[i])
i += 1
elif b[j] < a[i]:
c.append(b[j])
j += 1
if a[i:]:
c.extend(a[i:])
if b[j:]:
c.extend(b[j:])
return c
if len(L) <= 1:
return L
else:
mid = len(L) // 2
left = merge_sort(L[:mid])
right = merge_sort(L[mid:])
return merge(left, right)
Am I right in thinking that I could replace this:
if a[i:]:
c.extend(a[i:])
if b[j:]:
c.extend(b[j:])
With this:
while i < len(a):
c.append(a[i])
i += 1
while j < len(b):
c.append(b[j])
j += 1
And have the exact same level of complexity? My understanding of slicing is that its complexity is equivalent to slice length? Is that correct?
Does the fact that I'm calling a slice twice (first in the condition, second time inside of it) make it 2x complexity?
Your implementation of mergesort has problems:
in the merge function's main loop, you do nothing if the values in a[i] and b[j] are equal, or more precisely if you have neither a[i] < b[i] nor a[i] > b[i]. This causes an infinite loop.
there is no need to define merge as a local function, actually there is no need to make it a separate function, you could inline the code and save the overhead of a function call.
Here is a modified version:
def merge_sort(L):
if len(L) <= 1:
return L
else:
mid = len(L) // 2
a = merge_sort(L[:mid])
b = merge_sort(L[mid:])
i, j = 0, 0
c = []
while i < len(a) and j < len(b):
if a[i] <= b[j]:
c.append(a[i])
i += 1
else:
c.append(b[j])
j += 1
if a[i:]:
c.extend(a[i:])
else:
c.extend(b[j:])
return c
Regarding performance, slicing or iterating has no impact on complexity since both operations have linear time cost.
Regarding performance, here are directions to try:
replace the test if a[i:] with if i < len(a). Creating the slice twice is costly.
perform the sort in place, avoiding the append operations
restructure the main loop to have a single test per iteration
Here is a modified version:
def merge_sort(L):
if len(L) <= 1:
return L
else:
mid = len(L) // 2
a = merge_sort(L[:mid])
b = merge_sort(L[mid:])
i, j, k = 0, 0, 0
while True:
if a[i] <= b[j]:
L[k] = a[i]
k += 1
i += 1
if (i == len(a)):
L[k:] = b[j:]
return L
else:
L[k] = b[j]
k += 1
j += 1
if (j == len(b)):
L[k:] = a[i:]
return L

How to print nodes in heap recursively

Given heap as an array with keys , how to print all keys ? So far I got
def heapPrint (arr,i):
if (len(arr) == 0 or len(arr) == 1):
return
if (left(i) > len(arr) or right(i) > len(arr)):
return
if (left(i) < len(arr) and right(i) < len(arr):
print (arr[left(i)],arr[right(i)])
heapPrint (arr,left(i))
heapPrint (arr,right(i))
Isnt there any better solution where I could print just 1 node and not 2? Thanks
I think you're looking for something like this:
def heapPrint(arr, i)
# if the index is larger than the heap length
if i >= len(arr):
return;
# if it's a leaf node, then return
if left(i) >= len(arr) and right(i) >= len(arr):
return
if left(i) < len(arr):
print(arr[left(i)], end=' ')
if right(i) < len(arr):
print(arr[right(i)], end='')
print('')
# these calls will return immediately if the
# index is too large.
heapPrint(arr, left(i))
heapPrint(arr, right(i))
My python skills are minimal, so there might be some syntax errors there, but that's the basic idea.

Quicksort intermediate list printing Python

The prompt for this HackerRank problem has me stumped. It is essentially a quicksort implemention but as an exception you are required to print the intermediate (or semi-sorted) "original" array in its entirety each iteration.
My working code without printing intermediates. It works as expected.
def quicksort(array):
if len(array) > 1:
left = 0
right = len(array)-2
pivot = len(array)-1
while left <= right:
while array[left] < array[pivot]:
left +=1
while array[right] > array[pivot]:
right -= 1
if left <= right:
array[left], array[right] = array[right], array[left]
left += 1
right -=1
array[left], array[pivot] = array[pivot], array[left]
return quicksort(array[0:left]) + quicksort(array[left::])
else:
# return single element arrays
return array
And below is my attempt at implementing a way to print intermediates. I.e. I am trying to keep the indices separate instead of just passing the spliced list like the above example so that I will always have access to the full array in the first function parameter.
def quicksort2(array, start=0, end=None):
size = len(array[start:end])
if size > 1:
left = start
right = len(array[start:end])-2
pivot = len(array[start:end])-1
print("Print")
print("left: {}\nright: {}\npivot: {}".format(left, right, pivot))
while left <= right:
while array[left] < array[pivot]:
left +=1
while array[right] > array[pivot]:
right -= 1
if left <= right:
array[left], array[right] = array[right], array[left]
left += 1
right -=1
array[left], array[pivot] = array[pivot], array[left]
print(array)
print("the end is {}".format(end))
size = len(array[0:left]) # 3
return quicksort2(array, start=0, end=left) + quicksort2(array, start=left, end=left+(size-len(array)))
else:
# return single element arrays
return array
if __name__ == '__main__':
size = int(input()) # size is 7
arr = [int(i) for i in input().split()]
print(quicksort2(arr, start=0, end=size))
However, now the list are not fully sorted on the second half of the input list so I am sure it has something to do with the end keyword parameter that is passed at the bottom of the quicksort2 definition.
Figured out what I was doing wrong. I really needed to use the Lomuto Partitioning method in order to satisfy the requirements of the print statements.
Code for anyone searching for this in the future
def partition(array, lo, hi):
pivot_index = hi
pivot_value = array[pivot_index]
store_index = lo
for i in range(lo, hi):
if array[i] <= pivot_value:
array[i], array[store_index] = array[store_index], array[i]
store_index += 1
array[pivot_index], array[store_index] = array[store_index], array[pivot_index]
return store_index
def quicksort(array, lo, hi):
if lo < hi:
p = partition(array, lo, hi)
print(' '.join([str(i) for i in array]))
quicksort(array, lo, p-1)
quicksort(array, p+1, hi)
if __name__ == '__main__':
size = int(input())
ar = [int(i) for i in input().split()]
quicksort(ar, 0, size-1)

Categories