using merge sort principle - python

Q- I have an array A with the sizes of apples and have to create another array S which would contain the indices of the apples in sorted order given that we cannot directly access or touch A only a function is_large(A,i,j) function can access it. It returns -1 is A[i] > A[j] and 1 if A[i] < A[j].
I wrote the program but it is giving incorrect results even for small arrays what is the problem? The main problem is that the first element is not changing positions because of that the whole array is unsorted.
def is_large_apples(apple_size, i, j):
""" Takes two indices and tells
which one of them is larger or smaller """
if apple_size[i] > apple_size[j]:
return 1
elif apple_size[i] < apple_size[j]:
return -1
def mergesort_apples(s, l, r):
""" This function takes indexed list and
makes recursive calls to sort the array """
if l < r:
mid = (l+r)//2
mergesort_apples(s, l, mid)
mergesort_apples(s, mid+1, r)
merge_apples(s, l, mid, r)
def merge_apples(s, l, mid, r):
""" This function takes the list and
the indices to merge them into the final array"""
nl = mid - l + 1
nr = r - mid
left, right = [], []
left = s[l:mid+1:1]
right = s[mid+1:r+1:1]
i, j, k = 0, 0, l;
while i < nl and j < nr:
print(s)
if is_large_apples(apple_size, i, j) == -1:
s[k] = left[i]
i += 1
else:
s[k] = right[j]
j += 1
k += 1
while i < nl:
s[k] = left[i]
k += 1
i += 1
while j < nr:
s[k] = right[j]
k += 1
j += 1
apple_size = [5, 7, 1,44,2,33] # Given list of sizes.
s = [x for x in range(0,len(apple_size))] # Original list of indices.
mergesort_apples(s, 0, len(s)-1)
print(s)

if is_large_apples(apple_size, left[i], right[j]) == -1:
Because you want to check not i and j position, but left[i] position and right[j] position.

Related

Hello guys, I'm not experienced with programming and I would like to run the Mergesort algorithm in Python for a default generated list

It takes the random list I generated but the output is something a lot different than I expected.
I think that something goes wrong with merging.
e.g. : Input --> [267,168,236,190,2,500,4,45,86]
Output --> [2,2,2,2,2,4,4,4,45,45,86]
Thank you in advance.
import numpy as np
def mergeSort(myList):
if len(myList) > 1:
mid = len(myList) // 2
left = myList[:mid]
right = myList[mid:]
# Recursive call on each half
mergeSort(left)
mergeSort(right)
# Two iterators for traversing the two halves
i = 0
j = 0
# Iterator for the main list
k = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
# The value from the left half has been used
myList[k] = left[i]
# Move the iterator forward
i += 1
else:
myList[k] = right[j]
j += 1
# Move to the next slot
k += 1
# For all the remaining values
while i < len(left):
myList[k] = left[i]
i += 1
k += 1
while j < len(right):
myList[k]=right[j]
j += 1
k += 1
list1 = np.random.randint(low=1, high=800, size=100)
myList = list1
print("Given array is", end="\n")
print(myList)
mergeSort(myList)
print("Sorted array is: ", end="\n")
print(myList)
The problem with your code is only with inplace manipulation of your original list. merge sort needs extra space O(n).
you could simply rewrite your code like this to work: (notice that it is your code, only I modified two lines, look for # changed ...)
import numpy as np
def mergeSort(myList):
if len(myList) > 1:
mid = len(myList) // 2
left = myList[:mid].copy() # changed this line
right = myList[mid:].copy() # changed this line
# Recursive call on each half
mergeSort(left)
mergeSort(right)
# Two iterators for traversing the two halves
i = 0
j = 0
# Iterator for the main list
k = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
# The value from the left half has been used
myList[k] = left[i]
# Move the iterator forward
i += 1
else:
myList[k] = right[j]
j += 1
# Move to the next slot
k += 1
# For all the remaining values
while i < len(left):
myList[k] = left[i]
i += 1
k += 1
while j < len(right):
myList[k]=right[j]
j += 1
k += 1
myList = np.random.randint(low=1, high=800, size=100)
print("Given array is", end="\n")
print(myList)
mergeSort(myList)
print("Sorted array is: ", end="\n")
print(myList)
However this is not very optimized version of implementation for the mergsort specially in python.
Here is better implementation, pure python (from: https://github.com/amirhm/algo-data-scratch/blob/main/Sorting/mergesort.ipynb)
def mergesort(l):
def merge(l, r):
lp, rp = 0 , 0
d = []
while lp < len(l) and rp < len(r):
if l[lp] < r[rp]:
d.append(l[lp])
lp += 1
else:
d.append(r[rp])
rp += 1
if rp < len(r): d.extend(l[lp:])
if lp < len(l): d.extend(r[rp:])
return d
if len(l) <= 1:
return l
n = len(l)
return merge(mergesort(l[:n//2]), mergesort(l[n//2:]))
or much more abstract only in 9 lines:
def mergesort(l):
def merge(l, r):
res = []
while l and r : ((res.append(l.pop())) if (l[-1] > r[-1]) else res.append(r.pop()))
#while r or l: res.append(r.pop()) if r else (res.append(l.pop()))
if r: res[::-1].extend(r)
if l: res[::-1].extend(l)
return res
if len(l) <= 1: return l
return merge(mergesort(l[:len(l) // 2]), mergesort(l[len(l) // 2:]))

Key comparisons in a merge-insertion hybrid sort

I was given the task with the merge-insertion sort described as(paraphrased):
Starting off with merge sort, once a threshold S(small positive integer) is reached, the algorithm will then sort the sub arrays with insertion sort.
We are tasked to find the optimal S value for varying length of inputs to achieve minimum key comparisons. I implemented the code by modifying what was available online to get:
def mergeSort(arr, l, r, cutoff):
if l < r:
m = l+(r-l)//2
if len(arr[l:r+1]) > cutoff:
return mergeSort(arr, l, m, cutoff)+mergeSort(arr, m+1, r, cutoff)+merge(arr, l, m, r)
else:
return insertionSort(arr, l, r+1)
return 0
def merge(arr, l, m, r):
comp = 0
n1 = m - l + 1
n2 = r - m
L = [0] * (n1)
R = [0] * (n2)
for i in range(0, n1):
L[i] = arr[l + i]
for j in range(0, n2):
R[j] = arr[m + 1 + j]
i = 0
j = 0
k = l
while i < n1 and j < n2:
if L[i] <= R[j]:
arr[k] = L[i]
i += 1
else:
arr[k] = R[j]
j += 1
k += 1
comp +=1
while i < n1:
arr[k] = L[i]
i += 1
k += 1
while j < n2:
arr[k] = R[j]
j += 1
k += 1
return comp
def insertionSort(arr, l, r):
comp = 0
for i in range(l+1, r):
key = arr[i]
j = i-1
while j >= l:
if key >= arr[j]:
comp += 1
break
arr[j + 1] = arr[j]
j -= 1
comp += 1
arr[j + 1] = key
return comp
However the graph I get for the minimum value of S against length is:
This means that a near-pure mergesort is almost always preferred over the hybrid. Which is against what is available online, saying that insertion sort will perform faster than mergesort at low values of S(~10-25). I can't seem to find any error with my code, so is hybrid sort really better than merge sort?
IMO the question is flawed.
Mergesort always performs N Lg(N) key comparisons, while Insertionsort takes N²/2 of them. Hence as of N=2, the comparison count favors Mergesort in all cases. (This is only approximate, as N does not always divide evenly).
But the number of moves as well as the overhead will tend to favor Insertionsort. So a more relevant metric is the actual running time which, unfortunately, will depend on the key length and type.

Merge sort in python: slicing vs iterating - impact on complexity

I want to check that my understanding of how python handles slices is correct.
Here's my implementation of merge sort:
def merge_sort(L):
def merge(a, b):
i, j = 0, 0
c = []
while i < len(a) and j < len(b):
if a[i] < b[j]:
c.append(a[i])
i += 1
elif b[j] < a[i]:
c.append(b[j])
j += 1
if a[i:]:
c.extend(a[i:])
if b[j:]:
c.extend(b[j:])
return c
if len(L) <= 1:
return L
else:
mid = len(L) // 2
left = merge_sort(L[:mid])
right = merge_sort(L[mid:])
return merge(left, right)
Am I right in thinking that I could replace this:
if a[i:]:
c.extend(a[i:])
if b[j:]:
c.extend(b[j:])
With this:
while i < len(a):
c.append(a[i])
i += 1
while j < len(b):
c.append(b[j])
j += 1
And have the exact same level of complexity? My understanding of slicing is that its complexity is equivalent to slice length? Is that correct?
Does the fact that I'm calling a slice twice (first in the condition, second time inside of it) make it 2x complexity?
Your implementation of mergesort has problems:
in the merge function's main loop, you do nothing if the values in a[i] and b[j] are equal, or more precisely if you have neither a[i] < b[i] nor a[i] > b[i]. This causes an infinite loop.
there is no need to define merge as a local function, actually there is no need to make it a separate function, you could inline the code and save the overhead of a function call.
Here is a modified version:
def merge_sort(L):
if len(L) <= 1:
return L
else:
mid = len(L) // 2
a = merge_sort(L[:mid])
b = merge_sort(L[mid:])
i, j = 0, 0
c = []
while i < len(a) and j < len(b):
if a[i] <= b[j]:
c.append(a[i])
i += 1
else:
c.append(b[j])
j += 1
if a[i:]:
c.extend(a[i:])
else:
c.extend(b[j:])
return c
Regarding performance, slicing or iterating has no impact on complexity since both operations have linear time cost.
Regarding performance, here are directions to try:
replace the test if a[i:] with if i < len(a). Creating the slice twice is costly.
perform the sort in place, avoiding the append operations
restructure the main loop to have a single test per iteration
Here is a modified version:
def merge_sort(L):
if len(L) <= 1:
return L
else:
mid = len(L) // 2
a = merge_sort(L[:mid])
b = merge_sort(L[mid:])
i, j, k = 0, 0, 0
while True:
if a[i] <= b[j]:
L[k] = a[i]
k += 1
i += 1
if (i == len(a)):
L[k:] = b[j:]
return L
else:
L[k] = b[j]
k += 1
j += 1
if (j == len(b)):
L[k:] = a[i:]
return L

Getting "object of type 'NoneType' has no len()" during implementation of mergeSort

I'm trying to implement mergeSort in python but getting Type error.
I tried debugging my code but didn't got any success.
def merge(L, R):
(C, m, n) = ([], len(L), len(R))
(i,j) = (0,0)
while i+j < m+n:
if i == m: # Case 1 -> List A is empty
C.append(R[j])
j += 1
elif j == n: # Case 2 -> List B is empty
C.append(L[i])
i += 1
elif L[i] <= R[j]: # Case 3 -> Head of A is smaller
C.append(L[i])
i += 1
elif L[i] > R[j]:
C.append(R[j])
j += 1
print(C)
def mergeSort(A, left, right):
if right - left <= 1: # Base Case
return(A[left:right])
if right - left > 1: # Recurive call
mid = (left+right)//2
L = mergeSort(A, left, mid)
R = mergeSort(A, mid, right)
return(merge(L, R))
If anyone know what I'm doing wrong, please guide me to the right way.
merge has to return C, not print it.
def merge(L, R):
(C, m, n) = ([], len(L), len(R))
(i,j) = (0,0)
while i+j < m+n:
if i == m: # Case 1 -> List A is empty
C.append(R[j])
j += 1
elif j == n: # Case 2 -> List B is empty
C.append(L[i])
i += 1
elif L[i] <= R[j]: # Case 3 -> Head of A is smaller
C.append(L[i])
i += 1
elif L[i] > R[j]:
C.append(R[j])
j += 1
return C
Is there any more efficient way to implement this
Top down merge sort using a pair of mutually recursive functions (msa2a, msa2b) to change the direction of merge and avoid copying of data:
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
msa2a(a, b, 0, len(a)) # merge sort a to a
def msa2a(a, b, low, end): # merge sort a to a
if((end - low) < 2): # if < 2 elements
return # return
mid = (low+end)//2 # set mid point
msa2b(a, b, low, mid) # merge sort left half to b
msa2b(a, b, mid, end) # merge sort right half to b
mrg(b, a, low, mid, end) # merge halves from b to a
def msa2b(a, b, low, end): # merge sort a to b
if((end - low) < 2): # if < 2 elements
b[low] = a[low] # copy 1 element from a to b
return # return
mid = (low+end)//2 # set mid point
msa2a(a, b, low, mid) # merge sort left half to a
msa2a(a, b, mid, end) # merge sort right half to a
mrg(a, b, low, mid, end) # merge halves from a to b
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
Bottom up merge sort is only slightly faster, but for this version if the number of passes would be odd, it swaps in place for the first pass, which helps further still. The merge function (mrg) is identical to the top down merge sort shown above.
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
mrgsrt(a, b, len(a))
def mrgsrt(a, b, n):
s = 1 # assume even pass count
if((passcnt(n) & 1) == 1): # if odd count
while(s < n): # swap pairs in place
if(a[s] < a[s-1]):
a[s-1],a[s] = a[s],a[s-1]
s = s + 2
s = 2
while(s < n):
ee = 0 # reset end index
while(ee < n): # setup for next pair of runs
ll = ee
rr = ll + s
if(rr >= n): # if only left run copy it
b[ll:n] = a[ll:n]
break
ee = rr + s
if(ee > n):
ee = n
mrg(a, b, ll, rr, ee)
a,b = b,a # swap(a, b)
s = s << 1 # double run size
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
def passcnt(n): # return # passes
i = 0
s = 1
while(s < n):
s = s << 1
i = i + 1
return(i)
Faster still would be a hybrid insertion + merge sort, using insertion sort for runs <= 64 elements (depends on element size). I don't have python code for an example of this. Since python is interpretive, it's slower, in the example merge sorts shown above, python takes about 64 times as long as essentially the same code compiled in C++.

Why doesn't this python implementation of mergesort work?

When I input [8,7,6,5,4,3,2,1] the output is => [4, 3, 2, 1, 8, 7, 6, 5].
It seems like the only thing different from a working solution (comparing here) is that instead of a sorted list, I have a k variable that I am incrementing, and update arr[k] in place of sorted.
Why doesn't this work? And how does updating arr[k] work? It seems like you would be losing data by updating the original input array.
def mergesort(arr):
if len(arr) == 1:
return
else:
mid = len(arr)/2
left = arr[0:mid]
right = arr[mid:len(arr)]
sorted = []
i = 0
j = 0
mergesort(left)
mergesort(right)
while i < len(left) and j < len(right):
if left[i] < right[j]:
sorted.append(left[i])
i += 1
else:
sorted.append(right[j])
j += 1
while i < len(left):
sorted.append(left[i])
i += 1
while j < len(right):
sorted.append(right[j])
j += 1
return sorted
You should just assign to left and right variable as you function return the sorted list after sorting also in the base case you should return a list and use // for integer division check this code
def mergesort(arr):
if len(arr) == 1:
return arr
else:
mid = len(arr)//2
left = arr[0:mid]
right = arr[mid:len(arr)]
sorted = []
i = 0
j = 0
left = mergesort(left) #left is now sorted
right = mergesort(right)
while i < len(left) and j < len(right):
if left[i] < right[j]:
sorted.append(left[i])
i += 1
else:
sorted.append(right[j])
j += 1
while i < len(left):
sorted.append(left[i])
i += 1
while j < len(right):
sorted.append(right[j])
j += 1
return sorted
print (mergesort([8,7,6,5,4,3,2,1,3]))

Categories