I have small python code that implements the quickselect discussed here.
import random
def Quickselect(A, k):
if not A:
return
pivot = random.choice(A)
i = 0
A1 = []
A2 = [] # Two new arrays A1, A2 to store the split lists
for i in range(len(A)):
if A[i] < pivot :
A1.append(A[i])
else:
A2.append(A[i])
if k < len(A1):
return Quickselect(A1, k)
if k > len(A) - len(A2):
return Quickselect(A2, k-(len(A) - len(A2)))
else:
return pivot
pass
def main():
A = [45,1,27,56,12,56,88]
print(Quickselect(A,2))
pass
I seem to be getting an randrange error. Is something amiss?
Edit: Implemented random.choice instead of random.randint.
The above code seems to work fine. Thanks to User Blender.
Your error occurs because randrange breaks when the range is empty (i.e. randrange(1, 1)).
Use random.choice instead and change k <= len(A1) to k < len(A1):
def quick_select(A, k):
pivot = random.choice(A)
A1 = []
A2 = []
for i in A:
if i < pivot:
A1.append(i)
elif i > pivot:
A2.append(i)
else:
pass # Do nothing
if k <= len(A1):
return Quickselect(A1, k)
elif k > len(A) - len(A2):
return Quickselect(A2, k - (len(A) - len(A2)))
else:
return pivot
Related
Given an array nums of n integers, are there elements a, b, c in nums such that a + b + c = 0? Find all unique triplets in the array which gives the sum of zero.
class Solution:
def threeSum(self, nums):
data = []
i = j = k =0
length = len(nums)
for i in range(length):
for j in range(length):
if j == i:
continue
for k in range(length):
if k == j or k == i:
continue
sorted_num = sorted([nums[i],nums[j],nums[k]])
if nums[i]+nums[j]+nums[k] == 0 and sorted_num not in data:
data.append(sorted_num)
return data
My soulution is working well but it appears that it may be too slow.
Is there a way to improve my codes without changing it significantly?
This is a O(n^2) solution with some optimization tricks:
import itertools
class Solution:
def findsum(self, lookup: dict, target: int):
for u in lookup:
v = target - u
# reduce duplication, we may enforce v <= u
try:
m = lookup[v]
if u != v or m > 1:
yield u, v
except KeyError:
pass
def threeSum(self, nums: List[int]) -> List[List[int]]:
lookup = {}
triplets = set()
for x in nums:
for y, z in self.findsum(lookup, -x):
triplets.add(tuple(sorted([x, y, z])))
lookup[x] = lookup.get(x, 0) + 1
return [list(triplet) for triplet in triplets]
First, you need a hash lookup to reduce your O(n^3) algorithm to O(n^2). This is the whole idea, and the rest are micro-optimizations:
the lookup table is build along with the scan on the array, so it is one-pass
the lookup table index on the unique items that seen before, so it handles duplicates efficiently, and by using that, we keep the iteration count of the second-level loop to the minimal
This is an optimized version, will pass through:
from typing import List
class Solution:
def threeSum(self, nums: List[int]) -> List[List[int]]:
unique_triplets = []
nums.sort()
for i in range(len(nums) - 2):
if i > 0 and nums[i] == nums[i - 1]:
continue
lo = i + 1
hi = len(nums) - 1
while lo < hi:
target_sum = nums[i] + nums[lo] + nums[hi]
if target_sum < 0:
lo += 1
if target_sum > 0:
hi -= 1
if target_sum == 0:
unique_triplets.append((nums[i], nums[lo], nums[hi]))
while lo < hi and nums[lo] == nums[lo + 1]:
lo += 1
while lo < hi and nums[hi] == nums[hi - 1]:
hi -= 1
lo += 1
hi -= 1
return unique_triplets
The TLE is most likely for those instances that fall into these two whiles:
while lo < hi and nums[lo] == nums[lo + 1]:
while lo < hi and nums[lo] == nums[lo + 1]:
References
For additional details, please see the Discussion Board where you can find plenty of well-explained accepted solutions with a variety of languages including low-complexity algorithms and asymptotic runtime/memory analysis1, 2.
I'd suggest:
for j in range(i+1, length):
This will save you len(nums)^2/2 steps and first if statement becomes redundant.
sorted_num = sorted([nums[i],nums[j],nums[k]])
if nums[i]+nums[j]+nums[k] == 0 and sorted_num not in data:
sorted_num = sorted([nums[i],nums[j],nums[k]])
data.append(sorted_num)
To avoid unneeded calls to sorted in the innermost loop.
Your solution is the brute force one, and the slowest one.
Better solutions can be:
Assume you start from an element from array. Consider using a Set for finding next two numbers from remaining array.
There is a 3rd better solution as well. See https://www.gyanblog.com/gyan/coding-interview/leetcode-three-sum/
I'm trying to code the optimal algorithm to select the ith element the bigger of the list.
For example, if array = [4,3,5,7] and I search for the 2nd one, the function would return 4.
I'm assuming the list has only distinct numbers here
Here is the problem:
The function sometimes return None.
And here is my code (the first function works well I think).
from random import shuffle
def partition(array, leftend, rightend, pivot):
""" I tested this one and it should work fine """
i = leftend
pivotindex = array.index(pivot) # only works if all values in array unique
array[pivotindex] = array[leftend]
array[leftend] = pivot
for j in range(leftend+1, rightend):
if array[j] < pivot:
temp = array[j]
array[j] = array[i]
array[i] = temp
i += 1
pivotindex = array.index(pivot) # only works if all values in array unique
leftendval = array[pivotindex] # Take the value of the pivot
array[pivotindex] = array[i]
array[i] = leftendval
return array
def RSelect(array, n, statistic_order):
""" list * int * int
statistic_order = the i th element i'm searching for """
new_array = [] # is used at the end of the function
if n == 1:
return array[0]
array_temp = array # Allows to have a shuffled list and
shuffle(array_temp)
pivot = array_temp[0] # Allows to have a random pivot
partition(array,0,n,pivot)
j = array.index(pivot)
if j == statistic_order:
return pivot
elif j > statistic_order:
for k in range(0,j):
new_array.append(array[k])
RSelect(new_array,j,statistic_order)
elif j < statistic_order:
for k in range(j+1,n):
new_array.append(array[k])
RSelect(new_array,(n-j)-1,statistic_order-j)
Well a few things were wrong :
You need to return results in a recursive method, in every cases !
When j < statistic_order, you recursively work on the right part of the array, and you discard j+1 numbers, not j. Remember indices begin at 0 in python, not 1 !
I also changed a few things like useless parameters, or for loops that can be written with slices.
Here is the final code, check the changes to be sure you understand it.
RSelect :
def RSelect(array, statistic_order):
""" list * int * int
statistic_order = the i th element i'm searching for """
n = len(array)
if n == 1:
return array[0]
array_temp = array # Allows to have a shuffled list and
shuffle(array_temp)
pivot = array_temp[0] # Allows to have a random pivot
array = partition(array,0,n,pivot) # Changes here, does not impact the result, but for readability
j = array.index(pivot)
# print(array, j, statistic_order, end = '\t')
if j == statistic_order:
return pivot
elif j > statistic_order:
assert j > 0
# print((array[0:j]), pivot)
return RSelect(array[0:j],statistic_order) # Changes here : return
elif j < statistic_order:
assert j+1 < n
# print(pivot, (array[j+1:n]))
return RSelect(array[j+1:n],statistic_order-j-1) # Changes here : return, -j-1
main :
if __name__ == "__main__":
from sys import argv
if len(argv) > 1:
n = int(argv[1])
arr = [2, 1, 3, 5, 4]
print(RSelect(arr[:], n))
It exists an other algorithm also in O(n) for this purpose : see this
EDIT : typos corrected & correction about complexity
the code works fine but still, the result starts from 0.
For example, if arr = [2,3,5,6] and I ask for RSelect(arr,4,2), the answer will be 5 and not 3. I don't know why.
Here is the code updated:
from random import shuffle
def partition(array, leftend, rightend, pivot):
i = leftend
pivotindex = array.index(pivot) # only works if all values in array unique
array[pivotindex] = array[leftend]
array[leftend] = pivot
for j in range(leftend+1, rightend):
if array[j] < pivot:
temp = array[j]
array[j] = array[i]
array[i] = temp
i += 1
pivotindex = array.index(pivot) # only works if all values in array unique
leftendval = array[pivotindex] # Take the value of the pivot
array[pivotindex] = array[i]
array[i] = leftendval
def RSelect(array, n, statistic_order):
""" list * int * int
statistic_order = the i th element i'm searching for """
if n == 1:
return array[0]
array_temp = array # Allows to have a shuffled list
shuffle(array_temp)
pivot = array_temp[0] # Allows to have a random pivot
partition(array,0,n,pivot)
j = array.index(pivot)
if j == statistic_order:
return pivot
elif j > statistic_order:
return RSelect(array[0:j],j,statistic_order)
elif j < statistic_order:
return RSelect(array[j+1:n],(n-j)-1,statistic_order-j-1)
I have been asked to sort a k messed array
I have below code. I have to reduce the complexity from nlogn to nlogk.
arr = [3,2,1,4,5,6,8,10,9]
k = 2
def sortKmessedarr(arr, k):
i = 1
j = 0
n = len(arr)
while i < n:
if arr[i] > arr[i-1]:
pass
else:
arr[i-1:i+k].sort() # How to sort elements between two specific indexs
i += 1
sortKmessedarr(arr, k)
print(arr)
I think if I apply this approach then it will become nlogk
But how to apply this sort() between two indexes.
I have also tried another approach like below:
arr = [3,2,1,4,5,6,8,10,9]
k = 2
def sortKmessedarr(arr, k):
def merge(arr):
arr.sort()
print(arr)
i = 1
j = 0
n = len(arr)
while i < n:
if arr[i] > arr[i-1]:
pass
else:
merge(arr[i-1:i+k])#.sort()
i += 1
sortKmessedarr(arr, k)
print(arr)
But still no luck
You can use sorted with slice assignment to get the intended effect syntactically, but I am unsure of the impact on performance (memory or speed):
arr[i-1:i+k] = sorted(a[i-1:i+k])
I have been looking at this for the past couple hours and can still not understand where I have messed up. I keep getting Index out of bounds errors like below:
Each small edit or change i have done, has run me into another error, then i end back up here after trying to simplify my code.
def quickSort(alist):
firstList = []
secondList = []
thirdList = []
if(len(alist) > 1):
#pivot = pivot_leftmost(alist)
#pivot = pivot_best_of_three(alist)
pivot = pivot_ninther(alist)
#pivot = pivot_random(alist)
for item in alist:
if(item < pivot):
firstList.append(item)
if(item == pivot):
secondList.append(item)
if(item > pivot):
thirdList.append(item)
sortedList = quickSort(firstList) + secondList + quickSort(thirdList)
return sortedList
else:
print("list:", alist)
print("sorted, nothing to do") #debug
print("") #debug
return alist
def pivot_ninther(alist):
listLength = int(len(alist))
third = int(listLength / 3)
leftList = alist[:third]
midlist = alist[third:(third * 2)]
lastlist = alist[(third * 2):(third * 3)]
leftBest = pivot_best_of_three(leftList)
midBest = pivot_best_of_three(midlist)
lastBest = pivot_best_of_three(lastlist)
pivots = [leftBest, midBest, lastBest]
return pivot_best_of_three(pivots)
I am pretty sure a fresh pair of eyes can easily find it, but i have been lookig at it for hours. Thanks!
UPDATE: (My Best_of_three function)
def pivot_best_of_three(alist):
leftmost = 0
middle = int(len(alist) / 2)
rightmost = len(alist) - 1
if (alist[leftmost] <= alist[middle] <= alist[rightmost] or alist[rightmost] <= alist[middle] <= alist[leftmost]):
return alist[middle]
elif (alist[middle] <= alist[leftmost] <= alist[rightmost] or alist[rightmost] <= alist[leftmost] <= alist[middle]):
return alist[leftmost]
else:
return alist[rightmost]
The IndexError occurs when pivot_best_of_three tries to find the rightmost member of a list of zero items. The simple way to fix that is to simply not pass it such lists. :)
Here are slightly modified versions of those functions. I've tested these versions with lists of various lengths, down to length zero, and they appear to function correctly.
def pivot_ninther(alist):
listLength = len(alist)
if listLength < 3:
return alist[0]
third = listLength // 3
leftList = alist[:third]
midlist = alist[third:-third]
lastlist = alist[-third:]
leftBest = pivot_best_of_three(leftList)
midBest = pivot_best_of_three(midlist)
lastBest = pivot_best_of_three(lastlist)
pivots = [leftBest, midBest, lastBest]
return pivot_best_of_three(pivots)
def pivot_best_of_three(alist):
leftmost = alist[0]
middle = alist[len(alist) // 2]
rightmost = alist[-1]
if (leftmost <= middle <= rightmost) or (rightmost <= middle <= leftmost):
return middle
elif (middle <= leftmost <= rightmost) or (rightmost <= leftmost <= middle):
return leftmost
else:
return rightmost
As you can see, I've simplified pivot_best_of_three so it doesn't index alist multiple times for the same value.
But it can be simplified further by using a simple sorting network:
def sort3(a, b, c):
if c < b: b, c = c, b
if b < a: a, b = b, a
if c < b: b, c = c, b
return a, b, c
def pivot_best_of_three(alist):
leftmost = alist[0]
middle = alist[len(alist) // 2]
rightmost = alist[-1]
return sort3(leftmost, middle, rightmost)[1]
Try a smaller count (<= 20) and check to see what happens in pivot_ninther() when third == 0 (at the deepest level of recursion)? Seems like it would create empty arrays and then try to index them.
The code should check to make sure length >= 9 before calling pivot_ninther(), then >= 3 if calling ...best_of_three(). If just one or two items, pick one.
Suggestion, after you get the quicksort to work, back up the source code and rather than make new arrays, the pivot function should work with the original array and first / middle / last indexes.
You can use swaps to simplify finding the median of 3. This will help in cases like starting with a reversed order array.
// median of 3
i = lo, j = (lo + hi)/2, k = hi;
if (a[k] < a[i])
swap(a[k], a[i]);
if (a[j] < a[i])
swap(a[j], a[i]);
if (a[k] < a[j])
swap(a[k], a[j]);
pivot = a[j];
wiki article:
http://en.wikipedia.org/wiki/Quicksort#Choice_of_pivot
Alright so I have developed a generic quickselect function and it is used to find the median of a list.
k = len(aList)//2 and the list is aList = [1,2,3,4,5]
So how would the program act differently if pivot started at the first item of the list each time. Do I have to make it at the center? Also where should I start the time.clock() in order to find the elapsed time of the function. Here is the code
def quickSelect(aList, k)
if len(aList)!=0:
pivot=aList[(len(aList)//2)]
smallerList = []
for i in aList:
if i<pivot:
smallerList.append(i)
largerList=[]
for i in aList:
if i>pivot:
largerList.append(i)
m=len(smallerList)
count=len(aList)-len(smallerList)-len(largerList)
if k >= m and k<m + count:
return pivot
elif m > k:
return quickSelect(smallerList,k)
else:
return quickSelect(largerList, k - m - count)
I don't see any issue in placing the pivot at the beginning. But that would be just to initialize the pivot. The whole idea of pivot is normally to find the middle element.
Please try this for your time calculation:
import time
start_time = 0
aList = [1,2,3,4,5]
k = len(aList)//2
def quickSelect(aList, k):
start_time = time.time()
# print "%10.6f"%start_time
# pivot = aList[0]
if len(aList) != 0:
pivot = aList[(len(aList) // 2)]
smallerList = []
for i in aList:
if i < pivot:
smallerList.append(i)
largerList = []
for i in aList:
if i > pivot:
largerList.append(i)
m = len(smallerList)
count = len(aList) - len(smallerList) - len(largerList)
if k >= m and k < m + count:
print "Pivot", pivot
# print "%10.6f"%time.time()
print "time: ", time.time() -start_time
return pivot
elif m > k:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k - m - count)
quickSelect(aList, k)
In this case the time comes to be zero for your list is very small.
Please let me know, if I misinterpreted your question.
OUTPUT:
Pivot 3
time: 0.0