How would quickselectg act differently if pivot wasn't the middle term

How would quickselectg act differently if pivot wasn't the middle term - python

Alright so I have developed a generic quickselect function and it is used to find the median of a list.
k = len(aList)//2 and the list is aList = [1,2,3,4,5]
So how would the program act differently if pivot started at the first item of the list each time. Do I have to make it at the center? Also where should I start the time.clock() in order to find the elapsed time of the function. Here is the code
def quickSelect(aList, k)
if len(aList)!=0:
pivot=aList[(len(aList)//2)]
smallerList = []
for i in aList:
if i<pivot:
smallerList.append(i)
largerList=[]
for i in aList:
if i>pivot:
largerList.append(i)
m=len(smallerList)
count=len(aList)-len(smallerList)-len(largerList)
if k >= m and k<m + count:
return pivot
elif m > k:
return quickSelect(smallerList,k)
else:
return quickSelect(largerList, k - m - count)

I don't see any issue in placing the pivot at the beginning. But that would be just to initialize the pivot. The whole idea of pivot is normally to find the middle element.
Please try this for your time calculation:
import time
start_time = 0
aList = [1,2,3,4,5]
k = len(aList)//2
def quickSelect(aList, k):
start_time = time.time()
# print "%10.6f"%start_time
# pivot = aList[0]
if len(aList) != 0:
pivot = aList[(len(aList) // 2)]
smallerList = []
for i in aList:
if i < pivot:
smallerList.append(i)
largerList = []
for i in aList:
if i > pivot:
largerList.append(i)
m = len(smallerList)
count = len(aList) - len(smallerList) - len(largerList)
if k >= m and k < m + count:
print "Pivot", pivot
# print "%10.6f"%time.time()
print "time: ", time.time() -start_time
return pivot
elif m > k:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k - m - count)
quickSelect(aList, k)
In this case the time comes to be zero for your list is very small.
Please let me know, if I misinterpreted your question.
OUTPUT:
Pivot 3
time: 0.0

Related

How many times can a list be split in a way that every element on the left is smaller than every element on the right?

For example if the list is: [2,1,2,5,7,6,9] there's 3 possible ways of splitting:
[2,1,2] [5,7,6,9]
[2,1,2,5] [7,6,9]
[2,1,2,5,7,6] [9]
I'm supposed to calculate how many times the list can be split in a way that every element on the left is smaller than every element on the right. So with this list, the output would be 3. Here's my current solution:
def count(t):
c= 0
for i in range(len(t)):
try:
if max(t[:i]) < min(t[i:]):
c+=1
except:
continue
return c
The above code does the right thing, but it's not of O(n) time complexity.
How could I achieve the same result, but faster?

Compute all prefix maxima and suffix minima in linear time. And combine them in linear time.
from itertools import accumulate as acc
from operator import lt
def count(t):
return sum(map(lt,
acc(t, max),
[*acc(t[:0:-1], min)][::-1]))
Jacques requested a benchmark:
1444.6 ms Jacques_Gaudin
5.0 ms Kelly_Bundy
1424.5 ms Jacques_Gaudin
4.4 ms Kelly_Bundy
1418.2 ms Jacques_Gaudin
4.7 ms Kelly_Bundy
Code (Try it online!):
from timeit import timeit
from itertools import accumulate as acc
from operator import lt
def Kelly_Bundy(t):
return sum(map(lt,
acc(t, max),
[*acc(t[:0:-1], min)][::-1]))
def Jacques_Gaudin(t):
if not t: return 0
v, left_max = list(t), max(t)
c, right_min = 0, left_max
while (item := v.pop()) and v:
if item == left_max:
left_max = max(v)
if item < right_min:
right_min = item
if left_max < right_min:
c += 1
return c
funcs = [
Jacques_Gaudin,
Kelly_Bundy,
]
t = list(range(12345))
for func in funcs * 3:
time = timeit(lambda: func(t), number=1)
print('%6.1f ms ' % (time * 1e3), func.__name__)

My answer turned out to be very similar to kelly's above - we both calculate the mins and maxs for valid split points then check the condition on each split.
I'm around +50% slower than Kelly's as it's not fully functional as Kelly's is.
from itertools import accumulate as acc
from typing import List
def paddy3118(lst: List[int]) -> int:
# min of RHS for any split
min_from_r = list(acc(lst[::-1], min))[::-1]
# max of LHS for any split
max_from_l = list(acc(lst, max))
# Condition for valid split
return sum(max_from_l[split] < min_from_r[split+1]
for split in range(len(lst) - 1))
The following function can generate interesting test data, (try count == swap for larger count arguments):
def _gen_swap(count, swaps):
ans = list(range(count))
for i in range(swaps):
s = random.randint(0, count - 2)
ans[s], ans[s+1] = ans[s+1], ans[s]
return ans

my attempt:
def count(t):
max_el = t[0]
min_el = min(t[1:])
res = 0
for i in range(len(t)-1):
if t[i] == min_el:
min_el = min(t[i+1:])
if max_el < t[i]:
max_el = t[i]
if max_el < min_el:
res +=1
return res
Pretty straightforward, only compute the max/min if it could be different.

Here's my final answer:
def count(t):
c = 0
maxx = max(t)
right = [0]*len(t)
left = [0]*len(t)
maxx = t[0]
for i in range(0, len(t)):
if maxx >= t[i]:
left[i] = maxx
if maxx < t[i]:
maxx = t[i]
left[i] = maxx
minn = t[-1]
for i in range(len(t)-1,-1,-1):
if minn <= t[i]:
right[i] = minn
if minn > t[i]:
minn = t[i]
right[i] = minn
for i in range(0, len(t)-1):
if left[i] < right[i+1] :
c += 1
return c

Leetcode question '3Sum' algorithm exceeds time limit, looking for improvement

Given an array nums of n integers, are there elements a, b, c in nums such that a + b + c = 0? Find all unique triplets in the array which gives the sum of zero.
class Solution:
def threeSum(self, nums):
data = []
i = j = k =0
length = len(nums)
for i in range(length):
for j in range(length):
if j == i:
continue
for k in range(length):
if k == j or k == i:
continue
sorted_num = sorted([nums[i],nums[j],nums[k]])
if nums[i]+nums[j]+nums[k] == 0 and sorted_num not in data:
data.append(sorted_num)
return data
My soulution is working well but it appears that it may be too slow.
Is there a way to improve my codes without changing it significantly?

This is a O(n^2) solution with some optimization tricks:
import itertools
class Solution:
def findsum(self, lookup: dict, target: int):
for u in lookup:
v = target - u
# reduce duplication, we may enforce v <= u
try:
m = lookup[v]
if u != v or m > 1:
yield u, v
except KeyError:
pass
def threeSum(self, nums: List[int]) -> List[List[int]]:
lookup = {}
triplets = set()
for x in nums:
for y, z in self.findsum(lookup, -x):
triplets.add(tuple(sorted([x, y, z])))
lookup[x] = lookup.get(x, 0) + 1
return [list(triplet) for triplet in triplets]
First, you need a hash lookup to reduce your O(n^3) algorithm to O(n^2). This is the whole idea, and the rest are micro-optimizations:
the lookup table is build along with the scan on the array, so it is one-pass
the lookup table index on the unique items that seen before, so it handles duplicates efficiently, and by using that, we keep the iteration count of the second-level loop to the minimal

This is an optimized version, will pass through:
from typing import List
class Solution:
def threeSum(self, nums: List[int]) -> List[List[int]]:
unique_triplets = []
nums.sort()
for i in range(len(nums) - 2):
if i > 0 and nums[i] == nums[i - 1]:
continue
lo = i + 1
hi = len(nums) - 1
while lo < hi:
target_sum = nums[i] + nums[lo] + nums[hi]
if target_sum < 0:
lo += 1
if target_sum > 0:
hi -= 1
if target_sum == 0:
unique_triplets.append((nums[i], nums[lo], nums[hi]))
while lo < hi and nums[lo] == nums[lo + 1]:
lo += 1
while lo < hi and nums[hi] == nums[hi - 1]:
hi -= 1
lo += 1
hi -= 1
return unique_triplets
The TLE is most likely for those instances that fall into these two whiles:
while lo < hi and nums[lo] == nums[lo + 1]:
while lo < hi and nums[lo] == nums[lo + 1]:
References
For additional details, please see the Discussion Board where you can find plenty of well-explained accepted solutions with a variety of languages including low-complexity algorithms and asymptotic runtime/memory analysis1, 2.

I'd suggest:
for j in range(i+1, length):
This will save you len(nums)^2/2 steps and first if statement becomes redundant.
sorted_num = sorted([nums[i],nums[j],nums[k]])
if nums[i]+nums[j]+nums[k] == 0 and sorted_num not in data:
sorted_num = sorted([nums[i],nums[j],nums[k]])
data.append(sorted_num)
To avoid unneeded calls to sorted in the innermost loop.

Your solution is the brute force one, and the slowest one.
Better solutions can be:
Assume you start from an element from array. Consider using a Set for finding next two numbers from remaining array.
There is a 3rd better solution as well. See https://www.gyanblog.com/gyan/coding-interview/leetcode-three-sum/

merge, heap, and quick sort counts are not coming out properly

import random, timeit
#Qucik sort
def quick_sort(A,first,last):
global Qs,Qc
if first>=last: return
left, right= first+1, last
pivot = A[first]
while left <= right:
while left <=last and A[left]<pivot:
Qc= Qc+1
left= left + 1
while right > first and A[right] >= pivot:
Qc=Qc+1
right = right -1
if left <= right:
A[left],A[right]=A[right],A[left]
Qs = Qs+1
left= left +1
right= right-1
A[first],A[right]=A[right],A[first]
Qs=Qs+1
quick_sort(A,first,right-1)
quick_sort(A,right+1,last)
#Merge sort
def merge_sort(A, first, last): # merge sort A[first] ~ A[last]
global Ms,Mc
if first >= last: return
middle = (first+last)//2
merge_sort(A, first, middle)
merge_sort(A, middle+1, last)
B = []
i = first
j = middle+1
while i <= middle and j <= last:
Mc=Mc+1
if A[i] <= A[j]:
B.append(A[i])
i += 1
else:
B.append(A[j])
j += 1
for i in range(i, middle+1):
B.append(A[i])
Ms=Ms+1
for j in range(j, last+1):
B.append(A[j])
for k in range(first, last+1): A[k] = B[k-first]
#Heap sort
def heap_sort(A):
global Hs, Hc
n = len(A)
for i in range(n - 1, -1, -1):
while 2 * i + 1 < n:
left, right = 2 * i + 1, 2 * i + 2
if left < n and A[left] > A[i]:
m = left
Hc += 1
else:
m = i
Hc += 1
if right < n and A[right] > A[m]:
m = right
Hc += 1
if m != i:
A[i], A[m] = A[m], A[i]
i = m
Hs += 1
else:
break
for i in range(n - 1, -1, -1):
A[0], A[i] = A[i], A[0]
n -= 1
k = 0
while 2 * k + 1 < n:
left, right = 2 * k + 1, 2 * k + 2
if left < n and A[left] > A[k]:
m = left
Hc += 1
else:
m = k
Hc += 1
if right < n and A[right] > A[m]:
m = right
Hc += 1
if m != k:
A[k], A[m] = A[m], A[k]
k = m
Hs += 1
else:
break
#
def check_sorted(A):
for i in range(n-1):
if A[i] > A[i+1]: return False
return True
#
#
Qc, Qs, Mc, Ms, Hc, Hs = 0, 0, 0, 0, 0, 0
n = int(input())
random.seed()
A = []
for i in range(n):
A.append(random.randint(-1000,1000))
B = A[:]
C = A[:]
print("")
print("Quick sort:")
print("time =", timeit.timeit("quick_sort(A, 0, n-1)", globals=globals(), number=1))
print(" comparisons = {:10d}, swaps = {:10d}\n".format(Qc, Qs))
print("Merge sort:")
print("time =", timeit.timeit("merge_sort(B, 0, n-1)", globals=globals(), number=1))
print(" comparisons = {:10d}, swaps = {:10d}\n".format(Mc, Ms))
print("Heap sort:")
print("time =", timeit.timeit("heap_sort(C)", globals=globals(), number=1))
print(" comparisons = {:10d}, swaps = {:10d}\n".format(Hc, Hs))
assert(check_sorted(A))
assert(check_sorted(B))
assert(check_sorted(C))
I made the code that tells how much time it takes to sort list size n(number input) with 3 ways of sorts. However, I found that my result is quite unexpected.
Quick sort:
time = 0.0001289689971599728
comparisons = 474, swaps = 168
Merge sort:
time = 0.00027709499408956617
comparisons = 541, swaps = 80
Heap sort:
time = 0.0002578190033091232
comparisons = 744, swaps = 478
Quick sort:
time = 1.1767549149953993
comparisons = 3489112, swaps = 352047
Merge sort:
time = 0.9040642600011779
comparisons = 1536584, swaps = 77011
Heap sort:
time = 1.665754442990874
comparisons = 2227949, swaps = 1474542
Quick sort:
time = 4.749891302999458
comparisons = 11884246, swaps = 709221
Merge sort:
time = 3.1966246420051903
comparisons = 3272492, swaps = 154723
Heap sort:
time = 6.2041203819972
comparisons = 4754829, swaps = 3148479
as you see, my results are very different from what I learned. Can you please tell me why quick sort is not the fastest in my code? and why merge is the fastest one.

I can see that you are choosing the first element of the array as the pivot in quicksort. Now, consider the order of the elements of the unsorted array. Is it random? How do you generate the input array?
You see, if the pivot was either the min or max value of the aray, or somewhere close to the mind/max value, the running time of quicksort in that case (worst case) will be in the order of O(n^2). That is because on each iteration, you are partitioning the arry by breaking off only one element.
For optimal quicksort performance of O(n log n), your pivot should be as close to the median value as possible. In order to increase the likelihood of that being the case, consider initially picking 3 values at random in from the array, and use the median value as the pivot. Obviously, the more values you choose the median from initially the better the probability that your pivot is more efficient, but you are adding extra moves by choosing those values to begin with, so it's a trade off. I imagine one would even be able to calculate exactly how many elements should be selected in relation to the size of the array for optimal performance.
Merge sort on the other hand, always has the complexity in the order of O(n log n) irrespective of input, which is why you got consistent results with it over different samples.
TL:DR my guess is that the input array's first element is very close to being the smallest or largest value of that array, and it ends up being the pivot of your quicksort algorithm.

Finding first pair of numbers in array that sum to value

Im trying to solve the following Codewars problem: https://www.codewars.com/kata/sum-of-pairs/train/python
Here is my current implementation in Python:
def sum_pairs(ints, s):
right = float("inf")
n = len(ints)
m = {}
dup = {}
for i, x in enumerate(ints):
if x not in m.keys():
m[x] = i # Track first index of x using hash map.
elif x in m.keys() and x not in dup.keys():
dup[x] = i
for x in m.keys():
if s - x in m.keys():
if x == s-x and x in dup.keys():
j = m[x]
k = dup[x]
else:
j = m[x]
k = m[s-x]
comp = max(j,k)
if comp < right and j!= k:
right = comp
if right > n:
return None
return [s - ints[right],ints[right]]
The code seems to produce correct results, however the input can consist of array with up to 10 000 000 elements, so the execution times out for large inputs. I need help with optimizing/modifying the code so that it can handle sufficiently large arrays.

Your code inefficient for large list test cases so it gives timeout error. Instead you can do:
def sum_pairs(lst, s):
seen = set()
for item in lst:
if s - item in seen:
return [s - item, item]
seen.add(item)
We put the values in seen until we find a value that produces the specified sum with one of the seen values.
For more information go: Referance link

Maybe this code:
def sum_pairs(lst, s):
c = 0
while c<len(lst)-1:
if c != len(lst)-1:
x= lst[c]
spam = c+1
while spam < len(lst):
nxt= lst[spam]
if nxt + x== s:
return [x, nxt]
spam += 1
else:
return None
c +=1
lst = [5, 6, 5, 8]
s = 14
print(sum_pairs(lst, s))
Output:
[6, 8]

This answer unfortunately still times out, even though it's supposed to run in O(n^3) (since it is dominated by the sort, the rest of the algorithm running in O(n)). I'm not sure how you can obtain better than this complexity, but I thought I might put this idea out there.
def sum_pairs(ints, s):
ints_with_idx = enumerate(ints)
# Sort the array of ints
ints_with_idx = sorted(ints_with_idx, key = lambda (idx, num) : num)
diff = 1000000
l = 0
r = len(ints) - 1
# Indexes of the sum operands in sorted array
lSum = 0
rSum = 0
while l < r:
# Compute the absolute difference between the current sum and the desired sum
sum = ints_with_idx[l][1] + ints_with_idx[r][1]
absDiff = abs(sum - s)
if absDiff < diff:
# Update the best difference
lSum = l
rSum = r
diff = absDiff
elif sum > s:
# Decrease the large value
r -= 1
else:
# Test to see if the indexes are better (more to the left) for the same difference
if absDiff == diff:
rightmostIdx = max(ints_with_idx[l][0], ints_with_idx[r][0])
if rightmostIdx < max(ints_with_idx[lSum][0], ints_with_idx[rSum][0]):
lSum = l
rSum = r
# Increase the small value
l += 1
# Retrieve indexes of sum operands
aSumIdx = ints_with_idx[lSum][0]
bSumIdx = ints_with_idx[rSum][0]
# Retrieve values of operands for sum in correct order
aSum = ints[min(aSumIdx, bSumIdx)]
bSum = ints[max(aSumIdx, bSumIdx)]
if aSum + bSum == s:
return [aSum, bSum]
else:
return None

Python based quickselect Implementation resulting in error

I have small python code that implements the quickselect discussed here.
import random
def Quickselect(A, k):
if not A:
return
pivot = random.choice(A)
i = 0
A1 = []
A2 = [] # Two new arrays A1, A2 to store the split lists
for i in range(len(A)):
if A[i] < pivot :
A1.append(A[i])
else:
A2.append(A[i])
if k < len(A1):
return Quickselect(A1, k)
if k > len(A) - len(A2):
return Quickselect(A2, k-(len(A) - len(A2)))
else:
return pivot
pass
def main():
A = [45,1,27,56,12,56,88]
print(Quickselect(A,2))
pass
I seem to be getting an randrange error. Is something amiss?
Edit: Implemented random.choice instead of random.randint.
The above code seems to work fine. Thanks to User Blender.

Your error occurs because randrange breaks when the range is empty (i.e. randrange(1, 1)).
Use random.choice instead and change k <= len(A1) to k < len(A1):
def quick_select(A, k):
pivot = random.choice(A)
A1 = []
A2 = []
for i in A:
if i < pivot:
A1.append(i)
elif i > pivot:
A2.append(i)
else:
pass # Do nothing
if k <= len(A1):
return Quickselect(A1, k)
elif k > len(A) - len(A2):
return Quickselect(A2, k - (len(A) - len(A2)))
else:
return pivot

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.