recursive binary search of a sorted sublist - python

I am trying to implement a recursive binary search algorithm which takes 4 arguments, a list, first, the integer index of the first item in the sorted sub-sequence, last, the integer index of the last item in the sorted sub-sequence and a target which will be compared to the values stored in the list.
The algorithm needs to return the position of the target within the sorted sub-sequence (if it exists) and if not return the position in which it should be placed within the sorted sub-sequence.
Here's what I have thus far;
def binary_search(a_list, first, last, target):
subMidpoint = (first + last) // 2
if a_list[subMidpoint] == target:
return subMidpoint
else:
if target < a_list[subMidpoint]:
last = subMidpoint -1
return binarySearch(a_list, first, last, target)
else:
first = subMidpoint +1
return binarySearch(a_list, first, last, target)
return first
I am struggling to wrap my head around how it will return the position if the item does not exist, any help would be greatly appreciated. The code currently compiles however is returning 'None' rather than an index position.
Many Thanks in advance.
Edit;
Thanks all for your help, I have managed to alter the final clause and it has passed some tests however it fails when the target is less than the smallest value in first and when the target is greater than the value in last.
Here's the altered final clause.
else:
if target < a_list[subMidpoint]:
last = subMidpoint -1
return binary_search(a_list, first, last, target)
else:
first = subMidpoint +1
return first

You almost have your answer in your description: if you get down to adjacent items, say positions 5 and 6, and you haven't found the item, then it would be inserted between those two. Since list indices grow to the upper end, you'd return the higher of the two -- 6, in this case.
Thus, your logic would be in your last clause
else:
if subMidpoint == first:
return last
else:
first = subMidpoint +1
return binarySearch(a_list, first, last, target)
Drop that return first at the bottom; you should not be able to reach that statement.
Learn the elif keyword; your program will be more readable.

Solved, thanks everyone. Not the cleanest solution but it works.
def binary_search(a_list, first, last, target):
subMidpoint = (first + last) // 2
if target < a_list[first]:
return first
elif target > a_list[last]:
return last +1
elif a_list[subMidpoint] == target:
return subMidpoint
elif target < a_list[subMidpoint]:
last = subMidpoint -1
return binary_search(a_list, first, last, target)
else:
first = subMidpoint +1
return first

Related

Binary search on array with duplicate

First time posting here, so apologies in advance if I am not following best practices. My algorithm is supposed to do the following in a sorted array with possible duplicates.
Return -1 if the element does not exist in the array
Return the smallest index where the element is present.
I have written a binary search algorithm for an array without duplicate. This returns a position of the element or -1. Based on blackbox testing, I know that the non-duplicate version of the binary search works. I have then recursively called that function via another function to search from 0 to position-1 to find the first incidence of the element, if any.
I am currently failing a black box test. I am getting a wrong answer error and not a time out error. I have tried most of the corner cases that I could think of and also ran a brute force test with the naive search algorithm and could not find an issue.
I am looking for some guidance on what might be wrong in the implementation rather than an alternate solution.
The format is as follow:
Input:
5 #array size
3 4 7 7 8 #array elements need to be sorted
5 #search query array size
3 7 2 8 4 #query elements
Output
0 2 -1 4 1
My code is shown below:
class BinarySearch:
def __init__(self,input_list,query):
self.array=input_list
self.length=len(input_list)
self.query=query
return
def binary_search(self,low,high):
'''
Implementing the binary search algorithm with distinct numbers on a
sorted input.
'''
#trivial case
if (self.query<self.array[low]) or (self.query>self.array[high-1]):
return -1
elif (low>=high-1) and self.array[low]!=self.query:
return -1
else:
m=low+int(np.floor((high-low)/2))
if self.array[low]==self.query:
return low
elif (self.array[m-1]>=self.query):
return self.binary_search(low,m)
elif self.array[high-1]==self.query:
return high-1
else:
return self.binary_search(m,high)
return
class DuplicateBinarySearch(BinarySearch):
def __init__(self,input_list,query):
BinarySearch.__init__(self,input_list,query)
def handle_duplicate(self,position):
'''
Function handles the duplicate number problem.
Input: position where query is identified.
Output: updated earlier position if it exists else return
original position.
'''
if position==-1:
return -1
elif position==0:
return 0
elif self.array[position-1]!=self.query:
return position
else:
new_position=self.binary_search(0,position)
if new_position==-1 or new_position>=position:
return position
else:
return self.handle_duplicate(new_position)
def naive_duplicate(self,position):
old_position=position
if position==-1:
return -1
else:
while position>=0 and self.array[position]==self.query:
position-=1
if position==-1:
return old_position
else:
return position+1
if __name__ == '__main__':
num_keys = int(input())
input_keys = list(map(int, input().split()))
assert len(input_keys) == num_keys
num_queries = int(input())
input_queries = list(map(int, input().split()))
assert len(input_queries) == num_queries
for q in input_queries:
item=DuplicateBinarySearch(input_keys,q)
#res=item.handle_duplicate(item.binary_search(0,item.length))
#res=item.naive_duplicate(item.binary_search(0,item.length))
#assert res_check==res
print(item.handle_duplicate(item.binary_search(0,item.length)), end=' ')
#print(item.naive_duplicate(item.binary_search(0,item.length)), end=' ')
When I run a naive duplicate algorithm, I get a time out error:
Failed case #56/57: time limit exceeded (Time used: 10.00/5.00, memory used: 42201088/536870912.)
When I run the binary search with duplicate algorithm, I get a wrong answer error on a different test case:
Failed case #24/57: Wrong answer
(Time used: 0.11/5.00, memory used: 42106880/536870912.)
The problem statement is as follows:
Problem Statement
Update:
I could make the code work by making the following change but I have not been able to create a test case to see why the code would fail in the first case.
Original binary search function that works with no duplicates but fails an unknown edge case when a handle_duplicate function calls it recursively. I changed the binary search function to the following:
def binary_search(self,low,high):
'''
Implementing the binary search algorithm with distinct numbers on a sorted input.
'''
#trivial case
if (low>=high-1) and self.array[low]!=self.query:
return -1
elif (self.query<self.array[low]) or (self.query>self.array[high-1]):
return -1
else:
m=low+(high-low)//2
if self.array[low]==self.query:
return low
elif (self.array[m-1]>=self.query):
return self.binary_search(low,m)
elif self.array[m]<=self.query:
return self.binary_search(m,high)
elif self.array[high-1]==self.query:
return high-1
else:
return -1
Since you are going to implement binary search with recursive, i would suggest you add a variable 'result' which act as returning value and hold intermediate index which equal to target value.
Here is an example:
def binarySearchRecursive(nums, left, right, target, result):
"""
This is your exit point.
If the target is not found, result will be -1 since it won't change from initial value.
If the target is found, result will be the index of the first occurrence of the target.
"""
if left > right:
return result
# Overflow prevention
mid = left + (right - left) // 2
if nums[mid] == target:
# We are not sure if this is the first occurrence of the target.
# So we will store the index to the result now, and keep checking.
result = mid
# Since we are looking for "first occurrence", we discard right half.
return binarySearchRecursive(nums, left, mid - 1, target, result)
elif target < nums[mid]:
return binarySearchRecursive(nums, left, mid - 1, target, result)
else:
return binarySearchRecursive(nums, mid + 1, right, target, result)
if __name__ == '__main__':
nums = [2,4,4,4,7,7,9]
target = 4
(left, right) = (0, len(nums)-1)
result = -1 # Initial value
index = binarySearchRecursive(nums, left, right, target, result)
if index != -1:
print(index)
else:
print('Not found')
From your updated version, I still feel the exit point of your function is a little unintuitive.(Your "trivial case" section)
Since the only condition that your searching should stop, is that you have searched all possible section of the list. That is when the range of searching area is 0, there is no element left to be search and check. In implementation, that is when left < right, or high < low, is true.
The 'result' variable, is initialized as -1 when the function first been called from main. And won't change if there is no match find. And after each successful matching, since we can not be sure if it is the first occurrence, we will just store this index into the result. If there are more 'left matching', then the value will be update. If there is not, then the value will be eventually returned. If the target is not in the list, the return will be -1, as its original initialized value.

finding a value in a list just less than or equal to key

given a sorted array i want to get the smallest element which is equal to or just less then the passed key
i already tried finding gap between every element and returning the one with least gap but that does not give the desired result as it also returns the value greater then the passed one
sorted_li= [25,22,15,14,12,6,4]
def find_nearest_small_value(key,sorted_li):
gap_current, gap_global, value = 0, key, sorted_li[0]
for i in sorted_li:
gap_current = abs(i-key)
if gap_global>=gap_current:
gap_global=gap_current
value=i
return value
for example the above code with key=19 will return 22 because gap is 3 there although it should return 15 as it's less then 19 with lest gap
any help will be appreciated thank you :)
This will work for sorted and unsorted input sequences:
def find_nearest_small_value(key, sorted_li):
return max(i for i in sorted_li if i <= key)
it is very easily readable and a simple solution
Naive Approach
Traverse through every element in the given sorted list, and return the element when the condition is fulfilled. The worse case complexity is O(n).
def find_nearest_small_value(key, sorted_li):
for i in sorted_li:
if i <= key:
return i
return None
Effficient Approach: Binary Search
Cut the search space in half each time we compare, and thus making the algorithm more efficient. The complexity is O(log n).
def find_nearest_small_value(key, sorted_li):
element = None
start = 0
end = len(sorted_li)
while start <= end:
mid = (start + end) // 2
if mid >= len(sorted_li): # out of bounds
break
if sorted_li[mid] <= key:
element = sorted_li[mid]
end = mid - 1
else:
start = mid + 1
return element
Why would anyone need a loop for that?? Return the first element as the smallest, if it's less than the key or None otherwise:
def find_nearest_small_value( key, sorted_list ) :
return sorted_list[0] if sorted_list[0] <= key else None
Answering the question below:
>>> def find_nearest_small_value( key, sorted_list ) :
... return sorted_list[0] if sorted_list[0] <= key else None
...
>>> find_nearest_small_value( 31, [22,25,28,39] )
22

How to do binary search in python (Recursion) , but not including max and min values of list?

I have been ,wanting to do binary search (by recursion) , and I dont know why my code is not working...
Can anyone pls, correct my code and tell the reason why it is not working??
As you can look in my code, i am slicing the string per recursion, and at last when the target is found i will return the position of the target..
def binary(n,target):
n.sort()
mid = (0 + (len(n)-1))//2
if target == n[mid]:
return mid
elif target < n[mid]:
return binary(n[:mid],target)
elif target > n[mid]:
return binary(n[mid:],target)
This is the error message i am recieving...
RecursionError: maximum recursion depth exceeded while calling a Python object.
There are two problems, both of them are on the last line:
def binary(n, target):
n.sort()
mid = (0 + (len(n) - 1)) // 2
if target == n[mid]:
return mid
elif target < n[mid]:
return binary(n[:mid], target)
elif target > n[mid]:
return mid + 1 + binary(n[mid + 1:],target)
^^^^^^^^ ^
since you're slicing, and in order to provide the index in the original sorted list, we need to keep track of the indexes we "lose" while recursing
The complete to n[:mid] is n[mid+1:] - not n[mid:] because you already checked the target (in mid) and want to remove it from future iterations - this is, by the way, what causes the infinite loop!
Since we slice the list at mid+1 we need to add mid+1 before calling recursively, in order to preserve the index of the item on the right-side of the list:
[1,2,3,4,5]
^ say we slice here and get [4,5]
we want to save the indexes so we'll add mid (2) + 1 since now in [4,5] the item 4 will get the index zero
Comment: by calling n.sort() upon every iteration we "lose" all the advantage of binary search since, even after the list is sorted, to re-sort it will take at least O(n). So if we need to sort first, we might as well just iterate the array until the item is found/not-found. Or, if we insist on sorting, do it only once and only then call recursively:
n.sort()
binary(n, 2)
where binary does't include the sorting anymore:
def binary(n, target):
mid = (0 + (len(n) - 1)) // 2
if target == n[mid]:
return mid
elif target < n[mid]:
return binary(n[:mid], target)
elif target > n[mid]:
return mid + 1 + binary(n[mid + 1:], target)
Your midpoint calculation is too aggressive:
Consider n = [1, 2, 3] and target = 3. mid (len(n)-1//2) will then be 0, when surely it should be 1 or 2. The issue stems from you subtracting 1 from the length, and then dividing the result by 2.
Since you anyways do integer division, there is no need for subtracting one:
def binary(n,target):
n.sort()
mid = len(n)//2
...
There is also the issue of sorting the list (segment) every time binary is entered, which is unnecessary. You could write a wrapper function that sorts the list once and then calls the inner recursive function:
def binary(n, target):
return _binary(sorted(n), target)
Then just rename your original function to _binary.
Finally, when cutting from the left, you'll need to make sure to keep track of the lost number that you need to add to the index:
elif target > n[mid]:
return mid + binary(n[mid:],target)

Binary search: weird middle point calculation

Regarding calculation of the list mid-point: why is there
i = (first +last) //2
and last is initialized to len(a_list) - 1? From my quick tests, this algorithm without -1 works correctly.
def binary_search(a_list, item):
"""Performs iterative binary search to find the position of an integer in a given, sorted, list.
a_list -- sorted list of integers
item -- integer you are searching for the position of
"""
first = 0
last = len(a_list) - 1
while first <= last:
i = (first + last) / 2
if a_list[i] == item:
return '{item} found at position {i}'.format(item=item, i=i)
elif a_list[i] > item:
last = i - 1
elif a_list[i] < item:
first = i + 1
else:
return '{item} not found in the list'.format(item=item)
The last legal index is len(a_list) - 1. The algorithm will work correctly, as first will always be no more than this, so that the truncated mean will never go out of bounds. However, without the -1, the midpoint computation will be one larger than optimum about half the time, resulting in a slight loss of speed.
Consider the case where the item you're searching for is greater than all the elements of the list. In that case the statement first = i + 1 gets executed repeatedly. Finally you get to the last iteration of the loop, where first == last. In that case i is also equal to last, but if last=len() then i is off the end of the list! The first if statement will fail with an index out of range.
See for yourself: https://ideone.com/yvdTzo
You have another error in that code too, but I'll let you find it for yourself.

Python: Recursive function to find the largest number in the list

I'm trying to do a lab work from the textbook Zelle Python Programming
The question asked me to "write and test a recursive function max() to find the largest number in a list. The max is the larger of the first item and the max of all the other items." I don't quite understand the question from the textbook.
def Max(list):
if len(list) <= 1:
else:
return list[0]
else:
m = Max(list[1:])
return m if m > list[0] else list[0]
def main():
list = eval(raw_input(" please enter a list of numbers: "))
print("the largest number is: ", Max(list))
main()
Or maybe I'm suppose to open a txt file with numbers in it and then use recursive?
I believe recursive works like this
def function()
> if something:
>>return 0
>else:
>>return function()
Your understanding of how recursion works seems fine.
Your if-block is messed up, you have two elses to one if and the alignment is out. You need to remove your first else and un-indent everything below the if one level. eg:
def Max(list):
if len(list) == 1:
return list[0]
else:
m = Max(list[1:])
return m if m > list[0] else list[0]
def main():
list = eval(raw_input(" please enter a list of numbers: "))
print("the largest number is: ", Max(list))
main()
I post a different solution approach of the problem. Most of the answers manipulate the list using the slice operator in each recursive call. By the time the exercise does not provide a strict function prototype to be used, I also pass as function parameter the length of the list.
Suppose that we try to find and return the maximum element from a sequence S, of n elements.
Function prototype: Max(S, n)
Base case: If S contains only one item, return it. (Obviously the only item in the sequence is the max one.)
Recur: If not the base case, call Max each time for one less item, that is call Max(S, n-1). We then store the returning value to a variable called previous that indicate the previous element from the sequence and check that value with the next element in the sequence, which is the right most element in the current recursive call, and return the max of these values.
A recursion trace of the above procedure is given in the following figure. Suppose we try to find the max from a list that contains [5, 10, 20, 11, 3].
Note: To help you further, keep in mind that we recursively iterate the list from the right most element to the left most one.
Finally here is the working code:
def find_max_recursively(S, n):
"""Find the maximum element in a sequence S, of n elements."""
if n == 1: # reached the left most item
return S[n-1]
else:
previous = find_max_recursively(S, n-1)
current = S[n-1]
if previous > current:
return previous
else:
return current
if __name__ == '__main__':
print(find_max_recursively([5, 10, 20, 11, 3], 5))
Note: The recursive implementation will work by default only with sequences of 1000 most elements.
To combat against infinite recursions, the designers of Python made an
intentional decision to limit the overall number of function
activations that can be simultaneously active. The precise value of
this limit depends upon the Python distribution, but a typical default
value is 1000. If this limit is reached, the Python interpreter
raises a RuntimeError with a message, maximum recursion depth exceeded.
Michael T. Goodrich (2013), Data Structures and Algorithms in Python, Wiley
To change the default value do:
import sys
sys.setrecursionlimit(1000000)
here is one more approach to solve above problem
def maximum(L):
if len(L) == 1:
return L[0]
else:
return max(L[0],maximum(L[1:]))
so example input and output:
L= [2,4,6,23,1,46]
print maximum(L)
produces
46
The basic approach is this.
If the list contains only a single element, that element is the max. Return it immediately.
Otherwise, the list contains multiple elements. Either the first element in the list is the maximum, or it is not.
The maximum of the first element is simply the first element in the list.
Recursively call Max on the rest (all but first element) to find the maximum of those elements.
Compare the results from step 3 and 4. The result is the number that is greater. Return it.
Right now you have some syntax errors. For example, you have two else clauses for a single if, and the indentation looks funny. You can only have one else for an if block. But if you follow these instructions, you should have a working algorithm.
def Max(lis,maxx=-float("inf")):
if len(lis) == 1: #only one element in lis
return maxx if maxx>lis[0] else lis[0] #return lis[0] if it's greater than maxx
else:
m=lis[0] if lis[0]>maxx else maxx # m = max(lis[0],maxx)
return Max(lis[1:],m) #call Max with lis[1:] and pass 'm' too
print Max([1,2,39,4,5,6,7,8]) #prints 39
print Max([1,2,3,4,5,6,7,8]) #prints 8
These solutions fail after certain list size.
This is a better version:
def maximum2(a, n):
if n == 1:
return a[0]
x = maximum2(a[n//2:], n - n//2)
return x if x > a[0] else a[0]
def maximum(a):
return maximum2(a, len(a))
maximum(range(99999))
>>> 99998
One simple way would be to sort the list first then use indexing.
Here's a function that would work:
a = [1,233,12,34]
def find_max(a):
return sorted(a)[-1]
def find_max(my_list, max):
if len(my_list) <= 1:
return max
else:
if my_list[0] > max:
return find_max(my_list[1:], my_list[0])
else:
return find_max(my_list[1:], max)
if __name__ == '__main__':
my_list = [1, 5, 16, 9, 20, 40, 5]
print(find_max(my_list, my_list[0]))
def find_max(arr):
"""find maximum number in array by recursion"""
if arr == []: # if its an empty array
return 0
if len(arr) == 1: # if array has only one element
return arr[0]
else: # get max of first item compared to other items recursively
return max(arr[0], find_max(arr[1:])) # 1: means all other excluding 0th element
def main():
print(find_max([2,3,5,6,7,1])) # will print max - 7
if __name__ == "__main__":
main()
You can also do it in this way:
def maximum(data, start, stop):
if start >= stop:
return data[start]
else:
if data[start] >= data[stop - 1]:
return maximum(data, start, stop - 1)
else:
return maximum(data, start + 1, stop)
def recursiveMax(a):
if len(a) == 1:
return a[0]
else:
return a[0] if a[0] > recursiveMax(a[1:]) else recursiveMax(a[1:])
Test:
print(recursiveMax([1, 2, 15, 6, 3, 2, 9]))
print(recursiveMax([98, 2, 1, 1, 1, 1, ]))
TLDR; This code will also work when the list passed to the function is empty!
#jam's answer is amazing. However, I found some problems with the conditions, I think #Blender was hinting at it.
That code will fail in the case when the list passed to the function is empty. There are two base cases:
When the list is empty -> return None
When the list has one item -> return list[0]
And then the recursive case ... to reduce any other case into the base case.
def recursive_max(arr):
if len(arr) == 0:
return None
elif len(arr) == 1:
return arr[0]
else:
maxItem = recursive_max(arr[1:])
return maxItem if maxItem > arr[0] else arr[0]
Here is my answer, with a one line of code :))
def max_value(n_list):
return n_list[0] if len(n_list) == 1 else max(n_list[0], max_value(n_list[1:]))
def getMaxNumber(numbers):
return 'N.A' if len(numbers) == 0 else max(numbers)

Categories