Get index of closest value with binary search - python

I want to do a binary search in python:
def binarySearch(data, val):
Where data is a sorted array and value is the value being searched for. If the value is found, I want to return the index (such that data[index] = val). If the value is not found, I want to return the index of the item that is closest to that value.
Here is what I've got:
def binarySearch(data, val):
high = len(data)-1
low = 0
while True:
index = (high + low) / 2
if data[index] == val:
return index
if data[index] < val:
low = index
if data[index] > val:
high = index

Here is the code that will return the index if the value is found, otherwise the index of the item that is closest to that value, hope it helps.
def binarySearch(data, val):
lo, hi = 0, len(data) - 1
best_ind = lo
while lo <= hi:
mid = lo + (hi - lo) // 2
if data[mid] < val:
lo = mid + 1
elif data[mid] > val:
hi = mid - 1
else:
best_ind = mid
break
# check if data[mid] is closer to val than data[best_ind]
if abs(data[mid] - val) < abs(data[best_ind] - val):
best_ind = mid
return best_ind
def main():
data = [1, 2, 3, 4, 5, 6, 7]
val = 6.1
ind = binarySearch(data, val)
print 'data[%d]=%d' % (ind, data[ind])
if __name__ == '__main__':
main()

Something like this should work. It returns an array with two indexes. If val is found, both values in the return array are the same. Otherwise, it returns the indexes of the two items closest to val.
def binarySearch(data, val):
highIndex = len(data)-1
lowIndex = 0
while highIndex > lowIndex:
index = (highIndex + lowIndex) / 2
sub = data[index]
if data[lowIndex] == val:
return [lowIndex, lowIndex]
elif sub == val:
return [index, index]
elif data[highIndex] == val:
return [highIndex, highIndex]
elif sub > val:
if highIndex == index:
return sorted([highIndex, lowIndex])
highIndex = index
else:
if lowIndex == index:
return sorted([highIndex, lowIndex])
lowIndex = index
return sorted([highIndex, lowIndex])

I know this is an old question, but it's high on Google's results and I had the same issue. There's a built-in to do this which uses binary search and allows you to feed in a reference array and a comparison array.
numpy.searchsorted(a, v, side='left', sorter=None)
a is the reference array (data in original question), v is the array to compare (val from the question). This returns an array of size v with int values for the index the nth element of v would need to be inserted into a to preserve the sort order in a' The side keyword determines whether you want the elements of v to be placed to the 'left' (before) or the 'right' (after) the appropriate value in a.
[documentation link as of July 2017]
https://docs.scipy.org/doc/numpy/reference/generated/numpy.searchsorted.html#numpy.searchsorted

Here's a sample implementation of binary search. I won't do all the (home?)work for you, I am sure you can figure out how to store and return the index of the closest value yourself.
# BINARY SEARCH: O(log n), search space halfed each step
def biSearch(lst, find): # expects sorted lst
lowIndex = 0
highIndex = len(lst) - 1
midIndex = (lowIndex + highIndex)//2
lastMid = None
steps = 0
while midIndex != lastMid:
steps += 1
if lst[midIndex] == find:
return (midIndex, steps)
if lst[midIndex] < find:
lowIndex = midIndex + 1
else:
highIndex = midIndex - 1
lastMid = midIndex
midIndex = (lowIndex + highIndex)//2
return (-1, steps)

Not the answer to this question. But I landed here trying to figure out how to get the two surrounding values for a given target item in a sorted list.
If anyone else is looking, this is what I came up with based on some of the other answers here.
import random
def get_nearest(items, target):
print(f'looking for {target}')
high_index = len(items) - 1
low_index = 0
if not items[low_index] <= target <= items[high_index]:
raise ValueError(f'The target {target} is not in the range of'
f' provided items {items[low_index]}:{items[high_index]}')
if target in items:
return target, target
while high_index > low_index:
index = int((high_index + low_index) / 2)
sub = items[index]
if sub > target:
if high_index == index:
return tuple(sorted([items[high_index], items[low_index]]))
high_index = index
else:
if low_index == index:
return tuple(sorted([items[high_index], items[low_index]]))
low_index = index
return tuple(sorted([items[high_index], items[low_index]]))
if __name__ == '__main__':
my_randoms = sorted(random.sample(range(10000000), 100000))
x = 340000
print(get_nearest(my_randoms, x))
x = 0
my_randoms = [x] + my_randoms
print(get_nearest(my_randoms, x))
x = 10000000
my_randoms.append(x)
print(get_nearest(my_randoms, x))
idx = random.randint(0, 100000)
x = my_randoms[idx]
print(get_nearest(my_randoms, x))

Related

return value was give as return -1, but output is displayed as none

this is Binary search program```
when the number doesn't exist a return value -1 is what expected followed by output statement will be displayed.but here I don't no why return value is none.can anyone please explain why this is happening.
def binary_serch(arr,element,r,l):
if r>l:
mid=l+(r-l)//2
if arr[mid]==element:
return mid
if arr[mid]>element:
return binary_serch(arr,element,mid+1,l)
elif arr[mid]<element:
return binary_serch(arr,element,mid-1,l)
else:
return -1
arr=[1,2,3,4,5]
element=6
result=binary_serch(arr,element,len(arr)-1,0)
num=len(arr)
print(result)
if result<num-1:
print("Element is present at index ",result)
else:
print("not found")
In binary_search method there is a condition: if r > l that need else command.
Also it's better change if arr[mid]>element to elif arr[mid]>element.
I suggest you see this code and compare it with your code:
def binary_search(arr, x):
low = 0
high = len(arr) - 1
mid = 0
while low <= high:
mid = (high + low) // 2
# If x is greater, ignore left half
if arr[mid] < x:
low = mid + 1
# If x is smaller, ignore right half
elif arr[mid] > x:
high = mid - 1
# means x is present at mid
else:
return mid
# If we reach here, then the element was not present
return -1
# Test array
arr = [ 2, 3, 4, 10, 40 ]
x = 10
# Function call
result = binary_search(arr, x)
if result != -1:
print("Element is present at index", str(result))
else:
print("Element is not present in array")

why return value is -1 in this sorted list [2, 3, 12, 100, 200] target value is 100?

I practice some selection sort and given a sorted list found a number with the binary search but is not working. Why output comes index -1.
def selectionsort(lst, target):
first = 0
last = len(lst) - 1
for step in range(len(lst)):
min_idx = step
for i in range(step + 1, len(lst)):
min_idx = i
if lst[min_idx] < lst[step]:
#swap the elements
(lst[min_idx], lst[step]) = (lst[step], lst[min_idx])
return -1
while (first <= last):
mid = (first + last) // 2
if lst[mid] == target:
mid
elif lst[mid] < target:
mid - 1
else:
mid + 1
return None
def verify(index):
if index is not None:
print("Target found at index", index)
else:
print("Target not found")
data = [200, 12, 3, 100, 2]
result = selectionsort(data, 100)
verify(result)
print("The sorted list is \n", data)
Your code was missing some return and variable assignments. Also binary search should be out of the loop. Also, return -1 shouldn't be there. Here is the fixed code.
def selectionsort(lst, target):
first = 0
last = len(lst) - 1
for step in range(len(lst)-1):
for i in range(step + 1, len(lst)):
min_idx = i
if lst[min_idx] < lst[step]:
#swap the elements
(lst[min_idx], lst[step]) = (lst[step], lst[min_idx])
while (first <= last):
mid = (first + last) // 2
if lst[mid] == target:
return mid
elif lst[mid] < target:
first = mid + 1
else:
last = mid - 1
return None
def verify(index):
if index is not None:
print("Target found at index", index)
else:
print("Target not found")
data = [200, 12, 3, 100, 2]
result = selectionsort(data, 100)
verify(result)
print("The sorted list is \n", data)

Finding first pair of numbers in array that sum to value

Im trying to solve the following Codewars problem: https://www.codewars.com/kata/sum-of-pairs/train/python
Here is my current implementation in Python:
def sum_pairs(ints, s):
right = float("inf")
n = len(ints)
m = {}
dup = {}
for i, x in enumerate(ints):
if x not in m.keys():
m[x] = i # Track first index of x using hash map.
elif x in m.keys() and x not in dup.keys():
dup[x] = i
for x in m.keys():
if s - x in m.keys():
if x == s-x and x in dup.keys():
j = m[x]
k = dup[x]
else:
j = m[x]
k = m[s-x]
comp = max(j,k)
if comp < right and j!= k:
right = comp
if right > n:
return None
return [s - ints[right],ints[right]]
The code seems to produce correct results, however the input can consist of array with up to 10 000 000 elements, so the execution times out for large inputs. I need help with optimizing/modifying the code so that it can handle sufficiently large arrays.
Your code inefficient for large list test cases so it gives timeout error. Instead you can do:
def sum_pairs(lst, s):
seen = set()
for item in lst:
if s - item in seen:
return [s - item, item]
seen.add(item)
We put the values in seen until we find a value that produces the specified sum with one of the seen values.
For more information go: Referance link
Maybe this code:
def sum_pairs(lst, s):
c = 0
while c<len(lst)-1:
if c != len(lst)-1:
x= lst[c]
spam = c+1
while spam < len(lst):
nxt= lst[spam]
if nxt + x== s:
return [x, nxt]
spam += 1
else:
return None
c +=1
lst = [5, 6, 5, 8]
s = 14
print(sum_pairs(lst, s))
Output:
[6, 8]
This answer unfortunately still times out, even though it's supposed to run in O(n^3) (since it is dominated by the sort, the rest of the algorithm running in O(n)). I'm not sure how you can obtain better than this complexity, but I thought I might put this idea out there.
def sum_pairs(ints, s):
ints_with_idx = enumerate(ints)
# Sort the array of ints
ints_with_idx = sorted(ints_with_idx, key = lambda (idx, num) : num)
diff = 1000000
l = 0
r = len(ints) - 1
# Indexes of the sum operands in sorted array
lSum = 0
rSum = 0
while l < r:
# Compute the absolute difference between the current sum and the desired sum
sum = ints_with_idx[l][1] + ints_with_idx[r][1]
absDiff = abs(sum - s)
if absDiff < diff:
# Update the best difference
lSum = l
rSum = r
diff = absDiff
elif sum > s:
# Decrease the large value
r -= 1
else:
# Test to see if the indexes are better (more to the left) for the same difference
if absDiff == diff:
rightmostIdx = max(ints_with_idx[l][0], ints_with_idx[r][0])
if rightmostIdx < max(ints_with_idx[lSum][0], ints_with_idx[rSum][0]):
lSum = l
rSum = r
# Increase the small value
l += 1
# Retrieve indexes of sum operands
aSumIdx = ints_with_idx[lSum][0]
bSumIdx = ints_with_idx[rSum][0]
# Retrieve values of operands for sum in correct order
aSum = ints[min(aSumIdx, bSumIdx)]
bSum = ints[max(aSumIdx, bSumIdx)]
if aSum + bSum == s:
return [aSum, bSum]
else:
return None

python3 binary search not working [duplicate]

I am trying to implement the binary search in python and have written it as follows. However, I can't make it stop whenever needle_element is larger than the largest element in the array.
Can you help? Thanks.
def binary_search(array, needle_element):
mid = (len(array)) / 2
if not len(array):
raise "Error"
if needle_element == array[mid]:
return mid
elif needle_element > array[mid]:
return mid + binary_search(array[mid:],needle_element)
elif needle_element < array[mid]:
return binary_search(array[:mid],needle_element)
else:
raise "Error"
It would be much better to work with a lower and upper indexes as Lasse V. Karlsen was suggesting in a comment to the question.
This would be the code:
def binary_search(array, target):
lower = 0
upper = len(array)
while lower < upper: # use < instead of <=
x = lower + (upper - lower) // 2
val = array[x]
if target == val:
return x
elif target > val:
if lower == x: # these two are the actual lines
break # you're looking for
lower = x
elif target < val:
upper = x
lower < upper will stop once you have reached the smaller number (from the left side)
if lower == x: break will stop once you've reached the higher number (from the right side)
Example:
>>> binary_search([1,5,8,10], 5) # return 1
1
>>> binary_search([1,5,8,10], 0) # return None
>>> binary_search([1,5,8,10], 15) # return None
Why not use the bisect module? It should do the job you need---less code for you to maintain and test.
array[mid:] creates a new sub-copy everytime you call it = slow. Also you use recursion, which in Python is slow, too.
Try this:
def binarysearch(sequence, value):
lo, hi = 0, len(sequence) - 1
while lo <= hi:
mid = (lo + hi) // 2
if sequence[mid] < value:
lo = mid + 1
elif value < sequence[mid]:
hi = mid - 1
else:
return mid
return None
In the case that needle_element > array[mid], you currently pass array[mid:] to the recursive call. But you know that array[mid] is too small, so you can pass array[mid+1:] instead (and adjust the returned index accordingly).
If the needle is larger than all the elements in the array, doing it this way will eventually give you an empty array, and an error will be raised as expected.
Note: Creating a sub-array each time will result in bad performance for large arrays. It's better to pass in the bounds of the array instead.
You can improve your algorithm as the others suggested, but let's first look at why it doesn't work:
You're getting stuck in a loop because if needle_element > array[mid], you're including element mid in the bisected array you search next. So if needle is not in the array, you'll eventually be searching an array of length one forever. Pass array[mid+1:] instead (it's legal even if mid+1 is not a valid index), and you'll eventually call your function with an array of length zero. So len(array) == 0 means "not found", not an error. Handle it appropriately.
This is a tail recursive solution, I think this is cleaner than copying partial arrays and then keeping track of the indexes for returning:
def binarySearch(elem, arr):
# return the index at which elem lies, or return false
# if elem is not found
# pre: array must be sorted
return binarySearchHelper(elem, arr, 0, len(arr) - 1)
def binarySearchHelper(elem, arr, start, end):
if start > end:
return False
mid = (start + end)//2
if arr[mid] == elem:
return mid
elif arr[mid] > elem:
# recurse to the left of mid
return binarySearchHelper(elem, arr, start, mid - 1)
else:
# recurse to the right of mid
return binarySearchHelper(elem, arr, mid + 1, end)
def binary_search(array, target):
low = 0
mid = len(array) / 2
upper = len(array)
if len(array) == 1:
if array[0] == target:
print target
return array[0]
else:
return False
if target == array[mid]:
print array[mid]
return mid
else:
if mid > low:
arrayl = array[0:mid]
binary_search(arrayl, target)
if upper > mid:
arrayu = array[mid:len(array)]
binary_search(arrayu, target)
if __name__ == "__main__":
a = [3,2,9,8,4,1,9,6,5,9,7]
binary_search(a,9)
Using Recursion:
def binarySearch(arr,item):
c = len(arr)//2
if item > arr[c]:
ans = binarySearch(arr[c+1:],item)
if ans:
return binarySearch(arr[c+1],item)+c+1
elif item < arr[c]:
return binarySearch(arr[:c],item)
else:
return c
binarySearch([1,5,8,10,20,50,60],10)
All the answers above are true , but I think it would help to share my code
def binary_search(number):
numbers_list = range(20, 100)
i = 0
j = len(numbers_list)
while i < j:
middle = int((i + j) / 2)
if number > numbers_list[middle]:
i = middle + 1
else:
j = middle
return 'the index is '+str(i)
If you're doing a binary search, I'm guessing the array is sorted. If that is true you should be able to compare the last element in the array to the needle_element. As octopus says, this can be done before the search begins.
You can just check to see that needle_element is in the bounds of the array before starting at all. This will make it more efficient also, since you won't have to do several steps to get to the end.
if needle_element < array[0] or needle_element > array[-1]:
# do something, raise error perhaps?
It returns the index of key in array by using recursive.
round() is a function convert float to integer and make code fast and goes to expected case[O(logn)].
A=[1,2,3,4,5,6,7,8,9,10]
low = 0
hi = len(A)
v=3
def BS(A,low,hi,v):
mid = round((hi+low)/2.0)
if v == mid:
print ("You have found dude!" + " " + "Index of v is ", A.index(v))
elif v < mid:
print ("Item is smaller than mid")
hi = mid-1
BS(A,low,hi,v)
else :
print ("Item is greater than mid")
low = mid + 1
BS(A,low,hi,v)
BS(A,low,hi,v)
Without the lower/upper indexes this should also do:
def exists_element(element, array):
if not array:
yield False
mid = len(array) // 2
if element == array[mid]:
yield True
elif element < array[mid]:
yield from exists_element(element, array[:mid])
else:
yield from exists_element(element, array[mid + 1:])
Returning a boolean if the value is in the list.
Capture the first and last index of the list, loop and divide the list capturing the mid value.
In each loop will do the same, then compare if value input is equal to mid value.
def binarySearch(array, value):
array = sorted(array)
first = 0
last = len(array) - 1
while first <= last:
midIndex = (first + last) // 2
midValue = array[midIndex]
if value == midValue:
return True
if value < midValue:
last = midIndex - 1
if value > midValue:
first = midIndex + 1
return False

Converting phone number range list to prefix list

I have a phone number range, for example:
3331234-3332345
I need to write a function that converts it to list of prefixes:
3331234
...
3331239
333124
...
333129
33313
...
33319
33320
...
33322
333231
333232
333233
3332341
...
3332345
Question is not so easy. I don't need to get a list of numbers between range start and end.
My working code. It not very quick, too. Optimizations welcome.
def diap_to_prefix(a, b):
lst = ['%0*d'%(max(len(str(a)), len(str(b))), x) for x in range(int(a), int(b)+1)]
new_lst = []
while len(lst) != len(new_lst):
lst = new_lst or lst
new_lst = []
c = lst[0]
tmp_lst = [c]
for i in lst[1:]:
if c[:-1] == i[:-1]:
c = i
tmp_lst.append(c)
else:
if len(tmp_lst) == 10:
new_lst.append(c[:-1])
else:
new_lst.extend(tmp_lst)
c = i
tmp_lst = [c]
if len(tmp_lst) == 10:
new_lst.append(c[:-1])
else:
new_lst.extend(tmp_lst)
return lst
My new more optimal solution (py3.4)
def diap_to_prefix(a, b):
def inner(aa, bb, p):
if p == 1:
if a <= aa <= b:
yield aa
return
for d in range(aa, bb + 1, p):
if a <= d and d + p - 1 <= b:
yield d // p
elif not (bb < a or aa > b):
for i in range(10):
yield from inner(d + i * p // 10, d + (i + 1) * p // 10 - 1, p // 10)
a, b = int(a), int(b)
p = 10**(max(len(str(x)) for x in (a, b)) - 1)
yield from inner(a // p * p, b // p * p + p - 1, p)
You need to get the common prefix of the values separated by "-", so:
Use .split to get these and iterate through them until you find a difference
Complete the first value with zeros (to get the least number) until you get phone_len digits and do the same for the maximum (with nines)
Then, you have a simple range of numbers
Iterate through them and convert them to strings
Here it is:
phone_len = 7
R = "33312345-3332345".split("-")
prefix = ""
for i in range(len(R[0])):
if R[0][i] == R[1][i]:
prefix += R[0][i]
else:
break
m = int(R[0]+"0"*(phone_len-len(R[0])))
M = int(R[1]+"9"*(phone_len-len(R[0])))
phones = [str(n) for n in range(m, M+1)]
Here's a sketch of one way to handle this problem. I've used ellipses to mark the spots where you'll need to fill in the details explained in the comments. I'd write a function to derive the initial value of 'maxpower', everything else is simple enough to be written inline.
firstnumber = 3331234
lastnumber = 3332345
current = firstnumber
while current <= lastnumber:
# Find the largest power of 10 that exactly divides 'current'.
# Call this value 'maxpower'. 'maxpower' is a candidate for the
# size of the block of numbers that will be represented by the
# next output value.
maxpower = ... # 1, 10, 100, 1000, 10000, and so on
# If a block of size 'maxpower' would take us past the
# 'lastnumber', we can't use that block size. We must try a
# smaller block. Divide 'maxpower' by 10 until the block size
# becomes acceptable.
while (current + maxpower) > ... :
maxpower /= 10
# Now 'maxpower' is the largest acceptable size for the next
# block, so the desired prefix is 'current' divided by 'maxpower'.
# Emit that value, then add 'maxpower' to 'current' to get the new
# 'current' value for the next iteration.
print ...
current += maxpower
My working code. It not very quick, but working. Optimizations welcome.
def fill(root, prefix, value, parent, pkey):
if len(prefix) > 1:
if prefix[0] in root:
fill(root[prefix[0]], prefix[1:], value, root, prefix[0])
if pkey:
if len(parent[pkey]) == 10:
parent[pkey] = value
elif type(root) == type({}):
root[prefix[0]] = {}
fill(root[prefix[0]], prefix[1:], value, root, prefix[0])
if pkey:
if len(parent[pkey]) == 10:
parent[pkey] = value
elif type(root) == type({}):
root[prefix[0]] = value
if pkey:
if len(parent[pkey]) == 10:
parent[pkey] = value
return root
def compact(prefixes, current):
if not type(prefixes) == type({}):
return [current]
else:
rlist = []
for k, v in prefixes.iteritems():
rlist.extend(compact(v, current + k))
continue
return rlist
if __name__ == '__main__':
plist = {}
for x in range(4440000, 4490000):
fill(plist, str(x), 'value', plist, None)
#print plist
print compact(plist, '')

Categories