I am trying to implement heapsort but I am getting unexpected results. I think this is due to something I don't understand about how Python handles variables (I am talking about side effects). Here's the code:
from math import *
def parent(i):
return floor((i+1)/2)-1
def left(i):
return 2*i+1
def right(i):
return 2*i+2
def maxheapify(A, i):
l = left(i)
r = right(i)
if l < len(A) and A[i] < A[l]:
largest = l
else:
largest = i
if r < len(A) and A[largest] < A[r]:
largest = r
if largest != i:
temp = A[i]
A[i] = A[largest]
A[largest] = temp
maxheapify(A, largest)
def buildmaxheap(A):
for i in range(int(floor(len(A)/2)), -1, -1):
maxheapify(A, i)
def heapsort(A):
n = len(A)
buildmaxheap(A)
for k in range(len(A), 0, -1):
temp = A[0]
A[0] = A[k-1]
A[k-1] = temp
C = A[0:k-1]
maxheapify(C, 0)
A = C + A[k-1:n]
print(A)
Now when I run
A = [2, 4, 1, 3, 7, 5, 9]
heapsort(A)
print(A)
I obtain two printed lines (one from inside the heapsort showing that the sorting worked and one from the last print):
[1, 2, 3, 4, 5, 7, 9]
[1, 7, 5, 3, 4, 2, 9]
Obviously, I'd like them both to be the same (which would mean that the sorting actually worked and A is sorted after calling heapsort(A))
So what I don't get is:
If A is correctly sorted (at the point of the last line in heapsort(A)), why doesn't this change persist after leaving the function block?
If this is due to some permanence of the variable A, why isn't the end result the original value of A, but the intermediate step in heapsort, which is the result of the maxheapify call?
At the start of the function, the list A inside the function is the same as the list outside of the function, and any modifications made to one will be reflected in the other (it's a mutable object).
When you do an assignment to a list, you're substituting a new list object for the old list object. This breaks the connection to the outside object.
Instead of assigning a new list to A, you can assign to a slice of A and the original object will be modified in place instead.
A[:] = C + A[k-1:n]
A = C + A[k-1:n]
This is the line responsible for the behaviour you're seeing. By setting A equal to A[0:k-1] + A[k-1:n] you are making a copy of all of A's elements. If you want your changes to persist within the list you passed in you must assign the list to all the elements of A like so:
A[:] = C + A[k-1:n]
The following implementation shows a rewrite of your code but includes an alternate solution above the last call to the print function. The commented-out line may replace the line directly above it, or you may choose to return a at the end of the heap_sort function and rebind the value of a in your main function instead.
def main():
a = [2, 4, 1, 3, 7, 5, 9]
heap_sort(a)
print(a)
parent = lambda i: (i + 1 >> 1) - 1
left = lambda i: (i << 1) + 1
right = lambda i: i + 1 << 1
def max_heapify(a, i, n):
l = left(i)
r = right(i)
largest = l if l < n and a[i] < a[l] else i
if r < n and a[largest] < a[r]:
largest = r
if largest != i:
a[i], a[largest] = a[largest], a[i]
max_heapify(a, largest, n)
def build_max_heap(a, n):
for i in reversed(range(n + 2 >> 1)):
max_heapify(a, i, n)
def heap_sort(a):
n = len(a)
build_max_heap(a, n)
for k in reversed(range(n)):
a[0], a[k] = a[k], a[0]
c = a[:k]
max_heapify(c, 0, k)
a[:k] = c
# the following would change "a" in this scope only
# a = c + a[k:]
# print(a)
if __name__ == '__main__':
main()
Related
How do I find the index of variable b from list a?
I suspect that the problem is in the data types.
a=[-202516736, '-202516736', '13886', '678280946', '14514', '330251838', '14511', '639566631', '14510', '542472303', '14506']
b=['678280946']
a_INT = [int(item) for item in a]
b_INT = [int(item) for item in b]
j = 0
while True:
try:
i = a_INT.index(b_INT, j)
print(i)
j = i + 1
except:
break
Let's take this a step further and add another value to the b list and also add a duplicate in the a list. Then:
a=[-202516736, '-202516736', '13886', '678280946', '14514', '678280946', '330251838', '14511', '639566631', '14510', '542472303', '14506']
b=['678280946', 13886]
ai = list(map(int, a))
for n in map(int, b):
offset = 0
r = []
while True:
try:
i = ai[offset:].index(n)
r.append(offset+i)
offset += i + 1
except ValueError:
break
print(f'{n} occurs at {r}')
Output:
678280946 occurs at [3, 5]
13886 occurs at [2]
Version 2:
The first piece of code is functionally correct. However, it could be very inefficient if the list being searched is very large.
Python's built-in sort function is very fast. So, let's build a list of 2-tuples each made up of a value from the list and its original index. Then sort the new list. Now that it's sorted we can perform a binary search and move on from there.
Added some more values to the OP's original list for demonstration purposes:
a = [-202516736, '-202516736', '13886', '678280946', '14514', '678280946',
'330251838', '14511', '639566631', '14510', '542472303', '14506', '678280946']
b = ['678280946', 13886, 14514, '-202516736', 99]
def bsearch(lst, x):
L = 0
R = len(lst) - 1
while L <= R:
m = (L + R) // 2
if (v := lst[m][0]) == x:
return m
if v < x:
L = m + 1
else:
R = m - 1
return -1
def findall(list_, n):
templist = sorted((v, i) for i, v in enumerate(list_))
result = None
if (i := bsearch(templist, n)) >= 0:
result = [templist[i][1]]
for j in range(i-1, -1, -1):
if templist[j][0] != n:
break
result.append(templist[j][1])
for j in range(i+1, len(templist)):
if templist[j][0] != n:
break
result.append(templist[j][1])
return result
ai = list(map(int, a))
for n in map(int, b):
print(f'{n} -> {findall(ai, n)}')
Output:
678280946 -> [5, 3, 12]
13886 -> [2]
14514 -> [4]
-202516736 -> [0, 1]
99 -> None
a=[-202516736, '-202516736', '13886', '678280946', '14514', '330251838', '14511', '639566631', '14510', '542472303', '14506']
b=['678280946']
for item in b:
print(a.index(item))
Since b has only one element the output is 3.
I have two sorted lists, e.g.
a = [1, 4, 7, 8]
b = [1, 2, 3, 4, 5, 6]
I want to know for each item in a if it is in b. For the above example, I want to find
a_in_b = [True, True, False, False]
(or having the indices where a_in_b is True would be fine too).
Now, both a and b are very large, so complexity is an issue. If M = len(a) and N = len(b). How can I do this with a complexity lower than M * O(N) by making use of the fact that both lists are sorted?
You can iterate over your b list manually within a loop over a. You'll want to advance the b iteration when the latest value you've seen from it is less than the current value from a.
from math import inf
result = []
b_iter = iter(b) # create an iterator over b
b_val = -inf
for a_val in a:
while b_val < a_val:
b_val = next(b_iter, inf) # manually iterate on it
result.append(a_val == b_val)
This should have a running time of O(M+N), since each list item gets iterated over at most once (b may not even need to be fully iterated).
You could avoid using floating point infinities if you want to, but you'd need to do a bit of extra work to handle some edge cases (e.g. if b is empty).
Exploiting sorted'ness is a red-herring for time complexity: The ideal case is to iterate both in lockstep for O(n+m) complexity. This is the same as converting b to a set for O(m), then searching the elements of a in the set for O(n).
>>> a = [1, 4, 7, 8]
>>> b = [1, 2, 3, 4, 5, 6]
>>> bs = set(b) # create set for O(len(b))
>>> [item in bs for item in a] # check O(len(a)) items "in set of b" for O(1) each
[True, True, False, False]
Since most of these operations are builtin, the only costly operation is the iteration over a which is needed in all solutions.
However, this will duplicate the references to the items in b. If b is treated as external to the algorithm, the space complexity is O(m+n) instead of the ideal case O(n) for just the answer.
Late answer, but a different approach to the problem using set() uniqueness and O(1) speed of len(), i. e. :
a_in_b = []
a = [1,4,7,8]
b = [1,2,3,4,5,6]
b_set = set(b)
for v in a:
l1 = len(b_set)
b_set.add(v)
a_in_b.append(l1 == len(b_set))
Unfortunately, my approach isn't the fastest:
mistermiyagi: 0.387 ms
tomerikoo: 0.442 ms
blckknght: 0.729 ms
lobito: 1.043 ms
semisecure: 1.87 ms
notnotparas: too long
lucky6qi: too long
Benchmark
Use Binary Search here:
def bs(b,aele,start,end):
if start > end:
return False
mid = (start + end) // 2
if ale == b[mid]:
return True
if ale < b[mid]:
return bs(b, aele, start, mid-1)
else:
return bs(b, aele, mid+1, end)
For each element in a check if it exists in b using this method.
Time Complexity: O(m*log(n))
Using sets the order doesn't even matter.
Turn b to a set (O(N)). Then iterate a (O(M)), and for each element check if it's in set_b (O(1)). This will give a time complexity of O(max(M, N)):
a = [1, 4, 7, 8]
b = [1, 2, 3, 4, 5, 6]
set_b = set(b)
res = []
for elem in a:
res.append(elem in set_b)
This can of-course be shortened to a nifty list-comp:
res = [elem in set_b for elem in a]
Both give:
[True, True, False, False]
For your parenthesized request, simply iterate with enumerate instead:
for i, elem in enumerate(a):
if elem in set_b:
res.append(i)
Which will give [0, 1].
You should use binary search algorithm(read about it if you don't know what it is).
The modified bin_search function has to return position right for which b[right] >= elem - the first element in b that is greater or equal than searched element from a. This position will be used as the left position for next bin_search call. Also bin_search returns True as a second argument if it have found elem in b
def bin_search(arr, elem, left):
right = len(arr)
while left < right:
mid = (left+right)//2
if arr[mid] == elem:
return (mid, True)
if arr[mid] < elem:
left = mid + 1
else:
right = mid
return (right, False)
def find_a_in_b(a, b):
new_left = 0
a_in_b = [False] * len(a)
# we could have used enumerate but size of a is too large
index = 0
for i in a:
new_left, a_in_b[index] = bin_search(b, i, new_left)
index += 1
return a_in_b
It's probably the best time
P.S. Forget it, i'm stupid and forgot about linear algorithm used in merge sort, so it's not the best
Go through a and b once:
a_in_b = []
bstart = 0
for ai in a:
print (ai,bstart)
if bstart == len(b):
a_in_b.append(False)
else:
for bi in b[bstart:]:
print (ai, bi, bstart)
if ai == bi:
a_in_b.append(True)
break
elif ai > bi:
if bstart < len(b):
bstart+=1
if bstart == len(b):
a_in_b.append(False)
continue
The obvious solution is actually O(M + N):
a = [1, 1, 4, 7, 8]
b = [1, 2, 3, 4, 5, 6]
c = [0] * len(a) # Or use a dict to stash hits ..
j = 0
for i in range(0, len(a)):
while j < len(b) - 1 and b[j] < a[i]:
j += 1
if b[j] == a[i]:
c[i] = 1
print(c)
For each i in 0 ... N where N is length of a, only a unique partition / sub-sequence of b plus one border number is checked, making it O(M + N) all together.
for el in a:
try:
b = b[b.index(el):]
a_in_b.append("True")
except:
a_in_b.append("False")
A simple solution is to convert the lists to a data frame and do an inner merge
The inner join matches like values on a specific column
Basically what I want to do is create something like this in python (this is basic idea and not actual code):
n = 3
i = n + 1
a = [1, 3, 3, 1]
b = [1, 2, 1]
while n > 1:
Check if n is even
- if n is even, then for all i in range(0,n), insert values into an array using the formula below
- b[n-i] = a[n-i-1] + a[n-i], this value will replace the previously given value of b[] above the code.
- Print out the array
- After each area is filled, n+=1, i=n+1 are applied, then the loop continues
Check if n is odd
- same process except formula is
- a[n-i] = b[n-i-1] + a[n-i], this value will replace the previously given value of a[] above the code.
- Print out the array
- After each area is filled, n+=1, i=n+1 are applied, then the loop continues
This process will loop and print each and continue on, the arrays will essentially look like this:
b = [1, 4, 6, 4, 1], a = [1 5, 10, 10, 5, 1], b = [1, 6, 15, 20, 20, 15, 6, 1], etc.
Here is the code that I currently have, however I'm getting an 'out of range' error.
n = 3
i = n + 1
b = [1, 2, 1]
a = [1, 3, 3, 1]
while n > 1:
if n%2==0:
print("even")
for i in range(0,n):
b[n-i].append(a[n-i-1]+a[n-i])
else:
print("odd")
for i in range(0,n):
print("yay")
a[n-i].append(b[n-i-1]+b[n-i])
if n%2==0:
print(b)
else:
print(a)
n +=1
i = n + 1
print("loop")
The random prints throughout the code are to test and see if it is even making it into the process. There were from a previous code and I just haven't removed them yet.
Hopefully you can help me, I can't find anything online about a loop that constantly increases the size of an array and fills it at the same time.
Sorry struggling with the code that's in the sample. From your description I can see that you want to generate Pascal's triangle. Here's a short snippet that will do this.
a = [1, 1]
for _ in range(10):
a = [1] + [x+y for (x,y) in zip(a[:-1], a[1:])] + [1]
print a
a[:-1] refers to the whole array except the last element and a[1:] refers to whole array except first element. zip combines first elements from each array into a tuple and so on. All that remains is to add them and pad the row with ones one the outside. _ is used to tell Python, I don't care about this variable - useful if you want to be explicit that you are not using the range value for anything except flow control.
Maria's answer is perfect, I think. If you want to start with your code, you can rewrite your code as below to get similar result. FYI.
n = 3
b = [1, 2, 1]
while 1 < n < 10:
if n % 2 == 0:
print("even")
b = [0] * (n + 1)
for i in range(0, n + 1):
if i == 0:
b[i] = a[0]
elif i == n:
b[i] = a[i - 1]
else:
b[n - i] = a[i - 1] + a[i]
else:
print("odd")
a = [0] * (n + 1)
for i in range(0, n + 1):
if i == 0:
a[i] = b[0]
elif i == n:
a[i] = b[i - 1]
else:
a[i] = b[i - 1] + b[i]
if n % 2 == 0:
print(b)
else:
print(a)
n += 1
print("loop")
I tried implementing Insertion sort with for loops only and wrote the following code:
def isort(L): #implementation with a for loop
for i in range(1,len(L)):
small = L[i]
M = range(i)
M.reverse()
for j in M:
if small<L[j]:
L[j+1]=L[j]
else:
break
L[j+1] = small
return L
L = [5,4,3,2,1]
M = isort(L)
print M
This gives the output [5,1,2,3,4]. Can someone please point out where I am making a mistake
Change (the fix shown in the question is easy, the one-off error was caused by one little +1 :)):
L[j+1] = small
To:
L[j] = small
Testing:
>>> isort([5, 4, 3, 2, 1])
[1, 2, 3, 4, 5]
However, there are some other things with your code, as illustrated- it will not work alot of the time. With a fair few tweaks, we can get it to work:
def isort(L):
for i in range(1,len(L)):
small = L[i]
M = range(-1, i)
M.reverse()
for j in M:
if j>=0 and small<L[j]:
L[j+1]=L[j]
else:
break
L[j+1] = small
return L
Testing:
>>> isort([4, 5, 3, 2, 1])
[1, 2, 3, 4, 5]
The post condition for the inner loop is that j is pointing for the first value that is smaller than small (this is achieved by the break call). However, the loop naturally exists when j=0, therefore in every last inner iteration, the condition is not what you'd expect.
To fix it, I suggest initializing M from -1:
M = range(-1, i)
But then, you have to check as well that j is positive (to avoid making changes you don't want to):
if j>=0 and small<L[j]:
L[j+1]=L[j]
This is little tricky :
I took the inner loop range function as range(j, -2, -1) , so the inner loop always breaks at one position ahead, so the last statement arr[j + 1] = key works perfectly
arr = [5, 4, 3, 2, 1]
for i in range(1, len(arr)):
j = i - 1
key = arr[i]
for j in range(j, -2, -1):
if j < 0 or key >= arr[j]:
break
else:
arr[j + 1] = arr[j]
arr[j + 1] = key
if __name__ == "__main__":
n = int(input("How many numbers ?\t"))
nums = [int(x) for x in input("Enter {} numbers\t".format(n)).split()]
for i in range(1,n):
val = nums[i]
for j in range(i-1,-2,-1):
if j < 0 : break
if nums[j] > val:
nums[j+1] = nums[j]
else:
break
nums[j+1] = val
for num in nums:
print(num,end=' ')
print()
I have written the following heap sort code and I get the wrong output (not sorted) at times and I can't seem to find why...any help will be much appreciated! (WARNING: I am still learning Python that might be the problem)
def heap_sort(self, a):
self.build_max_heap(a)
n = len(a)-1
i = len(a)-1
for i in range(len(a)-1, 1):
temp = a[0]
a[0] = a[i]
a[i] = temp
a.heapsize = heapsize - 1
self.max_heapify(a, 0) #rebuild max heap at with new root
def max_heapify(self, a, i):
left = (2*(i+1))-1 #left child of i
right = 2*(i+1) #right child of i
largest = i
if left < a.heapsize and a[left] > a[i]:
largest = left
if right < a.heapsize and a[right] > a[largest]:
largest = right
if largest != i:
temp = a[largest]
a[largest] = a[i]
a[i] = temp
self.max_heapify(a, largest)
def build_max_heap(self, a):
heapsize = len(a)
i = int(heapsize/2)-1
for i in range(i, 0):
self.max_heapify(a, i)
These are my tests:
#--Test for 0 in array--#
def zero_array(self):
a = [12,0,232]
print self.sort.heap_sort(a)
return
#--Test for duplicate in array--#
def duplicate_array(self):
a = [12, 12, 7]
print self.sort.heap_sort(a)
return
#--Test for all same values in array--#
def allsame_array(self):
a = [1,1,1]
print self.sort.heap_sort(a)
return
#--Test for negative values in array--#
def negative_array(self):
a = [-23, -2, 123]
print self.sort.heap_sort(a)
return
I get the following returned arrays (which are suppose to be sorted):
[12, 0, 232]
[12, 12, 7]
[1, 1, 1]
[-23, -2, 123]
I see one issue right away:
for i in range(len(a)-1, 1)
If you want to go down to 1 inclusive use:
for i in range(len(a)-1, 0, -1)