Merge sort in python without slicing using recursion - python

Here is my solution to a merge sort in python.
My notes:
The time is 2n(log(n)) instead of n(log(n)) because of the stack in recursion. I can't see any way to fix this.
A bottom up recursion method is actually easier and is n(log(n))
I have seen my solution that use slicing. For example:
lefthalf = alist[:mid]
righthalf = alist[mid:]
mergeSort(lefthalf)
mergeSort(righthalf)
However, these solutions ignore that a slice takes k time, because a slice takes k times. So the above code actually is:
lefthalf = []
for i in range(mid): #k times
lefhalf.append(alist[i])
for i in range(mid, len(alist): #k times
righthalf.append(alist[i])
My solution:
import random
def _merge_sort(indices, the_list):
start = indices[0]
end = indices[1]
half_way = (end - start)//2 + start
if start < half_way:
_merge_sort((start, half_way), the_list)
if half_way + 1 <= end and end - start != 1:
_merge_sort((half_way + 1, end), the_list)
#a stack is created using log(n) number of recursions
sort_sub_list(the_list, indices[0], indices[1])
def sort_sub_list(the_list, start, end):
orig_start = start
initial_start_second_list = (end - start)//2 + start + 1
list2_first_index = initial_start_second_list
new_list = []
while start < initial_start_second_list and list2_first_index <= end:
first1 = the_list[start]
first2 = the_list[list2_first_index]
if first1 > first2:
new_list.append(first2)
list2_first_index += 1
else:
new_list.append(first1)
start += 1
while start < initial_start_second_list:
new_list.append(the_list[start])
start += 1
while list2_first_index <= end:
new_list.append(the_list[list2_first_index])
list2_first_index += 1
# at this point, the total number each while statement ran is n
# now we have to do n again!
for i in new_list:
the_list[orig_start] = i
orig_start += 1
def merge_sort(the_list):
return _merge_sort((0, len(the_list) - 1), the_list)
if __name__ == '__main__':
for i in range(100):
n = 100
l = range(n)
random.shuffle(l)
merge_sort(l)
assert l == range(n)

Related

Python 3: Optimizing Project Euler Problem #14

I'm trying to solve the Hackerrank Project Euler Problem #14 (Longest Collatz sequence) using Python 3. Following is my implementation.
cache_limit = 5000001
lookup = [0] * cache_limit
lookup[1] = 1
def collatz(num):
if num == 1:
return 1
elif num % 2 == 0:
return num >> 1
else:
return (3 * num) + 1
def compute(start):
global cache_limit
global lookup
cur = start
count = 1
while cur > 1:
count += 1
if cur < cache_limit:
retrieved_count = lookup[cur]
if retrieved_count > 0:
count = count + retrieved_count - 2
break
else:
cur = collatz(cur)
else:
cur = collatz(cur)
if start < cache_limit:
lookup[start] = count
return count
def main(tc):
test_cases = [int(input()) for _ in range(tc)]
bound = max(test_cases)
results = [0] * (bound + 1)
start = 1
maxCount = 1
for i in range(1, bound + 1):
count = compute(i)
if count >= maxCount:
maxCount = count
start = i
results[i] = start
for tc in test_cases:
print(results[tc])
if __name__ == "__main__":
tc = int(input())
main(tc)
There are 12 test cases. The above implementation passes till test case #8 but fails for test cases #9 through #12 with the following reason.
Terminated due to timeout
I'm stuck with this for a while now. Not sure what else can be done here.
What else can be optimized here so that I stop getting timed out?
Any help will be appreciated :)
Note: Using the above implementation, I'm able to solve the actual Project Euler Problem #14. It is giving timeout only for those 4 test cases in hackerrank.
Yes, there are things you can do to your code to optimize it. But I think, more importantly, there is a mathematical observation you need to consider which is at the heart of the problem:
whenever n is odd, then 3 * n + 1 is always even.
Given this, one can always divide (3 * n + 1) by 2. And that saves one a fair bit of time...
Here is an improvement (it takes 1.6 seconds): there is no need to compute the sequence of every number. You can create a dictionary and store the number of the elements of a sequence. If a number that has appeared already comes up, the sequence is computed as dic[original_number] = dic[n] + count - 1. This saves a lot of time.
import time
start = time.time()
def main(n,dic):
'''Counts the elements of the sequence starting at n and finishing at 1'''
count = 1
original_number = n
while True:
if n < original_number:
dic[original_number] = dic[n] + count - 1 #-1 because when n < original_number, n is counted twice otherwise
break
if n == 1:
dic[original_number] = count
break
if (n % 2 == 0):
n = n/2
else:
n = 3*n + 1
count += 1
return dic
limit = 10**6
dic = {n:0 for n in range(1,limit+1)}
if __name__ == '__main__':
n = 1
while n < limit:
dic=main(n,dic)
n += 1
print('Longest chain: ', max(dic.values()))
print('Number that gives the longest chain: ', max(dic, key=dic.get))
end = time.time()
print('Time taken:', end-start)
The trick to solve this question is to compute the answers for only largest input and save the result as lookup for all smaller inputs rather than calculating for extreme upper bound.
Here is my implementation which passes all the Test Cases.(Python3)
MAX = int(5 * 1e6)
ans = [0]
steps = [0]*(MAX+1)
def solve(N):
if N < MAX+1:
if steps[N] != 0:
return steps[N]
if N == 1:
return 0
else:
if N % 2 != 0:
result = 1+ solve(3*N + 1) # This is recursion
else:
result = 1 + solve(N>>1) # This is recursion
if N < MAX+1:
steps[N]=result # This is memoization
return result
inputs = [int(input()) for _ in range(int(input()))]
largest = max(inputs)
mx = 0
collatz=1
for i in range(1,largest+1):
curr_count=solve(i)
if curr_count >= mx:
mx = curr_count
collatz = i
ans.append(collatz)
for _ in inputs:
print(ans[_])
this is my brute force take:
'
#counter
C = 0
N = 0
for i in range(1,1000001):
n = i
c = 0
while n != 1:
if n % 2 == 0:
_next = n/2
else:
_next= 3*n+1
c = c + 1
n = _next
if c > C:
C = c
N = i
print(N,C)
Here's my implementation(for the question specifically on Project Euler website):
num = 1
limit = int(input())
seq_list = []
while num < limit:
sequence_num = 0
n = num
if n == 1:
sequence_num = 1
else:
while n != 1:
if n % 2 == 0:
n = n / 2
sequence_num += 1
else:
n = 3 * n + 1
sequence_num += 1
sequence_num += 1
seq_list.append(sequence_num)
num += 1
k = seq_list.index(max(seq_list))
print(k + 1)

Python - MergeSort Recursion Error

I made a MergeSort program in python using recursion and I keep getting errors about line 27,line 23,line 18 and a recursion error:
"RecursionError: maximum recursion depth exceeded in comparison" but i don't seem to find any obvious mistake in my code.
def merge(list, start, middle, end):
L = list[start:middle]
R = list[middle:end+1]
L.append(99999999999)
R.append(99999999999)
i = j = 0
for k in range(start, end+1):
if L[i] <= R[j]:
list[k] = L[i]
i += 1
else:
list[k] = R[j]
j+=1
def mergesort2(list, start, end):
if start < end:
middle = (start + end)//2
mergesort2(list, start, end)
mergesort2(list, middle+1, end)
merge(list, start, middle, end)
def mergesort(list):
mergesort2(list, 0, len(list)-1)
mylist = [8,23,4,56,75,21,42,10,2,5]
mergesort(mylist)
print(mylist)
Thanks
def mergesort2(list, start, end):
if start < end:
middle = start + (end - start)//2
mergesort2(list, start, middle) // notice middle instead of end.
mergesort2(list, middle+1, end)
merge(list, start, middle, end)
You were recursing with the same list without reducing its size, thus it was never reaching the base case.
Edit:
Also, middle should be calculated by start + (end-start)/2, instead of (start+end)/2, to avoid integer overflow errors when using large arrays. It's a good practice.
Edit 2:
After analysing the code even more, I found that the output was wrong. I have tried to correct them and this is my code:
def merge(start, middle, end):
L = l[:middle]
R = l[middle:]
i = j = k = 0
while i < len(L) and j < len(R):
if L[i] <= R[j]:
l[k] = L[i]
i += 1
else:
l[k] = R[j]
j+=1
k += 1
while i < len(L):
l[k] = L[i]
i += 1
k += 1
while j < len(R):
l[k] = R[j]
j += 1
k += 1
def mergesort2(start, end):
if start < end:
middle = start + (end - start)//2
mergesort2(start, middle)
mergesort2(middle+1, end)
merge(start, middle, end)
def mergesort(l):
mergesort2(0, len(l)-1)
l = [8,23,4,56,75,21,42,10,2,5]
mergesort(l)
print(l)
A few points to be noted:
Changed the variable name from list to l to avoid confusion with the keyword list.
There was no use passing the list to every function because it was already declared as a global variable.
merge() had some issues. The loop should actually run from 0 till the length of both the lists are not crossed. If crossed, then just copy the rest of the elements.
Used proper Python splicing techniques :-p

Why does set( ) make this code run so much faster?

I wrote some code for Project Euler Problem 35:
#Project Euler: Problem 35
import time
start = time.time()
def sieve_erat(n):
'''creates list of all primes < n'''
x = range(2,n)
b = 0
while x[b] < int(n ** 0.5) + 1:
x = filter(lambda y: y % x[b] != 0 or y == x[b], x)
b += 1
else:
return x
def circularPrimes(n):
'''returns # of circular primes below n'''
count = 0
primes = sieve_erat(n)
b = set(primes)
for prime in primes:
inc = 0
a = str(prime)
while inc < len(a):
if int(a) not in b:
break
a = a[-1] + a[0:len(a) - 1]
inc += 1
else:
count += 1
else:
return count
print circularPrimes(1000000)
elapsed = (time.time() - start)
print "Found in %s seconds" % elapsed
I am wondering why this code (above) runs so much faster when I set b = set(primes) in the circularPrimes function. The running time for this code is about 8 seconds. Initially, I did not set b = set(primes) and my circularPrimes function was this:
def circularPrimes(n):
'''returns # of circular primes below n'''
count = 0
primes = sieve_erat(n)
for prime in primes:
inc = 0
a = str(prime)
while inc < len(a):
if int(a) not in primes:
break
a = a[-1] + a[0:len(a) - 1]
inc += 1
else:
count += 1
else:
return count
My initial code (without b = set(primes)) ran so long that I didn't wait for it to finish. I am curious as to why there is such a large discrepancy in terms of running time between the two pieces of code as I do not believe that primes would have had any duplicates that would have made iterating through it take so much longer that iterating through set(primes). Maybe my idea of set( ) is wrong. Any help is welcome.
I believe the culprit here is if int(a) not in b:. Sets are implemented internally as hashtables, meaning that checking for membership is significantly less expensive than with a list (since you just need to check for collision).
You can check out the innards of sets here.

How would quickselectg act differently if pivot wasn't the middle term

Alright so I have developed a generic quickselect function and it is used to find the median of a list.
k = len(aList)//2 and the list is aList = [1,2,3,4,5]
So how would the program act differently if pivot started at the first item of the list each time. Do I have to make it at the center? Also where should I start the time.clock() in order to find the elapsed time of the function. Here is the code
def quickSelect(aList, k)
if len(aList)!=0:
pivot=aList[(len(aList)//2)]
smallerList = []
for i in aList:
if i<pivot:
smallerList.append(i)
largerList=[]
for i in aList:
if i>pivot:
largerList.append(i)
m=len(smallerList)
count=len(aList)-len(smallerList)-len(largerList)
if k >= m and k<m + count:
return pivot
elif m > k:
return quickSelect(smallerList,k)
else:
return quickSelect(largerList, k - m - count)
I don't see any issue in placing the pivot at the beginning. But that would be just to initialize the pivot. The whole idea of pivot is normally to find the middle element.
Please try this for your time calculation:
import time
start_time = 0
aList = [1,2,3,4,5]
k = len(aList)//2
def quickSelect(aList, k):
start_time = time.time()
# print "%10.6f"%start_time
# pivot = aList[0]
if len(aList) != 0:
pivot = aList[(len(aList) // 2)]
smallerList = []
for i in aList:
if i < pivot:
smallerList.append(i)
largerList = []
for i in aList:
if i > pivot:
largerList.append(i)
m = len(smallerList)
count = len(aList) - len(smallerList) - len(largerList)
if k >= m and k < m + count:
print "Pivot", pivot
# print "%10.6f"%time.time()
print "time: ", time.time() -start_time
return pivot
elif m > k:
return quickSelect(smallerList, k)
else:
return quickSelect(largerList, k - m - count)
quickSelect(aList, k)
In this case the time comes to be zero for your list is very small.
Please let me know, if I misinterpreted your question.
OUTPUT:
Pivot 3
time: 0.0

Bubble sort error

I am trying to write and optimized bubble sort, but my code does not actually sort right through the list. Any suggestions as to why it is stopping early?
#!/usr/bin/env python
from random import *
import time
def bubble(lst):
counter = 0
n = len(lst)
while n > 0:
temp = 0
for i in range(1, n - 1):
if lst[i] > lst[i + 1]:
lst[i], lst[i + 1] = lst[i + 1], lst[i] # swap
temp = i
counter += 1 # to compare the speed to the
# unoptimized bubble sort
n = temp
return counter
def main():
lst = range(10)
shuffle(lst)
print lst
start = time.time()
counter = bubble(lst)
print lst
end = time.time()
print counter
print "time", end - start
main()
This should fix it:
def bubble(lst):
counter = 0
n = len(lst)
while n > 0:
temp = 0
for i in range(0,n-1): # <- first element is at position 0
if lst[i] > lst[i+1]:
lst[i],lst[i+1] = lst[i+1],lst[i] #swap
temp = i+1 # <- the last swapped position
counter += 1
n = temp
return counter

Categories