I have to find the second largest number and largest number from the list by divide and conquer algorithm. The problem is that everything is right except the part that I use indices like a and b. Because it works faster. Cost cheaper. Do not need rewrite code or send other codes and approaches. Just help me please to fix it if u can.. Any helps any ideas welcome. Thanks
#!/usr/local/bin/python2.7
def two_max(arr,a,b):
n = len(arr)
if n==2:
if arr[0]<arr[1]:
return (arr[1], arr[0])
else:
return (arr[0], arr[1])
(greatest_left, sec_greatest_left) = two_max(arr,a (a+b)/2)
(greatest_right, sec_greatest_right) = two_max(arr,(a+b)/2,b)
if greatest_left < greatest_right:
greatest = greatest_right
if greatest_left < sec_greatest_left:
return (greatest, sec_greatest_left)
else:
return (greatest, greatest_left)
else:
greatest = greatest_left
if greatest_right < sec_greatest_right: # Line 4
return (greatest, sec_greatest_right)
else:
return (greatest, greatest_right)
The biggest problem is that you never get any closer to your recursive base case.
The base case is len(arr) == 2. But every time you call yourself, you just pass arr as-is:
(greatest_left, sec_greatest_left) = two_max(arr,a,(a+b)/2)
(greatest_right, sec_greatest_right) = two_max(arr,(a+b)/2,b)
(Note that I'm guessing on the comma in the first one, because as you posted it, you're actually calling the number a as a function, which is unlikely to do anything useful…)
So, either your base case should take a and b into account, like this:
if b-a == 2:
if arr[a]<arr[a+1]:
return (arr[a+1], arr[a])
else:
return (arr[a], arr[a+1])
… or you should send a slice of arr instead of the whole thing—in which case you don't need a and b in the first place:
(greatest_left, sec_greatest_left) = two_max(arr[:len(a)/2])
(greatest_right, sec_greatest_right) = two_max(arr[len(a)/2:])
Either one will fix your first problem. Of course the function still doesn't work for most inputs. In fact, it only works if the length of the list is a power of two.
If that isn't a good enough hint for how to fix it: What happens if b-a is 3? Obviously you can't split it into two halves, both of which are of size 2 or greater. So, you'll need to write another base case for b-a == 1, and return something that will make the rest of the algorithm work.
Why don't you do it that way:
>>> def getIlargest(arr, i):
if (i <= len(arr) and i > 0):
return sorted(arr)[-i]
>>> a = [1,3,51,4,6,23,53,2,532,5,2,6,7,5,4]
>>> getIlargest(a, 2)
53
I took it one step further and tested 3 methods:
Using counting sort - getIlargestVer2
Using python sorted function - getIlargestVer1
Using heap - heapIlargest as #abarnert suggested.
The results:
for arrays in sizes from 1 to ~5000 sorted is the best, for larger arrays the heapq.nlargest usage is the winner:
plot for arrays in sizes between [1*150, 55*150]:
*Full scan between array in sizes of [1*150, 300*150]:*
The code I used is the following, the 3 methods implementation is in setup string:
setup = """
import heapq, random
a = random.sample(xrange(1<<30), 150)
a = a * factor
class ILargestFunctions:
# taken from [wiki][3] and was rewriting it.
def counting_sort(self, array, maxval):
m = maxval + 1
count = {}
for a in array:
if count.get(a, None) is None:
count[a] = 1
else:
count[a] += 1
i = 0
for key in count.keys():
for c in range(count[key]):
array[i] = key
i += 1
return array
def getIlargestVer1(self, arr, i):
if (i <= len(arr) and i > 0):
return sorted(arr)[-i]
def getIlargestVer2(self, arr, i):
if (i <= len(arr) and i > 0):
return self.counting_sort(arr, max(arr))[-i]
def heapIlargest(self, arr, i):
if (i <= len(arr) and i > 0):
return heapq.nlargest(i,arr)
n = ILargestFunctions()
"""
And the main line triggers the performance counting and plots the collected data is in:
import timeit
import numpy as np
import matplotlib.pyplot as plt
if __name__ == "__main__":
results = {}
r1 = []; r2 = []; r3 = [];
x = np.arange(1,300,1)
for i in xrange(1,300,1):
print i
factorStr = "factor = " + str(i) + ";"
newSetupStr = factorStr + setup
r1.append(timeit.timeit('n.getIlargestVer1(a, 100)', number=200, setup=newSetupStr))
r2.append(timeit.timeit('n.getIlargestVer2(a, 100)', number=200, setup=newSetupStr))
r3.append(timeit.timeit('n.heapIlargest(a, 100)', number=200, setup=newSetupStr))
results[i] = (r1,r2,r3)
p1 = plt.plot(x, r1, 'r', label = "getIlargestVer1")
p2 = plt.plot(x, r2, 'b' , label = "getIlargestVer2")
p3 = plt.plot(x, r3, 'g' , label = "heapIlargest")
plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.)
plt.show()
#0x90 has the right idea, but he got it reversed.
def find_i_largest_element(seq, i):
if (i <= len(seq) and i > 0):
s = sorted(seq, reverse=True)
return s[i-1]
By the way, is this a homework assignment? If so, what's the whole idea behind the algorithm you have to use?
Related
I made myself an exercise with python since I am new. I wanted to make a rever LMC calculator ( Least common multiple ) but for some reason, something as simple as a print in a loop doesn't seem o work for me. I would appreciate some help since I am stuck on this weird issue for 20 minutes now. Here is the code:
import random
import sys
def print_list():
count_4_print = 0
while count_4_print < len(values):
print(values[count_4_print])
count_4_print += 1
def lcm(x, y):
if x > y:
greater = x
else:
greater = y
while True:
if (greater % x == 0) and (greater % y == 0):
lcm1 = greater
break
greater += 1
return lcm1
def guess(index, first_guess, second_guess):
num = 1
while lcm(first_guess, second_guess) != values[num - 1]:
first_guess = random.randrange(1, 1000000)
second_guess = random.randrange(1, 1000000)
num += 1
num = 1
if lcm(first_guess, second_guess) == values[num - 1]:
return first_guess, second_guess
num += 1
lineN = int(input())
values = []
count_4_add = 0
count_4_guess = 0
for x in range(lineN):
values.append(int(input()))
count_4_add += 1
if count_4_add >= lineN:
break
print_list()
for x in range(lineN + 1):
first, second = guess(count_4_guess, 1, 1)
count_4_guess += 1
print(first + second)
# this ^^^ doesn't work for some reason
Line 57 is in the while loop with count_4_guess. Right above this text, it says print(first_guess + second_guess)
Edit: The code is supposed to take in an int x and then prompt for x values. The outputs are the inputs without x and LMC(output1, output2) where the "LMC" is one of the values. This is done for each of the values, x times. What it actually does is just the first part. It takes the x and prompts for x outputs and then prints them but doesn't process the data (or it just doesn't print it)
Note: From looking at your comments and edits it seems that you are lacking some basic knowledge and/or understanding of things. I strongly encourage you to study more programming, computer science and python before attempting to create entire programs like this.
It is tough to answer your question properly since many aspects are unclear, so I will update my answer to reflect any relevant changes in your post.
Now, onto my answer. First, I will go over some of your code and attempt to give feedback on what could improved. Then, I will present two ways to compute the least common multiple (LCM) in python.
Code review
Code:
def print_list():
count_4_print = 0
while count_4_print < len(values):
print(values[count_4_print])
count_4_print += 1
Notes:
Where are the parameters? It was already mentioned in a few comments, but the importance of this cannot be stressed enough! (see the note at the beginning of my comment)
It appears that you are trying to print each element of a list on a new line. You can do that with print(*my_list, sep='\n').
That while loop is not how you should iterate over the elements of a list. Instead, use a for loop: for element in (my_list):.
Code:
def lcm(x, y):
if x > y:
greater = x
else:
greater = y
while True:
if (greater % x == 0) and (greater % y == 0):
lcm1 = greater
break
greater += 1
return lcm1
Notes:
This is not a correct algorithm for the LCM, since it crashes when both numbers are 0.
The comparison of a and b can be replaced with greater = max(x, y).
See the solution I posted below for a different way of writing this same algorithm.
Code:
def guess(index, first_guess, second_guess):
num = 1
while lcm(first_guess, second_guess) != values[num - 1]:
first_guess = random.randrange(1, 1000000)
second_guess = random.randrange(1, 1000000)
num += 1
num = 1
if lcm(first_guess, second_guess) == values[num - 1]:
return first_guess, second_guess
num += 1
Notes:
The line num += 1 comes immediately after return first_guess, second_guess, which means it is never executed. Somehow the mistakes cancel each other out since, as far as I can tell, it wouldn't do anything anyway if it were executed.
if lcm(first_guess, second_guess) == values[num - 1]: is completely redundant, since the while loop above checks the exact same condition.
In fact, not only is it redundant it is also fundamentally broken, as mentioned in this comment by user b_c.
Unfortunately I cannot say much more on this function since it is too difficult for me to understand its purpose.
Code:
lineN = int(input())
values = []
count_4_add = 0
count_4_guess = 0
for x in range(lineN):
values.append(int(input()))
count_4_add += 1
if count_4_add >= lineN:
break
print_list()
Notes:
As explained previously, print_list() should not be a thing.
lineN should be changed to line_n, or even better, something like num_in_vals.
count_4_add will always be equal to lineN at the end of your for loop.
Building on the previous point, the check if count_4_add >= lineN is useless.
In conclusion, count_4_add and count_4_guess are completely unnecessary and detrimental to the program.
The for loop produces values in the variable x which is never used. You can replace an unused variable with _: for _ in range(10):.
Since your input code is simple you could probably get away with something like in_vals = [int(input(f'Enter value number {i}: ')) for i in range(1, num_in_vals+1)]. Again, this depends on what it is you're actually trying to do.
LCM Implementations
According to the Wikipedia article referenced earlier, the best way to calculate the LCM is using the greatest common denominator.
import math
def lcm(a: int, b: int) -> int:
if a == b:
res = a
else:
res = abs(a * b) // math.gcd(a, b)
return res
This second method is one possible brute force solution, which is similar to how the one you are currently using should be written.
def lcm(a, b):
if a == b:
res = a
else:
max_mult = a * b
res = max_mult
great = max(a, b)
small = min(a, b)
for i in range(great, max_mult, great):
if i % small == 0:
res = i
break
return res
This final method works for any number of inputs.
import math
import functools
def lcm_simp(a: int, b: int) -> int:
if a == b:
res = a
else:
res = abs(a * b) // math.gcd(a, b)
return res
def lcm(*args: int) -> int:
return functools.reduce(lcm_simp, args)
Oof, that ended up being way longer than I expected. Anyway, let me know if anything is unclear, if I've made a mistake, or if you have any further questions! :)
After analyzing the fastest subset sum algorithm which runs in 2^(n/2) time, I noticed a slight optimization that can be done. I'm not sure if it really counts as an optimization and if it does, I'm wondering if it can be improved by recursion.
Basically from the original algorithm: http://en.wikipedia.org/wiki/Subset_sum_problem (see part with title Exponential time algorithm)
it takes the list and splits it into two
then it generates the sorted power sets of both in 2^(n/2) time
then it does a linear search in both lists to see if 1 value in both lists sum to x using a clever trick
In my version with the optimization
it takes the list and removes the last element last
then it splits the list in two
then it generates the sorted power sets of both in 2^((n-1)/2) time
then it does a linear search in both lists to see if 1 value in both lists sum to x or x-last (at same time with same running time) using a clever trick
If it finds either, then I will know it worked. I tried using python time functions to test with lists of size 22, and my version is coming like twice as fast apparently.
After running the below code, it shows
0.050999879837 <- the original algorithm
0.0250000953674 <- my algorithm
My logic for the recursion part is, well if it works for a size n list in 2^((n-1)/1) time, can we not repeat this again and again?
Does any of this make sense, or am I totally wrong?
Thanks
I created this python code:
from math import log, ceil, floor
import helper # my own code
from random import randint, uniform
import time
# gets a list of unique random floats
# s = how many random numbers
# l = smallest float can be
# h = biggest float can be
def getRandomList(s, l, h):
lst = []
while len(lst) != s:
r = uniform(l,h)
if not r in lst:
lst.append(r)
return lst
# This just generates the two powerset sorted lists that the 2^(n/2) algorithm makes.
# This is just a lazy way of doing it, this running time is way worse, but since
# this can be done in 2^(n/2) time, I just pretend its that running time lol
def getSortedPowerSets(lst):
n = len(lst)
l1 = lst[:n/2]
l2 = lst[n/2:]
xs = range(2**(n/2))
ys1 = helper.getNums(l1, xs)
ys2 = helper.getNums(l2, xs)
return ys1, ys2
# this just checks using the regular 2^(n/2) algorithm to see if two values
# sum to the specified value
def checkListRegular(lst, x):
lst1, lst2 = getSortedPowerSets(lst)
left = 0
right = len(lst2)-1
while left < len(lst1) and right >= 0:
sum = lst1[left] + lst2[right]
if sum < x:
left += 1
elif sum > x:
right -= 1
else:
return True
return False
# this is my improved version of the above version
def checkListSmaller(lst, x):
last = lst.pop()
x1, x2 = x, x - last
return checkhelper(lst, x1, x2)
# this is the same as the function 'checkListRegular', but it checks 2 values
# at the same time
def checkhelper(lst, x1, x2):
lst1, lst2 = getSortedPowerSets(lst)
left = [0,0]
right = [len(lst2)-1, len(lst2)-1]
while 1:
check = 0
if left[0] < len(lst1) and right[0] >= 0:
check += 1
sum = lst1[left[0]] + lst2[right[0]]
if sum < x1:
left[0] += 1
elif sum > x1:
right[0] -= 1
else:
return True
if left[1] < len(lst1) and right[1] >= 0:
check += 1
sum = lst1[left[1]] + lst2[right[1]]
if sum < x2:
left[1] += 1
elif sum > x2:
right[1] -= 1
else:
return True
if check == 0:
return False
n = 22
lst = getRandomList(n, 1, 3000)
startTime = time.time()
print checkListRegular(lst, -50) # -50 so it does worst case scenario
startTime2 = time.time()
print checkListSmaller(lst, -50) # -50 so it does worst case scenario
startTime3 = time.time()
print (startTime2 - startTime)
print (startTime3 - startTime2)
This is the helper library which I just use to generate the powerset list.
def dec_to_bin(x):
return int(bin(x)[2:])
def getNums(lst, xs):
sums = []
n = len(lst)
for i in xs:
bin = str(dec_to_bin(i))
bin = (n-len(bin))*"0" + bin
chosen_items = getList(bin, lst)
sums.append(sum(chosen_items))
sums.sort()
return sums
def getList(binary, lst):
s = []
for i in range(len(binary)):
if binary[i]=="1":
s.append(float(lst[i]))
return s
then it generates the sorted power sets of both in 2^((n-1)/2) time
OK, since now the list has one less lement. However, this is not a big deal its just a constant time improvement of 2^(1/2)...
then it does a linear search in both lists to see if 1 value in both lists sum to x or x-last (at same time with same running time) using a clever trick
... and this improvement will go away because now you do twice as many operations to check for both x and x-last sums instead of only for x
can we not repeat this again and again?
No you can't, for the same reason why you couldn't split the original algorithm again and again. The trick only works for once because once you start looking for values in more than two lists you can't use the sorting trick anymore.
I wrote code to arrange numbers after taking user input. The ordering requires that the sum of adjacent numbers is prime. Up until 10 as an input code is working fine. If I go beyond that the system hangs. Please let me know the steps to optimize it
ex input 8
Answer should be: (1, 2, 3, 4, 7, 6, 5, 8)
Code as follows....
import itertools
x = raw_input("please enter a number")
range_x = range(int(x)+1)
del range_x[0]
result = list(itertools.permutations(range_x))
def prime(x):
for i in xrange(1,x,2):
if i == 1:
i = i+1
if x%i==0 and i < x :
return False
else:
return True
def is_prime(a):
for i in xrange(len(a)):
print a
if i < len(a)-1:
if prime(a[i]+a[i+1]):
pass
else:
return False
else:
return True
for i in xrange(len(result)):
if i < len(result)-1:
if is_prime(result[i]):
print 'result is:'
print result[i]
break
else:
print 'result is'
print result[i-1]
For posterity ;-), here's one more based on finding a Hamiltonian path. It's Python3 code. As written, it stops upon finding the first path, but can easily be changed to generate all paths. On my box, it finds a solution for all n in 1 through 900 inclusive in about one minute total. For n somewhat larger than 900, it exceeds the maximum recursion depth.
The prime generator (psieve()) is vast overkill for this particular problem, but I had it handy and didn't feel like writing another ;-)
The path finder (ham()) is a recursive backtracking search, using what's often (but not always) a very effective ordering heuristic: of all the vertices adjacent to the last vertex in the path so far, look first at those with the fewest remaining exits. For example, this is "the usual" heuristic applied to solving Knights Tour problems. In that context, it often finds a tour with no backtracking needed at all. Your problem appears to be a little tougher than that.
def psieve():
import itertools
yield from (2, 3, 5, 7)
D = {}
ps = psieve()
next(ps)
p = next(ps)
assert p == 3
psq = p*p
for i in itertools.count(9, 2):
if i in D: # composite
step = D.pop(i)
elif i < psq: # prime
yield i
continue
else: # composite, = p*p
assert i == psq
step = 2*p
p = next(ps)
psq = p*p
i += step
while i in D:
i += step
D[i] = step
def build_graph(n):
primes = set()
for p in psieve():
if p > 2*n:
break
else:
primes.add(p)
np1 = n+1
adj = [set() for i in range(np1)]
for i in range(1, np1):
for j in range(i+1, np1):
if i+j in primes:
adj[i].add(j)
adj[j].add(i)
return set(range(1, np1)), adj
def ham(nodes, adj):
class EarlyExit(Exception):
pass
def inner(index):
if index == n:
raise EarlyExit
avail = adj[result[index-1]] if index else nodes
for i in sorted(avail, key=lambda j: len(adj[j])):
# Remove vertex i from the graph. If this isolates
# more than 1 vertex, no path is possible.
result[index] = i
nodes.remove(i)
nisolated = 0
for j in adj[i]:
adj[j].remove(i)
if not adj[j]:
nisolated += 1
if nisolated > 1:
break
if nisolated < 2:
inner(index + 1)
nodes.add(i)
for j in adj[i]:
adj[j].add(i)
n = len(nodes)
result = [None] * n
try:
inner(0)
except EarlyExit:
return result
def solve(n):
nodes, adj = build_graph(n)
return ham(nodes, adj)
This answer is based on #Tim Peters' suggestion about Hamiltonian paths.
There are many possible solutions. To avoid excessive memory consumption for intermediate solutions, a random path can be generated. It also allows to utilize multiple CPUs easily (each cpu generates its own paths in parallel).
import multiprocessing as mp
import sys
def main():
number = int(sys.argv[1])
# directed graph, vertices: 1..number (including ends)
# there is an edge between i and j if (i+j) is prime
vertices = range(1, number+1)
G = {} # vertex -> adjacent vertices
is_prime = sieve_of_eratosthenes(2*number+1)
for i in vertices:
G[i] = []
for j in vertices:
if is_prime[i + j]:
G[i].append(j) # there is an edge from i to j in the graph
# utilize multiple cpus
q = mp.Queue()
for _ in range(mp.cpu_count()):
p = mp.Process(target=hamiltonian_random, args=[G, q])
p.daemon = True # do not survive the main process
p.start()
print(q.get())
if __name__=="__main__":
main()
where Sieve of Eratosthenes is:
def sieve_of_eratosthenes(limit):
is_prime = [True]*limit
is_prime[0] = is_prime[1] = False # zero and one are not primes
for n in range(int(limit**.5 + .5)):
if is_prime[n]:
for composite in range(n*n, limit, n):
is_prime[composite] = False
return is_prime
and:
import random
def hamiltonian_random(graph, result_queue):
"""Build random paths until Hamiltonian path is found."""
vertices = list(graph.keys())
while True:
# build random path
path = [random.choice(vertices)] # start with a random vertice
while True: # until path can be extended with a random adjacent vertex
neighbours = graph[path[-1]]
random.shuffle(neighbours)
for adjacent_vertex in neighbours:
if adjacent_vertex not in path:
path.append(adjacent_vertex)
break
else: # can't extend path
break
# check whether it is hamiltonian
if len(path) == len(vertices):
assert set(path) == set(vertices)
result_queue.put(path) # found hamiltonian path
return
Example
$ python order-adjacent-prime-sum.py 20
Output
[19, 18, 13, 10, 1, 4, 9, 14, 5, 6, 17, 2, 15, 16, 7, 12, 11, 8, 3, 20]
The output is a random sequence that satisfies the conditions:
it is a permutation of the range from 1 to 20 (including)
the sum of adjacent numbers is prime
Time performance
It takes around 10 seconds on average to get result for n = 900 and extrapolating the time as exponential function, it should take around 20 seconds for n = 1000:
The image is generated using this code:
import numpy as np
figname = 'hamiltonian_random_noset-noseq-900-900'
Ns, Ts = np.loadtxt(figname+'.xy', unpack=True)
# use polyfit to fit the data
# y = c*a**n
# log y = log (c * a ** n)
# log Ts = log c + Ns * log a
coeffs = np.polyfit(Ns, np.log2(Ts), deg=1)
poly = np.poly1d(coeffs, variable='Ns')
# use curve_fit to fit the data
from scipy.optimize import curve_fit
def func(x, a, c):
return c*a**x
popt, pcov = curve_fit(func, Ns, Ts)
aa, cc = popt
a, c = 2**coeffs
# plot it
import matplotlib.pyplot as plt
plt.figure()
plt.plot(Ns, np.log2(Ts), 'ko', label='time measurements')
plt.plot(Ns, np.polyval(poly, Ns), 'r-',
label=r'$time = %.2g\times %.4g^N$' % (c, a))
plt.plot(Ns, np.log2(func(Ns, *popt)), 'b-',
label=r'$time = %.2g\times %.4g^N$' % (cc, aa))
plt.xlabel('N')
plt.ylabel('log2(time in seconds)')
plt.legend(loc='upper left')
plt.show()
Fitted values:
>>> c*a**np.array([900, 1000])
array([ 11.37200806, 21.56029156])
>>> func([900, 1000], *popt)
array([ 14.1521409 , 22.62916398])
Dynamic programming, to the rescue:
def is_prime(n):
return all(n % i != 0 for i in range(2, n))
def order(numbers, current=[]):
if not numbers:
return current
for i, n in enumerate(numbers):
if current and not is_prime(n + current[-1]):
continue
result = order(numbers[:i] + numbers[i + 1:], current + [n])
if result:
return result
return False
result = order(range(500))
for i in range(len(result) - 1):
assert is_prime(result[i] + result[i + 1])
You can force it to work for even larger lists by increasing the maximum recursion depth.
Here's my take on a solution. As Tim Peters pointed out, this is a Hamiltonian path problem.
So the first step is to generate the graph in some form.
Well the zeroth step in this case to generate prime numbers. I'm going to use a sieve, but whatever prime test is fine. We need primes upto 2 * n since that is the largest any two numbers can sum to.
m = 8
n = m + 1 # Just so I don't have to worry about zero indexes and random +/- 1's
primelen = 2 * m
prime = [True] * primelen
prime[0] = prime[1] = False
for i in range(4, primelen, 2):
prime[i] = False
for i in range(3, primelen, 2):
if not prime[i]:
continue
for j in range(i * i, primelen, i):
prime[j] = False
Ok, now we can test for primality with prime[i]. Now its easy to make the graph edges. If I have a number i, what numbers can come next. I'll also make use of the fact that i and j have opposite parity.
pairs = [set(j for j in range(i%2+1, n, 2) if prime[i+j])
for i in range(n)]
So here pairs[i] is set object whose elements are integers j such that i+j is prime.
Now we need to walk the graph. This is really where the time consuming part is and all further optimizations will be done here.
chains = [
([], set(range(1, n))
]
chains is going to keep track of the valid paths as we walk them. The first element in the tuple will be your result. The second element is all the unused numbers, or unvisited nodes. The idea is to take one chain out of the queue, take a step down the path and put it back.
while chains:
chain, unused = chains.pop()
if not chain:
# we haven't even started, all unused are valid
valid_next = unused
else:
# We need numbers that are both unused and paired with the last node
# Using sets makes this easy
valid_next = unused & pairs[chains[-1]]
for num in valid_next:
# Take a step to the new node and add the new path back to chains
# Reminder, its important not to mutate anything here, always make new objs
newchain = chain + [num]
newunused = unused - set([num])
chains.append( (newchain, newunused) )
# are we done?
if not newunused:
print newchain
chains = False
Notice that if there is no valid next step, the path is removed without a replacement.
This is really memory inefficient, but runs in a reasonable time. The biggest performance bottleneck is walking the graph, so the next optimization would be popping and inserting paths in intelligent places to prioritize the most likely paths. It might be helpful to use a collections.deque or different container for your chains in that case.
EDIT
Here is an example of how you can implement your path priority. We will assign each path a score and keep the chains list sorted by this score. For a simple example I will suggest that paths containing "harder to use" nodes are worth more. That is for each step on a path the score will increase by n - len(valid_next) The modified code will look something like this.
import bisect
chains = ...
chains_score = [0]
while chains:
chain, unused = chains.pop()
score = chains_score.pop()
...
for num in valid_next:
newchain = chain + [num]
newunused = unused - set([num])
newscore = score + n - len(valid_next)
index = bisect.bisect(chains_score, newscore)
chains.insert(index, (newchain, newunused))
chains_score.insert(index, newscore)
Remember that insertion is O(n) so the overhead of adding this can be rather large. Its worth doing some analysis on your score algorithm to keep the queue length len(chains) managable.
def getPrimeList(check):
storedprimes = []
i = 2
while i <= check:
if isPrime(check):
storedprimes = storedprimes + [i]
i = i + 1
return storedprimes
def getPrimeFact(check):
primelist = getPrimeList(check)
prime_fact = []
i = 0
while check !=1:
if check%primelist[i]==0:
prime_fact=prime_fact+[primelist[i]]
check = check/primelist[i]
i = i + 1
if i == len(primelist):
i = 0
return prime_fact
def getGCF(checks):
a=0
listofprimefacts=[]
while a<len(checks):
listofprimefacts=listofprimefacts+[getPrimeFact(checks[a])]
a=a+1
b=0
storedprimes=[]
while b<len(primefactlist):
c=0
while c<len(listofprimefacts[b]):
if listofprimefacts[b][c] not in storedprimes:
storedprimes=storedprimes+[listofprimefacts[b][c]]
c=c+1
b=b+1
prime_exp=[]
d=0
while d<len(storedprimes):
prime_exp=prime_exp+[0]
d=d+1
e=0
while e<len(storedprimes):
f=0
while f<len(listofprimefacts):
if f==0:
prime_exp[e]=listofprimefacts[f].count(storedprimes[e])
elif prime_exp[e]-(listofprimefacts[f].count(storedprimes[e]))>0:
prime_exp[e]=listofprimefacts[f].count(storedprimes[e])
f=f+1
e=e+1
g=0
GCF=1
while g<len(primelist):
GCF=GCF*(storedprime[g]**prime_exp[g])
g=g+1
return GCF
I am creating a program that will use these functions for the purpose of calculating fractions; however, after testing my GCF function in the shell I keep getting a list indexing error. I have no idea, where the error is coming from considering im 99% sure there is no problems with my indexes, usually i would not post such a "fixable" problem in SO but this time i just have no idea what the problem is, thanks again.
Oh and heres the exact error
File "<pyshell#1>", line 1, in <module>
getGCF(checks)
File "E:\CompProgramming\MidtermFuncts.py", line 31, in getGCF
listofprimefacts=listofprimefacts+[getPrimeFact(checks[a])]
File "E:\CompProgramming\MidtermFuncts.py", line 20, in getPrimeFact
if check%primelist[i]==0:
IndexError: list index out of range
You might want to re-think how you attack this problem. In its current form, your code is really hard to work with.
Here's how I'd do it:
def is_prime(n):
for i in range(2, int(n ** 0.5) + 1):
if n % i == 0:
return False
return True
def prime_factors(number):
factors = []
for i in range(2, number / 2):
if number % i == 0 and is_prime(i):
factors.append(i)
return factors
def gcf(numbers):
common_factors = prime_factors(numbers[0])
for number in numbers[1:]:
new_factors = prime_factors(number)
common_factors = [factor for factor in common_factors if factor in new_factors]
return max(common_factors)
This line right here:
common_factors = [factor for factor in common_factors if factor in new_factors]
Is a list comprehension. You can unroll it into another for loop:
temp = []
for factor in common_factors:
if factor in new_factors:
temp.append(factor)
common_factors = list(temp) # Pass by value, not by reference
You mixed up i and check in your getPrimeList() function; you test if check is a prime, not i; here is the correct function:
def getPrimeList(check):
storedprimes = []
i = 2
while i <= check:
if isPrime(i): # *not* `check`!
storedprimes = storedprimes + [i]
i = i + 1
return storedprimes
This, primelist will be set to an empty list (as getPrimeList(check) returns an empty list) and your primelist[i] (for any i) will fail with an index error.
Another way for primelist to be empty is when isPrime() never returns True; you don't show us that function to verify it.
Your next error is in getGCF(); you define a listofprimefacts variable (a list) first, but later refer to a non-existing primefactlist variable, leading to a NameError. The next name error is going to be primelist further in that function.
You really want to re-read the Python tutorial; you are missing out on many python idioms in your code; specifically on how to create loops over sequences (hint: for check in checks: is easier to use than a while loop with an index variable) and how to append items to a list.
My personal toolkit defines this:
from math import sqrt
def prime_factors(num, start=2):
"""Return all prime factors (ordered) of num in a list"""
candidates = xrange(start, int(sqrt(num)) + 1)
factor = next((x for x in candidates if (num % x == 0)), None)
return ([factor] + prime_factors(num / factor, factor) if factor else [num])
which doesn't need a isPrime() test.
What is the fastest way to sort an array of whole integers bigger than 0 and less than 100000 in Python? But not using the built in functions like sort.
Im looking at the possibility to combine 2 sport functions depending on input size.
If you are interested in asymptotic time, then counting sort or radix sort provide good performance.
However, if you are interested in wall clock time you will need to compare performance between different algorithms using your particular data sets, as different algorithms perform differently with different datasets. In that case, its always worth trying quicksort:
def qsort(inlist):
if inlist == []:
return []
else:
pivot = inlist[0]
lesser = qsort([x for x in inlist[1:] if x < pivot])
greater = qsort([x for x in inlist[1:] if x >= pivot])
return lesser + [pivot] + greater
Source: http://rosettacode.org/wiki/Sorting_algorithms/Quicksort#Python
Since you know the range of numbers, you can use Counting Sort which will be linear in time.
Radix sort theoretically runs in linear time (sort time grows roughly in direct proportion to array size ), but in practice Quicksort is probably more suited, unless you're sorting absolutely massive arrays.
If you want to make quicksort a bit faster, you can use insertion sort] when the array size becomes small.
It would probably be helpful to understand the concepts of algorithmic complexity and Big-O notation too.
Early versions of Python used a hybrid of samplesort (a variant of quicksort with large sample size) and binary insertion sort as the built-in sorting algorithm. This proved to be somewhat unstable. S0, from python 2.3 onward uses adaptive mergesort algorithm.
Order of mergesort (average) = O(nlogn).
Order of mergesort (worst) = O(nlogn).
But Order of quick sort (worst) = n*2
if you uses list=[ .............. ]
list.sort() uses mergesort algorithm.
For comparison between sorting algorithm you can read wiki
For detail comparison comp
I might be a little late to the show, but there's an interesting article that compares different sorts at https://www.linkedin.com/pulse/sorting-efficiently-python-lakshmi-prakash
One of the main takeaways is that while the default sort does great we can do a little better with a compiled version of quicksort. This requires the Numba package.
Here's a link to the Github repo:
https://github.com/lprakash/Sorting-Algorithms/blob/master/sorts.ipynb
We can use count sort using a dictionary to minimize the additional space usage, and keep the running time low as well. The count sort is much slower for small sizes of the input array because of the python vs C implementation overhead. The count sort starts to overtake the regular sort when the size of the array (COUNT) is about 1 million.
If you really want huge speedups for smaller size inputs, implement the count sort in C and call it from Python.
(Fixed a bug which Aaron (+1) helped catch ...)
The python only implementation below compares the 2 approaches...
import random
import time
COUNT = 3000000
array = [random.randint(1,100000) for i in range(COUNT)]
random.shuffle(array)
array1 = array[:]
start = time.time()
array1.sort()
end = time.time()
time1 = (end-start)
print 'Time to sort = ', time1*1000, 'ms'
array2 = array[:]
start = time.time()
ardict = {}
for a in array2:
try:
ardict[a] += 1
except:
ardict[a] = 1
indx = 0
for a in sorted(ardict.keys()):
b = ardict[a]
array2[indx:indx+b] = [a for i in xrange(b)]
indx += b
end = time.time()
time2 = (end-start)
print 'Time to count sort = ', time2*1000, 'ms'
print 'Ratio =', time2/time1
The built in functions are best, but since you can't use them have a look at this:
http://en.wikipedia.org/wiki/Quicksort
def sort(l):
p = 0
while(p<len(l)-1):
if(l[p]>l[p+1]):
l[p],l[p+1] = l[p+1],l[p]
if(not(p==0)):
p = p-1
else:
p += 1
return l
this is a algorithm that I created but is really fast. just do sort(l)
l being the list that you want to sort.
#fmark
Some benchmarking of a python merge-sort implementation I wrote against python quicksorts from http://rosettacode.org/wiki/Sorting_algorithms/Quicksort#Python
and from top answer.
Size of the list and size of numbers in list irrelevant
merge sort wins, however it uses builtin int() to floor
import numpy as np
x = list(np.random.rand(100))
# TEST 1, merge_sort
def merge(l, p, q, r):
n1 = q - p + 1
n2 = r - q
left = l[p : p + n1]
right = l[q + 1 : q + 1 + n2]
i = 0
j = 0
k = p
while k < r + 1:
if i == n1:
l[k] = right[j]
j += 1
elif j == n2:
l[k] = left[i]
i += 1
elif left[i] <= right[j]:
l[k] = left[i]
i += 1
else:
l[k] = right[j]
j += 1
k += 1
def _merge_sort(l, p, r):
if p < r:
q = int((p + r)/2)
_merge_sort(l, p, q)
_merge_sort(l, q+1, r)
merge(l, p, q, r)
def merge_sort(l):
_merge_sort(l, 0, len(l)-1)
# TEST 2
def quicksort(array):
_quicksort(array, 0, len(array) - 1)
def _quicksort(array, start, stop):
if stop - start > 0:
pivot, left, right = array[start], start, stop
while left <= right:
while array[left] < pivot:
left += 1
while array[right] > pivot:
right -= 1
if left <= right:
array[left], array[right] = array[right], array[left]
left += 1
right -= 1
_quicksort(array, start, right)
_quicksort(array, left, stop)
# TEST 3
def qsort(inlist):
if inlist == []:
return []
else:
pivot = inlist[0]
lesser = qsort([x for x in inlist[1:] if x < pivot])
greater = qsort([x for x in inlist[1:] if x >= pivot])
return lesser + [pivot] + greater
def test1():
merge_sort(x)
def test2():
quicksort(x)
def test3():
qsort(x)
if __name__ == '__main__':
import timeit
print('merge_sort:', timeit.timeit("test1()", setup="from __main__ import test1, x;", number=10000))
print('quicksort:', timeit.timeit("test2()", setup="from __main__ import test2, x;", number=10000))
print('qsort:', timeit.timeit("test3()", setup="from __main__ import test3, x;", number=10000))
Bucket sort with bucket size = 1. Memory is O(m) where m = the range of values being sorted. Running time is O(n) where n = the number of items being sorted. When the integer type used to record counts is bounded, this approach will fail if any value appears more than MAXINT times.
def sort(items):
seen = [0] * 100000
for item in items:
seen[item] += 1
index = 0
for value, count in enumerate(seen):
for _ in range(count):
items[index] = value
index += 1