I am working on this problem where I need to find all of the combinations of the purchase items in a .csv file that equal a total. The purchase items can be positive and negative. When I run my code with practice data I can get the correct answer but when I use the real data which has hundreds of purchases it runs forever. I am wondering how to speed up the processing time of the following code.
import decimal
import pandas
import datetime
df = pandas.read_csv('negative_test.csv')
print(df)
# Index are the keys of the dictonary
index_list = list(df.index)
# Values are the values of the dictonary
values = df["Purchase"].to_list()
target = df["Target"].to_list()[0]
print(target)
def sanitize_values(list1):
sanitized_list = []
for item in list1:
sanitized_list.append(float(item.replace(",","")))
return sanitized_list
def merge(list1, list2):
merged_list = [(list1[i], list2[i]) for i in range(0, len(list1))]
return merged_list
merged_tuples = merge(index_list,sanitize_values(values))
def sum_tuples(tuples):
s = 0
for i in range(len(tuples)):
s += tuples[i][1]
#s += round(tuples[i][1],2)
return s
def subset_sum(numbers, target, partial=[], s=0, progress=0):
if(len(partial) > 0):
s += partial[len(partial)-1][1]
#s = sum_tuples(partial)
# check if the partial sum is equals to target
if round(s,2) == round(target,2):
print ((partial, target))
if len(numbers) == 0:
return
for i in range(len(numbers)):
#hacked in progress bar
if(len(partial) == 0):
progress += 1
printDateTime()
print("progress: " + str(progress))
#print("Starting from: " + str(i) + " Numbers: " + str(numbers) + "\n len: " + str(len(partial)) + "partial: " + str(partial))
n = numbers[i][1]
#n = round(numbers[i][1],2)
remaining = numbers[i+1:]
# print(remaining)
subset_sum(remaining, target, partial + [numbers[i]], s, progress)
def printDateTime():
now = datetime.datetime.now()
print ("Current date and time : ")
print (now.strftime("%Y-%m-%d %H:%M:%S"))
if __name__ == "__main__":
printDateTime()
subset_sum(merged_tuples,target)
printDateTime()
I am working on some practice exercises with linked lists and I got stuck with one function.
My program should create a Node class, take user input with create() function (number n and then takes in n number of elements), and has a function printLinkedList(p) to print it out. So far this works well but then I should create another function where I am going to be deleting the max element (if it occurs more than once, delete the first occurrence).
I found a function findMaxElement(p) that looks for the max, however, it doesn't work along my code (for example I get AttributeError: 'Node' object has no attribute 'head' error)
class Node:
def __init__(self, x = None):
self.data = x
self.next = None
def create():
n = int(input())
if n == 0:
return None
s = input().split()
p = Node(int(s[0]))
k = p
for i in range(1, n):
t = Node(int(s[i]))
k.next = t
k = t
return p
def printLinkedList(p):
if p == None:
print('Empty')
return
s = p
while s != None:
print(s.data, end = " ")
s = s.next
print()
def findMaxElement(p):
current = p.head
#Initializing max to initial node info
maximum = p.head.data
if(p.head == None):
print("List is empty")
else:
while(True):
#If current node's info is greater than max
#Then replace value of max with current node's info
if(maximum < current.info):
maximum = current.info
current= current.next
if(current == p.head):
break
return "Maximum value node in the list: "+ str(maximum)
#Driver code
a = create()
printLinkedList(a)
Input:
6
1 7 4 2 6 7
Expected result:
1 7 4 2 6 7
1 4 2 6 7
You could just define a findMaxElement() that traverses the linked-list in the same way that the printLinkedList() function is doing it (and finds the maximum value while doing so):
def findMaxElement(p):
if p == None:
return 'Empty List!'
current = p
maximum = p.data
while current != None: # Not end of list.
if current.data > maximum:
maximum = current.data
current = current.next
return "Maximum value node in the list: " + str(maximum)
Now, finding the shortest sequence of flips in pancake sorting is alone NP-hard, yet I'd like to find each and all of them, and count them.
Meaning for each permutation I'd like to find all the sequences of prefix reversals that restores the identity but not longer than the shortest one.
Here's what I've got so far:
#!/bin/env python3
# coding: utf-8
from math import factorial
import itertools
from multiprocessing import cpu_count, Manager, Pool
import numpy
import scipy.sparse
def flip(x, value):
return tuple(value[:x][::-1] + value[x:])
def rank(perm):
n = len(perm)
fact = factorial(n)
r = 0
for i in range(n):
fact //= n - i
r += len([x for x in perm[i:] if x < perm[i]]) * fact
return r
def unrank(i, items):
its = items[:]
perm = []
n = len(items)
fact = factorial(n)
r = i % fact
while its:
fact //= n
c, r = divmod(r, fact)
perm.append(its.pop(c))
n -= 1
return tuple(perm)
def get_colex_row(r, n, _fact):
row = scipy.sparse.dok_matrix((
1, _fact[n - 1]), dtype=numpy.int8)
perm = unrank(r, [i for i in range(n)])
for i in range(n):
column = r - r % _fact[i] + rank(perm[:-i - 2:-1])
row[0, column] = i + 1
return row
def get_colex_matrix(n):
fact = [factorial(i) for i in range(1, n + 1)]
m = scipy.sparse.dok_matrix(
(fact[n - 1], fact[n - 1]), dtype=numpy.int8)
items = [_ for _ in range(1, n + 1)]
for r in range(fact[n - 1]):
row = get_colex_row(r, n, fact)
m[r] = row
return m
def get_distance(n, items):
nfact = factorial(n)
stack = {unrank(i, items) for i in range(nfact)}
m = get_colex_matrix(n)
distance = {unrank(nfact - 1, items)[::-1] : 0}
new_distance = {nfact - 1}
d = 0
while distance.keys() != stack:
new_new_distance = set()
d += 1
for visiting in new_distance:
for i in range(2, n + 1):
key_index = m[visiting].tolist().index(i)
key = unrank(key_index, items)[::-1]
if key not in distance:
distance[key] = d
new_new_distance.add(key_index)
new_distance = new_new_distance
return distance
def get_paths_serial(items):
n = len(items)
nfact = factorial(n)
stack = {unrank(i, items) for i in range(nfact)}
m = get_colex_matrix(n)
distance = {unrank(nfact - 1, items)[::-1]: {()}}
new_distance = {nfact - 1}
while distance.keys() != stack:
new_new_distance = set()
for visiting_index in new_distance:
for i in range(2, n + 1):
key_index = m[visiting_index].tolist().index(i)
key = unrank(key_index, items)[::-1]
visiting = unrank(visiting_index, items)[::-1]
paths = distance[visiting]
prev_sample = next(iter(paths))
if key not in distance:
distance[key] = {path + (i,) for path in paths}
new_new_distance.add(key_index)
else:
curr_sample = next(iter(distance[key]))
if len(prev_sample) + 1 < len(curr_sample):
print("Shouldn't happen!")
distance[key] = {path + (i,) for path in paths}
elif len(prev_sample) + 1 == len(curr_sample):
distance[key] |= {path + (i,) for path in paths}
else:
# not relevant
pass
new_distance = new_new_distance
return distance
def _worker(ns, index):
row = get_colex_row(index, ns.n, ns.fact).toarray().tolist()[0]
visiting = unrank(index, ns.items)[::-1]
paths = ns.distance[visiting]
prev_sample = next(iter(paths))
out = {}
my_new_distance = set()
for i in range(2, ns.n + 1):
key_index = row.index(i)
key = unrank(key_index, ns.items)[::-1]
if key not in ns.distance:
out[key] = {path + (i,) for path in paths}
my_new_distance.add(key_index)
else:
curr_sample = next(iter(ns.distance[key]))
if len(prev_sample) + 1 < len(curr_sample):
print("Shouldn't happen!")
out[key] = {path + (i,) for path in paths}
elif len(prev_sample) + 1 == len(curr_sample):
out[key].update(path + (i,) for path in paths)
return my_new_distance, out
def get_paths_parallel(items):
n = len(items)
fact = [factorial(i) for i in range(1, n + 1)]
distance = {unrank(fact[n - 1] - 1, items)[::-1]: {()}}
stack = {unrank(i, items) for i in range(fact[n - 1])}
already_visited = set()
visiting = {fact[n - 1] - 1}
mgr = Manager()
namespace = mgr.Namespace()
namespace.fact = fact
namespace.distance = distance
namespace.items = items
namespace.n = n
with Pool(2 * cpu_count()) as pool:
while distance.keys() != stack:
result = pool.starmap(_worker, ((namespace, job)
for job in visiting))
visiting = set()
for next_to_visit, visited in result:
visiting |= next_to_visit
for k, v in visited.items():
if k in distance:
distance[k] |= v
else:
distance[k] = v
visiting -= already_visited
already_visited |= visiting
namespace.distance = distance
return distance
def colex(value, other):
for i in range(len(value) - 1, 0, -1):
if value[i] == other[i]:
continue
return value[i] > other[i]
return False
def ordered_by(order_cmp):
'Convert a cmp= function into a key= function'
if order_cmp is None:
return None
class K(object):
def __init__(self, obj):
self.value = obj
def __gt__(self, other):
if len(self.value) != len(other.value):
assert "Not the same length"
return order_cmp(self.value, other.value)
return K
def get_ordered(n, order):
return sorted(itertools.permutations(range(1, n + 1)),
key=ordered_by(order))
def get_matrix(n, order=None):
stack = get_ordered(n, order)
m = numpy.zeros((len(stack), len(stack)), numpy.int8)
for i,s in enumerate(stack):
for x in range(1, n + 1):
m[i, stack.index(flip(x, s))] = x
return m
I'm not sure what I'm doing wrong, but get_paths_parallel runs slower than get_paths_serial, please help!
I really should (and probably will soon) document my code better.
So for the time being, I'll say a few additional words:
It uses co-lexicographic ordering to rank the permutations and to find the indices in the adjacency matrix. Where I store the length of the flip that transforms the permutations, e.g. A(i,j) = k if performing a k length prefix reversal on the permutation with rank i results the ranked j permutation. In order to save on memory instead of storing the whole matrix I generate the rows on demand and limit the access by excluding already visited ones also I'm using scipy.sparse.dok_matrix for the same reason.
Other than these it's simply floods the graph till all permutations are reached.
There are some functions that doesn't use all or any of the consideration above like get_matrix, but presented only to validate that others, like get_colex_matrix are working as intended.
I'm creating the key function in a little bit convoluted manner, but that's just because I've tried other sorting before I've settled on co-lex.
Using multiprocessing.Manager to share data between processes makes them slow down.
Solution is to copy the needed data into each process's memory space (passing them as argument) or to use global variables for them.
Also using scipy.sparse.dok_matrix is overkill, dict would do.
I'll grab the literature I've found on the subject and link it hare later.
I'm trying to write a function that would recursively hash a key for n times, alternating between sha224 and sha256. Each iteration would be hash_256(hash_224)--a hash256 for the hash224 of the key--so that it would yield n * (hash_256(hash_224)). However, I'm new to coding and can't figure out how to write a recursive function with these parameters.
import hashlib
def shasum(key, n):
key = str(key).encode('utf-8')
hash_a = hashlib.sha224(key).hexdigest().encode('utf-8'))
hash_b = hashlib.sha256(hash_a).hexdigest()
if n == 0 or 1:
return hash_b #one iteration of 256(224)
else:
return n-1
return hash_b #stuck here
Edited: now it behaves like a number generator. What's wrong?
import hashlib
n = 0
def sha480(seed):
hashed_224 = str(hashlib.sha224(seed)).encode('utf-8')
hashed_256 = hashlib.sha256(hashed_224).hexdigest()
hashed_480 = str(hashed_256)
print("hash: " + hashed_480)
def repeater(key, n):
if n == 0:
return key
seed = str(key).encode('utf-8')
while n > 0:
return sha480(repeater(seed, n-1))
repeater('what', 2)
You have no recursive calls at all. You could change it to:
def hash_a(key):
return hashlib.sha224(key).hexdigest().encode('utf-8')
def hash_b(key):
return hashlib.sha256(key).hexdigest()
def shasum(key, n):
if n == 0: # base case: 0 iterations -> return key itself
return key
key = str(key).encode('utf-8')
return hash_b(hash_a(shasum(key, n - 1))) # recursve call
A side note: n == 0 or 1 is equivalent to (n == 0) or 1 which is always true. For that pattern, use n == 0 or n == 1 or shorter n in (0, 1)
Your code is nearly correct. just some minor issues fixed as below
import hashlib
def shasum(key, n):
print ("n: " + str(n))
key = str(key).encode('utf-8')
hash_a = hashlib.sha224(key).hexdigest().encode('utf-8')
print ("hash_a: " + str(hash_a))
hash_b = hashlib.sha256(hash_a).hexdigest()
print ("hash_b: " + str(hash_b))
if n == 0:
return hash_b #one iteration of 256(224)
else:
return shasum(hash_b, n-1)
I keep getting a type error for this. I am experimenting with decorative functions. Any help is appreciated
def primer(func):
def primes(n):
print (n)
return None
#primer
def find_prime(n):
while True:
count = 2
if (count == n):
z = ("PRIME")
return z
elif (n % count == 0):
z = n / count
return z
else:
count += 1
continue
prime = find_prime()
prime(10)
def primer(func):
def primes(n):
print(n)
#return None: dont know why this is here, you could do without it
return primes
#The nontype error is occuring because your code is returning none
#so to fix that all you have to do is return the inner function
#primer
def find_prime(n):
while True:
count = 2
if (count == n):
z = ("PRIME")
return z
elif (n % count == 0):
z = n / count
return z
else:
count += 1
continue
prime = find_prime
# if you want to turn a function into a variable you have to make sure it's
# callable, which means no parantheses around it
prime(15) # then you can call it