Python Script slowing down as it progresses? - python

I have a simulation running that has this basic structure:
from time import time
def CSV(*args):
#write * args to .CSV file
return
def timeleft(a,L,period):
print(#details on how long last period took, ETA#)
for L in range(0,6,4):
for a in range(1,100):
timeA = time()
for t in range(1,1000):
## Manufacturer in Supply Chain ##
inventory_accounting_lists.append(#simple calculations#)
# Simulation to determine the optimal B-value (Basestock level)
for B in range(1,100):
for tau in range(1,1000):
## simple inventory accounting operations##
## Distributor in Supply Chain ##
inventory_accounting_lists.append(#simple calculations#)
# Simulation to determine the optimal B-value (Basestock level)
for B in range(1,100):
for tau in range(1,1000):
## simple inventory accounting operations##
## Wholesaler in Supply Chain ##
inventory_accounting_lists.append(#simple calculations#)
# Simulation to determine the optimal B-value (Basestock level)
for B in range(1,100):
for tau in range(1,1000):
## simple inventory accounting operations##
## Retailer in Supply Chain ##
inventory_accounting_lists.append(#simple calculations#)
# Simulation to determine the optimal B-value (Basestock level)
for B in range(1,100):
for tau in range(1,1000):
## simple inventory accounting operations##
CSV(Simulation_Results)
timeB = time()
timeleft(a,L,timeB-timeA)
As the script continues, it seems to be getting slower and slower. Here is what it is for these values (and it increases linearly as a increases).
L = 0, a = 1: 1.15 minutes
L = 0, a = 99: 1.7 minutes
L = 2, a = 1: 2.7 minutes
L = 2, a = 99: 5.15 minutes
L = 4, a = 1: 4.5 minutes
L = 4, a = 15: 4.95 minutes (this is the latest value it has reached)
Why would each iteration take longer? Each iteration of the loop essentially resets everything except for a master global list, which is being added to each time. However, loops inside each "period" aren't accessing this master list -- they are accessing the same local list every time.
EDIT 1: I will post the simulation code here, in case anyone wants to wade through it, but I warn you, it is rather long, and the variable names are probably unnecessarily confusing.
#########
a = 0.01
L = 0
total = 1000
sim = 500
inv_cost = 1
bl_cost = 4
#########
# Functions
import random
from time import time
time0 = time()
# function to report ETA etc.
def timeleft(a,L,period_time):
if L==0:
periods_left = ((1-a)*100)-1+2*99
if L==2:
periods_left = ((1-a)*100)-1+99
if L==4:
periods_left = ((1-a)*100)-1+0*99
minute_time = period_time/60
minutes_left = (periods_left*period_time)/60
hours_left = (periods_left*period_time)/3600
percentage_complete = 100*((297-periods_left)/297)
print("Time for last period = ","%.2f" % minute_time," minutes")
print("%.2f" % percentage_complete,"% complete")
if hours_left<1:
print("%.2f" % minutes_left," minutes left")
else:
print("%.2f" % hours_left," hours left")
print("")
return
def dcopy(inList):
if isinstance(inList, list):
return list( map(dcopy, inList) )
return inList
# Save values to .CSV file
def CSV(a,L,I_STD_1,I_STD_2,I_STD_3,I_STD_4,O_STD_0,
O_STD_1,O_STD_2,O_STD_3,O_STD_4):
pass
# Initialization
# These are the global, master lists of data
I_STD_1 = [[0],[0],[0]]
I_STD_2 = [[0],[0],[0]]
I_STD_3 = [[0],[0],[0]]
I_STD_4 = [[0],[0],[0]]
O_STD_0 = [[0],[0],[0]]
O_STD_1 = [[0],[0],[0]]
O_STD_2 = [[0],[0],[0]]
O_STD_3 = [[0],[0],[0]]
O_STD_4 = [[0],[0],[0]]
for L in range(0,6,2):
# These are local lists that are appended to at the end of every period
I_STD_1_L = []
I_STD_2_L = []
I_STD_3_L = []
I_STD_4_L = []
O_STD_0_L = []
O_STD_1_L = []
O_STD_2_L = []
O_STD_3_L = []
O_STD_4_L = []
test = []
for n in range(1,100): # THIS is the start of the 99 value loop
a = n/100
print ("L=",L,", alpha=",a)
# Initialization for each Period
F_1 = [0,10] # Forecast
F_2 = [0,10]
F_3 = [0,10]
F_4 = [0,10]
R_0 = [10] # Items Received
R_1 = [10]
R_2 = [10]
R_3 = [10]
R_4 = [10]
for i in range(L):
R_1.append(10)
R_2.append(10)
R_3.append(10)
R_4.append(10)
I_1 = [10] # Final Inventory
I_2 = [10]
I_3 = [10]
I_4 = [10]
IP_1 = [10+10*L] # Inventory Position
IP_2 = [10+10*L]
IP_3 = [10+10*L]
IP_4 = [10+10*L]
O_1 = [10] # Items Ordered
O_2 = [10]
O_3 = [10]
O_4 = [10]
BL_1 = [0] # Backlog
BL_2 = [0]
BL_3 = [0]
BL_4 = [0]
OH_1 = [20] # Items on Hand
OH_2 = [20]
OH_3 = [20]
OH_4 = [20]
OR_1 = [10] # Order received from customer
OR_2 = [10]
OR_3 = [10]
OR_4 = [10]
Db_1 = [10] # Running Average Demand
Db_2 = [10]
Db_3 = [10]
Db_4 = [10]
var_1 = [0] # Running Variance in Demand
var_2 = [0]
var_3 = [0]
var_4 = [0]
B_1 = [IP_1[0]+10] # Optimal Basestock
B_2 = [IP_2[0]+10]
B_3 = [IP_3[0]+10]
B_4 = [IP_4[0]+10]
D = [0,10] # End constomer demand
for i in range(total+1):
D.append(9)
D.append(12)
D.append(8)
D.append(11)
period = [0]
from time import time
timeA = time()
# 1000 time periods t
for t in range(1,total+1):
period.append(t)
#### MANUFACTURER ####
# Manufacturing order from previous time period put into production
R_4.append(O_4[t-1])
#recieve shipment from supplier, calculate items OH HAND
if I_4[t-1]<0:
OH_4.append(R_4[t])
else:
OH_4.append(I_4[t-1]+R_4[t])
# Recieve and dispatch order, update Inventory and Backlog for time t
if (O_3[t-1] + BL_4[t-1]) <= OH_4[t]: # No Backlog
I_4.append(OH_4[t] - (O_3[t-1] + BL_4[t-1]))
BL_4.append(0)
R_3.append(O_3[t-1]+BL_4[t-1])
else:
I_4.append(OH_4[t] - (O_3[t-1] + BL_4[t-1])) # Backlogged
BL_4.append(-I_4[t])
R_3.append(OH_4[t])
# Update Inventory Position
IP_4.append(IP_4[t-1] + O_4[t-1] - O_3[t-1])
# Use exponential smoothing to forecast future demand
future_demand = (1-a)*F_4[t] + a*O_3[t-1]
F_4.append(future_demand)
# Calculate D_bar(t) and Var(t)
Db_4.append((1/t)*sum(O_3[0:t]))
s = 0
for i in range(0,t):
s+=(O_3[i]-Db_4[t])**2
if t==1:
var_4.append(0) # var(1) = 0
else:
var_4.append((1/(t-1))*s)
# Simulation to determine B(t)
S_BC_4 = [10000000000]*10
Run_4 = [0]*10
for B in range(10,500):
S_OH_4 = OH_4[:]
S_I_4 = I_4[:]
S_R_4 = R_4[:]
S_BL_4 = BL_4[:]
S_IP_4 = IP_4[:]
S_O_4 = O_4[:]
# Update O(t)(the period just before the simulation begins)
# using the B value for the simulation
if B - S_IP_4[t] > 0:
S_O_4.append(B - S_IP_4[t])
else:
S_O_4.append(0)
c = 0
for i in range(t+1,t+sim+1):
S_R_4.append(S_O_4[i-1])
#simulate demand
demand = -1
while demand <0:
demand = random.normalvariate(F_4[t+1],(var_4[t])**(.5))
# Receive simulated shipment, calculate simulated items on hand
if S_I_4[i-1]<0:
S_OH_4.append(S_R_4[i])
else:
S_OH_4.append(S_I_4[i-1]+S_R_4[i])
# Receive and send order, update Inventory and Backlog (simulated)
owed = (demand + S_BL_4[i-1])
S_I_4.append(S_OH_4[i] - owed)
if owed <= S_OH_4[i]: # No Backlog
S_BL_4.append(0)
c += inv_cost*S_I_4[i]
else:
S_BL_4.append(-S_I_4[i]) # Backlogged
c += bl_cost*S_BL_4[i]
# Update Inventory Position
S_IP_4.append(S_IP_4[i-1] + S_O_4[i-1] - demand)
# Update Order, Upstream member dispatches goods
if (B-S_IP_4[i]) > 0:
S_O_4.append(B - S_IP_4[i])
else:
S_O_4.append(0)
# Log Simulation costs for that B-value
S_BC_4.append(c)
# If the simulated costs are increasing, stop
if B>11:
dummy = []
for i in range(0,10):
dummy.append(S_BC_4[B-i]-S_BC_4[B-i-1])
Run_4.append(sum(dummy)/float(len(dummy)))
if Run_4[B-3] > 0 and B>20:
break
else:
Run_4.append(0)
# Use minimum cost as new B(t)
var = min((val, idx) for (idx, val) in enumerate(S_BC_4))
optimal_B = var[1]
B_4.append(optimal_B)
# Calculate O(t)
if B_4[t] - IP_4[t] > 0:
O_4.append(B_4[t] - IP_4[t])
else:
O_4.append(0)
#### DISTRIBUTOR ####
#recieve shipment from supplier, calculate items OH HAND
if I_3[t-1]<0:
OH_3.append(R_3[t])
else:
OH_3.append(I_3[t-1]+R_3[t])
# Recieve and dispatch order, update Inventory and Backlog for time t
if (O_2[t-1] + BL_3[t-1]) <= OH_3[t]: # No Backlog
I_3.append(OH_3[t] - (O_2[t-1] + BL_3[t-1]))
BL_3.append(0)
R_2.append(O_2[t-1]+BL_3[t-1])
else:
I_3.append(OH_3[t] - (O_2[t-1] + BL_3[t-1])) # Backlogged
BL_3.append(-I_3[t])
R_2.append(OH_3[t])
# Update Inventory Position
IP_3.append(IP_3[t-1] + O_3[t-1] - O_2[t-1])
# Use exponential smoothing to forecast future demand
future_demand = (1-a)*F_3[t] + a*O_2[t-1]
F_3.append(future_demand)
# Calculate D_bar(t) and Var(t)
Db_3.append((1/t)*sum(O_2[0:t]))
s = 0
for i in range(0,t):
s+=(O_2[i]-Db_3[t])**2
if t==1:
var_3.append(0) # var(1) = 0
else:
var_3.append((1/(t-1))*s)
# Simulation to determine B(t)
S_BC_3 = [10000000000]*10
Run_3 = [0]*10
for B in range(10,500):
S_OH_3 = OH_3[:]
S_I_3 = I_3[:]
S_R_3 = R_3[:]
S_BL_3 = BL_3[:]
S_IP_3 = IP_3[:]
S_O_3 = O_3[:]
# Update O(t)(the period just before the simulation begins)
# using the B value for the simulation
if B - S_IP_3[t] > 0:
S_O_3.append(B - S_IP_3[t])
else:
S_O_3.append(0)
c = 0
for i in range(t+1,t+sim+1):
#simulate demand
demand = -1
while demand <0:
demand = random.normalvariate(F_3[t+1],(var_3[t])**(.5))
S_R_3.append(S_O_3[i-1])
# Receive simulated shipment, calculate simulated items on hand
if S_I_3[i-1]<0:
S_OH_3.append(S_R_3[i])
else:
S_OH_3.append(S_I_3[i-1]+S_R_3[i])
# Receive and send order, update Inventory and Backlog (simulated)
owed = (demand + S_BL_3[i-1])
S_I_3.append(S_OH_3[i] - owed)
if owed <= S_OH_3[i]: # No Backlog
S_BL_3.append(0)
c += inv_cost*S_I_3[i]
else:
S_BL_3.append(-S_I_3[i]) # Backlogged
c += bl_cost*S_BL_3[i]
# Update Inventory Position
S_IP_3.append(S_IP_3[i-1] + S_O_3[i-1] - demand)
# Update Order, Upstream member dispatches goods
if (B-S_IP_3[i]) > 0:
S_O_3.append(B - S_IP_3[i])
else:
S_O_3.append(0)
# Log Simulation costs for that B-value
S_BC_3.append(c)
# If the simulated costs are increasing, stop
if B>11:
dummy = []
for i in range(0,10):
dummy.append(S_BC_3[B-i]-S_BC_3[B-i-1])
Run_3.append(sum(dummy)/float(len(dummy)))
if Run_3[B-3] > 0 and B>20:
break
else:
Run_3.append(0)
# Use minimum cost as new B(t)
var = min((val, idx) for (idx, val) in enumerate(S_BC_3))
optimal_B = var[1]
B_3.append(optimal_B)
# Calculate O(t)
if B_3[t] - IP_3[t] > 0:
O_3.append(B_3[t] - IP_3[t])
else:
O_3.append(0)
#### WHOLESALER ####
#recieve shipment from supplier, calculate items OH HAND
if I_2[t-1]<0:
OH_2.append(R_2[t])
else:
OH_2.append(I_2[t-1]+R_2[t])
# Recieve and dispatch order, update Inventory and Backlog for time t
if (O_1[t-1] + BL_2[t-1]) <= OH_2[t]: # No Backlog
I_2.append(OH_2[t] - (O_1[t-1] + BL_2[t-1]))
BL_2.append(0)
R_1.append(O_1[t-1]+BL_2[t-1])
else:
I_2.append(OH_2[t] - (O_1[t-1] + BL_2[t-1])) # Backlogged
BL_2.append(-I_2[t])
R_1.append(OH_2[t])
# Update Inventory Position
IP_2.append(IP_2[t-1] + O_2[t-1] - O_1[t-1])
# Use exponential smoothing to forecast future demand
future_demand = (1-a)*F_2[t] + a*O_1[t-1]
F_2.append(future_demand)
# Calculate D_bar(t) and Var(t)
Db_2.append((1/t)*sum(O_1[0:t]))
s = 0
for i in range(0,t):
s+=(O_1[i]-Db_2[t])**2
if t==1:
var_2.append(0) # var(1) = 0
else:
var_2.append((1/(t-1))*s)
# Simulation to determine B(t)
S_BC_2 = [10000000000]*10
Run_2 = [0]*10
for B in range(10,500):
S_OH_2 = OH_2[:]
S_I_2 = I_2[:]
S_R_2 = R_2[:]
S_BL_2 = BL_2[:]
S_IP_2 = IP_2[:]
S_O_2 = O_2[:]
# Update O(t)(the period just before the simulation begins)
# using the B value for the simulation
if B - S_IP_2[t] > 0:
S_O_2.append(B - S_IP_2[t])
else:
S_O_2.append(0)
c = 0
for i in range(t+1,t+sim+1):
#simulate demand
demand = -1
while demand <0:
demand = random.normalvariate(F_2[t+1],(var_2[t])**(.5))
# Receive simulated shipment, calculate simulated items on hand
S_R_2.append(S_O_2[i-1])
if S_I_2[i-1]<0:
S_OH_2.append(S_R_2[i])
else:
S_OH_2.append(S_I_2[i-1]+S_R_2[i])
# Receive and send order, update Inventory and Backlog (simulated)
owed = (demand + S_BL_2[i-1])
S_I_2.append(S_OH_2[i] - owed)
if owed <= S_OH_2[i]: # No Backlog
S_BL_2.append(0)
c += inv_cost*S_I_2[i]
else:
S_BL_2.append(-S_I_2[i]) # Backlogged
c += bl_cost*S_BL_2[i]
# Update Inventory Position
S_IP_2.append(S_IP_2[i-1] + S_O_2[i-1] - demand)
# Update Order, Upstream member dispatches goods
if (B-S_IP_2[i]) > 0:
S_O_2.append(B - S_IP_2[i])
else:
S_O_2.append(0)
# Log Simulation costs for that B-value
S_BC_2.append(c)
# If the simulated costs are increasing, stop
if B>11:
dummy = []
for i in range(0,10):
dummy.append(S_BC_2[B-i]-S_BC_2[B-i-1])
Run_2.append(sum(dummy)/float(len(dummy)))
if Run_2[B-3] > 0 and B>20:
break
else:
Run_2.append(0)
# Use minimum cost as new B(t)
var = min((val, idx) for (idx, val) in enumerate(S_BC_2))
optimal_B = var[1]
B_2.append(optimal_B)
# Calculate O(t)
if B_2[t] - IP_2[t] > 0:
O_2.append(B_2[t] - IP_2[t])
else:
O_2.append(0)
#### RETAILER ####
#recieve shipment from supplier, calculate items OH HAND
if I_1[t-1]<0:
OH_1.append(R_1[t])
else:
OH_1.append(I_1[t-1]+R_1[t])
# Recieve and dispatch order, update Inventory and Backlog for time t
if (D[t] +BL_1[t-1]) <= OH_1[t]: # No Backlog
I_1.append(OH_1[t] - (D[t] + BL_1[t-1]))
BL_1.append(0)
R_0.append(D[t]+BL_1[t-1])
else:
I_1.append(OH_1[t] - (D[t] + BL_1[t-1])) # Backlogged
BL_1.append(-I_1[t])
R_0.append(OH_1[t])
# Update Inventory Position
IP_1.append(IP_1[t-1] + O_1[t-1] - D[t])
# Use exponential smoothing to forecast future demand
future_demand = (1-a)*F_1[t] + a*D[t]
F_1.append(future_demand)
# Calculate D_bar(t) and Var(t)
Db_1.append((1/t)*sum(D[1:t+1]))
s = 0
for i in range(1,t+1):
s+=(D[i]-Db_1[t])**2
if t==1: # Var(1) = 0
var_1.append(0)
else:
var_1.append((1/(t-1))*s)
# Simulation to determine B(t)
S_BC_1 = [10000000000]*10
Run_1 = [0]*10
for B in range(10,500):
S_OH_1 = OH_1[:]
S_I_1 = I_1[:]
S_R_1 = R_1[:]
S_BL_1 = BL_1[:]
S_IP_1 = IP_1[:]
S_O_1 = O_1[:]
# Update O(t)(the period just before the simulation begins)
# using the B value for the simulation
if B - S_IP_1[t] > 0:
S_O_1.append(B - S_IP_1[t])
else:
S_O_1.append(0)
c=0
for i in range(t+1,t+sim+1):
#simulate demand
demand = -1
while demand <0:
demand = random.normalvariate(F_1[t+1],(var_1[t])**(.5))
S_R_1.append(S_O_1[i-1])
# Receive simulated shipment, calculate simulated items on hand
if S_I_1[i-1]<0:
S_OH_1.append(S_R_1[i])
else:
S_OH_1.append(S_I_1[i-1]+S_R_1[i])
# Receive and send order, update Inventory and Backlog (simulated)
owed = (demand + S_BL_1[i-1])
S_I_1.append(S_OH_1[i] - owed)
if owed <= S_OH_1[i]: # No Backlog
S_BL_1.append(0)
c += inv_cost*S_I_1[i]
else:
S_BL_1.append(-S_I_1[i]) # Backlogged
c += bl_cost*S_BL_1[i]
# Update Inventory Position
S_IP_1.append(S_IP_1[i-1] + S_O_1[i-1] - demand)
# Update Order, Upstream member dispatches goods
if (B-S_IP_1[i]) > 0:
S_O_1.append(B - S_IP_1[i])
else:
S_O_1.append(0)
# Log Simulation costs for that B-value
S_BC_1.append(c)
# If the simulated costs are increasing, stop
if B>11:
dummy = []
for i in range(0,10):
dummy.append(S_BC_1[B-i]-S_BC_1[B-i-1])
Run_1.append(sum(dummy)/float(len(dummy)))
if Run_1[B-3] > 0 and B>20:
break
else:
Run_1.append(0)
# Use minimum as your new B(t)
var = min((val, idx) for (idx, val) in enumerate(S_BC_1))
optimal_B = var[1]
B_1.append(optimal_B)
# Calculate O(t)
if B_1[t] - IP_1[t] > 0:
O_1.append(B_1[t] - IP_1[t])
else:
O_1.append(0)
### Calculate the Standard Devation of the last half of time periods ###
def STD(numbers):
k = len(numbers)
mean = sum(numbers) / k
SD = (sum([dev*dev for dev in [x-mean for x in numbers]])/(k-1))**.5
return SD
start = (total//2)+1
# Only use the last half of the time periods to calculate the standard deviation
I_STD_1_L.append(STD(I_1[start:]))
I_STD_2_L.append(STD(I_2[start:]))
I_STD_3_L.append(STD(I_3[start:]))
I_STD_4_L.append(STD(I_4[start:]))
O_STD_0_L.append(STD(D[start:]))
O_STD_1_L.append(STD(O_1[start:]))
O_STD_2_L.append(STD(O_2[start:]))
O_STD_3_L.append(STD(O_3[start:]))
O_STD_4_L.append(STD(O_4[start:]))
from time import time
timeB = time()
timeleft(a,L,timeB-timeA)
I_STD_1[L//2] = I_STD_1_L[:]
I_STD_2[L//2] = I_STD_2_L[:]
I_STD_3[L//2] = I_STD_3_L[:]
I_STD_4[L//2] = I_STD_4_L[:]
O_STD_0[L//2] = O_STD_0_L[:]
O_STD_1[L//2] = O_STD_1_L[:]
O_STD_2[L//2] = O_STD_2_L[:]
O_STD_3[L//2] = O_STD_3_L[:]
O_STD_4[L//2] = O_STD_4_L[:]
CSV(a,L,I_STD_1,I_STD_2,I_STD_3,I_STD_4,O_STD_0,
O_STD_1,O_STD_2,O_STD_3,O_STD_4)
from time import time
timeE = time()
print("Run Time: ",(timeE-time0)/3600," hours")

This would be a good time to look at a profiler. You can profile the code to determine where time is being spent. It would appear likely that you issue is in the simulation code, but without being able to see that code the best help you're likely to get going to be vague.
Edit in light of added code:
You're doing a fair amount of copying of lists, which while not terribly expensive can consume a lot of time.
I agree the your code is probably unnecessarily confusing and would advise you to clean up the code. Changing the confusing names to meaningful ones may help you find where you're having a problem.
Finally, it may be the case that your simulation is simply computationally expensive. You might want to consider looking into a SciPy, Pandas, or some other Python mathematic package to get better performance and perhaps better tools for expressing the model you're simulating.

I experienced a similar problem with a Python 3.x script I wrote. The script randomly generated 1,000,000 (one million) JSON objects, writing them out to a file.
My problem was that the program was growing progressively slower as time proceeded. Here is a timestamp trace every 10,000 objects:
So far: Mar23-17:56:46: 0
So far: Mar23-17:56:48: 10000 ( 2 seconds)
So far: Mar23-17:56:50: 20000 ( 2 seconds)
So far: Mar23-17:56:55: 30000 ( 5 seconds)
So far: Mar23-17:57:01: 40000 ( 6 seconds)
So far: Mar23-17:57:09: 50000 ( 8 seconds)
So far: Mar23-17:57:18: 60000 ( 8 seconds)
So far: Mar23-17:57:29: 70000 (11 seconds)
So far: Mar23-17:57:42: 80000 (13 seconds)
So far: Mar23-17:57:56: 90000 (14 seconds)
So far: Mar23-17:58:13: 100000 (17 seconds)
So far: Mar23-17:58:30: 110000 (17 seconds)
So far: Mar23-17:58:51: 120000 (21 seconds)
So far: Mar23-17:59:12: 130000 (21 seconds)
So far: Mar23-17:59:35: 140000 (23 seconds)
As can be seen, the script takes progressively longer to generate groups of 10,000 records.
In my case it turned out to be the way I was generating unique ID numbers, each in the range of 10250000000000-10350000000000. To avoid regenerating the same ID twice, I stored a newly generated ID in a list, checking later that the ID does not exist in the list:
trekIdList = []
def GenerateRandomTrek ():
global trekIdList
while True:
r = random.randint (10250000000000, 10350000000000)
if not r in trekIdList:
trekIdList.append (r)
return r
The problem is that an unsorted list takes O(n) to search. As newly generated IDs are appended to the list, the time needed to traverse/search the list grows.
The solution was to switch to a dictionary (or map):
trekIdList = {}
. . .
def GenerateRandomTrek ():
global trekIdList
while True:
r = random.randint (10250000000000, 10350000000000)
if not r in trekIdList:
trekIdList [r] = 1
return r
The improvement was immediate:
So far: Mar23-18:11:30: 0
So far: Mar23-18:11:30: 10000
So far: Mar23-18:11:31: 20000
So far: Mar23-18:11:31: 30000
So far: Mar23-18:11:31: 40000
So far: Mar23-18:11:32: 50000
So far: Mar23-18:11:32: 60000
So far: Mar23-18:11:32: 70000
So far: Mar23-18:11:33: 80000
So far: Mar23-18:11:33: 90000
So far: Mar23-18:11:33: 100000
So far: Mar23-18:11:34: 110000
So far: Mar23-18:11:34: 120000
So far: Mar23-18:11:34: 130000
So far: Mar23-18:11:35: 140000
The reason is that accessing a value in a dictionary/map/hash is O(1).
Moral: When dealing with large numbers of items, use a dictionary/map or binary searching a sorted list rathen than an unordered list.

You can use cProfile and the like but many times it will still be hard to spot the issue. However knowing that slowness is in linear progression is at huge benefit for you since you already kind of know what the problem is, but not exactly where it is.
I'd start by elimination and simplifying:
Make a small fast example that demonstrates the sluggishness as a separate file.
Run the above and keep removing/commenting out huge portions of the code.
Once you have narrowed down enough, look for Python keywords values(), items(), in, for , deepcopy as good examples.
By continuously simplifying the example and re-running the test script you will eventually get down to the core issue.
Once you resolved one bottleneck, you might find that you still exhibit the sluggishness when you bring back the old code. Most probably there's more than 1 bottlenecks then.

Related

Find the maximum product sales with planning schedules

I'm working on a dynamic programming problem and actually, I'm not quite sure whether it is dynamic programming since moving average M is based on previous M. No need to consider the efficiency. The problem requires selling a product over T time periods and maximizing the total actual sale amount. The total number of products is N and I plan to sell some products over different periods n0,n1,⋯,nT−1 and ∑ni=N.
In conclusion, this question wants to find the most optimal schedule for n0,n1,⋯,nT−1 such that ∑ni=N, which maximizes the ∑Si.
And the actual sale amount Si are based on current moving average M and current ni.
Assume that α=0.001 and π=0.5
Initialize M=0. Then for i=0,1,…,T−1
Compute new Mi=⌈0.5∗(Mi+ni)⌉
At time i we sell Si = ⌈(1−α*M^πi)*ni⌉ products
Continue this process until the last time period. For example, assume we already know ni for all periods, the trading will be below
M = 0
T = 4
N = 10000
alpha = 1e-3
pi = 0.5
S = np.zeros(T,dtype='i')
n = np.array([5000,1000,2000,2000])
print(n)
total = 0
for i in range(T):
M = math.ceil(0.5*(M + n[i]))
S[i] = math.ceil((1 - alpha*M**pi)*n[i])
total += S[i]
print('at time %d, M = %d and we sell %d products' %(i,M,S[i]))
print('total sold =', total)
My idea is to keep track of the state based on t time period, n products left, and m moving average as index and store the actual sale in a high dimension matrix. I think the upper bound for moving average is just [0,n] I'm still confusing how to program it. Could someone provide ideas about how to fix some problems in my programming? Thank you very much.
The below is some of my crude codes but the output is a little strange.
def DPtry(N,T,alpha,pi,S):
schedule = np.zeros(T)
M = 0
for n in range(0,N+1):
for m in range(0,n+1):
S[T-1,n,m] = math.ceil((1 - alpha*m**pi)*n)
for k in range(1,T):
t = T - k - 1
print("t = ",t)
for n in range(0,N+1):
for m in range(0,n+1):
best = -1
for plan in range(0,n+1):
salenow = math.ceil((1 - alpha*m**pi)*plan)
M = math.ceil(0.5*(m + plan))
salelater = S[t+1,n-plan,M]
candidate = salenow + salelater
if candidate > best:
best = candidate
S[t,n,m] = best
print(S[0,N,0])
N = 100
T = 5
pi = .5
alpha = 1e-3
S = np.zeros((T,N+1,N+1))
DPtry(N,T,alpha,pi,S)

Python multiprocessing: how to create x number of processes and get return value back

I have a program that I created using threads, but then I learned that threads don't run concurrently in python and processes do. As a result, I am trying to rewrite the program using multiprocessing, but I am having a hard time doing so. I have tried following several examples that show how to create the processes and pools, but I don't think it's exactly what I want.
Below is my code with the attempts I have tried. The program tries to estimate the value of pi by randomly placing points on a graph that contains a circle. The program takes two command-line arguments: one is the number of threads/processes I want to create, and the other is the total number of points to try placing on the graph (N).
import math
import sys
from time import time
import concurrent.futures
import random
import multiprocessing as mp
def myThread(arg):
# Take care of imput argument
n = int(arg)
print("Thread received. n = ", n)
# main calculation loop
count = 0
for i in range (0, n):
x = random.uniform(0,1)
y = random.uniform(0,1)
d = math.sqrt(x * x + y * y)
if (d < 1):
count = count + 1
print("Thread found ", count, " points inside circle.")
return count;
# end myThread
# receive command line arguments
if (len(sys.argv) == 3):
N = sys.argv[1] # original ex: 0.01
N = int(N)
totalThreads = sys.argv[2]
totalThreads = int(totalThreads)
print("N = ", N)
print("totalThreads = ", totalThreads)
else:
print("Incorrect number of arguments!")
sys.exit(1)
if ((totalThreads == 1) or (totalThreads == 2) or (totalThreads == 4) or (totalThreads == 8)):
print()
else:
print("Invalid number of threads. Please use 1, 2, 4, or 8 threads.")
sys.exit(1)
# start experiment
t = int(time() * 1000) # begin run time
total = 0
# ATTEMPT 1
# processes = []
# for i in range(totalThreads):
# process = mp.Process(target=myThread, args=(N/totalThreads))
# processes.append(process)
# process.start()
# for process in processes:
# process.join()
# ATTEMPT 2
#pool = mp.Pool(mp.cpu_count())
#total = pool.map(myThread, [N/totalThreads])
# ATTEMPT 3
#for i in range(totalThreads):
#total = total + pool.map(myThread, [N/totalThreads])
# p = mp.Process(target=myThread, args=(N/totalThreads))
# p.start()
# ATTEMPT 4
# with concurrent.futures.ThreadPoolExecutor() as executor:
# for i in range(totalThreads):
# future = executor.submit(myThread, N/totalThreads) # start thread
# total = total + future.result() # get result
# analyze results
pi = 4 * total / N
print("pi estimate =", pi)
delta_time = int(time() * 1000) - t # calculate time required
print("Time =", delta_time, " milliseconds")
I thought that creating a loop from 0 to totalThreads that creates a process for each iteration would work. I also wanted to pass in N/totalThreads (to divide the work), but it seems that processes take in an iterable list rather than an argument to pass to the method.
What is it I am missing with multiprocessing? Is it at all possible to even do what I want to do with processes?
Thank you in advance for any help, it is greatly appreciated :)
I have simplified your code and used some hard-coded values which may or may not be reasonable.
import math
import concurrent.futures
import random
from datetime import datetime
def myThread(arg):
count = 0
for i in range(0, arg[0]):
x = random.uniform(0, 1)
y = random.uniform(0, 1)
d = math.sqrt(x * x + y * y)
if (d < 1):
count += 1
return count
N = 10_000
T = 8
_start = datetime.now()
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(myThread, (int(N / T),)): _ for _ in range(T)}
total = 0
for future in concurrent.futures.as_completed(futures):
total += future.result()
_end = datetime.now()
print(f'Estimate for PI = {4 * total / N}')
print(f'Run duration = {_end-_start}')
A typical output on my machine looks like this:-
Estimate for PI = 3.1472
Run duration = 0:00:00.008895
Bear in mind that the number of threads you start is effectively managed by the ThreadPoolExecutor (TPE) [ when constructed with no parameters ]. It makes decisions about the number of threads that can run based on your machine's processing capacity (number of cores etc). Therefore you could, if you really wanted to, set T to a very high number and the TPE will block execution of any new threads until it determines that there is capacity.

Python Filtering a Point Cloud with PhotoScan Based on a Threshold Value - basic python help needed

I'm trying to implement a filter with Python to sort out the points on a point cloud generated by Agisoft PhotoScan. PhotoScan is a photogrammetry software developed to be user friendly but also allows to use Python commands through an API.
Bellow is my code so far and I'm pretty sure there is better way to write it as I'm missing something. The code runs inside PhotoScan.
Objective:
Selecting and removing 10% of points at a time with error within defined range of 50 to 10. Also removing any points within error range less than 10% of the total, when the initial steps of selecting and removing 10% at a time are done. Immediately after every point removal an optimization procedure should be done. It should stop when no points are selectable or when selectable points counts as less than 1% of the present total points and it is not worth removing them.
Draw it for better understanding:
Actual Code Under Construction (3 updates - see bellow for details):
import PhotoScan as PS
import math
doc = PS.app.document
chunk = doc.chunk
# using float with range and that by setting i = 1 it steps 0.1 at a time
def precrange(a, b, i):
if a < b:
p = 10**i
sr = a*p
er = (b*p) + 1
p = float(p)
for n in range(sr, er):
x = n/p
yield x
else:
p = 10**i
sr = b*p
er = (a*p) + 1
p = float(p)
for n in range(sr, er):
x = n/p
yield x
"""
Determine if x is close to y:
x relates to nselected variable
y to p10 variable
math.isclose() Return True if the values a and b are close to each other and
False otherwise
var is the tolerance here setted as a relative tolerance:
rel_tol is the relative tolerance – it is the maximum allowed difference
between a and b, relative to the larger absolute value of a or b. For example,
to set a tolerance of 5%, pass rel_tol=0.05. The default tolerance is 1e-09,
which assures that the two values are the same within about 9 decimal digits.
rel_tol must be greater than zero.
"""
def test_isclose(x, y, var):
if math.isclose(x, y, rel_tol=var): # if variables are close return True
return True
else:
False
# 1. define filter limits
f_ReconstUncert = precrange(50, 10, 1)
# 2. count initial point number
tiePoints_0 = len(chunk.point_cloud.points) # storing info for later
# 3. call Filter() and init it
f = PS.PointCloud.Filter()
f.init(chunk, criterion=PS.PointCloud.Filter.ReconstructionUncertainty)
a = 0
"""
Way to restart for loop!
should_restart = True
while should_restart:
should_restart = False
for i in xrange(10):
print i
if i == 5:
should_restart = True
break
"""
restartLoop = True
while restartLoop:
restartLoop = False
for count, i in enumerate(f_ReconstUncert): # for each threshold value
# count points for every i
tiePoints = len(chunk.point_cloud.points)
p10 = int(round((10 / 100) * tiePoints, 0)) # 10% of the total
f.selectPoints(i) # selects points
nselected = len([p for p in chunk.point_cloud.points if p.selected])
percent = round(nselected * 100 / tiePoints, 2)
if nselected == 0:
print("For threshold {} there´s no selectable points".format(i))
break
elif test_isclose(nselected, p10, 0.1):
a += 1
print("Threshold found in iteration: ", count)
print("----------------------------------------------")
print("# {} Removing points from cloud ".format(a))
print("----------------------------------------------")
print("# {}. Reconstruction Uncerntainty:"
" {:.2f}".format(a, i))
print("{} - {}"
" ({:.1f} %)\n".format(tiePoints,
nselected, percent))
f.removePoints(i) # removes points
# optimization procedure needed to refine cameras positions
print("--------------Optimizing cameras-------------\n")
chunk.optimizeCameras(fit_f=True, fit_cx=True,
fit_cy=True, fit_b1=False,
fit_b2=False, fit_k1=True,
fit_k2=True, fit_k3=True,
fit_k4=False, fit_p1=True,
fit_p2=True, fit_p3=False,
fit_p4=False, adaptive_fitting=False)
# count again number of points in point cloud
tiePoints = len(chunk.point_cloud.points)
print("= {} remaining points after"
" {} removal".format(tiePoints, a))
# reassigning variable to get new 10% of remaining points
p10 = int(round((10 / 100) * tiePoints, 0))
percent = round(nselected * 100 / tiePoints, 2)
print("----------------------------------------------\n\n")
# restart loop to investigate from range start
restartLoop = True
break
else:
f.resetSelection()
continue # continue to next i
else:
f.resetSelection()
print("for loop didnt work out")
print("{} iterations done!".format(count))
tiePoints = len(chunk.point_cloud.points)
print("Tiepoints 0: ", tiePoints_0)
print("Tiepoints 1: ", tiePoints)
Problems:
A. Currently I'm stuck on an endless processing because of a loop. I know it's about my bad coding. But how do I implement my objective and get away with the infinite loops? ANSWER: Got the code less confusing and updated above.
B. How do I start over (or restart) my search for valid threshold values in the range(50, 20) after finding one of them? ANSWER: Stack Exchange: how to restart a for loop
C. How do I turn the code more pythonic?
IMPORTANT UPDATE 1: altered above
Using a better range with float solution adapted from stackoverflow: how-to-use-a-decimal-range-step-value
# using float with range and that by setting i = 1 it steps 0.1 at a time
def precrange(a, b, i):
if a < b:
p = 10**i
sr = a*p
er = (b*p) + 1
p = float(p)
return map(lambda x: x/p, range(sr, er))
else:
p = 10**i
sr = b*p
er = (a*p) + 1
p = float(p)
return map(lambda x: x/p, range(sr, er))
# some code
f_ReconstUncert = precrange(50, 20, 1)
And also using math.isclose() to determine if selected points are close to the 10% selected points instead of using a manual solution through assigning new variables. This was implemented as follows:
"""
Determine if x is close to y:
x relates to nselected variable
y to p10 variable
math.isclose() Return True if the values a and b are close to each other and
False otherwise
var is the tolerance here setted as a relative tolerance:
rel_tol is the relative tolerance – it is the maximum allowed difference
between a and b, relative to the larger absolute value of a or b. For example,
to set a tolerance of 5%, pass rel_tol=0.05. The default tolerance is 1e-09,
which assures that the two values are the same within about 9 decimal digits.
rel_tol must be greater than zero.
"""
def test_threshold(x, y, var):
if math.isclose(x, y, rel_tol=var): # if variables are close return True
return True
else:
False
# some code
if test_threshold(nselected, p10, 0.1):
# if true then a valid threshold is found
# some code
UPDATE 2: altered on code under construction
Minor fixes and got to restart de for loop from beginning by following guidance from another Stack Exchange post on the subject. Have to improve the range now or alter the isclose() to get more values.
restartLoop = True
while restartLoop:
restartLoop = False
for i in range(0, 10):
if condition:
restartLoop = True
break
UPDATE 3: Code structure to achieve listed objectives:
threshold = range(0, 11, 1)
listx = []
for i in threshold:
listx.append(i)
restart = 0
restartLoop = True
while restartLoop:
restartLoop = False
for idx, i in enumerate(listx):
print("do something as printing i:", i)
if i > 5: # if this condition restart loop
print("found value for condition: ", i)
del listx[idx]
restartLoop = True
print("RESTARTING LOOP\n")
restart += 1
break # break inner while and restart for loop
else:
# continue if the inner loop wasn't broken
continue
else:
continue
print("restart - outer while", restart)

Python multiprocessing is slower than regular. How can I improve?

Basically have a script that combs a dataset of nodes/points to remove those that overlap. The actual script is more complicated but I pared it down to basically a simple overlap check that does nothing with it for demonstration.
I tried a few variants with locks, queues, pools adding one job at a time versus adding in bulk. Some of the worst offenders were slower by a couple order of magnitude. Eventually I got it to the fastest I could.
The overlap checking algorithm which is send to the individual processes:
def check_overlap(args):
tolerance = args['tolerance']
this_coords = args['this_coords']
that_coords = args['that_coords']
overlaps = False
distance_x = this_coords[0] - that_coords[0]
if distance_x <= tolerance:
distance_x = pow(distance_x, 2)
distance_y = this_coords[1] - that_coords[1]
if distance_y <= tolerance:
distance = pow(distance_x + pow(distance_y, 2), 0.5)
if distance <= tolerance:
overlaps = True
return overlaps
The processing function:
def process_coords(coords, num_processors=1, tolerance=1):
import multiprocessing as mp
import time
if num_processors > 1:
pool = mp.Pool(num_processors)
start = time.time()
print "Start script w/ multiprocessing"
else:
num_processors = 0
start = time.time()
print "Start script w/ standard processing"
total_overlap_count = 0
# outer loop through nodes
start_index = 0
last_index = len(coords) - 1
while start_index <= last_index:
# nature of the original problem means we can process all pairs of a single node at once, but not multiple, so batch jobs by outer loop
batch_jobs = []
# inner loop against all pairs for this node
start_index += 1
count_overlapping = 0
for i in range(start_index, last_index+1, 1):
if num_processors:
# add job
batch_jobs.append({
'tolerance': tolerance,
'this_coords': coords[start_index],
'that_coords': coords[i]
})
else:
# synchronous processing
this_coords = coords[start_index]
that_coords = coords[i]
distance_x = this_coords[0] - that_coords[0]
if distance_x <= tolerance:
distance_x = pow(distance_x, 2)
distance_y = this_coords[1] - that_coords[1]
if distance_y <= tolerance:
distance = pow(distance_x + pow(distance_y, 2), 0.5)
if distance <= tolerance:
count_overlapping += 1
if num_processors:
res = pool.map_async(check_overlap, batch_jobs)
results = res.get()
for r in results:
if r:
count_overlapping += 1
# stuff normally happens here to process nodes connected to this node
total_overlap_count += count_overlapping
print total_overlap_count
print " time: {0}".format(time.time() - start)
And testing function:
from random import random
coords = []
num_coords = 1000
spread = 100.0
half_spread = 0.5*spread
for i in range(num_coords):
coords.append([
random()*spread-half_spread,
random()*spread-half_spread
])
process_coords(coords, 1)
process_coords(coords, 4)
Still, the non-multiprocessing runs in less than 0.4s consistently and the multiprocessing I can get just under 3.0s as it stands above. I get that maybe the algorithm here is too simple to really reap benefits, but considering the above case has nearly half a million iterations and the real case has significantly more, it's weird to me that the multiprocessing is an order of magnitude slower.
What am I missing / what can I do to improve?
Building O(N**2) 3-element dicts not used in the serialized code, and transmitting them over interprocess pipes, is a pretty good way to guarantee multiprocessing can't help ;-) Nothing comes for free - everything costs.
Below is a rewrite that executes much the same code regardless of whether it's run in serial or multiprocessing modes. No new dicts, etc. In general, the larger len(coords), the more benefit it gets from multiprocessing. On my box, at 20000 the multiprocessing run takes about a third of the wall-clock time.
Key to this is that all processes have their own copy of coords. This is done below by transmitting it just once, when the pool is created. That should work on all platforms. On Linux-y systems, it could happen "by magic" instead via forked process inheritance. Reducing the amount of data sent across processes from O(N**2) to O(N) is a huge improvement.
Getting more out of multiprocessing would require better load balancing. As is, a call to check_overlap(i) compares coords[i] to each value in coords[i+1:]. The larger i, the less work there is for it to do, and for the largest values of i just the cost of transmitting i between processes - and transmitting the result back - swamps the amount of time spent in check_overlap(i).
def init(*args):
global _coords, _tolerance
_coords, _tolerance = args
def check_overlap(start_index):
coords, tolerance = _coords, _tolerance
tsq = tolerance ** 2
overlaps = 0
start0, start1 = coords[start_index]
for i in range(start_index + 1, len(coords)):
that0, that1 = coords[i]
dx = abs(that0 - start0)
if dx <= tolerance:
dy = abs(that1 - start1)
if dy <= tolerance:
if dx**2 + dy**2 <= tsq:
overlaps += 1
return overlaps
def process_coords(coords, num_processors=1, tolerance=1):
global _coords, _tolerance
import multiprocessing as mp
_coords, _tolerance = coords, tolerance
import time
if num_processors > 1:
pool = mp.Pool(num_processors, initializer=init, initargs=(coords, tolerance))
start = time.time()
print("Start script w/ multiprocessing")
else:
num_processors = 0
start = time.time()
print("Start script w/ standard processing")
N = len(coords)
if num_processors:
total_overlap_count = sum(pool.imap_unordered(check_overlap, range(N)))
else:
total_overlap_count = sum(check_overlap(i) for i in range(N))
print(total_overlap_count)
print(" time: {0}".format(time.time() - start))
if __name__ == "__main__":
from random import random
coords = []
num_coords = 20000
spread = 100.0
half_spread = 0.5*spread
for i in range(num_coords):
coords.append([
random()*spread-half_spread,
random()*spread-half_spread
])
process_coords(coords, 1)
process_coords(coords, 4)

Conditionally setting values with df.loc inside a loop

I'm querying an MS Access db to retrieve a set of leases. My task is to calculate monthly totals for base rent for the next 60 months. The leases have dates related to start and end in order to calculate the correct periods in the event a lease terminates prior to 60 periods. My current challenge comes in when I attempt to increase the base rent by a certain amount whenever it's time to increment for that specific lease. I'm at a beginner level with Python/pandas so my approach is likely not optimum and the code rough looking. It's likely a vectorized approach is better suited however i'm not quite able to execute such code yet.
Data:
Lease input & output
Code:
try:
sql = 'SELECT * FROM [tbl_Leases]'
#sql = 'SELECT * FROM [Copy Of tbl_Leases]'
df = pd.read_sql(sql, conn)
#print df
#df.to_csv('lease_output.csv', index_label='IndexNo')
df_fcst_periods = pd.DataFrame()
# init increments
periods = 0
i = 0
# create empty lists to store looped info from original df
fcst_months = []
fcst_lease_num = []
fcst_base_rent = []
fcst_method = []
fcst_payment_int = []
fcst_rate_inc_amt = []
fcst_rate_inc_int = []
fcst_rent_start = []
# create array for period deltas, rent interval calc, pmt interval calc
fcst_period_delta = []
fcst_rate_int_bool = []
fcst_pmt_int_bool = []
for row in df.itertuples():
# get min of forecast period or lease ending date
min_period = min(fcst_periods, df.Lease_End_Date[i])
# count periods to loop for future periods in new df_fcst
periods = (min_period.year - currentMonth.year) * 12 + (min_period.month - currentMonth.month)
for period in range(periods):
nextMonth = (currentMonth + monthdelta(period))
period_delta = (nextMonth.year - df.Rent_Start_Date[i].year) * 12 + (nextMonth.month - df.Rent_Start_Date[i].month)
period_delta = float(period_delta)
# period delta values allow us to divide by the payment & rent intervals looking for integers
rate_int_calc = period_delta/df['Rate_Increase_Interval'][i]
pmt_int_calc = period_delta/df['Payment_Interval'][i]
# float.is_integer() method - returns bool
rate_int_bool = rate_int_calc.is_integer()
pmt_int_bool = pmt_int_calc.is_integer()
# conditional logic to handle base rent increases
if df['Forecast_Method'][i] == "Percentage" and rate_int_bool:
rate_increase = df['Base_Rent'][i] * (1 + df['Rate_Increase_Amt'][i]/100)
df.loc[df.index, "Base_Rent"] = rate_increase
fcst_base_rent.append(df['Base_Rent'][i])
print "Both True"
else:
fcst_base_rent.append(df['Base_Rent'][i])
print rate_int_bool
fcst_rate_int_bool.append(rate_int_bool)
fcst_pmt_int_bool.append(pmt_int_bool)
fcst_months.append(nextMonth)
fcst_period_delta.append(period_delta)
fcst_rent_start.append(df['Rent_Start_Date'][i])
fcst_lease_num.append(df['Lease_Number'][i])
#fcst_base_rent.append(df['Base_Rent'][i])
fcst_method.append(df['Forecast_Method'][i])
fcst_payment_int.append(df['Payment_Interval'][i])
fcst_rate_inc_amt.append(df['Rate_Increase_Amt'][i])
fcst_rate_inc_int.append(df['Rate_Increase_Interval'][i])
i += 1
df_fcst_periods['Month'] = fcst_months
df_fcst_periods['Rent_Start_Date'] = fcst_rent_start
df_fcst_periods['Lease_Number'] = fcst_lease_num
df_fcst_periods['Base_Rent'] = fcst_base_rent
df_fcst_periods['Forecast_Method'] = fcst_method
df_fcst_periods['Payment_Interval'] = fcst_payment_int
df_fcst_periods['Rate_Increase_Amt'] = fcst_rate_inc_amt
df_fcst_periods['Rate_Increase_Interval'] = fcst_rate_inc_int
df_fcst_periods['Period_Delta'] = fcst_period_delta
df_fcst_periods['Rate_Increase_Interval_bool'] = fcst_rate_int_bool
df_fcst_periods['Payment_Interval_bool'] = fcst_pmt_int_bool
except Exception, e:
print str(e)
conn.close()
I ended up initializing a variable before the periods loop which allowed me to perform a calculation when looping to obtain the correct base rents for subsequent periods.
# init base rent, rate increase amount, new rate for leases
base_rent = df['Base_Rent'][i]
rate_inc_amt = float(df['Rate_Increase_Amt'][i])
new_rate = 0
for period in range(periods):
nextMonth = (currentMonth + monthdelta(period))
period_delta = (nextMonth.year - df.Rent_Start_Date[i].year) * 12 + (nextMonth.month - df.Rent_Start_Date[i].month)
period_delta = float(period_delta)
# period delta values allow us to divide by the payment & rent intervals looking for integers
rate_int_calc = period_delta/df['Rate_Increase_Interval'][i]
pmt_int_calc = period_delta/df['Payment_Interval'][i]
# float.is_integer() method - returns bool
rate_int_bool = rate_int_calc.is_integer()
pmt_int_bool = pmt_int_calc.is_integer()
# conditional logic to handle base rent increases
if df['Forecast_Method'][i] == "Percentage" and rate_int_bool:
new_rate = base_rent * (1 + rate_inc_amt/100)
base_rent = new_rate
fcst_base_rent.append(new_rate)
elif df['Forecast_Method'][i] == "Manual" and rate_int_bool:
new_rate = base_rent + rate_inc_amt
base_rent = new_rate
fcst_base_rent.append(new_rate)
else:
fcst_base_rent.append(base_rent)
Still open for any alternative approaches though!

Categories