How can I optimize the groupby.apply(function) in Python? - python

I have a function that uses deque.collections to track daily stock in based on FIFO. An order will be fulfilled if possible and is substracted from stock accordingly. I use a function in groupby.apply(my_function).
I have struggles where to place the second loop. Both loops work properly when run on their own. But I do not get them working combined.
The dataset is about 1.5 million rows.
Thanks.
DOS = 7
WIP = 1
df_fin['list_stock'] = 0
df_fin['stock_new'] = 0
def create_stocklist(x):
x['date_diff'] = x['dates'] - x['dates'].shift()
x['date_diff'] = x['date_diff'].fillna(0)
x['date_diff'] = (x['date_diff'] / np.timedelta64(1, 'D')).astype(int)
x['list_stock'] = x['list_stock'].astype(object)
x['stock_new'] = x['stock_new'].astype(object)
var_stock = DOS*[0]
sl = deque([0],maxlen=DOS)
for i in x.index:
order = x['order_bin'][i]
if x['date_diff'][i] > 0:
for p in range(0,x['date_diff'][i]):
if p == WIP:
sl.appendleft(x.return_bin[i-1])
else:
sl.appendleft(0)
sl_list = list(sl)
sl_list.reverse()
new_list = []
#from here the loop does not work as I wanted it to work.
#I want to loop over de created sl_list
#and then start the loop above with the outcome of the loop below.
for elem in sl_list:
while order > 0:
val = max(0,elem-order)
order = (abs(min(0,elem-order)))
new_list.append(val)
break
else:
new_list.append(elem)
new_list.reverse()
x.at[i,'list_stock'] = new_list
sl = deque(new_list)
return x
df_fin.groupby(by=['ID']).apply(create_stocklist)

You do not have access to sl_list inside the second loop, you should just define it in the upper scope: for example just after the first global for loop:
for i in x.index:
# define it just here
sl_list = []
order = x['order_bin'][i]

Related

Python blender API assigns material to only the first 10 objects within loop

In a loop, I'm generating a number of spheres, and assigning a material and a subsurf modifier to them:
for i in range(100):
x = coords[i,0]
y = coords[i,1]
z = coords[i,2]
type = types[i]
bpy.ops.mesh.primitive_uv_sphere_add(location=(x,y,z),radius=0.5)
bpy.data.objects[i].name = f"{i}_{type}"
m = bpy.data.objects[i].modifiers.new('subdiv','SUBSURF')
m.levels = 0
m.render_levels = 2
m.quality = 3
if type == 0:
mat = type_0_mat
else:
mat = type_1_mat
if bpy.data.objects[i].data.materials:
bpy.data.objects[i].data.materials[0] = mat
else:
bpy.data.objects[i].data.materials.append(mat)
However, this assigns the material and modifier to only the first 10 of the generated spheres. I have also noticed, after creating the spheres, that the indices and the names don't end up matching. For instance:
>>> bpy.data.objects[10]
bpy.data.objects['19_0.0']
I would have expected the 10th element to be 9_0.0. What is going on here? Why are the indices jumbled, and is this why only a few get elements assigned to them?

I'd like to cross-calculate the two formular over and over again in python

Q_optimal=((np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2))**(1/2)+((np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2+(2*np.array(order_cost)*np.array(demand_lt_avg)/np.array(carrying_cost))))
#get optimal value
while 1:
#new safety stock
new_safety_stock=((np.array(demand_lt_std))**2/(4*beta*np.array( Q_optimal)))-(beta*np.array( Q_optimal))
new_safety_stock[np.isnan(new_safety_stock)] = 0
new_safety_stock=new_safety_stock.tolist()
#delete 0
for i in range(len(order_cost)):
if new_safety_stock[i]< 0:
pos_1=np.where(np.array(new_safety_stock)<0)[0]
for i in pos_1:
new_safety_stock[i]=0
new_safety_stock=np.array(new_safety_stock)
Q_a_result = (np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2)**0.5
Q_b_result= (2*np.array(order_cost)*np.array(demand_lt_avg))/np.array(carrying_cost)
Q_c_result=(np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2+Q_b_result)**0.5
Q_d_result=Q_a_result+Q_c_result
#new Q
new_Q = Q_d_result
loop += 1
above there is all code
Please look at the cord below. first i caculate new_safety_stock using Q_optimal and i get new_Q.
After that, I want to do this repetition of using new_Q to get new_safety, and then using new_safety to get new_Q. I want to repeat it 10000 times, but I have no choice but to use Q_optimal when I get new_safety for the first time, but I wonder how to let you use new_Q again after that.
'''
while 1:
#new safety stock
new_safety_stock=((np.array(demand_lt_std))**2/(4*beta*np.array( Q_optimal)))-(beta*np.array( Q_optimal))
new_safety_stock[np.isnan(new_safety_stock)] = 0
new_safety_stock=new_safety_stock.tolist()
#delete 0
for i in range(len(order_cost)):
if new_safety_stock[i]< 0:
pos_1=np.where(np.array(new_safety_stock)<0)[0]
for i in pos_1:
new_safety_stock[i]=0
new_safety_stock=np.array(new_safety_stock)
Q_a_result = (np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2)**0.5
Q_b_result= (2*np.array(order_cost)*np.array(demand_lt_avg))/np.array(carrying_cost)
Q_c_result=(np.array(demand_lt_std)**2+np.array(Q_safety_stock)**2+Q_b_result)**0.5
Q_d_result=Q_a_result+Q_c_result
#new Q
new_Q = Q_d_result
loop += 1
'''

Can we put condition on REST API

I was giving hackerrank test where I got this problem.
Problem was to find number of football matches that are draw. i.e data[index]['team1goals']==data[index]['team2goals']
Here is an API you can play with it: https://jsonmock.hackerrank.com/api/football_matches?year=2011&page=1
This is what I tried:
import requests
year = 2011
draw = 0
r = requests.get('https://jsonmock.hackerrank.com/api/football_matches?year='+str(year)+'&page=1').json()
total_pages = r['total_pages']
per_page = r['per_page']
for page in range(1, total_pages+1):
r = requests.get('https://jsonmock.hackerrank.com/api/football_matches?year='+str(year)+'&page='+str(page)).json()
try:
for i in range(0, per_page):
if int(r['data'][i]['team1goals']) == int(r['data'][i]['team2goals']):
draw += 1
except:
pass
print(draw) #516
It is giving me correct answer. Since the data was big, it is facing time complexity which I don't want
Is it possible, Can we modify the REST API with condition like this:
https://jsonmock.hackerrank.com/api/football_matches?year=2011&team1goals==team2goals&page=1
OR
https://jsonmock.hackerrank.com/api/football_matches?year=2011&team1goals-gt-lt&team2goals&page=1
If the API allows these many calls, you can use a multiprocessing.pool.Pool function and iterate through each page parallelly to reduce time. This should work:
import requests
from functools import partial
from multiprocessing.pool import Pool
def loop(page,year,r,per_page):
r = requests.get('https://jsonmock.hackerrank.com/api/football_matches?year='+str(year)+'&page='+str(page)).json()
try:
for i in range(0, per_page):
if int(r['data'][i]['team1goals']) == int(r['data'][i]['team2goals']):
increase = 1
else:
increase = 0
except:
increase = 0
return increase
if __name__ == "__main__":
year = 2011
draw = []
r = requests.get('https://jsonmock.hackerrank.com/api/football_matches?year='+str(year)+'&page=1').json()
total_pages = r['total_pages']
per_page = r['per_page']
pages = range(1, total_pages+1)
pool = Pool()
f = pool.map(partial(loop,year=year,r=r,per_page=per_page),pages)
draw += f
final = 0
for x in draw:
x = int(x)
final += x
print(final) #516
You should use multithreading and make multiple requests in parallel.
You can do it other way.
def getNumDraws(year):
counter = 0
for z in range(0, 10):
# 10 - maximum goals, it's in the description of this task
link = f"https://jsonmock.hackerrank.com/api/football_matches?year={year}&team1goals={z}&team2goals={z}"
r = requests.get(link)
counter = counter + int(r.json()['total'])
return counter
Steps:
Get ['total'] from link where you add &team1goals={variable}&team2goals={variable}
Add ['total'] number to your counter.
Repeat 10 times. 10 times - because it's from task description that you can assume that there are 10 maximum goals scored.
So you call an api only 10 times

Python code not working as intended

I started learning Python < 2 weeks ago.
I'm trying to make a function to compute a 7 day moving average for data. Something wasn't going right so I tried it without the function.
moving_average = np.array([])
i = 0
for i in range(len(temp)-6):
sum_7 = np.array([])
avg_7 = 0
missing = 0
total = 7
j = 0
for j in range(i,i+7):
if pd.isnull(temp[j]):
total -= 1
missing += 1
if missing == 7:
moving_average = np.append(moving_average, np.nan)
break
if not pd.isnull(temp[j]):
sum_7 = np.append(sum_7, temp[j])
if j == (i+6):
avg_7 = sum(sum_7)/total
moving_average = np.append(moving_average, avg_7)
If I run this and look at the value of sum_7, it's just a single value in the numpy array which made all the moving_average values wrong. But if I remove the first for loop with the variable i and manually set i = 0 or any number in the range of the data set and run the exact same code from the inner for loop, sum_7 comes out as a length 7 numpy array. Originally, I just did sum += temp[j] but the same problem occurred, the total sum ended up as just the single value.
I've been staring at this trying to fix it for 3 hours and I'm clueless what's wrong. Originally I wrote the function in R so all I had to do was convert to python language and I don't know why sum_7 is coming up as a single value when there are two for loops. I tried to manually add an index variable to act as i to use it in the range(i, i+7) but got some weird error instead. I also don't know why that is.
https://gyazo.com/d900d1d7917074f336567b971c8a5cee
https://gyazo.com/132733df8bbdaf2847944d1be02e57d2
Hey you can using rolling() function and mean() function from pandas.
Link to the documentation :
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.rolling.html
df['moving_avg'] = df['your_column'].rolling(7).mean()
This would give you some NaN values also, but that is a part of rolling mean because you don't have all past 7 data points for first 6 values.
Seems like you misindented the important line:
moving_average = np.array([])
i = 0
for i in range(len(temp)-6):
sum_7 = np.array([])
avg_7 = 0
missing = 0
total = 7
j = 0
for j in range(i,i+7):
if pd.isnull(temp[j]):
total -= 1
missing += 1
if missing == 7:
moving_average = np.append(moving_average, np.nan)
break
# The following condition should be indented one more level
if not pd.isnull(temp[j]):
sum_7 = np.append(sum_7, temp[j])
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
if j == (i+6):
# this ^ condition does not do what you meant
# you should use a flag instead
avg_7 = sum(sum_7)/total
moving_average = np.append(moving_average, avg_7)
Instead of a flag you can use a for-else construct, but this is not readable. Here's the relevant documentation.
Shorter way to do this:
moving_average = np.array([])
for i in range(len(temp)-6):
ngram_7 = [t for t in temp[i:i+7] if not pd.isnull(t)]
average = (sum(ngram_7) / len(ngram_7)) if ngram_7 else np.nan
moving_average = np.append(moving_average, average)
This could be refactored further:
def average(ngram):
valid = [t for t in temp[i:i+7] if not pd.isnull(t)]
if not valid:
return np.nan
return sum(valid) / len(valid)
def ngrams(seq, n):
for i in range(len(seq) - n):
yield seq[i:i+n]
moving_average = [average(k) for k in ngrams(temp, 7)]

Can someone detect error in this code to implement dijkstra's algorithm using python?

I am trying to implement dijkstra's algorithm (on an undirected graph) to find the shortest path and my code is this.
Note: I am not using heap/priority queue or anything but an adjacency list, a dictionary to store weights and a bool list to avoid cycling in the loops/recursion forever. Also, the algorithm works for most test cases but fails for this particular one here: https://ideone.com/iBAT0q
Important : Graph can have multiple edges from v1 to v2 (or vice versa), you have to use the minimum weight.
import sys
sys.setrecursionlimit(10000)
def findMin(n):
for i in x[n]:
cost[n] = min(cost[n],cost[i]+w[(n,i)])
def dik(s):
for i in x[s]:
if done[i]:
findMin(i)
done[i] = False
dik(i)
return
q = int(input())
for _ in range(q):
n,e = map(int,input().split())
x = [[] for _ in range(n)]
done = [True]*n
w = {}
cost = [1000000000000000000]*n
for k in range(e):
i,j,c = map(int,input().split())
x[i-1].append(j-1)
x[j-1].append(i-1)
try: #Avoiding multiple edges
w[(i-1,j-1)] = min(c,w[(i-1,j-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
try:
w[(i-1,j-1)] = min(c,w[(j-1,i-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
w[(j-1,i-1)] = c
w[(i-1,j-1)] = c
src = int(input())-1
#for i in sorted(w.keys()):
# print(i,w[i])
done[src] = False
cost[src] = 0
dik(src) #First iteration assigns possible minimum to all nodes
done = [True]*n
dik(src) #Second iteration to ensure they are minimum
for val in cost:
if val == 1000000000000000000:
print(-1,end=' ')
continue
if val!=0:
print(val,end=' ')
print()
The optimum isn't always found in the second pass. If you add a third pass to your example, you get closer to the expected result and after the fourth iteration, you're there.
You could iterate until no more changes are made to the cost array:
done[src] = False
cost[src] = 0
dik(src)
while True:
ocost = list(cost) # copy for comparison
done = [True]*n
dik(src)
if cost == ocost:
break

Categories