Increase Loop Speed (Pandas dataframe) - python

Good Afternoon
I have created a function that assign a value depending on the previous row in a dataframe:
#Function to calculate cycles
def new_cycle (dfTickets_CC, cicle, id, prev_id,prev_status):
global new_cicle
if cicle is not None:
new_cicle = cicle
elif id != prev_id:
if not dfTickets_CC.loc[dfTickets_CC['Ticket_ID'].isin([id])].empty:
new_cicle = dfTickets_CC[dfTickets_CC['Ticket_ID'] == id]['Cicle_lastNr'].values[0] + 1
else:
new_cicle = 1
elif id == prev_id:
if prev_status == "Completed":
new_cicle = int(new_cicle)
new_cicle += 1
else:
new_cicle = new_cicle
return str(new_cicle).split(".")[0]
I call the function and iter the dataframe :
#Step 4, Calculating new cicle
ncicle = []
for i in range(len(dfCompilate.index)):
if i == 0:
ncicle.append(new_cycle(dfTickets_CC,dfCompilate['Cicle'].values[i],dfCompilate['Ticket_ID'].values[i],None,None))
else:
ncicle.append(new_cycle(dfTickets_CC,dfCompilate['Cicle'].values[i],dfCompilate['Ticket_ID'].values[i],dfCompilate['Ticket_ID'].values[i-1],dfCompilate['Status'].values[i-1]))
dfCompilate['New_cicle'] = ncicle
Problem is that, even though it works correctly, it takes a lot of time... For instance, it takes 2 hours to process a dataframe with 500,000 rows
Does anybody know how to make it faster?
Thanks in advance

Related

How to iterate through a list of class objects and total up how many attributes are the same?

I know the title is worded weird, I can't think of how to best word this.
Basically I have to create a survey in python with an option to view statistics of all of the submissions. I'm storing the submissions as objects in a list.
One of the questions in the survey are radio buttons to choose your ethnicity, and I want to total up how many of each ethnicity there is.
I did get it to work using this:
totalSubmissions = 0
totalWhite = 0
totalBlack = 0
totalAsian = 0
totalMixed = 0
totalOther = 0
for s in submissions:
submissionList.insert(END, s.getInfo())
totalSubmissions += 1
if s.ethnicity == "White":
totalWhite += 1
elif s.ethnicity == "Black":
totalBlack += 1
elif s.ethnicity == "Asian":
totalAsian += 1
elif s.ethnicity == "Mixed":
totalMixed += 1
elif s.ethnicity == "Other":
totalOther += 1
But this feels really inefficient and I'm sure there must be a better way to do this using iteration or something.
I assume that you have a survey class as below
class Survey:
def __init__(self, *args, **kwargs):
# other attrs ..
self.ethnicity = kwargs.get("ethnicity")
and then there is a list of submissions objects for example
submission_list = [
Survey(ethnicity="White"),
Survey(ethnicity="Black"),
Survey(ethnicity="Asian"),
Survey(ethnicity="Mixed"),
Survey(ethnicity="Other"),
Survey(ethnicity="White"),
Survey(ethnicity="White"),
Survey(ethnicity="Other"),
]
Now, you can get the total submission count as
total_submission = len(submission_list)
print("total_submission: ", total_submission)
And then define a dict for count of specific ethnicity, loop through the submissions list and check increase the ethnicity of the matched dict key.
total_dict = {
"White": 0,
"Black": 0,
"Asian": 0,
"Mixed": 0,
"Other": 0,
}
for s in submission_list:
total_dict[s.ethnicity] += 1
print("total_dict: ", total_dict)
What is SubmissionList? their is no function in for it. Relook at your code. There are many variables not defined.

Python pandas column operations

I'm trying to do some columnar operations on a dataframe and I'm stuck at one point. I'm new to pandas and now I'm unable to figure how to do this.
So wherever there is a "Yes" value in "Prevous_Line_Has_Br" buffer should be added to the "OldTop" value but whenever there is a "No" in between it should stop incrementing, take the previous row value and start incrementing when there is a "Yes" again.
I have tried something like this
temp_df["CheckBr"] = temp_df["Prevous_Line_Has_Br"].shift(1)
temp_df["CheckBr"] = temp_df["CheckBr"].fillna("dummy")
temp_df.insert(0, 'New_ID', range(0, 0 + len(temp_df)))
temp_df["NewTop"] = "NoIncr"
temp_df["MyTop"] = 0
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "Yes"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "No"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "dummy"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["NewTop"]=="Incr"),"MyTop" ] = new_top + (temp_df.New_ID * temp_df.buffer)
temp_df.loc[(temp_df["CheckBr"] == "Yes") & (temp_df["MyTop"] == 0), "MyTop"] = temp_df["MyTop"].shift(1)
This is giving me the following output to achieve the same without the for loop:
Can someone please help achieve the values in the original dataframe using pandas?
This is what I want to achieve finally..
This would be fairly easy to do if you moved away from pandas, and treated the columns as just lists. If you want to still use the apply method, you can use to decorator to keep track of the last row.
def apply_func_decorator(func):
prev_row = {}
def wrapper(curr_row, **kwargs):
val = func(curr_row, prev_row)
prev_row.update(curr_row)
prev_row[new_col] = val
return val
return wrapper
#apply_func_decorator
def add_buffer_and_top(curr_row, prev_row):
if curr_row.Prevous_Line_Has_Br == 'Yes':
if prev_row:
return curr_row.buffer + prev_row['NewTop']
return curr_row.buffer + prev_row['OldTop']
return prev_row['NewTop']
temp_df['NewTop'] = 0
temp_df['NewTop'] = temp_df.apply(add_buffer_and_top, axis=1)
This is how I achieved the output I desired
m = temp_df['Prevous_Line_Has_Br'].eq('Yes')
temp_df['New_ID'] = m.cumsum().where(m,np.nan)
temp_df["New_ID"] = temp_df["New_ID"].ffill()
temp_df["Top"] = temp_df['Old_Top'] + (temp_df['New_ID'] * temp_df['buffer'])
Column New_ID was incremented only when there was a value 'Yes' in column Previous_Line_Has_br.

Undefined dictionaries in my main function

def monday_availability(openhours_M): #openhours_M = number hours pool is open
hourone = int(input('Input the first hour in the range of hours the guard can work'))
hourlast = int(input('Input the last hour in the range of hours the guard'))
hour = 1
availability_M = []
while hour <= openhours_M:
if hour >= hourone & hour <= hourlast:
availability_M.append(1)
else:
availability_M.append(0)
return availability_M
Above is a function gathering the availability of a lifeguard and storing the hours a guard can work as a 1 in availability list or a 0 if they cannot. I return this list with the intent of adding it to a dictionary in the function below.
def guard_availability(guards, openhours_M, openhours_T, openhours_W,
openhours_R, openhours_F, openhours_S, openhours_Su):
continueon = 1
while continueon == 1:
name = input('Input guards name of lifeguard to update availability' )
availability = {}
days = {}
if openhours_M != 0:
monday_availability(openhours_M)
if openhours_T != 0:
tuesday_availability(openhours_T)
if openhours_W != 0:
wednesday_availability(openhours_W)
if openhours_R != 0:
thursday_availability(openhours_R)
if openhours_F != 0:
friday_availability(openhours_F)
if openhours_S != 0:
saturday_availability(openhours_S)
if openhours_Su != 0:
sunday_availability(openhours_Su)
days['Monday'] = availability_M
days['Tuesday'] = availability_T
days['Wednesday'] = availability_W
days['Thursday'] = availability_R
days['Friday'] = availability_F
days['Saturday'] = availability_S
days['Sunday'] = availability_Su
availability[name]= days
continueon = input('Enter 1 to add availability for another guard, 0 to stop: ')
return days
When I run this code, I get an error saying my availability lists are undefined even though I returned them in the functions above. Where is the error in my understanding of returning in functions, and how can I remedy this problem.
monday_availability(openhours_M) returns a value.
Returning a variable does not assign it to anything outside the scope of that function.
If you renamed return availability_M to use return foo and update the other uses only within that function accordingly, would the error make more sense?
Now, actually capture the result
availability_M = monday_availability(openhours_M)
Or even just
days['Monday'] = monday_availability(openhours_M)
Also, not seeing how that function has anything to do with Mondays. Try to write DRY code
You return the dic value in your function but don't assign it to any variable. You should do it like this:
if openhours_M != 0:
availability_M=monday_availability(openhours_M)
if openhours_T != 0:
availability_T=tuesday_availability(openhours_T)
if openhours_W != 0:
availability_W=wednesday_availability(openhours_W)
if openhours_R != 0:
availability_R=thursday_availability(openhours_R)
if openhours_F != 0:
availability_F=friday_availability(openhours_F)
if openhours_S != 0:
availability_S=saturday_availability(openhours_S)
if openhours_Su != 0:
availability_Su=sunday_availability(openhours_Su)

Binary Search not working when it should be

I have a binary search that searches a list from a user given input of an email. I get no errors and I get no output from it. I can't see where its going wrong?
def BubbleSort(logindata):
NoSwaps = 1
N = len(logindata)
logindata = list(logindata)
while NoSwaps == 1:
Count = 1
NoSwaps = 0
for Count in range(N-1):
if logindata[Count] > logindata[Count+1]:
temp = logindata[Count]
logindata[Count] = logindata[Count+1]
logindata[Count+1]=temp
NoSwaps=1
return tuple(logindata)
def BinarySearch(logindata,ItemSought):
First=0
Last=len(logindata)-1
ItemFound = False
SearchFailed = False
while ItemFound == False or SearchFailed == False:
Midpoint = (First + Last) // 2
if logindata[Midpoint] == ItemSought:
print("Item Found")
ItemFound = True
print("Item Found")
break
elif logindata[Midpoint][0] > ItemSought:
Last = Midpoint - 1
else:
First = Midpoint + 1
if __name__ == "__main__":
logindata=["tom#gmail.com","Password1"],["harry#gmail.com","Password2"],["jake#gmail.com","Password3"]
logindata=BubbleSort(logindata)
print(logindata)
ItemSought=input("Enter username")
BinarySearch(logindata,ItemSought)
In
if logindata[Midpoint] == ItemSought:
you compare list with a string. So I think you need
if logindata[Midpoint][0] == ItemSought:
You never terminate the search. If the item isn't in the list, you get to a stable midpoint and loop infinitely. If you do find the item, you loop infinitely on that (see Yehven's answer).
I traced it with the additions you see here:
SearchFailed = False
iter = 0
while iter < 10 and (ItemFound == False or SearchFailed == False):
iter += 1
Midpoint = (First + Last) // 2
print (First, Midpoint, Last, ItemSought)
if logindata[Midpoint] == ItemSought:
Note that you don't ever change SearchFailed. For instance, when I search for "harry", the loop hits a stable infinite point at (0, -1, -2) for First, Middle, Last.
Is that enough of a hint to let you fix it yourself?

List index out of range when coding a valid move for board game

Hey everyone im new here and im trying to make a game called HiQ now i got the board drawn and everything and i can click on one of the pieces, but when i do the piece does change color and i get an error in the shell as well (listed below) im not sure why im getting this and i was hoping you guys could give me better insight. Ill provide my code below as well and it is coded in python 3, thank you
builtins.IndexError: list index out of range
boardcirc =[[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0],
[1,1,1,1,1,1,1,1,1],
[1,1,1,1,2,1,1,1,1],
[1,1,1,1,1,1,1,1,1],
[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0]]
def HiQ():
splash_screen()
make_board()
def make_board():
make_sqr()
make_circ()
get_click()
def get_click():
global count, boardcirc
while 1!=0:
count = count - 1
displaymessage("Pieces: " + str(count))
where = win.getMouse()
col = where.x//90
row = where.y//90
valid_move(row,col)
make_move(row,col)
def valid_move(row,col):
if boardcirc[row][col] == 0:
return False
if boardcirc[row-1][col] == 1 and boardcirc[row-2][col] == 1:
return True
if boardcirc[row+1][col] == 1 and boardcirc[row+2][col] == 1:
return True
if boardcirc[row][col-1] == 1 and boardcirc[row][col-2] == 1:
return True
if boardcirc[row][col+1] == 1 and boardcirc[row][col+2] == 1:
return True
def make_move(row,col):
while valid_move(row,col) == True:
col = (col*85)+42
row = (row*85)+42
circ = Circle(Point(col,row),35)
circ.setFill("white")
circ.draw(win)
thats everything that applies to the error
For your valid_move(row,col), you can't have all those if statements.
Instead of doing this, use elif's after the initial if statement, and don't forget to write an else statement
if boardcirc[row][col] == 0:
return False
if boardcirc[row-1][col] == 1 and boardcirc[row-2][col] == 1:
return True
elif boardcirc[row+1][col] == 1 and boardcirc[row+2][col] == 1:
return True
elif boardcirc[row][col-1] == 1 and boardcirc[row][col-2] == 1:
return True
elif boardcirc[row][col+1] == 1 and boardcirc[row][col+2] == 1:
return True
else:
return False

Categories