dataframe reparsing new columns in a loop

dataframe reparsing new columns in a loop - python

My dataframe is suppose to just create 1 modified copy of each int or float value column however it is modifying the modified column etc. I believe when I write for column in data, it thinks there are more columns than are actually present. Is their any way to fix this problem? error occurs at **
here is what is appearing
class simple_math:
def __init__(self, operand, operator):
self.operand=operand
self.operator=operator
if self.operator == '+' or self.operator=='-' or self.operator=='/':
print('this is correct character')
else:
print('You have entered the wrong character')
def user_op(self, user_input):
operand=self.operand
operator=self.operator
temp = operand
if operator == '+':
temp += user_input
return temp
if operator == '-':
temp -= user_input
return temp
if operator == '/':
temp /= user_input
return temp
test_data=sns.load_dataset('titanic')
df=test_data
df2=pd.DataFrame()
i = 0
for columns in df:
new_columns= []
if df[columns].dtypes == float or df[columns].dtypes == bool:
new_columns = df[columns]
df2.insert(i, columns, new_columns)
i=i+1
else:
pass
df2= df2.replace({True: 'TRUE', False: 'FALSE'})
df3 = df2.loc[df['fare']<70]
test_data = df3.dropna()
test_data
**class tester(simple_math):
def applicator(self, data):
data.reset_index(drop=True, inplace=True)
df = data
for columns in data:
try:
df['modified_%s' % columns]= simple_math.user_op(self, data[columns])
except:
print('unable to parse', columns)
pass
return df

Related

Set function in Tkinter failed to display correct value

I'm creating a calculator and here's part of the code:
def _digit_formatting(x):
numbers = '1234567890.'
start_idxs = []
end_idxs = []
is_start = True
try:
for idx, value in enumerate(x):
if value in numbers and is_start:
start_idxs.append(idx)
is_start = False
elif value in numbers and idx == len(x) - 1:
end_idxs.append(len(x) - 1)
elif value in numbers and not is_start:
pass
elif value not in numbers and len(start_idxs) > len(end_idxs):
end_idxs.append(idx-1)
is_start = True
except:
...
if len(start_idxs) > len(end_idxs):
end_idxs.append(start_idxs[-1])
start_idxs.reverse()
end_idxs.reverse()
x = list(x)
for idx in range(len(start_idxs)):
if start_idxs[idx] == end_idxs[idx]:
num = x[start_idxs[idx]:end_idxs[idx]+1]
else:
num = x[start_idxs[idx]:end_idxs[idx]+1]
num = ''.join(num)
x = ''.join(x)
x = x[::-1]
num = num[::-1]
x = x.replace(num, '', 1)
x = list(x)
x.reverse()
num = num[::-1]
temp = f'{int(num):,}'
x.insert(start_idxs[idx], temp)
x = ''.join(x)
return x
def calculate(sv):
# This function is called when there's changes in entry box
if self.input_string_var.get() == '':
self.result_string_var.set('')
# Start
real_result = self.input_string_var.get().replace(',', '')
percent_count = self.input_string_var.get().count('%')
# Formatting input string
x = _digit_formatting(real_result)
print(x)
self.input_string_var.set(x)
if percent_count != 0:
numbers = '0123456789.'
for cnt in range(percent_count):
percent_idx = real_result.find('%', 0)
limit_operator = 2
percent_number = ''
for i in range(percent_idx - 1, -1, -1):
if real_result[i] not in numbers:
limit_operator -= 1
if limit_operator == 0:
break
if limit_operator == 1:
if real_result[i] in '*/':
percent_number = ''
break
else:
percent_number += real_result[i]
if percent_number == '':
percent_number = '1'
else:
percent_number = percent_number[1:][::-1]
real_result = list(real_result)
real_result[percent_idx] = f'/100*{percent_number}'
real_result = ''.join(real_result)
else:
real_result = self.input_string_var.get().replace(',', '')
try:
if eval(real_result) == int(eval(real_result)):
self.result_string_var.set(f'{int(eval(real_result)):,}')
else:
self.result_string_var.set(f'{int(eval(real_result)):,}')
except:
None
if self.input_string_var.get() == '':
self.result_string_var.set('')
# Entry box string variable
self.input_string_var = tk.StringVar()
self.input_string_var.trace('w', lambda name, index, mode: calculate(self.input_string_var))
There is two functions, first is _digit_formatting which is to format the equation to put comma like thousands, million and billion. The calculate function is called every time there's changes on the input string variable. But when I try to set the string variable to equation after formatting there seems to be a mistake, but if I print the value, it is correct. Example if I enter 1200 the value I printed is 1,200 which is correct but the value on the text box is not correct. Sorry if the code is messy, I'm still learning to make a clean code.

I cannot reproduce your code.
If you can take a look of my example.
n = 1234567890
print(f"I have {n:,} Reputations")
Result:
I have 1,234,567,890 Reputations

How to create a function based on another dataframe column being True?

I have a dataframe shown below:
Name X Y
0 A False True
1 B True True
2 C True False
I want to create a function for example:
example_function("A") = "A is in Y"
example_function("B") = "B is in X and Y"
example_function("C") = "C is in X"
This is my code currently (incorrect and doesn't look very efficient):
def example_function(name):
for name in df['Name']:
if df['X'][name] == True and df['Y'][name] == False:
print(str(name) + "is in X")
elif df['X'][name] == False and df['Y'][name] == True:
print(str(name) + "is in Y")
else:
print(str(name) + "is in X and Y")
I eventually want to add more Boolean columns so it needs to be scalable. How can I do this? Would it be better to create a dictionary, rather than a dataframe?
Thanks!

If you really want a function you could do:
def example_function(label):
s = df.set_index('Name').loc[label]
l = s[s].index.to_list()
return f'{label} is in {" and ".join(l)}'
example_function('A')
'A is in Y'
example_function('B')
'B is in X and Y'
You can also compute all the solutions as dictionary:
s = (df.set_index('Name').replace({False: pd.NA}).stack()
.reset_index(level=0)['Name']
)
out = s.index.groupby(s)
output:
{'A': ['Y'], 'B': ['X', 'Y'], 'C': ['X']}

I think you can stay with a DataFrame, the same output can be obtained with a function like this:
def func (name, df):
# some checks to verify that the name is actually in the df
occurrences_name = np.sum(df['Name'] == name)
if occurrences_name == 0:
raise ValueError('Name not found')
elif occurrences_name > 1:
raise ValueError('More than one name found')
# get the index corresponding to the name you're looking for
# and select the corresponding row
index = df[df['Name'] == name].index[0]
row = df.drop(['Name'], axis=1).iloc[index]
outstring = '{} is in '.format(name)
for i in range(len(row)):
if row[i] == True:
if i != 0: outstring += ', '
outstring += '{}'.format(row.index[i])
return outstring
of course you can adapt this to the specific shape of your df, I'm assuming that the column containing names is actually 'Name'.

Python pandas column operations

I'm trying to do some columnar operations on a dataframe and I'm stuck at one point. I'm new to pandas and now I'm unable to figure how to do this.
So wherever there is a "Yes" value in "Prevous_Line_Has_Br" buffer should be added to the "OldTop" value but whenever there is a "No" in between it should stop incrementing, take the previous row value and start incrementing when there is a "Yes" again.
I have tried something like this
temp_df["CheckBr"] = temp_df["Prevous_Line_Has_Br"].shift(1)
temp_df["CheckBr"] = temp_df["CheckBr"].fillna("dummy")
temp_df.insert(0, 'New_ID', range(0, 0 + len(temp_df)))
temp_df["NewTop"] = "NoIncr"
temp_df["MyTop"] = 0
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "Yes"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "No"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["Prevous_Line_Has_Br"] == "Yes") & (temp_df["CheckBr"] == "dummy"), "NewTop"] = "Incr"
temp_df.loc[(temp_df["NewTop"]=="Incr"),"MyTop" ] = new_top + (temp_df.New_ID * temp_df.buffer)
temp_df.loc[(temp_df["CheckBr"] == "Yes") & (temp_df["MyTop"] == 0), "MyTop"] = temp_df["MyTop"].shift(1)
This is giving me the following output to achieve the same without the for loop:
Can someone please help achieve the values in the original dataframe using pandas?
This is what I want to achieve finally..

This would be fairly easy to do if you moved away from pandas, and treated the columns as just lists. If you want to still use the apply method, you can use to decorator to keep track of the last row.
def apply_func_decorator(func):
prev_row = {}
def wrapper(curr_row, **kwargs):
val = func(curr_row, prev_row)
prev_row.update(curr_row)
prev_row[new_col] = val
return val
return wrapper
#apply_func_decorator
def add_buffer_and_top(curr_row, prev_row):
if curr_row.Prevous_Line_Has_Br == 'Yes':
if prev_row:
return curr_row.buffer + prev_row['NewTop']
return curr_row.buffer + prev_row['OldTop']
return prev_row['NewTop']
temp_df['NewTop'] = 0
temp_df['NewTop'] = temp_df.apply(add_buffer_and_top, axis=1)

This is how I achieved the output I desired
m = temp_df['Prevous_Line_Has_Br'].eq('Yes')
temp_df['New_ID'] = m.cumsum().where(m,np.nan)
temp_df["New_ID"] = temp_df["New_ID"].ffill()
temp_df["Top"] = temp_df['Old_Top'] + (temp_df['New_ID'] * temp_df['buffer'])
Column New_ID was incremented only when there was a value 'Yes' in column Previous_Line_Has_br.

replacing value in specific columns in datafram by using python

this is code to calculate the weight of evidance
#good is zero bad is one
Weight of Evidance function for discrete unordered variables
df = pd.concat([df[the_categroical_name], My_target], axis = 1)
df = pd.concat([df.groupby(df.columns.values[0], as_index = False)[df.columns.values[1]].count(),
df.groupby(df.columns.values[0], as_index = False)[df.columns.values[1]].mean()], axis = 1)
df = df.iloc[:, [0, 1, 3]]
df.columns = [df.columns.values[0], 'Number_of_observation', 'Probation_good_taxPayer']
df['prop_Number_of_observation'] = df['Number_of_observation'] / df['Number_of_observation'].sum()
df['N_good'] = df['Probation_good_taxPayer'] * df['Number_of_observation']
df['n_bad'] = (1 - df['Probation_good_taxPayer']) * df['Number_of_observation']
df['prop_n_good'] = df['N_good'] / df['N_good'].sum()
df['prop_of_bad'] = df['n_bad'] / df['n_bad'].sum()
df['WoE'] = np.log(df['prop_n_good'] / df['prop_of_bad'])
df['PD']= ((df['N_good'])/(df['n_bad'] + df['N_good']))
df = df.sort_values(['WoE'])
df = df.reset_index(drop = True)
#df['diff_Probation_good_taxPayer'] = df['Probation_good_taxPayer'].diff().abs()
#df['diff_WoE'] = df['WoE'].diff().abs()
df['IV'] = (df['prop_n_good'] - df['prop_of_bad']) * df['WoE']
df['IV'] = df['IV'].sum()
return df
df_BUSINESS_CATEGORY = Weight_of_evidance(df_input, 'BUSINESS_CATEGORY', df_Label)
# We execute the function we defined with the necessary arguments: a dataframe, a string, and a dataframe.
# We store the result in a dataframe.
df_BUSINESS_CATEGORY
So for now if i want to replace any value in the business_category for instance A withtheir value in the column Woe is -0978021 stc for now i am using for loop like this below code
def flag_df_ISIC_4_ARAB(df_input):
if (df_input['BUSINESS_CATEGORY'] == 'A'):
return '-0.978021'
elif (df_input['BUSINESS_CATEGORY'] == 'اB'):
return '-0.977854'
elif (df_input['BUSINESS_CATEGORY'] == 'C'):
return '0.082918'
elif (df_input['BUSINESS_CATEGORY'] == 'D'):
return '0.772306'
elif (df_input['BUSINESS_CATEGORY'] == 'H'):
return '-0.176700'
elif (df_input['BUSINESS_CATEGORY'] == 'أخرى'):
return '0.955446'
else:
return '0'
df_input['BUSINESS_CATEGORY'] = df_input.apply(flag_df_ISIC_4_ARAB, axis = 1).astype(str)```
is there another way to replace the Woe with out using for loop

Create dictionary first, pass to Series.map and replace non matched values to '0':
d = {'A':'-0.978021','اB':'-0.977854', 'C':'0.082918',
'D':'0.772306', 'H': '-0.176700', 'أخرى': '0.955446'}
df_input['BUSINESS_CATEGORY'] = df_input['BUSINESS_CATEGORY'].map(d).fillna('0')

code optimization for memory game

I made this memory card matching game for class and feel like I wrote more code than I had to. Is there any way to optimize this? The assignment was just to get a working program but I feel like I need to learn a little bit of optimization.
import random
class Card(object):
""" A card object with value but not suit"""
def __init__(self, value):
"""Creates a card with the given value and suit."""
self.value = value
self.face = 0
def showFace(self):
'''flips the card over'''
self.face = 1
def __str__(self):
"""Returns the string representation of a card."""
return str(self.value)
class Deck(object):
""" A deck containing cards."""
def __init__(self, rows, columns):
"""Creates a full deck of cards."""
self._cards = []
self._rows = rows
self._columns = columns
for value in range(1,(int((self._rows*self._columns)/2))+1):
c = Card(value)
self._cards.append(c)
self._cards.append(c)
def shuffle(self):
"""Shuffles the cards."""
random.shuffle(self._cards)
def deal(self):
"""Removes and returns the top card or None
if the deck is empty."""
if len(self) == 0:
return None
else:
return self._cards.pop(0)
def __len__(self):
"""Returns the number of cards left in the deck."""
return len(self._cards)
def __str__(self):
"""Returns the string representation of a deck."""
self.result = ''
for c in self._cards:
self.result = self.result + str(c) + '\n'
return self.result
class Game(Deck,Card):
'''Runs the memory game'''
def __init__(self, rows, columns):
'''gets rows and columns for the game'''
self._rows = rows
self._columns = columns
def play(self):
'''starts the game'''
self._deck = Deck(self._rows, self._columns)
self._deck.shuffle()
Game.populateBoard(self)
matches = 0
#While the game is not finished
while True:
while True: #The First Card
try:
coor1 = input("Enter coordinates for first card ")
coor1 = coor1.split(' ') # Removes spaces
coor1 = [x for x in coor1 if x != ' '] # puts in list
coor1 = [int(x)-1 for x in coor1 if x == x] # converts
except IndexError:
print('***Invalid coordinates! Try again.***')
except ValueError:
print('***Invalid coordinates! Try again.***')
else:
try: #Check if coordinates are valid
if 0 > coor1[0] or coor1[0] > self._rows\
or 0 > coor1[1] or coor1[1] > self._columns:
print('***Invalid coordinates! Try again.***')
else:
guess1 = self._gameboard[coor1[0]][coor1[1]]
break
except IndexError:
print('***Invalid coordinates! Try again.***')
while True: # The Second Card
try:
coor2 = input("Enter coordinates for second card ")
coor2 = coor2.split(' ') # Removes Spaces
coor2 = [x for x in coor2 if x != ' '] # puts in list
coor2 = [int(x)-1 for x in coor2 if x == x]# converts
except IndexError:
print('***Invalid coordinates! Try again.***')
except ValueError:
print('***Invalid coordinates! Try again.***')
else:
try: #Check if coordinates are valid
if 0 > coor2[0] or coor2[0] > self._rows\
or 0 > coor2[1] or coor2[1] > self._columns:
print('***Invalid coordinates! Try again.***')
else:
guess2 = self._gameboard[coor2[0]][coor2[1]]
break
except IndexError:
print('***Invalid coordinates! Try again.***')
if guess1 == guess2\
and coor2[0]-coor1[0] == 0\
and coor2[1]-coor1[1] == 0:#User enters same input for 1 and 2
print("***That's the same card! Try again.***")
elif guess1 == guess2:
guess1.showFace()
Game.showBoard(self)
matches += 1
if matches == ((self._rows * self._columns)/2):
break
else:
Game.showBoard(self)
print('Not an identical pair. Found',guess1,'at ('+
str(coor1[0]+1)+','+ str(coor1[1]+1)+') and', guess2,
'at ('+str(coor2[0]+1)+','+str(coor2[1]+1)+')')
def populateBoard(self):
'''populates the game board'''
self._gameboard = []
for row in range(self._rows):
self._gameboard.append([0] * self._columns)
for row in range(len(self._gameboard)):
for column in range(len(self._gameboard[row])):
self._gameboard[row][column] = self._deck.deal()
Game.showBoard(self)
def showBoard(self):
'''shows the board'''
for row in self._gameboard:
for card in row:
if card.face == 0:
print('*', end =" ")
else:
print(card, end= " ")
print()
def main():
while True:
# Force user to enter valid value for number of rows
while True:
rows = input("Enter number of rows ")
if rows.isdigit() and ( 1 <= int(rows) <= 9):
rows = int(rows)
break
else:
print (" ***Number of rows must be between 1 and 9! Try again.***")
# Adding *** and indenting error message makes it easier for the user to see
# Force user to enter valid value for number of columns
while True:
columns = input("Enter number of columns ")
if columns.isdigit() and ( 1 <= int(columns) <= 9):
columns = int(columns)
break
else:
print (" ***Number of columns must be between 1 and 9! Try again.***")
if rows * columns % 2 == 0:
break
else:
print (" ***The value of rows X columns must be even. Try again.***")
game = Game(rows, columns)
game.play()
if __name__ == "__main__":
main()

Here is a couple of places the code can be simplified
def populateBoard(self):
'''populates the game board'''
self._gameboard = [self._deck.deal() for row in self._rows
for column in self._columns]
self.showBoard()
and
def showBoard(self):
'''shows the board'''
for row in self._gameboard:
print(*(card if card.face else '*' for card in row))

coor1[0] is computed many times over. Consider assigning the result to a local variable and then using this local variable.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

dataframe reparsing new columns in a loop - python

Related

Set function in Tkinter failed to display correct value

How to create a function based on another dataframe column being True?

Python pandas column operations

replacing value in specific columns in datafram by using python

code optimization for memory game

Categories

Resources