I am running a for loop in order to create a dataframe of 'New' values.
New = 0
Approved = 0
df = pd.DataFrame()
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
df['New'] = New
Everything in this code seems to be working except for the last df['New'] = New statement. Any ideas on why that might be happening would be greatly appreciate.
df['New'] = New is a wrong way to insert a single row.
One way to fix it:
all_rows = []
New = 0
Approved = 0
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
# Accumulate all the rows
all_rows.append(New)
# Finally create a dataframe
df = pd.DataFrame({'New': all_rows})
Related
I have a function which creates a dataframe, and a sub-function which does some diff's on that data frame to come up with a result - how do I add this result as a column to my dataframe.
def custom_compare_eq(series, other):
length = len(series.values)
for i in range(length):
r1 = eval(str(series.values[i]))
r2 = eval(str(other.values[i]))
if type(r1) != type(r2):
yield False
else:
if type(r1) == int:
yield r1 == r2
elif type(r1) == list:
yield set(r1) == set(r2)
print('store the result output')
result = list(custom_compare_eq(df.a_series, df.b_series))
print('add new match column to df')
df['match'] = result
print(df)
the last print(df) I expected to have the new 'match' column but it does not :(
ValueError: 2 columns passed, passed data had 4 columns:
import pandas as pd
def customedata():
colnum = input("How many columns do you need? ")
colnum = int(colnum)
rownum = input("How many rows do you need? ")
# user input column and row
rownum = int(rownum)
colName = []
rowName = []
# create an empty list
for col in range(0,colnum):
colValue =input('Enter the value for column name of column %s:' %(col + 1))
colName.append(colValue)
for row in range(0,rownum):
rowValue = (int(input('Enter the value of row number %s:' %(row + 1))))
rowName.append(rowValue)
row = row + 1
col = col + 1
# columns = colName[i]
df1= pd.DataFrame([rowName],columns = colName)
print(df1)
I tried to create a dataframe using user input rows and columns but I keep getting valueError. I tought that it had something wrong with the nested loop but I wasn't able to solve the problem.
I think it would be easier to create a pd.DataFrame from user input using a dictionary like below. First you create an empty dict, then you pass the colName as key and your rowNamelist as value. Then you can use pd.DataFrame.from_dict() to transform your dict to a pd.DataFrame
Hoping it helps :)
def customedata():
colnum = input("How many columns do you need? ")
colnum = int(colnum)
rownum = input("How many rows do you need? ")
# user input column and row
rownum = int(rownum)
colName = []
rowName = []
dictionary = {}
for col in range(0, colnum):
colValue = input('Enter the value for column name of column %s:' % (col + 1))
for row in range(0, rownum):
rowValue = (int(input('Enter the value of row number %s:' % (row + 1))))
rowName.append(rowValue)
dictionary[colValue] = rowName
df1 = pd.DataFrame.from_dict(dictionary)
print(df1)
I have this kind of code, which check for value in column A. If condition is met then the code check for value in the other column of the same row and copy the value from that column to replace value in column A:
counter = 0
list_of_winners = []
for each in data.iterrows():
winner = data.iloc[counter, 5]
if winner == 'Red':
vitazr = data.iloc[counter, 0]
list_of_winners.append(vitazr)
elif winner == 'Blue':
vitazb = data.iloc[counter, 1]
list_of_winners.append(vitazb)
elif winner == 'Draw':
draw = str('Draw')
list_of_winners.append(draw)
else:
pass
counter += 1
The solution works for me and I am able to create a list and then that list put into original Dataframe and replace the values I looped thru.
What I want to ask.... Isn t there some other more elegant and shorter way to attack/address this problem?
You can do an np.select:
list_of_winners = np.select([data.iloc[:,5] == 'Red',
data.iloc[:,5] == 'Blue',
data.iloc[:,5] == 'Draw'],
[data.iloc[:,0], data.iloc[:, 1], 'Draw',
default=None
)
I am trying to create some lag features by subtracting a month from each date in my datetime column and then assigning a column value from the past date to the current one.
This is my code:
for row_index in range(0,len(merger)):
date = merger.loc[merger.index[row_index],'datetime']
prev = subtract_one_month(date)
inde = merger.loc[merger['datetime'] == str(prev),'count'].index.values.astype(int)
if inde == []:
continue
else:
inde = inde[0]
merger.loc[merger.index[row_index], 'count_lag_month'] =
merger.loc[merger.index[inde], 'count']
The inner if else loop is meant to deal with cases where the date I'm looking for doesn't exist.
The code above simply gives me a list of NaNs. I would appreciate any help.
I've changed my
first = []
mean = []
wrkday = []
count = []
for row_index in range(0,len(merger)):
print(row_index)
date = merger.loc[merger.index[row_index],'datetime']
prev = subtract_one_month(date)
inde = merger.loc[merger['datetime'] == str(prev)].index.values.astype(int)
if inde.size == 0:
first.append(0)
mean.append(0)
wrkday.append(0)
continue
else:
inde = inde[0]
first.append(merger.loc[merger.index[inde], 'count'])
mean.append(merger.loc[merger.index[inde], 'monthly_mean_count'])
wrkday.append(merger.loc[merger.index[inde], 'monthly_wrkday_mean_count'])
prev_day = subtract_one_day(date)
inde = merger.loc[merger['datetime'] == str(prev_day)].index.values.astype(int)
if inde.size == 0:
count.append(0)
continue
else:
inde = inde[0]
count.append(merger.loc[merger.index[inde], 'count'])
merger['count_lag_month'] = first
merger['monthly_mean_count_lag_month'] = mean
merger['monthly_wrkday_mean_count_lag_month'] = wrkday
merger['count_lag_day'] = count
It uses lists instead and it seems to run at a decent speed. I'm not sure if it's the best approach though.
So here is my code updating many column values based on a condition of split values of the column 'location'. The code works fine, but as its iterating by row it's not efficient enough. Can anyone help me to make this code work faster please?
for index, row in df.iterrows():
print index
location_split =row['location'].split(':')
after_county=False
after_province=False
for l in location_split:
if l.strip().endswith('ED'):
df[index, 'electoral_district'] = l
elif l.strip().startswith('County'):
df[index, 'county'] = l
after_county = True
elif after_province ==True:
if l.strip()!='Ireland':
df[index, 'dublin_postal_district'] = l
elif after_county==True:
df[index, 'province'] = l.strip()
after_province = True
'map' was what I needed :)
def fill_county(column):
res = ''
location_split = column.split(':')
for l in location_split:
if l.strip().startswith('County'):
res= l.strip()
break
return res
df['county'] = map(fill_county, df['location'])