Custom function - add as column to frame - python

I have a function which creates a dataframe, and a sub-function which does some diff's on that data frame to come up with a result - how do I add this result as a column to my dataframe.
def custom_compare_eq(series, other):
length = len(series.values)
for i in range(length):
r1 = eval(str(series.values[i]))
r2 = eval(str(other.values[i]))
if type(r1) != type(r2):
yield False
else:
if type(r1) == int:
yield r1 == r2
elif type(r1) == list:
yield set(r1) == set(r2)
print('store the result output')
result = list(custom_compare_eq(df.a_series, df.b_series))
print('add new match column to df')
df['match'] = result
print(df)
the last print(df) I expected to have the new 'match' column but it does not :(

Related

DataFrame not filling with value generate from Loop in Python

I am running a for loop in order to create a dataframe of 'New' values.
New = 0
Approved = 0
df = pd.DataFrame()
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
df['New'] = New
Everything in this code seems to be working except for the last df['New'] = New statement. Any ideas on why that might be happening would be greatly appreciate.
df['New'] = New is a wrong way to insert a single row.
One way to fix it:
all_rows = []
New = 0
Approved = 0
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
# Accumulate all the rows
all_rows.append(New)
# Finally create a dataframe
df = pd.DataFrame({'New': all_rows})

Why same code return list and tuple pandas?

if calculation_type == 'week':
df = df.groupby([pd.Grouper(key=order_date_field, freq='W-SUN')])[
display_fields].sum().reset_index().sort_values(order_date_field)
df[order_date_field] = df[order_date_field].apply(lambda x: x.date().isoformat())
else:
df = df.groupby([pd.Grouper(key=order_date_field, freq='M')])[
display_fields].sum().reset_index().sort_values(order_date_field)
df[order_date_field] = df[order_date_field].apply(lambda x: calendar.month_name[x.date().month])
return df.to_dict('records')
Question is next why if == week , return list and else return tuple?
df where 4 elems = list
df where 2 elems = tutple
It was mine fault when i call the function i put accidentally coma after the function
for e.i year = get_calculation(args),
and weeks week = get_calculation(args)

I want to get string from list

there is a matrix like below
[[a1, b, c],
[d2, e, a1],
[c1, dfd, u],
....
]
if matched with a specific word, then returns index from matrix.
And find row fixed column.
then I convert Matrix to list.
And I get the result like this ["RESULT DATA"].
I just want to get string RESULT DATA only without [''].
r, c = np.where(Matrix == SpecificWord)
find_row = np.where(c == 0)
result_data = Matrix[find_row,1].tolist()
print(str(result_data ))
print(result_data[0])
Solved
row, col = np.where(Terms == ElemData)
cList = col .tolist()
rLits = row.tolist()
if 0 not in cList:
return None
else:
num_col = cList.index(0)
num_row = rLits[num_col]
data = Terms[num_row, 1].tolist()
return data

How to change value in one cell to value in another (pandas)

I am trying to create some lag features by subtracting a month from each date in my datetime column and then assigning a column value from the past date to the current one.
This is my code:
for row_index in range(0,len(merger)):
date = merger.loc[merger.index[row_index],'datetime']
prev = subtract_one_month(date)
inde = merger.loc[merger['datetime'] == str(prev),'count'].index.values.astype(int)
if inde == []:
continue
else:
inde = inde[0]
merger.loc[merger.index[row_index], 'count_lag_month'] =
merger.loc[merger.index[inde], 'count']
The inner if else loop is meant to deal with cases where the date I'm looking for doesn't exist.
The code above simply gives me a list of NaNs. I would appreciate any help.
I've changed my
first = []
mean = []
wrkday = []
count = []
for row_index in range(0,len(merger)):
print(row_index)
date = merger.loc[merger.index[row_index],'datetime']
prev = subtract_one_month(date)
inde = merger.loc[merger['datetime'] == str(prev)].index.values.astype(int)
if inde.size == 0:
first.append(0)
mean.append(0)
wrkday.append(0)
continue
else:
inde = inde[0]
first.append(merger.loc[merger.index[inde], 'count'])
mean.append(merger.loc[merger.index[inde], 'monthly_mean_count'])
wrkday.append(merger.loc[merger.index[inde], 'monthly_wrkday_mean_count'])
prev_day = subtract_one_day(date)
inde = merger.loc[merger['datetime'] == str(prev_day)].index.values.astype(int)
if inde.size == 0:
count.append(0)
continue
else:
inde = inde[0]
count.append(merger.loc[merger.index[inde], 'count'])
merger['count_lag_month'] = first
merger['monthly_mean_count_lag_month'] = mean
merger['monthly_wrkday_mean_count_lag_month'] = wrkday
merger['count_lag_day'] = count
It uses lists instead and it seems to run at a decent speed. I'm not sure if it's the best approach though.

Efficiently update columns based on one of the columns split value

So here is my code updating many column values based on a condition of split values of the column 'location'. The code works fine, but as its iterating by row it's not efficient enough. Can anyone help me to make this code work faster please?
for index, row in df.iterrows():
print index
location_split =row['location'].split(':')
after_county=False
after_province=False
for l in location_split:
if l.strip().endswith('ED'):
df[index, 'electoral_district'] = l
elif l.strip().startswith('County'):
df[index, 'county'] = l
after_county = True
elif after_province ==True:
if l.strip()!='Ireland':
df[index, 'dublin_postal_district'] = l
elif after_county==True:
df[index, 'province'] = l.strip()
after_province = True
'map' was what I needed :)
def fill_county(column):
res = ''
location_split = column.split(':')
for l in location_split:
if l.strip().startswith('County'):
res= l.strip()
break
return res
df['county'] = map(fill_county, df['location'])

Categories