I would like to make a loop to create a dataframe that gathers the lines of an input dataframe, which have common points.
My problem : When I apply the function, the output dataframe is empty...
yet with a print (output) in the loop, we see that the program works .. I do not understand, i tried to change return position but that doesn't work
Thank you in advance for your help !
def group (dataframe, identifiant, output):
for i in range(len(identifiant)):
ident = identifiant.loc[i,"IDCTV"]
# print(ident)
for j in range(len(dataframe)):
if dataframe.loc[j,"IDCONTREVENANT"] == ident:
di = dataframe.loc[j, "DATE_INFRACTION"]
nt = dataframe.loc[j,"NOTRAIN"]
genre = dataframe.loc[j,"CODEETATCIVIL"]
age = dataframe.loc[j,"AGE"]
# print(di, nt, genre, age)
for k in range(len(dataframe)):
if k != j :
if dataframe.loc[k,"DATE_INFRACTION"] == di and dataframe.loc[k,"NOTRAIN"] == nt:
idgroup = dataframe.loc[k,"IDCONTREVENANT"]
genreidgroup = dataframe.loc[k,"CODEETATCIVIL"]
ageidgroup = dataframe.loc[k,"AGE"]
output = output.append({ "IDREF" : ident ,"CODEETATCIVILREF" : genre,"AGEREF" : age ,"IDCTV" : idgroup,"CODEETATCIVILCTV" : genreidgroup,"AGECTV" : ageidgroup}, ignore_index = True)
print(output)
return output
group(df,IDCTV,df_groups)
print(df_groups)
I think you want to change
group(df,IDCTV,df_groups)
to
df_groups = group(df,IDCTV,df_groups)
Right now you're calling the group funciton and doing all that calculation, but you're not saving the output anywhere. So when you run print(df_groups) it prints out whatever it was before you called the function.
Related
I'm trying to do iterative calculation that will store the result of each iteration by append into a dataframe
however when I try to change the input dataframe into something else, I got the key error : 0
here are my complete code
d = []
df_it = df_ofr
i = 0
last_col = len(df_it.iloc[:,3:].columns) - 1
print("User Group : " + df_it[['user_type'][0]][0] + " " + df_it[['user_status'][0]][0])
for column in df_it.iloc[:,3:]:
if i > 0 :
if i < last_col: # 1 step conversion
convert_baseline = df_it[[column][0]][0]
convert_variant_a = df_it[[column][0]][1]
elif i == last_col: # end to end conversion
convert_baseline = df_it[[column][0]][0]
convert_variant_a = df_it[[column][0]][1]
lead_baseline = step_1_baseline
lead_variant_a = step_1_variant_a
#perform proportion z test
test_stat, p_value = proportions_ztest([convert_baseline,convert_variant_a], [lead_baseline,lead_variant_a], alternative='smaller')
#perform bayesian ab test
#initialize a test
test = BinaryDataTest()
#add variant using aggregated data
test.add_variant_data_agg("Baseline", totals=lead_baseline, positives=convert_baseline)
test.add_variant_data_agg("Variant A", totals=lead_variant_a, positives=convert_variant_a)
bay_result = test.evaluate(seed=99)
#append result
d.append(
{
'Convert into': column,
'# Users Baseline': lead_baseline,
'# Users Variant A': lead_variant_a,
'% CVR Baseline' : convert_baseline / lead_baseline,
'% CVR Variant A' : convert_variant_a / lead_variant_a,
'Z Test Stat' : test_stat,
'P-Value' : p_value,
'Prob Baseline being the Best' : bay_result[0]['prob_being_best'],
'Prob Variant A being the Best' : bay_result[1]['prob_being_best']
}
)
elif i == 0:
step_1_baseline = df_it[[column][0]][0]
step_1_variant_a = df_it[[column][0]][1]
i = i+1
lead_baseline = df_it[[column][0]][0]
lead_variant_a = df_it[[column][0]][1]
pd.DataFrame(d)
the one that I'm trying to change is this part
df_it = df_ofr
thanks for your help, really appreciate it
I'm trying to do iterative calculation that will store the result of each iteration by append into a dataframe
I'm new in python and sqlalchemy.
I already have a delete method working if I construct the where conditions by hand.
Now, I need to read the columns and values from an enter request in yaml format and create the where conditions.
#enter data as yaml
items:
- item:
table: [MyTable,OtherTable]
filters:
field_id: 1234
#other_id: null
Here is what I try and can't go ahead:
for i in use_case_cfg['items']:
item = i.get('item')
for t in item['table']:
if item['filters']:
filters = item['filters']
where_conditions = ''
count = 0
for column, value in filters.items():
aux = str(getattr(t, column) == bindparam(value))
if count == 0:
where_conditions += aux
else:
where_conditions += ', ' + aux
count += 1
to_delete = inv[t].__table__.delete().where(text(where_conditions))
#to_delete = t.__table__.delete().where(getattr(t, column) == value)
else:
to_delete = inv[t].__table__.delete()
CoreData.session.execute(to_delete)
To me, it looks ok, but when I run, I got the error below:
sqlalchemy.exc.StatementError: (sqlalchemy.exc.InvalidRequestError) A value is required for bind parameter '9876'
[SQL: DELETE FROM MyTable WHERE "MyTable".field_id = %(1234)s]
[parameters: [{}]]
(Background on this error at: http://sqlalche.me/e/cd3x)
Can someone explain to me what is wrong or the proper way to do it?
Thanks.
There are two problems with the code.
Firstly,
str(getattr(t, column) == bindparam(value))
is binding the value as a placeholder, so you end up with
WHERE f2 = :Bob
but it should be the name that maps to the value in filters (so the column name in your case), so you end up with
WHERE f2 = :f2
Secondly, multiple WHERE conditions are being joined with a comma, but you should use AND or OR, depending on what you are trying to do.
Given a model Foo:
class Foo(Base):
__tablename__ = 'foo'
id = sa.Column(sa.Integer, primary_key=True)
f1 = sa.Column(sa.Integer)
f2 = sa.Column(sa.String)
Here's a working version of a segment of your code:
filters = {'f1': 2, 'f2': 'Bob'}
t = Foo
where_conditions = ''
count = 0
for column in filters:
aux = str(getattr(t, column) == sa.bindparam(column))
if count == 0:
where_conditions += aux
else:
where_conditions += ' AND ' + aux
count += 1
to_delete = t.__table__.delete().where(sa.text(where_conditions))
print(to_delete)
session.execute(to_delete, filters)
If you aren't obliged to construct the WHERE conditions as strings, you can do it like this:
where_conditions = [(getattr(t, column) == sa.bindparam(column))
for column in filters]
to_delete = t.__table__.delete().where(sa.and_(*where_conditions))
session.execute(to_delete, filters)
Two strings. My items name:
Parfume name EDT 50ml
And competitor's items name:
Parfume another name EDP 60ml
And i have a long list of these names in one column, competitors names in other column, and I want to leave only those rows in dataframe, that have same amount of ml in both my and competitors names no matter what everything else in these strings look like. So how do I find a substring ending with 'ml' in a bigger string? I could simply do
"**ml" in competitors_name
to see if they both contain the same amount of ml.
Thank you
UPDATE
'ml' is not always at the end of string. It might look like this
Parfume yet another great name 60ml EDP
Try this:
import re
def same_measurement(my_item, competitor_item, unit="ml"):
matcher = re.compile(r".*?(\d+){}".format(unit))
my_match = matcher.match(my_item)
competitor_match = matcher.match(competitor_item)
return my_match and competitor_match and my_match.group(1) == competitor_match.group(1)
my_item = "Parfume name EDT 50ml"
competitor_item = "Parfume another name EDP 50ml"
assert same_measurement(my_item, competitor_item)
my_item = "Parfume name EDT 50ml"
competitor_item = "Parfume another name EDP 60ml"
assert not same_measurement(my_item, competitor_item)
You could use the python Regex library to select the 'xxml' values for each of your data rows and then do some logic to check if they match.
import re
data_rows = [["Parfume name EDT", "Parfume another name EDP 50ml"]]
for data_pairs in data_rows:
my_ml = None
comp_ml = None
# Check for my ml matches and set value
my_ml_matches = re.search(r'(\d{1,3}[Mm][Ll])', data_pairs[0])
if my_ml_matches != None:
my_ml = my_ml_matches[0]
else:
print("my_ml has no ml")
# Check for comp ml matches and set value
comp_ml_matches = re.search(r'(\d{1,3}[Mm][Ll])', data_pairs[1])
if comp_ml_matches != None:
comp_ml = comp_ml_matches[0]
else:
print("comp_ml has no ml")
# Print outputs
if (my_ml != None) and (comp_ml != None):
if my_ml == comp_ml:
print("my_ml: {0} == comp_ml: {1}".format(my_ml, comp_ml))
else:
print("my_ml: {0} != comp_ml: {1}".format(my_ml, comp_ml))
Where data_rows = each row in the data set
Where data_pairs = {your_item_name, competitor_item_name}
You could use a lambda function to do that.
import pandas as pd
import re
d = {
'Us':
['Parfume one 50ml', 'Parfume two 100ml'],
'Competitor':
['Parfume uno 50ml', 'Parfume dos 200ml']
}
df = pd.DataFrame(data=d)
df['Eq'] = df.apply(lambda x : 'Yes' if re.search(r'(\d+)ml', x['Us']).group(1) == re.search(r'(\d+)ml', x['Competitor']).group(1) else "No", axis = 1)
Result:
Doesn't matter whether 'ml' is in the end of in the middle of the string.
I am creating to school SAS (internet online marks from school ) and I have one problem. I created function to generate some marks but when I delete the funcion the marks will just disapear.
I have two files,this is the one when we are executing our functions
import sas as s
s.generateGrades()
s.completeAverage()
and in this, there are all of functions
import random
def generateGrades() :
for i in range(30) :
continuousClassification.append([
subjects[random.randint(0,len(subjects)-1)],
"2016"+"-"+str(random.randint(1,12))+"-"+str(random.randint(1,30)),
str(random.randint(1,5)),
])
def addGrade() :
subject = input("Zadejte předmět zkratkou: ")
date = input("Zadejte datum ve formátu RRRR-MM-DD : ")
grade = input("Zadejte známku, pokud žák nepsal zadejte N :")
continuousClassification.append([subject,date,grade])
def searchBy(typeOf,source) :
if typeOf == "predmetu" :
for i in range(len(continuousClassification)) :
if(continuousClassification[i][0] == source) :
print("Známka ",continuousClassification[i][2])
else :
for i in range(len(continuousClassification)):
if (continuousClassification[i][1] == source):
print(i, ".", "známka ", continuousClassification[i][2])
def averageOfSubject(subject) :
all = 0
total = 0
for i in range(len(continuousClassification)) :
if continuousClassification[i][0] == subject :
all+=int(continuousClassification[i][2])
total+=1
if all == 0 :
return "V předmětu "+subject+" nemáte žádnou známku"
return round(all/total,2)
def completeAverage() :
for subject in subjects :
print("Průměr z ",subject," je ",averageOfSubject(subject))
subjects = ["MAT","CJL","DEJ","FYZ","TEV","ANJ","NEJ","PAD","GRW","TVY","ASW","TEA","ZAE"]
continuousClassification = []
I want to generate marks and I want to remember it all time, but it doesnt do it. When I run my script without generating new ones it just dont load them and I have to load new again
I am trying to edit this function so the values of the dictionary will not be printed in parentheses and will be iterable:
def traverse_appended(key):
reg_dict = {}
#keypath = r"SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\"
for item in traverse_reg(key):
keypath_str = str(keypath+item)
reg_dict[item] = str(get_reg("Displayversion", keypath_str)), str(get_reg("DisplayName", keypath_str))
#reg_dict[item] = get_reg("DisplayName", keypath_str)
return reg_dict
the expected output is :
{'DXM_Runtime': 'None', 'None'}
The function output:
{'DXM_Runtime': ('None', 'None')}
#Consider traverse_appended returns following dict.
#I think, converting func_dict values which are tuple into string, will help you to get expected output.
func_dict = {"DXM_Runtime":('None','None'),
"TMP_KEY":('A','B')
}
derived_dict = {}
for k,v in func_dict.viewitems():
tmp_str = ",".join(v)
derived_dict[k] = tmp_str
print derived_dict
#Output
E:\tmp_python>python tmp.py
{'DXM_Runtime': 'None,None', 'TMP_KEY': 'A,B'}
#If this doesn't help you, then please post the code for get_reg and traverse_reg function also.