python counts contributions of categories from csv - python

I'm trying to deduce the volume contributions of each category (like summarize by row value in excel's pivot table) from a csv file (which has two cols ['customer','category']) in to a dict in the following format,
foodict = {
'customer' : 'cust1' ,
'categories' : { 'cat1' : 50, 'cat2' : 55 } ,
'contribution' : {
'cat1' : cat1/(cat1+cat2) ,
'cat2' : cat2/(cat1+cat2)
}
}
so far, I've got like this, but unable to achieve the needful one. I'm trying to do it in pure python without using any other data libraries.
c = {}
for i in customer:
for j in category:
for k in db:
count = 0
if k['customer'] == i and k['category'] == j:
count += 1
if not i in c.keys():
c[i] = {'category' : j , 'counts' : count}
else:
pass

#len:
This satisfied my requirement, but is there any efficient way to group by get the count contributions like we do in the excel as 'Summarize by row values in the pivot table
foo_dict = {}
for name in cust:
for j in category:
foo_dict[name] = {
'total' : int(sum(1 for i in db if i['cust'] == name)),
'lob' : {pro : int(sum(1 for i in db if i['cust'] == name and i['category'] == pro)) for pro in category},
'conts' : {pro : round(
float(
sum(1 for i in db if i['cust'] == name and i['category'] == pro)/int(sum(1 for i in db if i['cust'] == name))),2) for pro in category},
'samples' : {pro : round(
float(
sum(1 for i in db if i['cust'] == name and i['category'] == pro)/int(sum(1 for i in db if i['cust'] == name))),0) for pro in category}
}

Related

Why does my python loop return Key Error : 0 when I change the input dataframe?

I'm trying to do iterative calculation that will store the result of each iteration by append into a dataframe
however when I try to change the input dataframe into something else, I got the key error : 0
here are my complete code
d = []
df_it = df_ofr
i = 0
last_col = len(df_it.iloc[:,3:].columns) - 1
print("User Group : " + df_it[['user_type'][0]][0] + " " + df_it[['user_status'][0]][0])
for column in df_it.iloc[:,3:]:
if i > 0 :
if i < last_col: # 1 step conversion
convert_baseline = df_it[[column][0]][0]
convert_variant_a = df_it[[column][0]][1]
elif i == last_col: # end to end conversion
convert_baseline = df_it[[column][0]][0]
convert_variant_a = df_it[[column][0]][1]
lead_baseline = step_1_baseline
lead_variant_a = step_1_variant_a
#perform proportion z test
test_stat, p_value = proportions_ztest([convert_baseline,convert_variant_a], [lead_baseline,lead_variant_a], alternative='smaller')
#perform bayesian ab test
#initialize a test
test = BinaryDataTest()
#add variant using aggregated data
test.add_variant_data_agg("Baseline", totals=lead_baseline, positives=convert_baseline)
test.add_variant_data_agg("Variant A", totals=lead_variant_a, positives=convert_variant_a)
bay_result = test.evaluate(seed=99)
#append result
d.append(
{
'Convert into': column,
'# Users Baseline': lead_baseline,
'# Users Variant A': lead_variant_a,
'% CVR Baseline' : convert_baseline / lead_baseline,
'% CVR Variant A' : convert_variant_a / lead_variant_a,
'Z Test Stat' : test_stat,
'P-Value' : p_value,
'Prob Baseline being the Best' : bay_result[0]['prob_being_best'],
'Prob Variant A being the Best' : bay_result[1]['prob_being_best']
}
)
elif i == 0:
step_1_baseline = df_it[[column][0]][0]
step_1_variant_a = df_it[[column][0]][1]
i = i+1
lead_baseline = df_it[[column][0]][0]
lead_variant_a = df_it[[column][0]][1]
pd.DataFrame(d)
the one that I'm trying to change is this part
df_it = df_ofr
thanks for your help, really appreciate it
I'm trying to do iterative calculation that will store the result of each iteration by append into a dataframe

Best simple and effective solution for data organization

I'm new with Python programming so I'm doing a bunch of practice exercise in order to improve my skills.
Therefore, I would like to show you guys my approach on this example and if you could let me know what you think I would be grateful!
Exercise:
Given a list of costumers IDs, I have to segment them by the following logic:
If ID is multiple of 7 and multiple of 3 then segment 'A'
If ID is multiple of 3 then segment 'B'
If ID is multiple of 7, then segment 'C'
Else, segment 'D'
What I've done:
from collections import Counter
from datetime import datetime
import os.path
import json
date = datetime.today().strftime('%Y-%m-%d')
customer_indices = [list of IDs] ex: [123981,12398,123157,12371...]
def segment(customer):
if customer % 7 == 0 & customer % 3 == 0:
return 'A'
elif customer % 7 == 0:
return 'B'
elif customer % 3 == 0:
return 'C'
else:
return 'D'
def split_customers(customers):
a = []
b = []
c = []
d = []
for customer in customers:
if customer % 7 == 0 & customer % 3 == 0:
a.append(customer)
elif customer % 7 == 0:
b.append(customer)
elif customer % 3 == 0:
c.append(customer)
else:
d.append(customer)
return a,b,c,d
segmentation = [segment(customer) for customer in customer_indices]
print('Segmentation list: ')
print(segmentation)
print('\n')
segmentation_counter = Counter(segmentation)
print('Count of clients per segment: ')
print(f"A: {segmentation_counter['A']}")
print(f"B: {segmentation_counter['B']}")
print(f"C: {segmentation_counter['C']}")
print(f"D: {segmentation_counter['D']}")
a, b, c, d = split_customers(customer_indices)
main_dict = {'Date': date,
'Segmentation': {
'A Clients': {
'Count': segmentation_counter['A'],
'Customers': a},
'B Clients': {
'Count': segmentation_counter['B'],
'Customers': b},
'C Clients': {
'Count': segmentation_counter['C'],
'Customers': c},
'D Clients': {
'Count': segmentation_counter['D'],
'Customers': d}}}
main_list = [main_dict]
if not os.path.exists('Data/customer_segmentation.json'):
os.makedirs('Data')
if os.path.isfile('Data/customer_segmentation.json'):
with open('Data/customer_segmentation.json') as file:
data = json.load(file)
file.close()
data.append(main_dict)
with open('Data/customer_segmentation.json', 'w') as file:
json.dump(data, file, indent=2)
file.close()
else:
file = open('Data/customer_segmentation.json', 'w')
json.dump(main_list, file, indent=2)
file.close()
The original code has a with open txt function at first that extracts the client's ID list from a txt within the same directory of this .py
The main idea of this solution it would be to execute this every day so the json file will update with a new list for each day that it's run, so if I want to do an analysis of the segmentation growth, it would be pretty easy to do so.
What do you think?

Django/Python Multiple records

I have a program that compares values from the database and from a CSV file. My program works like this.
Database has values.
User uploads a file (multiple users multiple
files).
The program compares the values from the database and the
CSV files and gives an output.
Which tells me that this particular value was found in this user's file.
But I want the program to show me that if the value was found in the other user's file or not.
Here is a working example.
DB Values = [1,23,33,445,6656,88]
Example values of the CSV files:
File 1 values = [1,23,445,77,66,556,54]
File 2 values = [1,23,45,77,366]
File 3 values = [1,23,5,77,5356,524]
Output needed:
{'1':[(user1, some value),(user2, some value)...]}
Here my code:
def LCR(request):
template = "LCR\LCRGen.html"
dest = Destination.objects.values_list('dest_num', flat=True)
ratelist = { }
csv_file = { }
data_set = { }
io_string = { }
vendor = RateFile.objects.values_list()
v_count = vendor.count()
for v_id, v_name, v_file in vendor:
vendor_name = str(v_name)
vendornames = str(v_name)
vendornames = { }
for desNum in dest:
desNum = str(desNum)
for countvar in range(v_count):
csv_file[vendor_name] = RateFile.objects.get(id=v_id).ven_file
data_set[vendor_name] = csv_file[vendor_name].read().decode("UTF-8")
io_string[vendor_name] = io.StringIO(data_set[vendor_name])
next(io_string[vendor_name])
for column in csv.reader(io_string[vendor_name], delimiter=str(u",")):
vendornames[column[0]] = column[1]
for venNum, venValue in vendornames.items():
venlen = len(venNum)
deslen = len(desNum)
if venlen >= deslen or venlen <= deslen:
if desNum[:-1] == venNum[:-1] and desNum[:-2] == venNum[:-2] and desNum[:-3] == venNum[:-3]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
elif desNum[:-1] == venNum[:-2] and desNum[:-2] == venNum[:-3] and desNum[:-3] == venNum[:-4]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
elif desNum[:-1] == desNum[:-3] and desNum[:-2] == venNum[:-4] and desNum[:-3] == venNum[:-5]:
ratelist[desNum] = [(vendor_name, venValue),]
elif desNum[:-1] == venNum[:-5] and desNum[:-2] == venNum[:-6]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
else:
pass
print ( ratelist )
return render ( request, template, { "ratelist" : ratelist } )
Output
Zong, Tata are usernames and the float values is their respective value for the key value of the dictionary.
{'12': [('Zong', ' 0.026')], '213': [('Tata', ' 4.150')], '49': [('Tata', ' 0.531')], '30': [('Zong', ' 0.87')], '454': [('Tata', ' 0.531')], '374': [('Zong', ' 0.87')],
This is what you asked for:
### your data example
db = [1,23,33,445,66,556,88]
us1 = [1,23,445,77,66,556,54]
us2 = [1,23,45,77,366]
### create a list of usernames (to use the string name in dictionary)
userlist = [ "us1", "us2" ]
### intialize the dict for results
values_dict = {}
### open the loop on DB values
for value in db :
# open loop on userlist
for user in userlist :
# if value is found in user list of values
if value in eval(user) :
# if values still NOT a key of results dictionary create the key with the tuple list as values
if value not in values_dict :
values_dict.update({ value : [ ( user, value ) ] })
# else just append the tuple (username, value) to the results dictionary for the DB value corresponding key
else :
values_dict[value].append([ ( user, value ) ])
values_dict
### OUTPUT:
{1: [('us1', 1), [('us2', 1)]], 23: [('us1', 23), [('us2', 23)]], 445: [('us1', 445)], 66: [('us1', 66)], 556: [('us1', 556)]}
but it makes no sense cause it simply check if a value is in the user list of values and add a tuple just to confirm it, it doesn't require all this code, could be simplified a lot. But I'm thinking that I misunderstood your question (please review the english), probably you need to use the DB value as the key to retrieve another value from the user...please review and update

What is the most efficient way to a multiple variable in dictionary in python?

this my code, i'm looking, is other way to code this in most efficient way?
i have multiple variables and inserted to the dictionary.
please feel to suggest and other options like array and etc will do.
def momentEndSpan(span_type,max_combo,length):
if "simply supported" == span_type:
q = max_combo
force = {}
RA = {"PA" : q*length/2}
RB = {"PB" : q*length/2}
RA_moment = {"MA" : 0}
R_mid_moment = {"Mmid": (q*math.pow(length,2))/8 }
RB_moment = { "MB" : 0}
force.update(RA)
force.update(RB)
force.update(RA_moment)
force.update(R_mid_moment)
force.update(RB_moment)
return force
elif "one end continuous" == span_type:
q = max_combo
x = (3/8)*length
force = {}
RA = {"Phinge" : 3*q*length/8}
RB = {"Pfixed" : 5*q*length/8}
RA_moment = {"Mhinge" : 0}
R_mid_moment = {"Mmid": (q*math.pow(length,2))*(9/128) }
RB_moment = { "MB" : -1*(q*math.pow(length,2))/8 }
force.update(RA)
force.update(RB)
force.update(RA_moment)
force.update(R_mid_moment)
force.update(RB_moment)
return force
Thank you very much
The "More Pythonic" way is to create one dictionary and update once.
q = max_combo
force = {}
if "simply supported" == span_type:
new = {"PA" : q*length/2,
"PB" : q*length/2,
"MA" : 0, "Mmid": (q*math.pow(length,2))/8,
"MB" : 0}
elif "one end continuous" == span_type:
x = (3/8)*length
new = {"Phinge" : 3*q*length/8,
"Pfixed" : 5*q*length/8,
"Mhinge" : 0,
"Mmid": (q*math.pow(length,2))*(9/128),
"MB" : -1*(q*math.pow(length,2))/8 }
force.update(new)
Also, note that if the force dictionary doesn't contain any previously defined items you can simply return the new and/or just continue to update the new in your next operations if there are any. Or just use name force instead of new.
q = max_combo
if "simply supported" == span_type:
force = {...}
elif "one end continuous" == span_type:
x = (3/8)*length
force = {...}

List doesnt hold values inside and clear

I am creating to school SAS (internet online marks from school ) and I have one problem. I created function to generate some marks but when I delete the funcion the marks will just disapear.
I have two files,this is the one when we are executing our functions
import sas as s
s.generateGrades()
s.completeAverage()
and in this, there are all of functions
import random
def generateGrades() :
for i in range(30) :
continuousClassification.append([
subjects[random.randint(0,len(subjects)-1)],
"2016"+"-"+str(random.randint(1,12))+"-"+str(random.randint(1,30)),
str(random.randint(1,5)),
])
def addGrade() :
subject = input("Zadejte předmět zkratkou: ")
date = input("Zadejte datum ve formátu RRRR-MM-DD : ")
grade = input("Zadejte známku, pokud žák nepsal zadejte N :")
continuousClassification.append([subject,date,grade])
def searchBy(typeOf,source) :
if typeOf == "predmetu" :
for i in range(len(continuousClassification)) :
if(continuousClassification[i][0] == source) :
print("Známka ",continuousClassification[i][2])
else :
for i in range(len(continuousClassification)):
if (continuousClassification[i][1] == source):
print(i, ".", "známka ", continuousClassification[i][2])
def averageOfSubject(subject) :
all = 0
total = 0
for i in range(len(continuousClassification)) :
if continuousClassification[i][0] == subject :
all+=int(continuousClassification[i][2])
total+=1
if all == 0 :
return "V předmětu "+subject+" nemáte žádnou známku"
return round(all/total,2)
def completeAverage() :
for subject in subjects :
print("Průměr z ",subject," je ",averageOfSubject(subject))
subjects = ["MAT","CJL","DEJ","FYZ","TEV","ANJ","NEJ","PAD","GRW","TVY","ASW","TEA","ZAE"]
continuousClassification = []
I want to generate marks and I want to remember it all time, but it doesnt do it. When I run my script without generating new ones it just dont load them and I have to load new again

Categories