Python Classes and Object assignment - python

I need to write a program that does the following:
First, find the County that has the highest turnout, i.e. the highest percentage of the
population who voted, using the objects’ population and voters attributes
Then, return a tuple containing the name of the County with the highest turnout and the
percentage of the population who voted, in that order; the percentage should be
represented as a number between 0 and 1.
I took a crack at it, but am getting the following error:
Error on line 19:
allegheny = County("allegheny", 1000490, 645469)
TypeError: object() takes no parameters
Here is what I've done so far. Thank you so much for your help.
class County:
def __innit__(self, innit_name, innit_population, innit_voters) :
self.name = innit_name
self.population = innit_population
self.voters = innit_voters
def highest_turnout(data) :
highest_turnout = data[0]
for County in data:
if (county.voters / county.population) > (highest_turnout.voters / highest_turnout.population):
highest_turnout = county
return highest_turnout
# your program will be evaluated using these objects
# it is okay to change/remove these lines but your program
# will be evaluated using these as inputs
allegheny = County("allegheny", 1000490, 645469)
philadelphia = County("philadelphia", 1134081, 539069)
montgomery = County("montgomery", 568952, 399591)
lancaster = County("lancaster", 345367, 230278)
delaware = County("delaware", 414031, 284538)
chester = County("chester", 319919, 230823)
bucks = County("bucks", 444149, 319816)
data = [allegheny, philadelphia, montgomery, lancaster, delaware, chester, bucks]
result = highest_turnout(data) # do not change this line!
print(result) # prints the output of the function
# do not remove this line!

def __innit__(self, innit_name, innit_population, innit_voters) :
You mispelled __init__

Related

BeautifulSoup Data Scraping : Unable to fetch correct information from the page

I am trying to scrape data from:-
https://www.canadapharmacy.com/
below are a few pages that I need to scrape:-
https://www.canadapharmacy.com/products/abilify-tablet
https://www.canadapharmacy.com/products/accolate
https://www.canadapharmacy.com/products/abilify-mt
I need all the information from the page. I wrote the below code:-
base_url = 'https://www.canadapharmacy.com'
data = []
for i in tqdm(range(len(medicine_url))):
r = requests.get(base_url+medicine_url[i])
soup = BeautifulSoup(r.text,'lxml')
# Scraping medicine Name
try:
main_name = (soup.find('h1',{"class":"mn"}).text.lstrip()).rstrip()
except:
main_name = None
try:
sec_name = (soup.find('div',{"class":"product-name"}).find('h3').text.lstrip()).rstrip()
except:
sec_name = None
try:
generic_name = (soup.find('div',{"class":"card product generic strength equal"}).find('div').find('h3').text.lstrip()).rstrip()
except:
generic_name = None
# Description
try:
des1 = soup.find('div',{"class":"answer expanded"}).find_all('p')[1].text
except:
des1 = ''
try:
des2 = soup.find('div',{"class":"answer expanded"}).find('ul').text
except:
des2 = ''
try:
des3 = soup.find('div',{"class":"answer expanded"}).find_all('p')[2].text
except:
des3 = ''
desc = (des1+des2+des3).replace('\n',' ')
#Directions
try:
dir1 = soup.find('div',{"class":"answer expanded"}).find_all('h4')[1].text
except:
dir1 = ''
try:
dir2 = soup.find('div',{"class":"answer expanded"}).find_all('p')[5].text
except:
dir2 = ''
try:
dir3 = soup.find('div',{"class":"answer expanded"}).find_all('p')[6].text
except:
dir3 = ''
try:
dir4 = soup.find('div',{"class":"answer expanded"}).find_all('p')[7].text
except:
dir4 = ''
directions = dir1+dir2+dir3+dir4
#Ingredients
try:
ing = soup.find('div',{"class":"answer expanded"}).find_all('p')[9].text
except:
ing = None
#Cautions
try:
c1 = soup.find('div',{"class":"answer expanded"}).find_all('h4')[3].text
except:
c1 = None
try:
c2 = soup.find('div',{"class":"answer expanded"}).find_all('p')[11].text
except:
c2 = ''
try:
c3 = soup.find('div',{"class":"answer expanded"}).find_all('p')[12].text #//div[#class='answer expanded']//p[2]
except:
c3 = ''
try:
c4 = soup.find('div',{"class":"answer expanded"}).find_all('p')[13].text
except:
c4 = ''
try:
c5 = soup.find('div',{"class":"answer expanded"}).find_all('p')[14].text
except:
c5 = ''
try:
c6 = soup.find('div',{"class":"answer expanded"}).find_all('p')[15].text
except:
c6 = ''
caution = (c1+c2+c3+c4+c5+c6).replace('\xa0','')
#Side Effects
try:
se1 = soup.find('div',{"class":"answer expanded"}).find_all('h4')[4].text
except:
se1 = ''
try:
se2 = soup.find('div',{"class":"answer expanded"}).find_all('p')[18].text
except:
se2 = ''
try:
se3 = soup.find('div',{"class":"answer expanded"}).find_all('ul')[1].text
except:
se3 = ''
try:
se4 = soup.find('div',{"class":"answer expanded"}).find_all('p')[19].text
except:
se4 = ''
try:
se5 = soup.find('div',{"class":"post-author-bio"}).text
except:
se5 = ''
se = (se1 + se2 + se3 + se4 + se5).replace('\n',' ')
for j in soup.find('div',{"class":"answer expanded"}).find_all('h4'):
if 'Product Code' in j.text:
prod_code = j.text
#prod_code = soup.find('div',{"class":"answer expanded"}).find_all('h4')[5].text #//div[#class='answer expanded']//h4
pharma = {"primary_name":main_name,
"secondary_name":sec_name,
"Generic_Name":generic_name,
"Description":desc,
"Directions":directions,
"Ingredients":ing,
"Caution":caution,
"Side_Effects":se,
"Product_Code":prod_code}
data.append(pharma)
But, each page is having different positions for the tags hence not giving correct data. So, I tried:-
soup.find('div',{"class":"answer expanded"}).find_all('h4')
which gives me the output:-
[<h4>Description </h4>,
<h4>Directions</h4>,
<h4>Ingredients</h4>,
<h4>Cautions</h4>,
<h4>Side Effects</h4>,
<h4>Product Code : 5513 </h4>]
I want to create a data frame where the description contains all the information given in the description, directions contain all the information of directions given on the web page.
for i in soup.find('div',{"class":"answer expanded"}).find_all('h4'):
if 'Description' in i.text:
print(soup.find('div',{"class":"answer expanded"}).findAllNext('p'))
but it prints all the after the soup.find('div',{"class":"answer expanded"}).find_all('h4'). but I want only the tags are giving me the description of the medicine and no others.
Can anyone suggest how to do this? Also, how to scrape the rate table from the page as it gives me values in unappropriate fashion?
You can try the next working example:
import requests
from bs4 import BeautifulSoup
import pandas as pd
data = []
r = requests.get('https://www.canadapharmacy.com/products/abilify-tablet')
soup = BeautifulSoup(r.text,"lxml")
try:
card = ''.join([x.get_text(' ',strip=True) for x in soup.select('div.answer.expanded')])
des = card.split('Directions')[0].replace('Description','')
#print(des)
drc = card.split('Directions')[1].split('Ingredients')[0]
#print(drc)
ingre= card.split('Directions')[1].split('Ingredients')[1].split('Cautions')[0]
#print(ingre)
cau=card.split('Directions')[1].split('Ingredients')[1].split('Cautions')[1].split('Side Effects')[0]
#print(cau)
se= card.split('Directions')[1].split('Ingredients')[1].split('Cautions')[1].split('Side Effects')[1]
#print(se)
except:
pass
data.append({
'Description':des,
'Directions':drc,
'Ingredients':ingre,
'Cautions':cau,
'Side Effects':se
})
print(data)
# df = pd.DataFrame(data)
# print(df)
Output:
[{'Description': " Abilify Tablet (Aripiprazole) Abilify (Aripiprazole) is a medication prescribed to treat or manage different conditions, including: Agitation associated with schizophrenia or bipolar mania (injection formulation only) Irritability associated with autistic disorder Major depressive disorder , adjunctive treatment Mania and mixed episodes associated with Bipolar I disorder Tourette's disorder Schizophrenia Abilify works by activating different neurotransmitter receptors located in brain cells. Abilify activates D2 (dopamine) and 5-HT1A (serotonin) receptors and blocks 5-HT2A (serotonin) receptors. This combination of receptor activity is responsible for the treatment effects of Abilify. Conditions like schizophrenia, major depressive disorder, and bipolar disorder are caused by neurotransmitter imbalances in the brain. Abilify helps to correct these imbalances and return the normal functioning of neurons. ", 'Directions': ' Once you are prescribed and buy Abilify, then take Abilify exactly as prescribed by your
doctor. The dose will vary based on the condition that you are treating. The starting dose of Abilify ranges from 2-15 mg once daily, and the recommended dose for most conditions is between 5-15 mg once daily. The maximum dose is 30 mg once daily. Take Abilify with or without food. ', 'Ingredients': ' The active ingredient in Abilify medication is aripiprazole . ', 'Cautions': ' Abilify and other antipsychotic medications have been associated with an increased risk of death in elderly patients with dementia-related psychosis. When combined with other dopaminergic agents, Abilify can increase the risk of neuroleptic malignant syndrome. Abilify can cause metabolic changes and in some cases can induce high blood sugar in people with and without diabetes . Abilify can also weight gain and increased risk of dyslipidemia. Blood glucose should be monitored while taking Abilify. Monitor for low blood pressure and heart rate while taking Abilify; it can cause orthostatic hypertension which may lead to dizziness or fainting. Use with caution in patients with a history of seizures. ', 'Side Effects': ' The side effects of Abilify vary greatly depending
on what condition is being treated, what other medications are being used concurrently, and what dose is being taken. Speak with your doctor or pharmacist for a full list of side effects that apply to you. Some of the most common side effects include: Akathisia Blurred vision Constipation Dizziness Drooling Extrapyramidal disorder Fatigue Headache Insomnia Nausea Restlessness Sedation Somnolence Tremor Vomiting Buy Abilify online from Canada Pharmacy . Abilify can be purchased online with a valid prescription from a doctor. About Dr. Conor Sheehy (Page Author) Dr. Sheehy (BSc Molecular Biology, PharmD) works a clinical pharmacist specializing in cardiology, oncology, and ambulatory care. He’s a board-certified pharmacotherapy specialist (BCPS), and his experience working one-on-one with patients to fine tune their medication and therapy plans for optimal results makes him a valuable subject matter expert for our pharmacy. Read More.... IMPORTANT NOTE: The above information is intended to increase awareness of health information
and does not suggest treatment or diagnosis. This information is not a substitute for individual medical attention and should not be construed to indicate that use of the drug is safe, appropriate, or effective for you. See your health care professional for medical advice and treatment. Product Code : 5513'}]

Trying to find averages from a .txt but I keep getting ValueError: could not convert string to float: ''

I'm using the txt file: https://drive.google.com/file/d/1-VrWf7aqiqvnshVQ964zYsqaqRkcUoL1/view?usp=sharin
I'm running the script:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
print(line)
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
if city == 'San Francisco':
sf_sum = sf_sum + amount
elif city == 'New York':
ny_sum = ny_sum + amount
ny_count = ny_count + 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is {}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()
And I ALWAYS get the error:
ValueError: could not convert string to float: ''
I'm pretty new-ish to Python and I'm really not sure what I'm doing wrong here. I'm trying to get averages for New York and San Francisco, then export the results AND the comparison to a txt results file
This should give you what you're looking for:
from collections import defaultdict as DD
with open('New Purchases.txt') as pfile:
sums = DD(lambda: [0.0, 0])
for line in [line.split('\t') for line in pfile]:
try:
k = line[2]
sums[k][0] += float(line[4])
sums[k][1] += 1
except Exception:
pass
for k in ['San Francisco', 'New York']:
v = sums.get(k, [0.0, 1])
print(f'Average for {k} = ${v[0]/v[1]:.2f}')
I have re-arranged the code. I agree with BrutusFocus that the splits are making it difficult to read exactly the location on each row. I have set it so if it sees the location at any point in the row, it counts it.
with open("data.txt", "r") as f:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
print(city, amount)
if "New York" in line:
ny_sum = ny_sum + amount
ny_count = ny_count + 1
elif "San Francisco" in line:
sf_sum = sf_sum + amount
sf_count = sf_count + 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is
{}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San
Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()

Trying to add object onto text file

I am trying to upload a bunch of objects onto a text file in an organized manner but I keep on getting an error. I am not sure about objects and how to arrange them so they appear in the text document.
class Customer:
def __init__(self, name, date, address, hkid, acc):
self.name = name
self.date = date
self.address = address
self.hkid = hkid
self.acc = acc
customer1 = Customer ("Sarah Parker","1/1/2000","Hong Kong, Tai Koo,Tai Koo Shing Block 22,Floor 10, Flat 1", "X1343434","2222")
customer2 = Customer ("Joe Momo","7/11/2002","Hong Kong, Tai Koo, Tai Koo Shing, Block 22, Floor 10, Flat 5", "C2327934","1234")
customer3 = Customer ("Brent Gamez","7/20/2002","Hong Kong, Tung Chung, Yun Tung, Block 33, Floor 10, Flat 9", "C1357434","2234")
customer4 = Customer ("Jose Gamez","7/20/2002","Hong Kong, Tung Chung, Yun Tung, Block 33, Floor 10, Flat 9", "C1357434","2234")
customer5 =Customer ("Viraj Ghuman","7/20/2002","Hong Kong, Heng Fa Chuen, 100 Shing Tai Road, Block 22, Floor 20, Flat 1", "C6969689","100000")
allCustom = [customer1, customer2, customer3, customer4, customer5]
def UpdateFile ():
global allCustom
OutFile = open("CustomInfo.txt","w")
for i in range (len(allCustom)):
for c in range (i):
OutFile.write(allCustom[i["\n","Name:",c.name,"\n"]])
OutFile.write(allCustom[i["Birth Date:",c.date,"\n"]])
OutFile.write(allCustom[i["Address:",c.address,"\n"]])
OutFile.write(allCustom[i["HKID:",c.hkid,"\n"]])
OutFile.write(allCustom[i["Account value:", c.acc,"\n"]])
OutFile.close()
i and c are integer list indexes. You can't use c.name because it's not a Customer object. And you can't index i[...] because it's not a container.
You don't need nested loops, just one loop over all the customers. Your loop iterates i from 0 to 4. On the first iteration it iterates 0 times, on the second iteration it processes c == 0, on the third iteration it processes c == 0 and c == 1, and so on.
Then you can use a formatting operator to put the attributes into the strings that you're writing to the file (I've used f-strings below, but you can also use the % operator or the .format() method).
def updateFile():
global allCustom;
with open("CustomInfo.txt", "w") as OutFile:
for c in allCustom:
OutFile.write(f"\nName:{c.name}\n")
OutFile.write(f"Birth Date:{c.date}\n")
OutFile.write(f"Address:{c.address}\n")
OutFile.write(f"HKID:{c.hkid}\n")
OutFile.write(f"Account value:{c.acc}\n")
You don't need two loops to get each object info. Maybe this is what you are looking for.
def UpdateFile():
global allCustom
Outfile = open("CustomInfo.txt", "w")
for i in allCustom:
Outfile.write(f'\nName: {i.name}\n')
...
Outfile.close()

Python voter turnout assignment - class and object

I need to write a code to turn out the name of the County which: (i) has the highest voter turnout and (ii) percentage of population voted. Can you help me because I'm so confused. Here is what I have done:
class County:
def __init__(self, init_name, init_population, init_voters) :
self.population = init_population
self.voters = init_voters
allegheny = County("allegheny", 1000490, 645469)
philadelphia = County("philadelphia", 1134081, 539069)
montgomery = County("montgomery", 568952, 399591)
lancaster = County("lancaster", 345367, 230278)
delaware = County("delaware", 414031, 284538)
chester = County("chester", 319919, 230823)
bucks = County("bucks", 444149, 319816)
data = [allegheny, philadelphia, montgomery, lancaster, delaware, chester, bucks]
def highest_turnout(self):
highest = self[0]
highest_voters = self[0].voters
for county in data:
if county.voters > highest_voters:
highest = county
result = highest_turnout(self)
print(result)
Summarizing suggestions from comments, the highest_turnout function needs to return the highest otherwise after the function finishes the highest value is lost.
Then instead of passing in self to highest_turnout pass in data:
class County:
def __init__(self, init_name, init_population, init_voters):
self.name = init_name
self.population = init_population
self.voters = init_voters
allegheny = County("allegheny", 1000490, 645469)
philadelphia = County("philadelphia", 1134081, 539069)
montgomery = County("montgomery", 568952, 399591)
lancaster = County("lancaster", 345367, 230278)
delaware = County("delaware", 414031, 284538)
chester = County("chester", 319919, 230823)
bucks = County("bucks", 444149, 319816)
data = [allegheny, philadelphia, montgomery, lancaster, delaware, chester, bucks]
def highest_voter_turnout(data):
'''iterate over county objects comparing county.voters values;
returns county object with max voters attribute'''
highest_voters = data[0]
for county in data:
if county.voters > highest_voters.voters:
highest_voters = county
return highest_voters
result_highest_voter_turnout = highest_voter_turnout(data)
print(result_highest_voter_turnout.name)
So far, this will return and display the name of the "County which: (i) has the highest voter turnout" (i.e. allegheny).
A similar function can now be created to compute the county with the highest "(ii) percentage of population voted" (one method also mentioned in comments).

Error when creating dictionaries from text files

I've been working on a function which will update two dictionaries (similar authors, and awards they've won) from an open text file. The text file looks something like this:
Brabudy, Ray
Hugo Award
Nebula Award
Saturn Award
Ellison, Harlan
Heinlein, Robert
Asimov, Isaac
Clarke, Arthur
Ellison, Harlan
Nebula Award
Hugo Award
Locus Award
Stephenson, Neil
Vonnegut, Kurt
Morgan, Richard
Adams, Douglas
And so on. The first name is an authors name (last name first, first name last), followed by awards they may have won, and then authors who are similar to them. This is what I've got so far:
def load_author_dicts(text_file, similar_authors, awards_authors):
name_of_author = True
awards = False
similar = False
for line in text_file:
if name_of_author:
author = line.split(', ')
nameA = author[1].strip() + ' ' + author[0].strip()
name_of_author = False
awards = True
continue
if awards:
if ',' in line:
awards = False
similar = True
else:
if nameA in awards_authors:
listawards = awards_authors[nameA]
listawards.append(line.strip())
else:
listawards = []
listawards.append(line.strip()
awards_authors[nameA] = listawards
if similar:
if line == '\n':
similar = False
name_of_author = True
else:
sim_author = line.split(', ')
nameS = sim_author[1].strip() + ' ' + sim_author[0].strip()
if nameA in similar_authors:
similar_list = similar_authors[nameA]
similar_list.append(nameS)
else:
similar_list = []
similar_list.append(nameS)
similar_authors[nameA] = similar_list
continue
This works great! However, if the text file contains an entry with just a name (i.e. no awards, and no similar authors), it screws the whole thing up, generating an IndexError: list index out of range at this part Zname = sim_author[1].strip()+" "+sim_author[0].strip() )
How can I fix this? Maybe with a 'try, except function' in that area?
Also, I wouldn't mind getting rid of those continue functions, I wasn't sure how else to keep it going. I'm still pretty new to this, so any help would be much appreciated! I keep trying stuff and it changes another section I didn't want changed, so I figured I'd ask the experts.
How about doing it this way, just to get the data in, then manipulate the dictionary any ways you want.
test.txt contains your data
Brabudy, Ray
Hugo Award
Nebula Award
Saturn Award
Ellison, Harlan
Heinlein, Robert
Asimov, Isaac
Clarke, Arthur
Ellison, Harlan
Nebula Award
Hugo Award
Locus Award
Stephenson, Neil
Vonnegut, Kurt
Morgan, Richard
Adams, Douglas
And my code to parse it.
award_parse.py
data = {}
name = ""
awards = []
f = open("test.txt")
for l in f:
# make sure the line is not blank don't process blank lines
if not l.strip() == "":
# if this is a name and we're not already working on an author then set the author
# otherwise treat this as a new author and set the existing author to a key in the dictionary
if "," in l and len(name) == 0:
name = l.strip()
elif "," in l and len(name) > 0:
# check to see if recipient is already in list, add to end of existing list if he/she already
# exists.
if not name.strip() in data:
data[name] = awards
else:
data[name].extend(awards)
name = l.strip()
awards = []
# process any lines that are not blank, and do not have a ,
else:
awards.append(l.strip())
f.close()
for k, v in data.items():
print("%s got the following awards: %s" % (k,v))

Categories