I have data class:
class data:
def __init__(self, ReadTime, Concentration_PM10, Concentration_SO2, Concentration_O3, Concentration_NO2, Concentration_CO, AQI_PM10,
AQI_SO2,AQI_O3, AQI_NO2, AQI_CO, AQI_AQIIndex, AQI_ContaminantParameter, AQI_State, AQI_Color ):
self.ReadTime = ReadTime
self.Concentration_PM10 = Concentration_PM10
self.Concentration_SO2 = Concentration_SO2
self.Concentration_O3 = Concentration_O3
self.Concentration_NO2 = Concentration_NO2
self.Concentration_CO = Concentration_CO
self.AQI_PM10 = AQI_PM10
self.AQI_SO2 = AQI_SO2
self.AQI_O3 = AQI_O3
self.AQI_NO2 = AQI_NO2
self.AQI_CO = AQI_CO
self.AQI_AQIIndex = AQI_AQIIndex
self.AQI_ContaminantParameter = AQI_ContaminantParameter
self.AQI_State = AQI_State
self.AQI_Color = AQI_Color
I'm sending a request to an api and populating the variables into a list.:
list = []
for i in result:
list.append( data(i['ReadTime'], i['Concentration']['PM10'], i['Concentration']['SO2'],i['Concentration']['O3'],
i['Concentration']['NO2'],i['Concentration']['CO'],i['AQI']['PM10'],
i['AQI']['SO2'],i['AQI']['O3'],i['AQI']['NO2'],i['AQI']['CO'],i['AQI']['AQIIndex'],i['AQI']['ContaminantParameter'],
i['AQI']['State'],i['AQI']['Color']))
then I want to insert this list into a table in PostgreSQL but I get error "can't adapt type 'data'"
list_record = ", ".join(["%s"] * len(list))
query_insert= (f"INSERT INTO hava_kalitesi (ReadTime, Concentration_PM10, Concentration_SO2, Concentration_O3, Concentration_NO2, Concentration_CO, AQI_PM10, AQI_SO2, AQI_O3, AQI_NO2, AQI_CO, AQI_AQIIndex, AQI_ContaminantParameter,AQI_State,AQI_Color) VALUES {list_record}"
)
cursor.execute(query_insert,list)
Related
For some reason, in my fruit scraper, i cannot access anything from listify function.
I'am getting an error, for exmaple: NameError: name 'family' is not defined.
And i cant figure out what is wrong with my code - is my function is bad, or i'am doing something wrong with class ?
import requests
import json
import random
import pickle
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self, stats):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
return stats
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
It's my first time doing OOP.
Please consider changing this
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
to
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
myScraper.listify()
Fruits_statistics = myScraper.get_summary()
Otherwise you create three different objects of this class and discard them with all their attributes after running the individual method once.
This might also be critical to define family in this line of the code:
Listified_info = myScraper.listify(family)
But I can't see how you intended to use the parameter stats in your method listify(). It is just received and returned. I suggest that you change:
def listify(self, stats):
to
def listify(self):
and remove
return stats
If you want to get those lists inside the object of this class returned by listify(), you may do the following (but this is not OOP way of doing things):
import requests
import json
import copy
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def collect_all_lists(self):
self.allLists = dict('name': self.name, 'id': self.id, 'family': self.family, 'genus': self.genus, 'order': self.order, 'carbohydrates': self.carbohydrates, 'protein': self.protein, 'fat': self.fat, 'calories': self.calories, 'sugar': self.sugar)
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
self.collect_all_lists()
return copy.deepcopy(self.allLists)
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
Listified_info = myScraper.listify()
Fruits_statistics = myScraper.get_summary()
I have the following list :
['[infotype02]', 'lastModifiedOn = serial<customMapping>', 'customString18 = BADGE_NUMBER<move>', 'firstName = FIRST_NAME<move>', 'lastName = LAST_NAME<move>', 'customString29 = USER_NAME<move>', 'email = EMAIL_ADDRESS<move>', 'documenttype = DOC_TYPE<move>', 'documentnumber = DOC_SERIA<customMapping>', 'documentnumberx2 = DOC_NUMBER<customMapping>', 'issuedate = DOC_ISSUE_DATE<move>', 'issueauthority = DOC_ISSUER<move>', 'nationalId = CNP<move>', 'company = COMPANY<move>', 'phoneNumber = PHONE_NUMBER<move>', 'startDate = HIRE_DATE<customMapping>', 'startDatex2 = TERMINATION_DATE<customMapping>', '[/infotype02]', '[infotype02]', 'lastModifiedOn = serial<customMapping>', 'customString18 = BADGE_NUMBER<move>', 'firstName = FIRST_NAME<move>', 'lastName = LAST_NAME<move>', 'customString29 = USER_NAME<move>', 'email = EMAIL_ADDRESS<move>', 'documenttype = DOC_TYPE<move>', 'documentnumber = DOC_SERIA<customMapping>', 'documentnumberx2 = DOC_NUMBER<customMapping>', 'issuedate = DOC_ISSUE_DATE<move>', 'issueauthority = DOC_ISSUER<move>', 'nationalId = CNP<move>', 'company = COMPANY<move>', 'phoneNumber = PHONE_NUMBER<move>', 'startDate = HIRE_DATE<customMapping>', 'startDatex2 = TERMINATION_DATE<customMapping>', '[/infotype02]']
for i in list; i = [infotype02]
I tried using re expression to get the string between the [], expected result infotype02 :
result = re.search('[(.*)]', i)
Then tried to append the result.group(1) to a new list and it returned an error :
lst.append(result.group(1))
AttributeError: 'NoneType' object has no attribute 'group'
I don't understand what is wrong with my re expresion and why isn't it finding the string between the []
You can just use a simple for-loop to accomplish this:
data = ['[infotype02]', 'lastModifiedOn = serial<customMapping>', 'customString18 = BADGE_NUMBER<move>', 'firstName = FIRST_NAME<move>',
'lastName = LAST_NAME<move>', 'customString29 = USER_NAME<move>', 'email = EMAIL_ADDRESS<move>', 'documenttype = DOC_TYPE<move>',
'documentnumber = DOC_SERIA<customMapping>', 'documentnumberx2 = DOC_NUMBER<customMapping>', 'issuedate = DOC_ISSUE_DATE<move>',
'issueauthority = DOC_ISSUER<move>', 'nationalId = CNP<move>', 'company = COMPANY<move>', 'phoneNumber = PHONE_NUMBER<move>',
'startDate = HIRE_DATE<customMapping>', 'startDatex2 = TERMINATION_DATE<customMapping>', '[/infotype02]', '[infotype02]',
'lastModifiedOn = serial<customMapping>', 'customString18 = BADGE_NUMBER<move>', 'firstName = FIRST_NAME<move>', 'lastName = LAST_NAME<move>',
'customString29 = USER_NAME<move>', 'email = EMAIL_ADDRESS<move>', 'documenttype = DOC_TYPE<move>', 'documentnumber = DOC_SERIA<customMapping>',
'documentnumberx2 = DOC_NUMBER<customMapping>', 'issuedate = DOC_ISSUE_DATE<move>', 'issueauthority = DOC_ISSUER<move>', 'nationalId = CNP<move>',
'company = COMPANY<move>', 'phoneNumber = PHONE_NUMBER<move>', 'startDate = HIRE_DATE<customMapping>', 'startDatex2 = TERMINATION_DATE<customMapping>',
'[/infotype02]']
new_list = []
for d in data:
if d[0] == '[' and not d[1] == '/':
#if re.match(r"\[[^/](.*)\]", d): # If you want to use `re`
new_list.append(d[1:-1])
print(new_list)
Output:
['infotype02', 'infotype02']
As you have 2 of these tags in your given list.
I'm studying on a task that I have to get data from SQL Server, and because I'm running time series analysis, I need to specify a date field that can change every table or query. Also I can read a simple query or a stored procedure. I want to generalize my below code which is a field and database specific. I thought that I can define an empty dictionary in class and then I can call it in below dataread method. But I am conflicted.
class DataPrep:
def __init__(self,conn):
self.df = pd.DataFrame()
self.mega_projects = set()
self.mega_project_to_df = {}
self.mega_project_to_df_pvt = {}
self.conn={}
def read_data(self):
self.conn=pyodbc.connect({'driver':None, 'server':None, 'database':None, 'uid':None, 'pwd':None})
self.df = pd.read_sql_query('''exec [dbo].[ML_WorkLoad]''', self.conn, parse_dates={'CreatedDate': '%d/%m/%Y %H.%M.%S'})
#self.df = self.df[['EstimateManDay', 'CreatedDate', 'MegaProject', 'ProjectName']]
self.df['month'] = pd.DatetimeIndex(self.df['CreatedDate']).month
self.df['year'] = pd.DatetimeIndex(self.df['CreatedDate']).year
self.df['quarter'] = pd.DatetimeIndex(self.df['CreatedDate']).quarter
self.df['week'] = pd.DatetimeIndex(self.df['CreatedDate']).week
self.df['dayorg'] = pd.DatetimeIndex(self.df['CreatedDate']).day
self.df['day'] = 1
self.df['year_quarter'] = self.df['year'].astype(str) + "_" + self.df[
'quarter'].astype(str)
self.df['year_month'] = self.df['year'].astype(str) + "_" + self.df[
'month'].astype(str)
self.df['year_week'] = self.df['year'].astype(str) + "_" + self.df['week'].astype(
str)
self.df['date'] = pd.to_datetime(self.df[['year', 'month', 'day']])
self.df = self.df[self.df['CreatedDate'] <= datetime.strptime("2020-01-01", "%Y-%m-%d")]
The max number of records in my input json is 100 however there is a paging-next link that provides the next 100 records. Below is what I have but it returns a dict with only 100 entries- I know there are more- How should I modify this function to get all the records?
def process_comment_json(comment_json):
post_comment_dict = dict()
next_links = list()
if 'comments' in comment_json.keys():
try:
for y in comment_json['comments']['data']:
post_id = comment_json['id']
commentor_name = y['from']['name']
commentor_id = y['from']['id']
created_time = y['created_time']
message = remove_non_ascii(y['message'])
sentiment = return_sentiment_score(message)
post_comment_dict[commentor_id] = {'commentor_name':commentor_name,\
'created_time':created_time, 'message':message,\
'sentiment':sentiment}
except:
print("malformed data, skipping this comment in round1")
if 'next' in comment_json['comments']['paging']:
print('found_next appending')
next_links.append(comment_json['comments']['paging']['next'])
else:
return post_comment_dict
while next_links:
print("processing next_links")
print("current len of post_comment_dict is:", len(post_comment_dict))
for next_link in next_links:
t = requests.get(next_link)
nl_json = t.json()
next_links.pop()
if "data" in list(nl_json.keys()):
for record in nl_json['data']:
try:
for y in comment_json['comments']['data']:
post_id = comment_json['id']
commentor_name = y['from']['name']
commentor_id = y['from']['id']
created_time = y['created_time']
message = remove_non_ascii(y['message'])
sentiment = return_sentiment_score(message)
post_comment_dict[commentor_id] = {'commentor_name':commentor_name,\
'created_time':created_time, 'message':message,\
'sentiment':sentiment}
except:
print("malformed data, skipping this comment from the next_links list")
if 'next' in comment_json['comments']['paging']:
print('found_next appending')
next_links.append(comment_json['comments']['paging']['next'])
else:
return post_comment_dict
At beginning i wanna say i'm newbie in use Python and everything I learned it came from tutorials.
My problem concerning reference to the value. I'm writing some script which is scrapping some information from web sites. I defined some function:
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
So now inside the script I wanna check value of the expr1 returned by my fynction. I don't know how to do that. Can u guys help me ? Is my function written correct ?
EDIT:
I can't add answer so I edit my current post
This is my all script. Some comments are in my native language but i add some in english
#! /usr/bin/env python
#encoding:UTF-8-
# ----------------------------- importujemy potrzebne biblioteki i skrypty -----------------------
# ------------------------------------------------------------------------------------------------
import urllib
from lxml import etree, html
import sys
import re
import MySQLdb as mdb
from TOR_connections import *
from XPathSelection import *
import os
# ------------------------------ Definiuje xPathSelectors ------------------------------------------
# --------------------------------------------------------------------------------------------------
# -------Doctors -----
check_reg = etree.XPath("string(//html/body/div/table[1]/tr[3]/td[2]/text())") #warunek Lekarz
check_practice = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())") #warunek praktyka
no_ks = etree.XPath("string(//html/body/div/table[1]/tr[1]/td[2]/text())")
Registred_by = etree.XPath("string(//html/body/div/table[1]/tr[4]/td[2]/text())")
Name_doctor = etree.XPath("string(//html/body/div/table[2]/tr[2]/td[2]/text())")
Registration_no = etree.XPath("string(//html/body/div/table[2]/tr[3]/td[2]/text())")
PWZL = etree.XPath("string(//html/body/div/table[2]/tr[4]/td[2]/text())")
NIP = etree.XPath("string(//html/body/div/table[2]/tr[5]/td[2]/text())")
Spec = etree.XPath("string(//html/body/div/table[2]/tr[18]/td[2]/text())")
Start_date = etree.XPath("string(//html/body/div/table[2]/tr[20]/td[2]/text())")
#-----Reg_practice-----
Name_of_practise = etree.XPath("string(//html/body/div/table[2]/tr[1]/td[2]/text())")
TERYT = etree.XPath("string(//html/body/div/table[2]/tr[7]/td[2]/*/text())")
Street = etree.XPath("string(//html/body/div/table[2]/tr[8]/td[2]/text())")
House_no = etree.XPath("string(//html/body/div/table[2]/tr[9]/td[2]/*/text())")
Flat_no = etree.XPath("string(//html/body/div/table[2]/tr[10]/td[2]/*/text())")
Post_code = etree.XPath("string(//html/body/div/table[2]/tr[11]/td[2]/*/text())")
City = etree.XPath("string(//html/body/div/table[2]/tr[12]/td[2]/*/text())")
Practice_no = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())")
Kind_of_practice = etree.XPath("string(//html/body/div/table[3]/tr[5]/td[2]/text())")
#------Serv_practice -----
TERYT2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[2]/td[2]/*/text())")
Street2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[3]/td[2]/text())")
House_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[4]/td[2]/*/text())")
Flat_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[5]/td[2]/i/text())")
Post_code2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[6]/td[2]/*/text())")
City2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[7]/td[2]/*/text())")
Phone_no = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[8]/td[2]/text())")
# --------------------------- deklaracje zmiennych globalnych ----------------------------------
# ----------------------------------------------------------------------------------------------
decrease = 9
No = 1
Link = "http://rpwdl.csioz.gov.pl/rpz/druk/wyswietlKsiegaServletPub?idKsiega="
# --------------------------- funkcje zdefiniowane ----------------------------------
# ----------------------------------------------------------------------------------------------
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
# --------------------------- ustanawiamy polaczenie z baza danych -----------------------------
# ----------------------------------------------------------------------------------------------
con = mdb.connect('localhost', 'root', '******', 'SANBROKER', charset='utf8');
# ---------------------------- początek programu -----------------------------------------------
# ----------------------------------------------------------------------------------------------
with con:
cur = con.cursor()
cur.execute("SELECT Old_num FROM SANBROKER.Number_of_records;")
Old_num = cur.fetchone()
count = Old_num[0]
counter = input("Input number of rows: ")
# ----------------------- pierwsze połączenie z TORem ------------------------------------
# ----------------------------------------------------------------------------------------
#connectTor()
#conn = httplib.HTTPConnection("my-ip.heroku.com")
#conn.request("GET", "/")
#response = conn.getresponse()
#print(response.read())
while count <= counter: # co dziesiata liczba
# --------------- pierwsze wpisanie do bazy danych do Archive --------------------
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Archive_num=%s",(count))
# ---------------------------------------------------------------------------------
if decrease == 0:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
#------- ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease = 10-1
count +=1
else:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
# ------ ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease -=1
count +=1
Well, I'm assuming check_reg is a function that returns a boolean (either True or False).
If that's the case, to check the return:
if expr1:
print "True."
else:
print "False"
There's more than one way to do it, but basically, if expr1: is all you need to do the checking.
To capture the return value of a function, assign the function to a name with an equal sign, like this:
return_value = somefunction(some_value)
print('The return value is ',return_value)
Keep in mind that when the first return statement is encountered, the function will exit. So if you have more than one return statement after each other, only the first will execute.
If you want to return multiple things, add them to a list and then return the list.
Here is an improved version of your function:
def match_pattern(count):
sock = urllib.urlopen(Link+str(count))
htmlsource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
# root = etree.HTML(htmlSource) - duplicate line
# result = etree.tostring(root, pretty_print=True, method="html")
function_names = [check_reg, check_practice, no_ks, Registered_by, \
Name_doctor, Registration_no, PWZL, NIP, Spec, Start_date, \
Name_of_practise, TERYT, Street, House_no2, Flat_no, \
Post_code2, City2, Phone_no]
results = []
for function in function_names:
results.append(function(root))
return results
r = match_pattern(1)
print r[0] # this will be the result of check_reg(root)
The code you have posted is quite ambigous. Can you please fix the ident to let us know what belongs to the function and which part is the script.
A function can returns only one value. You cannot do :
return something
return something_else
return ...
The function will ends when first value will be returned.
What you can do is returning a list, tuple or dict containing all your values.
For instance :
return (something,something_else,...)
or
return [something,something_else,...]
In your case, it seems better to create a class that would have all values you want as attributes, and turn this function into a method that would set the attributes values.
class Example(object):
def __init__ ( self , link , count ):
sock = urllib.urlopen(link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
self.expr1 = check_reg(root)
self.expr2 = check_practice(root)
self.D_expr1 = no_ks(root)
...
self.D_expr8 = Start_date(root)
#-----Reg_practice-----
self.R_expr1 = Name_of_practise(root)
...
self.R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
self.S_expr1 = TERYT2(root)
...
self.S_expr7 = Phone_no(root)
Then you will be able to use this class like :
exampleInstance = Example ( "link you want to use" , 4 ) # the second argument is your 'count' value
# Now you can use attributes of your class to get the values you want
print exampleInstance . expr1
print exampleInstance . S_expr7