Need help please.
I want to create 2 additional functions that will insert into the database each table instead of in one function as below in def insert_to_db(self).
Is the correct way to create 2 additional functions or to leave it as it is or any other way?
If it's to create another function how do I pass the db and cursor to the new functions?
main.py
class ConfigMain:
def __init__(self, filepath):
env_params = get_env_params()
self.db = connect_to_db(user=env_params.user_name,
password=env_params.password,
host=env_params.host,
port=env_params.port,
database=env_params.database)
self.filepath = filepath
ConfigMain.py
from main import ConfigMain
import openpyxl
import mariadb
import logging
class DBConfig(ConfigMain):
def insert_to_db(self):
conn = self.db[0]
cursor = self.db[1]
wb = openpyxl.load_workbook(filename=self.filepath, data_only=True)
s = 0
file_sheets = wb.sheetnames
for s in range(len(file_sheets)):
# order_invoice Sheet
if file_sheets[s] == 'Sheet1':
# table = 'order_invoice'
num_rows_inserted = 0
num_rows_inserted_item = 0
try:
wb.active = s
sheet = wb.active
row_count = sheet.max_row
column_count = sheet.max_column
for r in range(2, row_count + 1):
order_invoice_id = order_invoice_id.replace(" ", "")
billing_cycle_id = str(billing_year) + str(billing_month)
# Assign values from each row
values = (order_invoice_id, billing_cycle_id)
table = 'order_invoice'
query = """INSERT INTO """ + table + """(order_invoice_id, billing_cycle_id) VALUES (%s, %s) """
# Execute sql Query
cursor.execute(query, values)
num_rows_inserted = num_rows_inserted + cursor.rowcount
table = 'order_item'
# transformations
order_invoice_id = order_invoice_id.replace(" ", "")
order_invoice_item_id = str(order_invoice_id) + str(order_invoice_item_id)
# # Assign values from each row
values = ( order_invoice_item_id, order_invoice_id)
query = """INSERT INTO """ + table + """(order_invoice_item_id,
order_invoice_id) VALUES (%s,%s ) """
# Execute sql Query
cursor.execute(query, values)
except mariadb.IntegrityError as e:
print(f"{e} - for table '{table}' ")
raise
except mariadb.DataError as e:
print(f"{e} - for table '{table}' ")
raise
Thank you!
I'm wring a web scraping program to collect data from truecar.com
my database has 3 columns
and when I run the program I get an error which is this : list indext out of range
here is what I've done so far:
import mysql.connector
from bs4 import BeautifulSoup
import requests
import re
# take the car's name
requested_car_name = input()
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in range(20):
# appends car_model to car_list
car_list.append(car_model[item].text)
car_string = ', '.join('?' * len(car_list))
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in range(20):
# appends price to price_list
price_list.append(price[item].text)
price_string = ', '.join('?' * len(price_list))
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in range(20):
# appends distance to distance_list
distance_list.append(distance[item].text)
distance_string = ', '.join('?' * len(distance_list))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s);' % (car_string, price_string, distance_string)
# values = (car_string, price_string, distance_string)
my_cursor.execute(insert_command, car_list, price_list, distance_list)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
and I have another problem that I can't insert a list into my columns and I have tried many ways but unfortunately I wasn't able to get it working
I think the problem is in this line:
IndexError Traceback (most recent call last)
<ipython-input-1-4a3930bf0f57> in <module>
23 for item in range(20):
24 # appends car_model to car_list
---> 25 car_list.append(car_model[item].text)
26
27 car_string = ', '.join('?' * len(car_list))
IndexError: list index out of range
I don't want it to insert the whole list to 1 row in database . I want the first 20 car's price, model, mileage in truecar.com in my database
Ya you are hard coding the length. Change how you are iterating through your soup elements. So:
import mysql.connector
from bs4 import BeautifulSoup
import requests
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in car_model:
# appends car_model to car_list
car_list.append(item.text)
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in price:
# appends price to price_list
price_list.append(item.text)
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in distance:
# appends distance to distance_list
distance_list.append(item.text)
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s)'
values = list(zip(car_list, price_list, distance_list))
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
ALTERNATE:
there's also the API where you can fetch the dat:
import mysql.connector
import requests
import math
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
url = 'https://www.truecar.com/abp/api/vehicles/used/listings'
payload = {
'city': 'holtsville',
'collapse': 'true',
'fallback': 'true',
'include_incentives': 'true',
'include_targeted_incentives': 'true',
'make_slug': requested_car_name,
'new_or_used': 'u',
'per_page': '30',
'postal_code': '',
'search_event': 'true',
'sort[]': 'best_match',
'sponsored': 'true',
'state': 'ny',
'page':'1'}
jsonData = requests.get(url, params=payload).json()
total = jsonData['total']
total_pages = math.ceil(total/30)
total_pages_input = input('There are %s pages to iterate.\nEnter the number of pages to go through or type ALL: ' %total_pages)
if total_pages_input.upper() == 'ALL':
total_pages = total_pages
else:
total_pages = int(total_pages_input)
values = []
for page in range(1,total_pages+1):
if page == 1:
car_listings = jsonData['listings']
else:
payload.update({'page':'%s' %page})
jsonData = requests.get(url, params=payload).json()
car_listings = jsonData['listings']
for listing in car_listings:
vehicle = listing['vehicle']
ex_color = vehicle['exterior_color']
in_color = vehicle['interior_color']
location = vehicle['location']
price = vehicle['list_price']
make = vehicle['make']
model = vehicle['model']
mileage = vehicle['mileage']
style = vehicle['style']
year = vehicle['year']
engine = vehicle['engine']
accidentCount = vehicle['condition_history']['accidentCount']
ownerCount = vehicle['condition_history']['ownerCount']
isCleanTitle = vehicle['condition_history']['titleInfo']['isCleanTitle']
isFrameDamaged = vehicle['condition_history']['titleInfo']['isFrameDamaged']
isLemon = vehicle['condition_history']['titleInfo']['isLemon']
isSalvage = vehicle['condition_history']['titleInfo']['isSalvage']
isTheftRecovered = vehicle['condition_history']['titleInfo']['isTheftRecovered']
values.append((ex_color, in_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered))
print('Completed: Page %s of %s' %(page,total_pages))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
# create_command = ''' create table car_information (exterior_color varchar(255), interior_color varchar(255),location varchar(255),price varchar(255),make varchar(255),model varchar(255),mileage varchar(255),
# style varchar(255),year varchar(255),engine varchar(255),accidentCount varchar(255),ownerCount varchar(255),isCleanTitle varchar(255),isFrameDamaged varchar(255),
# isLemon varchar(255), isSalvage varchar(255),isTheftRecovered varchar(255))'''
# my_cursor.execute(create_command)
# print('created')
insert_command = '''INSERT INTO car_name (exterior_color, interior_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
the problem seems to be that the list of car models has less than 20 entries.
for item in range(20):
car_list.append(car_model[item].text)
this always tries to append exactly 20 items to the car list. if you have less than 20 entries, there is an error, because car_model[20].text does not exist when there are only 10 entries. you can try
for item in range(len(car_model)):
car_list.append(car_model[item].text)
I have the following code:
def execute_statement(stmt):
#create connection
conn = psdb.connect(dbname='db', user='user', host='localhost', password='password')
cur = conn.cursor()
cur.execute(stmt)
rows=cur.fetchall()[0]
conn.close()
return rows
def get_product_details(request):
"""Retrieve all information for a certain product, -> returns id, name, description and price"""
#initiate faultstring
faultstring = None
#get product information from db
try:
row = execute_statement("""SELECT array_to_json(array_agg(pr)) FROM (SELECT id, name, description, price FROM product WHERE product.id = %(product_id)s) pr""" % request.matchdict)[0]
except Exception as e:
faultstring = str(e)
#create responseobject
resp = {}
if faultstring:
resp['faultstring'] = faultstring
else:
resp['product'] = row
respjson = json.dumps(resp)
return Response(json_body=json.loads(respjson))
Route:
#get_product_details
config.add_route('getproductdetail', '/getproductdetail/{product_id}')
config.add_view(get_product_details, route_name='getproductdetail', renderer='json')
The try/except block in get_product_details() returns a faultstring if I fill in a letter as an ID (ex: localhost/get_product_details/q)
If I try to get a product which does not exist, like localhost/get_product_details/500 it does not fill the faultstring, and returns 'products': null while the error does exist. Why doesnt it handle the exception for an empty fetch the same way as it does with a faulty datatype?
I have crawled a webpage in order to crawl certain information like price, header and so on.
Now my goal is to insert the information into a databank. I already set up the databank with the respective fields that are needed.
That is my code:
def trade_spider(max_pages):
Language = "Japanese"
partner = La
location = Tokyo
already_printed = set()
for reg in Region:
count = 0
count1 = 0
page = -1
while page <= max_pages:
page += 1
response = urllib.request.urlopen("http://www.jsox.de/s/search.json?q=" + str(reg) +"&page=" + str(page))
jsondata = json.loads(response.read().decode("utf-8"))
format = (jsondata['activities'])
g_data = format.strip("'<>()[]\"` ").replace('\'', '\"')
soup = BeautifulSoup(g_data)
articles = soup.find_all("article", {"class": "activity-card activity-card-horizontal "})
try:
connection = mysql.connector.connect\
(host = "localhost", user = "root", passwd ="", db = "crawl")
except:
print("No connection to Server")
sys.exit(0)
cursor = connection.cursor()
cursor.execute("DELETE from prices_crawled where Location=" + str(location) + " and Partner=" + str(partner))
connection.commit()
for article in articles:
headers = article.find_all("h3", {"class": "activity"})
for header in headers:
header_initial = header.text.strip()
if header_initial not in already_printed:
already_printed.add(header_initial)
header_final = header_initial
prices = article.find_all("span", {"class": "price"})
for price in prices:
price_end = price.text.strip().replace(",","")[2:]
count1 += 1
if count1 > count:
pass
else:
price_final = price_end
deeplinks = article.find_all("a", {"class": "activity-card"})
for t in set(t.get("href") for t in deeplinks):
deeplink_initial = t
if deeplink_initial not in already_printed:
already_printed.add(deeplink_initial)
deeplink_final = deeplink_initial
cursor.execute('''INSERT INTO prices_crawled (price_id, Header, Price, Deeplink, Partner, Location, Language) \
VALUES(%s, %s, %s, %s, %s, %s, %s)''', ['None'] + [header_final] + [price_final] + [deeplink_final] + [partner] + [location] + [Language])
connection.commit()
cursor.close()
connection.close()
trade_spider(int(Spider))
The issue is that the information do not get into the database. Furthermore, I do not get any error message. Hence, I do not know what I´m doing wrong.
Could you guys help me out? Any feedback is appreciated
Is the delete statement working?
I think the problem is the way you pass your variables
Change your syntax like this:
sql_insert_tx = "INSERT INTO euro_currencies (pk,currency,rate,date) values (null,'USD','%s','%s')" % (usd,date)
cursor.execute(sql_insert_tx)
def websvc(currency):
db = MySQLdb.connect("localhost", "root", "aqw", "PFE_Project")
cursor = db.cursor()
sql = "SELECT * FROM myform_composantsserveur"
try:
cursor.execute(sql)
results = cursor.fetchall()
currency_in = currency
req = urllib2.urlopen('http://rate-exchange.appspot.com/currency?from=USD&to=%s') % (currency_in)
req1 = req.read()
rate = int(req1['rate'])
# rate = 0.77112893299999996
servers = []
for row in results:
result = {}
result['1'] = row[1]
result['3'] = int(row[2])
result['4'] = int(row[3])
result['5'] = int(row[4])
result['6'] = row[5]
result['7'] = int(row[6])
result['8'] = row[7]
result['9'] = row[8]
p = rate * calculations_metric (int(row[2]), int(row[3]), int(row[4]), int(row[6]), row[7])
result['2'] = p
keys = result.keys()
keys.sort()
servers.append(result)
except:
print "Error: unable to fetch data"
db.close()
return servers
but i have this error while compiling the code :
Exception Type: UnboundLocalError
Exception Value: local variable
'servers' referenced before assignment
Exception Location: /home/amine/PFE Directory/mysite1/myform/Webservice.py in websvc, line 43 Python Executable: /usr/bin/python2.7
this code works normally before i added a parameter in this function
Your code not able to reach servers initialization and that is why you getting error. Simply move initialization before try..except. Change this way:
def websvc(currency):
db = MySQLdb.connect("localhost", "root", "aqw", "PFE_Project")
cursor = db.cursor()
sql = "SELECT * FROM myform_composantsserveur"
servers = []
try:
cursor.execute(sql)
results = cursor.fetchall()
currency_in = currency
req = urllib2.urlopen('http://rate-exchange.appspot.com/currency?from=USD&to=%s') % (currency_in)
req1 = req.read()
rate = int(req1['rate'])
# rate = 0.77112893299999996
for row in results:
result = {}
result['1'] = row[1]
result['3'] = int(row[2])
result['4'] = int(row[3])
result['5'] = int(row[4])
result['6'] = row[5]
result['7'] = int(row[6])
result['8'] = row[7]
result['9'] = row[8]
p = rate * calculations_metric (int(row[2]), int(row[3]), int(row[4]), int(row[6]), row[7])
result['2'] = p
keys = result.keys()
keys.sort()
servers.append(result)
except:
print "Error: unable to fetch data"
db.close()
return servers
I see the problem now you have edited it to add the missing parts. It's the exception handler.
If you have an error after try and before servers=[] it will jump to the except clause, then see return servers and fail.
You might want to use a list(), instead of using a dict() to emulate a list ...
You can make the empty variable also in the try block if you check against the globals() variables any time after the try block. This is no game changer in this code since making a new empty list will never fail, but I could use it to have the opening of a connection into the try block so that it would be caught in the exception, and I could close that object in the finally block without having to make an empty object before the try/except/finally block (tested).
def websvc(currency):
db = MySQLdb.connect("localhost", "root", "aqw", "PFE_Project")
cursor = db.cursor()
sql = "SELECT * FROM myform_composantsserveur"
try:
servers = []
cursor.execute(sql)
results = cursor.fetchall()
currency_in = currency
req = urllib2.urlopen('http://rate-exchange.appspot.com/currency?from=USD&to=%s') % (currency_in)
req1 = req.read()
rate = int(req1['rate'])
# rate = 0.77112893299999996
for row in results:
result = {}
result['1'] = row[1]
result['3'] = int(row[2])
result['4'] = int(row[3])
result['5'] = int(row[4])
result['6'] = row[5]
result['7'] = int(row[6])
result['8'] = row[7]
result['9'] = row[8]
p = rate * calculations_metric (int(row[2]), int(row[3]), int(row[4]), int(row[6]), row[7])
result['2'] = p
keys = result.keys()
keys.sort()
servers.append(result)
except:
print "Error: unable to fetch data"
db.close()
if 'servers' in globals():
return servers
else:
return []
This is untested. If it crashes at servers.append(result), try if 'servers' in globals(): right before that as well. Which would blow up the code of the try block, therefore, I hope that it is not needed, and in my example, I also did not have to do that when I used the called connection afterwards in the try block.
Side remark: append() makes a full copy. Try servers.extend([result]) instead if you grow a large list (not likely if you just count up just a few servers).