I have built a program that uses a webscraper and geopy to extract venue name at Latitude/Longitude with the aim of putting the venues onto a map on a website.
I am currently using Django and need to begin creating my database for the website. Is it possible to incorporate my pre-existing program code as a Django model and have the website draw from a database I have already written the code to create?
Here is the code I have written for that webscrapes, geocodes and puts into database, can it be integrated into my django code as a model? What would I need to change to make it function well?:
#cafeNames
def scrapecafes(city, area):
#url = 'https://www.broadsheet.com.au/melbourne/guides/best-cafes-thornbury' #go to the website
url = f"https://www.broadsheet.com.au/{city}/guides/best-cafes-{area}"
response = requests.get(url, timeout=5)
soup_cafe_names = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_names)
cafeNames = soup_cafe_names.findAll('h2', attrs={"class":"venue-title", }) #scrape the elements
cafeNamesClean = [cafe.text.strip() for cafe in cafeNames] #clean the elements
#cafeNameTuple = [(cafe,) for cafe in cafeNamesCleans
#print(cafeNamesClean)
#addresses
soup_cafe_addresses = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_addresses)
cafeAddresses = soup_cafe_addresses.findAll( attrs={"class":"address-content" })
cafeAddressesClean = [address.text for address in cafeAddresses]
#cafeAddressesTuple = [(address,) for address in cafeAddressesClean]
#print(cafeAddressesClean)
##geocode addresses
locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
lat = []
long = []
try:
for address in cafeAddressesClean:
location = locator.geocode(address.strip().replace(',',''))
lat.append(location.latitude)
long.append(location.longitude)
except:
lat.append(None)
long.append(None)
#zip up for table
fortable = list(zip(cafeNamesClean, cafeAddressesClean, lat, long))
print(fortable)
##connect to database
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Database created and Successfully Connected to 25july_database")
sqlite_select_Query = "select sqlite_version();"
cursor.execute(sqlite_select_Query)
record = cursor.fetchall()
print("SQLite Database Version is: ", record)
cursor.close()
except sqlite3.Error as error:
print("Error while connecting to sqlite", error)
#create table
try:
sqlite_create_table_query = ''' CREATE TABLE IF NOT EXISTS test666 (
name TEXT NOT NULL,
address TEXT NOT NULL,
latitude FLOAT,
longitude FLOAT
);'''
cursor = sqliteConnection.cursor()
print("Successfully Connected to SQLite")
cursor.execute(sqlite_create_table_query)
sqliteConnection.commit()
print("SQLite table created")
except sqlite3.Error as error:
print("Error while creating a sqlite table", error)
##enter data into table
try:
sqlite_insert_name_param = """INSERT INTO test666
(name, address, latitude, longitude)
VALUES (?,?,?,?);"""
cursor.executemany(sqlite_insert_name_param, fortable)
sqliteConnection.commit()
print("Total", cursor.rowcount, "Records inserted successfully into table")
sqliteConnection.commit()
cursor.close()
except sqlite3.Error as error:
print("Failed to insert data into sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
Related
I'm using snscrape lib to scrape twitter data off twitter. I want to insert this data into my database but I seem to be failing no matter what method I try. when I use a loop and create a sql query after the loop to insert the values 1 by 1. I get an IndexError and a TypeError. When I try to append the data into a list. I can't loop in to each value 1 by 1. Now I'm stuck and don't know what to do.
method 1
class Tweet_list():
def tweets_list1(self):
dbname = '******'
user = '******'
password = '******'
host = '*******'
port = ****
cur = None
conn = None
try:
conn = psycopg2.connect(
dbname = dbname,
user = user,
password = password,
host = host,
port = port
)
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Machine_twitter')
create_table = '''CREATE TABLE IF NOT EXISTS Machine_twitter (
id int PRIMARY KEY,
Tweet text,
Tweet_id int,
Timestamp timestamp,
Replys int,
Retweets int,
Likes int,
Username char)'''
cur.execute(create_table)
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 5:
break
insert_tweet = 'INSERT INTO Machine_twitter (Tweet, Tweet_id, Timestamp, Replys, Retweets, Likes, Username) VALUES (%s, %s, %s, %s,%s, %s, %s)'
insert_values = (tweet.content, tweet.id, tweet.date, tweet.replyCount, tweet.retweetCount, tweet.likeCount, tweet.user.username)
cur.execute(insert_tweet, insert_values)
conn.commit()
print('completed')
except Exception as error:
print(error)
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
tweets = Tweet_list()
tweets2 = Tweet_list()
tweets2.tweets_list1()
error
IndexError: list index out of range
method 2
def update_list1(self):
tweets_list2 = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 100:
break
tweets_list2.append([tweet.content, tweet.id,tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.user.username])
tweet_df = pd.DataFrame(tweets_list2, columns=('tweet', 'tweet id', 'likeCount', 'retweetCount', 'replyCount', 'username'))
tweet_df.head()
the problem with the second method is that after the list gets appended. I can't access the values(eg. tweet.content) so I can insert them into the database. I've tried every method under the sun but I'm failing miserably can somebody help.
I am trying to drop many markers on a folium map. The coordinates are drawn from a SQLite3 Table but right now no map is displaying and no error is being thrown.
def maps():
melbourne = (-37.840935, 144.946457)
map = folium.Map(location = melbourne)
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Connected to SQLite")
sqlite_select_query = """SELECT latitude, longitude FROM test555;"""
cursor.execute(sqlite_select_query)
items = cursor.fetchall()
for item in items:
folium.Marker(location = item)
cursor.close()
except sqlite3.Error as error:
print("Failed to read data from sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
I tried to make "item" a list folium.Marker(location = [item]) but that threw the following error ValueError: Expected two (lat, lon) values for location, instead got: [(-37.7650309, 144.9613659)].
This suggests to me that the variable is not wrong but something else is broken somewhere.
Thanks in advance!
In order to extract the tuple (-37.7650309, 144.9613659) from the list, you just need to take the first element : folium.Marker(location = item[0])
You also need to add the marker to the map : folium.Marker(location = item[0]).add_to(map)
In order to draw the map, you need to return it at the end of your function.
You will have something like this (it works in my Jupyter Notebook) :
def maps():
melbourne = (-37.840935, 144.946457)
map = folium.Map(location = melbourne)
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Connected to SQLite")
sqlite_select_query = """SELECT latitude, longitude FROM test555;"""
cursor.execute(sqlite_select_query)
items = cursor.fetchall()
for item in items:
folium.Marker(location = item[0]).add_to(map)
cursor.close()
except sqlite3.Error as error:
print("Failed to read data from sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
return map
N.B:
You should not use map as the name of your variable because you shadow the map() function of the Python standard library.
I am trying to add data to my SQlite3 table which runs on a function that takes two arguments to find a city and a neighbourhood def scrapecafes(city, area) Strangely, this works well with some of the arguments I am entering but not with others. For example if I run scrapecafes(melbourne, thornbury) the code works fine, but if I run scrapecafes(melbourne, carlton I get the following error: UnboundLocalError: local variable 'lat' referenced before assignment
I know the function definitely works, but I can't figure out why I am getting the UnboundLocalError for some arguments but not for others. Here is the code:
import folium
from bs4 import BeautifulSoup
import requests
from requests import get
import sqlite3
import geopandas
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
#cafeNames
def scrapecafes(city, area):
#url = 'https://www.broadsheet.com.au/melbourne/guides/best-cafes-thornbury' #go to the website
url = f"https://www.broadsheet.com.au/{city}/guides/best-cafes-{area}"
response = requests.get(url, timeout=5)
soup_cafe_names = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_names)
cafeNames = soup_cafe_names.findAll('h2', attrs={"class":"venue-title", }) #scrape the elements
cafeNamesClean = [cafe.text.strip() for cafe in cafeNames] #clean the elements
#cafeNameTuple = [(cafe,) for cafe in cafeNamesCleans
#print(cafeNamesClean)
#addresses
soup_cafe_addresses = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_addresses)
cafeAddresses = soup_cafe_addresses.findAll( attrs={"class":"address-content" })
cafeAddressesClean = [address.text for address in cafeAddresses]
#cafeAddressesTuple = [(address,) for address in cafeAddressesClean]
#print(cafeAddressesClean)
##geocode addresses
locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
try:
location = []
for item in cafeAddressesClean:
location.append(locator.geocode(item))
lat = [loc.latitude for loc in location]
long = [loc.longitude for loc in location]
except:
pass
#zip up for table
fortable = list(zip(cafeNamesClean, cafeAddressesClean, lat, long))
print(fortable)
##connect to database
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Database created and Successfully Connected to 25july_database")
sqlite_select_Query = "select sqlite_version();"
cursor.execute(sqlite_select_Query)
record = cursor.fetchall()
print("SQLite Database Version is: ", record)
cursor.close()
except sqlite3.Error as error:
print("Error while connecting to sqlite", error)
#create table
try:
sqlite_create_table_query = ''' CREATE TABLE IF NOT EXISTS test555 (
name TEXT NOT NULL,
address TEXT NOT NULL,
latitude FLOAT NOT NULL,
longitude FLOAT NOT NULL
);'''
cursor = sqliteConnection.cursor()
print("Successfully Connected to SQLite")
cursor.execute(sqlite_create_table_query)
sqliteConnection.commit()
print("SQLite table created")
except sqlite3.Error as error:
print("Error while creating a sqlite table", error)
##enter data into table
try:
sqlite_insert_name_param = """INSERT INTO test555
(name, address, latitude, longitude)
VALUES (?,?,?,?);"""
cursor.executemany(sqlite_insert_name_param, fortable)
sqliteConnection.commit()
print("Total", cursor.rowcount, "Records inserted successfully into table")
sqliteConnection.commit()
cursor.close()
except sqlite3.Error as error:
print("Failed to insert data into sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
The problem is geopy doesn't have co-ordinates for Carlton. Hence, you should change your table schema and insert null in those cases.
When geopy doesn't have data, it returns None and when try to call something on None it throws exception. You have to put the try/except block inside the for loop.
from bs4 import BeautifulSoup
import requests
from requests import get
import sqlite3
import geopandas
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
#cafeNames
def scrapecafes(city, area):
#url = 'https://www.broadsheet.com.au/melbourne/guides/best-cafes-thornbury' #go to the website
url = f"https://www.broadsheet.com.au/{city}/guides/best-cafes-{area}"
response = requests.get(url, timeout=5)
soup_cafe_names = BeautifulSoup(response.content, "html.parser")
cafeNames = soup_cafe_names.findAll('h2', attrs={"class":"venue-title", }) #scrape the elements
cafeNamesClean = [cafe.text.strip() for cafe in cafeNames] #clean the elements
#cafeNameTuple = [(cafe,) for cafe in cafeNamesCleans
#addresses
soup_cafe_addresses = BeautifulSoup(response.content, "html.parser")
cafeAddresses = soup_cafe_addresses.findAll( attrs={"class":"address-content" })
cafeAddressesClean = [address.text for address in cafeAddresses]
#cafeAddressesTuple = [(address,) for address in cafeAddressesClean]
##geocode addresses
locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
lat = []
long = []
for item in cafeAddressesClean:
try:
location = locator.geocode(item.strip().replace(',',''))
lat.append(location.latitude)
long.append(location.longitude)
except:
lat.append(None)
long.append(None)
#zip up for table
fortable = list(zip(cafeNamesClean, cafeAddressesClean, lat, long))
print(fortable)
##connect to database
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Database created and Successfully Connected to 25july_database")
sqlite_select_Query = "select sqlite_version();"
cursor.execute(sqlite_select_Query)
record = cursor.fetchall()
print("SQLite Database Version is: ", record)
cursor.close()
except sqlite3.Error as error:
print("Error while connecting to sqlite", error)
#create table
try:
sqlite_create_table_query = ''' CREATE TABLE IF NOT EXISTS test (
name TEXT NOT NULL,
address TEXT NOT NULL,
latitude FLOAT,
longitude FLOAT
);'''
cursor = sqliteConnection.cursor()
print("Successfully Connected to SQLite")
cursor.execute(sqlite_create_table_query)
sqliteConnection.commit()
print("SQLite table created")
except sqlite3.Error as error:
print("Error while creating a sqlite table", error)
##enter data into table
try:
sqlite_insert_name_param = """INSERT INTO test
(name, address, latitude, longitude)
VALUES (?,?,?,?);"""
cursor.executemany(sqlite_insert_name_param, fortable)
sqliteConnection.commit()
print("Total", cursor.rowcount, "Records inserted successfully into table")
sqliteConnection.commit()
cursor.close()
except sqlite3.Error as error:
print("Failed to insert data into sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
scrapecafes('melbourne', 'carlton')
I have the following code:
def execute_statement(stmt):
#create connection
conn = psdb.connect(dbname='db', user='user', host='localhost', password='password')
cur = conn.cursor()
cur.execute(stmt)
rows=cur.fetchall()[0]
conn.close()
return rows
def get_product_details(request):
"""Retrieve all information for a certain product, -> returns id, name, description and price"""
#initiate faultstring
faultstring = None
#get product information from db
try:
row = execute_statement("""SELECT array_to_json(array_agg(pr)) FROM (SELECT id, name, description, price FROM product WHERE product.id = %(product_id)s) pr""" % request.matchdict)[0]
except Exception as e:
faultstring = str(e)
#create responseobject
resp = {}
if faultstring:
resp['faultstring'] = faultstring
else:
resp['product'] = row
respjson = json.dumps(resp)
return Response(json_body=json.loads(respjson))
Route:
#get_product_details
config.add_route('getproductdetail', '/getproductdetail/{product_id}')
config.add_view(get_product_details, route_name='getproductdetail', renderer='json')
The try/except block in get_product_details() returns a faultstring if I fill in a letter as an ID (ex: localhost/get_product_details/q)
If I try to get a product which does not exist, like localhost/get_product_details/500 it does not fill the faultstring, and returns 'products': null while the error does exist. Why doesnt it handle the exception for an empty fetch the same way as it does with a faulty datatype?
I'm using Python 3, MySQL, Sequel Pro and BeautifulSoup.
Put simply, I want to create a SQL table and then insert my downloaded data into that data.
I've used this answer as a template to build the SQL part Beautiful soup webscrape into mysql, but it won't work.
Errors thrown:
line 86 finally:SyntaxError: invalid syntax
When I comment out this last finally: (just see if the rest of the code worked) I get:
InternalError: (1054, "Unknown column 'address' in 'field list'")
Another common error I got was:
ProgrammingError: (1146, "Table 'simple_scrape.simple3' doesn't exist",
though I can't remember the exact changes I made to end up with this error.
Finally- I started to learn programming (not just Python, but 'programming') less than four weeks ago- if you're wondering why I've done something stupid or inefficient it's almost certainly because that was the first way I got it to work!
Please help!
Code:
from selenium import webdriver
#Guess BER Number
for i in range(108053983,108053985):
try:
# ber_try = 100000000
ber_try =+i
#Open page & insert BER Number
browser = webdriver.Firefox()
type(browser)
browser.get('https://ndber.seai.ie/pass/ber/search.aspx')
ber_send = browser.find_element_by_id('ctl00_DefaultContent_BERSearch_dfSearch_txtBERNumber')
ber_send.send_keys(ber_try)
#click search
form = browser.find_element_by_id('ctl00_DefaultContent_BERSearch_dfSearch_Bottomsearch')
form.click()
#click intermediate page
form = browser.find_element_by_id('ctl00_DefaultContent_BERSearch_gridRatings_gridview_ctl02_ViewDetails')
form.click()
#scrape the page
import bs4
soup = bs4.BeautifulSoup(browser.page_source)
# First Section
ber_dec = soup.find('fieldset', {'id':'ctl00_DefaultContent_BERSearch_fsBER'})
address = ber_dec.find('div', {'id':'ctl00_DefaultContent_BERSearch_dfBER_div_PublishingAddress'})
address = (address.get_text(', ').strip())
print(address)
date_issue = ber_dec.find('span', {'id':'ctl00_DefaultContent_BERSearch_dfBER_container_DateOfIssue'})
date_issue = date_issue.get_text().strip()
print(date_issue)
except:
print('Invalid BER Number:', ber_try)
browser.quit()
#connecting to mysql
finally:
import pymysql.cursors
from pymysql import connect, err, sys, cursors
#Making the connection
connection = pymysql.connect(host = '127.0.0.1',
port = 3306,
user = 'root',
passwd = 'root11',
db = 'simple_scrape',
cursorclass=pymysql.cursors.DictCursor);
with connection.cursor() as cursor:
sql= """CREATE TABLE `simple3`(
(
`ID` INT AUTO_INCREMENT NOT NULL,
`address` VARCHAR( 200 ) NOT NULL,
`date_issue` VARCHAR( 200 ) NOT NULL,
PRIMARY KEY ( `ID` )
)Engine = MyISAM)"""
sql = "INSERT INTO `simple3` (`address`, `date_issue`) VALUES (%s, %s)"
cursor.execute(sql, (address, date_issue))
connection.commit()
finally:
connection.close()
browser.quit()
Issues:
And actually create the table
sql= """CREATE TABLE simple3(
(
ID INT AUTO_INCREMENT NOT NULL,
address VARCHAR( 200 ) NOT NULL,
date_issue VARCHAR( 200 ) NOT NULL,
PRIMARY KEY ( ID )
)Engine = MyISAM)"""
// Added this line since your table was not being created.
cursor.execute(sql)
sql = "INSERT INTO simple3 (address, date_issue) VALUES (%s, %s)"
cursor.execute(sql, (address, date_issue))