Analysing quantity and membership levels of Facebook groups over the past year - python

I need to analyse the number of Facebook groups created in the past year related to a topic, and their membership numbers over the same period of time.
Currently I have followed a tutorial to scrape Facebook for all groups related to that one keyword using the following code:
from selenium import webdriver
your_username = input("Please Enter Your Email/Login")
your_password = input("Please Enter Your Password")
query = input("Please enter a search query")
driver = webdriver.Chrome("C:\Python34\selenium\webdriver\chromedriver.exe")
print ("Logging in...")
driver.get("http://facebook.com")
driver.find_element_by_id("email").send_keys(your_username)
driver.find_element_by_id("pass").send_keys(your_password)
driver.find_element_by_id("loginbutton").click()
print ("Login Successful!")
driver.get("https://mobile.facebook.com/search/groups/?q=" + query)
import time
time.sleep(2) #Wait for page to load.
check = 0 #Variable to check after each pagination(Scroll Down)
last = 0 #What the last length of group_links was
time_to_sleep = 1 #Total time to sleep after each scroll down.
group_links = [] #A list to store new group links.
while check<10:
elems = driver.find_elements_by_xpath("//a[#href]") # grabs every anchor element on page each loop
for elem in elems: #Loops through each anchor element above
new_link = elem.get_attribute("href") #grabs link from anchor element
if "facebook.com/groups/" in new_link: #Checks to see if facebook group link
if new_link not in group_links: #If new link found not already in our group links add it
group_links.append(new_link)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(time_to_sleep) # Sleep here, let page scroll load
if last == len(group_links): #If the amount of group links is the same as last time, then add 1 to check
print ("Found Same Amount...")
check+=1
else:#Check out http://www.pythonhowto.com
check=0 #If not reset check back to 0
last = len(group_links) #changes last to current length of group links
print ("Total group links found => "),last
print ("Out of Loop")
filey = open("grouplinks.txt","w") #Open file
for link in group_links: #FOr each link found write it to file
filey.write(link + "\n")
filey.close()
driver.quit() #Exits selenium driver (It can sometimes hang in background)
However, this only gives me groups existing today. Is it possible to run something similar to analyse the number of groups created since, lets say 01/01/2017?
Sidenote: I have read that the Facebook Graph API is a more efficient method of carrying out tasks such as this when compared to scraping. Should I be doing this differently?
Lastly; This is for a college project, ultimately what I want to achieve is to be able to compare the number of Facebook groups related to Bitcoin, their memberships over a period of time, and compare this to the price of Bitcoin over the same period.

Related

How to not wait for a page to fully load, selenium python [duplicate]

This question already has answers here:
Don't wait for a page to load using Selenium in Python
(3 answers)
Closed 2 months ago.
I have this code that should take around 360 hours to fully complete and it's all because of the slow servers of the website I'm trying to scrape, But when I look at the website and the python console at the same time I realize the elements I'm trying to use has already been loaded and the selenium is waiting for the useless ads and another thing I don't care about to load. So I was wondering if there is any way to start scraping as soon as the elements needed are loaded.
Another way of doing this would be to just do the scraping even if the page is not loaded and then using the time.sleep I can time it by hand. Though this question has already been asked and answered in stack overflow so if this is the only way of doing it you can let me know in the comments otherwise better way would be to wait only for the elements needed to be scrapped which would make it way easier.
I don't think my code could help you answer my question but ill put it here in case.
code:
#C:\Users\keibo\PycharmProjects\emergency ahanonline project
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium import webdriver
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
import time
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(f'[{current_time}] Started.')
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
#options.add_argument("--headless")
output=f'\nState, City, Group, Sub_Group, Address, Website, Description, Views'
browser = webdriver.Chrome(options=options,service=Service(ChromeDriverManager().install()))
def tir():
global output
browser.get(
'https://senf.ir/ListCompany/75483/%D8%A2%D9%87%D9%86-%D8%A2%D9%84%D8%A7%D8%AA-%D9%88-%D8%B6%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA')
browser.find_element(By.ID, "ContentPlaceHolder2_rptPager_lnkPage_11").click()
pages = (browser.find_element(By.ID, "ContentPlaceHolder2_rptPager_lnkPage_9").text)
print(f'There are {pages} pages of 20 names which means there is {pages*20} people to save.')
for page in range(pages-1):
for person in range(19):
browser.get(
'https://senf.ir/ListCompany/75483/%D8%A2%D9%87%D9%86-%D8%A2%D9%84%D8%A7%D8%AA-%D9%88-%D8%B6%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA')
browser.find_element(By.ID, f"ContentPlaceHolder2_grdProduct_HpCompany_{person}").click()
try:
state = (browser.find_element(By.XPATH,
'(.//span[#id = "ContentPlaceHolder2_rpParent_lblheaderCheild_0"])').text)
if state == '' or state == ' ':
state = None
except:
state = None
try:
city = (browser.find_element(By.XPATH,
'(.//span[#id = "ContentPlaceHolder2_rpParent_lblheaderCheild_1"])').text)
if city == '' or city == ' ':
city = None
except:
city = None
try:
group = (browser.find_element(By.XPATH,
'(.//span[#id = "ContentPlaceHolder2_rpParent_lblheaderCheild_2"])').text)
if group == '' or group == ' ':
group = None
except:
group = None
try:
sub_group = (browser.find_element(By.XPATH,
'(.//span[#id = "ContentPlaceHolder2_rpParent_lblheaderCheild_3"])').text)
if sub_group == '' or sub_group == ' ':
sub_group = None
except:
sub_group = None
try:
Address = (browser.find_element(By.XPATH, '(.//span[#id = "ContentPlaceHolder2_txtAddress"])').text)
if Address == '' or Address == ' ':
Address = None
except:
Address = None
try:
ceo = (browser.find_element(By.XPATH, '(.//span[#id = "ContentPlaceHolder2_LblManager"])').text)
if ceo == '' or ceo == ' ':
ceo = None
except:
ceo = None
# print(browser.find_element(By.XPATH, '(.//span[#id = "ContentPlaceHolder2_ImgEmail"])').text)
try:
website = str(browser.find_element(By.XPATH, '(.//a[#id = "ContentPlaceHolder2_hfWebsait"])').text)
if website == '' or website == ' ':
website = None
except:
website = None
try:
Description = (browser.find_element(By.XPATH, '(.//span[#id = "ContentPlaceHolder2_lblDesc"])').text)
if Description == '' or Description == ' ':
Description = None
except:
Description = None
try:
views = (browser.find_element(By.XPATH, '(.//span[#id = "ContentPlaceHolder2_lblVisit"])').text)
if views == '' or views == ' ':
views = None
except:
views = None
output += f'\n{views}, {Description}, {website}, {Address}, {sub_group}, {group}, {city}, {state}'
print(output)
print('--------------------------------------------')
browser.find_element(By.ID, "ContentPlaceHolder2_rptPager_lnkPage_12").click()
tir()
print("End")
with open('Program Files\CSV pre built.txt') as f1:
file1 = open("Program Files\CSV pre built.txt", "w")
file1.write(output)
file1.close()
read_file1 = pd.read_csv('Program Files\CSV pre built.txt')
read_file1.to_csv('Output.csv', index=False)
try:
pass
except Exception as e:
browser.close()
print('something went wrong ):')
sees=input('Press enter to leave or press 1 and than enter to see error: ')
if sees=='1':
input(e)
If you want to prioritize locating specific elements over the whole page, try using an explicit wait. If you want to wait for the whole webpage, use an implicit wait.
Explicit Wait
Where element is a desired search param & driver is your WebDriver:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
WebDriverWait(driver, timeout=5).until(lambda d: d.find_element(By.ID, 'element'))
Rather than wait for the entire webpage to load, this particular function waits for a given amount of time to find an element. For this scenario, the function waits for a maximum of 5 seconds to find the element with an ID of "element." You can assign a variable to this function to store the element it finds (if it is valid and discovered).
--
Implicit Wait
You mentioned using the sleep function time.sleep() to wait for the webpage to load. Selenium offers a method called Implicit Waiting for this. Rather than manually triggering a halt in the program, Selenium allows the driver to wait up to an imposed amount of time. The code is shown below, where driver is your WebDriver:
driver.implicitly_wait(5)
It is generally advised not to use time.sleep() as it "defeats the purpose of Automation." A more detailed explanation regarding implicit/static waits can be found in this post.
--
Explicit Wait Example
For a more direct answer to your question, we can apply an explicit wait to line 21 of your code snippet.
browser.find_element(By.ID, "ContentPlaceHolder2_rptPager_lnkPage_11").click()
Explicit waits can store and be applied to element searches. While variable declaration for element searches is not necessary, I highly recommend doing so for elements that require more than one applied action. The above code will be replaced like so:
pagelink = WebDriverWait(browser, timeout=10).until(lambda b: b.find_element(By.ID, "ContentPlaceHolder2_rptPager_lnkPage_11"))
pagelink.click()
This explicit wait function allows a grace period of up to 10 seconds to find the element's ID. For a more versatile use, the element is stored in the variable pagelink. Selenium then performs the click action on the element.
--
Implicit Wait Example
Rather than apply waits to every single element, implicit waits are used for every page that loads. Let's apply this between lines 27 and 28 of your code where it declares:
browser.get(
'https://senf.ir/ListCompany/75483/...')
browser.find_element(By.ID, f"ContentPlaceHolder2...").click()
Directly after the get function, we can use an implicit wait for Selenium to wait for when elements load:
browser.get("https://senf.ir/ListCompany/...")
browser.implicitly_wait(10)
browser.find_element(By.ID, f"ContentPlaceHolder2...").click()
The Selenium driver waits for up to 10 seconds for all elements on the page to load. If the driver exceeds these 10 seconds, the driver immediately runs the next statement (in this case the find_element function).
--
Documentation for Explicit and Implicit Waits can be found here.

webscrapping by Python

I tried to extract data from below site but I don't know how to put the xpath in the loop "for", because the loop needs to be convert xpath to str, could you do me a favor and help me:
Site: https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/imp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-01-01&r9=2022-05-01
from selenium import webdriver
import pandas as pd
import time
driver = webdriver.Chrome('C:\Webdriver\chromedriver.exe')
driver.get('https://www150.statcan.gc.ca/n1/pub/71-607-x/71-607-x2021004-eng.htm')
time.sleep(2)
# finding the button using ID
button = driver.find_element_by_xpath('//*[#id="cimt_import"]/p[1]/a')
# clicking on the button
button.click()
time.sleep(2)
# finding the button using ID
button = driver.find_element_by_xpath('//*[#id="topic3s"]')
# clicking on the button
button.click()
time.sleep(2)
# finding the start year:2022 from scroll
element_drop_down_startYear = driver.find_element_by_xpath('//*[#id="fromYear"]/option[1]')
element_drop_down_startYear.click()
# finding the start month from:January scroll
element_drop_down_startMonth = driver.find_element_by_xpath('//*[#id="fromMonth"]/option[1]')
element_drop_down_startMonth.click()
# finding the End year from scroll
element_drop_down_endYear = driver.find_element_by_xpath('//*[#id="toYear"]/option[1]')
element_drop_down_endYear.click()
# finding the End month from scroll
element_drop_down_endmonth = driver.find_element_by_xpath('//*[#id="toMonth"]/option[5]')
element_drop_down_endmonth.click()
# finding the specific Chapter
element_drop_down_specificChapter = driver.find_element_by_xpath('//*[#id="report_hs"]/option[1]')
element_drop_down_specificChapter.click()
time.sleep(1)
# finding the specific Commodity from the list
element_drop_down_specific_commodity = driver.find_element_by_xpath('//*[#id="report_hs"]/option[2]')
element_drop_down_specific_commodity.click()
# finding the specific Commodity from the list
element_drop_down_specific_button= driver.find_element_by_xpath('//*[#id="report"]/div[1]/div[3]/div[5]/p[2]/button')
element_drop_down_specific_button.click()
#--------------------------------------------------------------------
cel = 1
for cel in rane(25):
x = driver.find_element_by_xpath('//*[#id="report_table"]/tbody/tr[1]/td[2]/a')
print(x)
print("//*[#id="report_table"]/tbody/tr["+ cel +"]/td[4]")
print("//*[#id="report_table"]/tbody/tr["+ cel +"]/td[7]")
print("//*[#id="report_table"]/tbody/tr["+ cel +"]/td[8]/abbr")
time.sleep(3)
You need to find the element before printing it, otherwise you're printing a string. I think what you want to do is in each iteration of the for loop print those selectors? if so find the elements like so, then print them.
for i in range(25):
x = driver.find_element_by_xpath('//*[#id="report_table"]/tbody/tr[1]/td[2]/a')
print(x)
element_1 = driver.find_element_by_xpath(f'//*[#id="report_table"]/tbody/tr[{i}]/td[4]')
element_2 = driver.find_element_by_xpath(f'//*[#id="report_table"]/tbody/tr[{i}]/td[7]')
element_3 = driver.find_element_by_xpath(f'//*[#id="report_table"]/tbody/tr[{i}]/td[8]/abbr')
If you inspect the Network tab, you can see that webpage is pulling the table data from
https://www150.statcan.gc.ca//t1/cimt/rest/getReport/(1)/0/0/12/0/150000/1/0/2022-01-01/2022-05-01
Scrape that json page instead:
import requests
r = requests.get('https://www150.statcan.gc.ca//t1/cimt/rest/getReport/(1)/0/0/12/0/150000/1/0/2022-01-01/2022-05-01')
print(r.json())

How do I access the 2nd element with the same xpath in python in selenium

What I mean is that the website I'm using has 2 dropmenus named province with the exact same id, so how do I tell python which dropmenu in particular I wanna select. Of course this is assuming that the issue is that python always picks the first id it sees
from selenium import webdriver
from selenium.webdriver.support.ui import Select
# There are two dropmenu with the same xpath. first time it works fine
# 2nd time it throws an error about element not interactable
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(2)
def Start():
# once opened it will fill in the confirm your age
Day = Select(web.find_element_by_xpath('//*[#id="bday_day"]'))
Day.select_by_index(2)
Month = Select(web.find_element_by_xpath('//*[#id="bday_month"]'))
Month.select_by_index(4)
Month = Select(web.find_element_by_xpath('//*[#id="bday_year"]'))
Month.select_by_index(24)
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Button = web.find_element_by_xpath('//*[#id="popup-subscribe"]/button')
Button.click()
# have to go through select your birthday
Start()
# 2 seconds is enough for the website to load
time.sleep(2)
# this throws and error.
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Selenium has functions
find_element_by_... without s in word element to get only first element
find_elements_by_... with s in word elements to get all elements
Selenium doc: 4. Locating Elements¶
So you can get all elements as list (even if there is only one element in HTML)
(if there is no elements then you get empty list)
elements = web.find_elements_by_xpath('//*[#id="province"]')
and later slice it
first = elements[0]
second = elements[1]
last = elements[-1]
list_first_and_second = elements[:1]
EDIT:
You can also try to slice directly in xpath like
(it starts counting at one, not zero)
'//*[#id="province"][2]'
or maybe
'(//*[#id="province"])[2]'
but I never used it to confirm if it will work.
BTW:
All ID should have unique names - you shouldn't duplicate IDs.
If you check documentation 4. Locating Elements¶ then you see that there is find_element_by_id without char s in word element - to get first and the only element with some ID - but there is no find_elements_by_id with char s in word elements - to get more then one element with some ID.
EDIT:
Minimal working code with example HTML in code
from selenium import webdriver
from selenium.webdriver.support.ui import Select
html = '''
<select id="province">
<option value="value_A">A</options>
<option value="value_B">B</options>
</select>
<select id="province">
<option value="value_1">1</options>
<option value="value_2">2</options>
</select>
'''
driver = webdriver.Firefox()
driver.get("data:text/html;charset=utf-8," + html)
all_elements = driver.find_elements_by_xpath('//*[#id="province"]')
first = all_elements[0]
second = all_elements[1]
prov1 = Select(first)
prov2 = Select(second)
print('--- first ---')
for item in prov1.options:
print('option:', item.text, item.get_attribute('value'))
for item in prov1.all_selected_options:
print('selected:', item.text, item.get_attribute('value'))
print('--- second ---')
for item in prov2.options:
print('option:', item.text, item.get_attribute('value'))
for item in prov2.all_selected_options:
print('selected:', item.text, item.get_attribute('value'))
EDIT:
There are two province.
When you use find_element in Start then you get first province in popup - and you can fill it. When you click button then it closes this popup but it doesn't remove first province from HTML - it only hide it.
Later when you use find_element you get again first province in hidden popup - and this time it is not visible and it can't use it - and this gives error. You have to use second province like in this example.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
def Start():
# once opened it will fill in the confirm your age
Day = Select(web.find_element_by_xpath('//*[#id="bday_day"]'))
Day.select_by_index(2)
Month = Select(web.find_element_by_xpath('//*[#id="bday_month"]'))
Month.select_by_index(4)
Month = Select(web.find_element_by_xpath('//*[#id="bday_year"]'))
Month.select_by_index(24)
# it uses first `province`
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Button = web.find_element_by_xpath('//*[#id="popup-subscribe"]/button')
Button.click()
web = webdriver.Firefox()
web.get('https://www.tastyrewards.com/en-ca/contest/fritolaycontest/participate')
# have to go through select your birthday
Start()
# 2 seconds is enough for the website to load
time.sleep(2)
# `find_elements` with `s` - to get second `province`
all_province = web.find_elements_by_xpath('//*[#id="province"]')
second_province = all_province[1]
Prov = Select(second_province)
Prov.select_by_index(5)

The element is not attached - Selenium in Python

I am trying scraping data from a number of pages on a website by using selenium in python. The syntax run and scrape data successfully on the first page but, after the second page, it can't find the click button and stop scraping. I check the HTML codes of the webpage, but the element on the second page is as same as the one on the first page. I found this question related to the same issue. I think that the problem is caused by that the reference to the button is lost after the DOM is changed, but I still can't fix the issue properly. I would appreciate any suggestions or solutions. The syntax and results are included below:
browser = webdriver.Chrome(r"C:\Users\...\chromedriver.exe")
browser.get('https://fortune.com/global500/2019/walmart')
table = browser.find_element_by_css_selector('tbody')
data =[]
#Use For Loop for Index
i = 1
while True:
if i > 5:
break
try:
print("Scraping Page no. " + str(i))
i = i + 1
# Select rows in the table
for row in table.find_elements_by_css_selector('tr'):
cols = data.append([cell.text for cell in row.find_elements_by_css_selector('td')])
try:
WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH,'//span[#class="singlePagination__icon--2KbZn"]')))
time.sleep(10)
finally:
browser.find_element_by_xpath('//span[#class="singlePagination__icon--2KbZn"]').click()
except Exception as e:
print(e)
break
data1 = pd.DataFrame(data, columns=['Labels','Value'])
print(data1)
browser.close()
output:
Scraping Page no. 1
Scraping Page no. 2
Message: stale element reference: element is not attached to the page document
(Session info: chrome=....)
Labels Value
0 (...) (...)
1 (...) (...)
move table = browser.find_element_by_css_selector('tbody') line into your while loop.So that you will get the latest reference to the table element as part of each loop and then you should not see any stale element issue.
while True:
table = browser.find_element_by_css_selector('tbody')
if i > 5:

navigating through pagination with selenium in python

I'm scraping this website using Python and Selenium. I have the code working but it currently only scrapes the first page, I would like to iterate through all the pages and scrape them all but they handle pagination in a weird way how would I go through the pages and scrape them one by one?
Pagination HTML:
<div class="pagination">
First
Prev
1
<span class="current">2</span>
3
4
Next
Last
</div>
Scraper:
import re
import json
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
options = Options()
# options.add_argument('--headless')
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options,
executable_path=r'/Users/weaabduljamac/Downloads/chromedriver')
url = 'https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList'
driver.get(url)
def getData():
data = []
rows = driver.find_element_by_xpath('//*[#id="form1"]/table/tbody').find_elements_by_tag_name('tr')
for row in rows:
app_number = row.find_elements_by_tag_name('td')[1].text
address = row.find_elements_by_tag_name('td')[2].text
proposals = row.find_elements_by_tag_name('td')[3].text
status = row.find_elements_by_tag_name('td')[4].text
data.append({"CaseRef": app_number, "address": address, "proposals": proposals, "status": status})
print(data)
return data
def main():
all_data = []
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
list_options = select.options
for item in range(len(list_options)):
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
select.select_by_index(str(item))
driver.find_element_by_css_selector("input.formbutton#csbtnSearch").click()
all_data.extend( getData() )
driver.find_element_by_xpath('//*[#id="form1"]/div[3]/a[4]').click()
driver.get(url)
with open( 'wiltshire.json', 'w+' ) as f:
json.dump( all_data, f )
driver.quit()
if __name__ == "__main__":
main()
Before moving on to automating any scenario, always write down the manual steps you would perform to execute the scenario. Manual steps for what you want to (which I understand from the question) is -
1) Go to site - https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList
2) Select first week option
3) Click search
4) Get the data from every page
5) Load the url again
6) Select second week option
7) Click search
8) Get the data from every page
.. and so on.
You are having a loop to select different weeks but inside each loop iteration for the week option, you also need to include a loop to iterate over all the pages. Since you are not doing that, your code is returning only the data from the first page.
Another problem is with how you are locaing the 'Next' button -
driver.find_element_by_xpath('//*[#id="form1"]/div[3]/a[4]').click()
You are selecting the 4th <a> element which is ofcourse not robust because in different pages, the Next button's index will be different. Instead, use this better locator -
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
Logic for creating loop which will iterate through pages -
First you will need the number of pages. I did that by locating the <a> immediately before the "Next" button. As per the screenshot below, it is clear that the text of this element will be equal to the number of pages -
-
I did that using following code -
number_of_pages = int(driver.find_element_by_xpath("//a[contains(text(),'Next')]/preceding-sibling::a[1]").text)
Now once you have number of pages as number_of_pages, you only need to click "Next" button number_of_pages - 1 times!
Final code for your main function-
def main():
all_data = []
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
list_options = select.options
for item in range(len(list_options)):
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
select.select_by_index(str(item))
driver.find_element_by_css_selector("input.formbutton#csbtnSearch").click()
number_of_pages = int(driver.find_element_by_xpath("//a[contains(text(),'Next')]/preceding-sibling::a[1]").text)
for j in range(number_of_pages - 1):
all_data.extend(getData())
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
time.sleep(1)
driver.get(url)
with open( 'wiltshire.json', 'w+' ) as f:
json.dump( all_data, f )
driver.quit()
Following approach is simply worked for me.
driver.find_element_by_link_text("3").click()
driver.find_element_by_link_text("4").click()
....
driver.find_element_by_link_text("Next").click()
first get the total pages in the pagination, using
ins.get('https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList/10702380,1')
ins.find_element_by_class_name("pagination")
source = BeautifulSoup(ins.page_source)
div = source.find_all('div', {'class':'pagination'})
all_as = div[0].find_all('a')
total = 0
for i in range(len(all_as)):
if 'Next' in all_as[i].text:
total = all_as[i-1].text
break
Now just loop through the range
for i in range(total):
ins.get('https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList/10702380,{}'.format(count))
keep incrementing the count and get the source code for the page and then get the data for it.
Note: Don't forget the sleep when clicking on going form one page to another

Categories