I am trying to write a program in Python that click to the next page until the it reaches to the last page. I followed some old posts on Stackoverflow and wrote the following code:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path="/Users/yasirmuhammad/Downloads/chromedriver")
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
while True:
try:
driver.find_element_by_link_text('next').click()
except NoSuchElementException:
break
However, when I run the program, it throws following error:
selenium.common.exceptions.WebDriverException: Message: unknown error: Element ... is not clickable at point (1180, 566). Other element would receive the click: <html class="">...</html>
(Session info: chrome=68.0.3440.106)
I also followed a thread of Stackoverflow (selenium exception: Element is not clickable at point) but no luck.
You need to close this banner first -
Since selenium opens a fresh browser instance so the website will ask you to store cookies every time you run the script. It is this exact banner which is coming in the way of selenium clicking your "next" button. Use this code to delete that close button -
driver.find_element_by_xpath("//a[#class='grid--cell fc-white js-notice-close']").click()
Also, driver.find_element_by_link_text('next') will throw a StaleElementReferenceException. Use this locator instead -
driver.find_element_by_xpath("//span[contains(text(),'next')]").click()
Final code -
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
driver.find_element_by_xpath("//a[#class='grid--cell fc-white js-notice-close']").click()
while True:
try:
time.sleep(3)
driver.find_element_by_xpath("//span[contains(text(),'next')]").click()
except NoSuchElementException:
break
As per your question to click through the next page until the it reaches to the last page, you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[#class='grid--cell fc-white js-notice-close' and #aria-label='notice-dismiss']"))).click()
while True:
try:
driver.execute_script(("window.scrollTo(0, document.body.scrollHeight)"))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='pager fr']//a[last()]/span[#class='page-numbers next']")))
driver.find_element_by_xpath("//div[#class='pager fr']//a[last()]/span[#class='page-numbers next']").click()
except (TimeoutException, NoSuchElementException, StaleElementReferenceException) :
print("Last page reached")
break
driver.quit()
Console Output:
Last page reached
There are couple of things that need to be taken care of:
It seems the element is hidden by the cookies banner. By scrolling
the page the element can be made available.
When you click on the
next - the page is reloaded. So you need to handle the
StaleElementException.
Adding both these the code looks as follows:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
driver = webdriver.Chrome()
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
driver.execute_script(("window.scrollTo(0, document.body.scrollHeight)"))
while True:
try:
webdriver.ActionChains(driver).move_to_element(driver.find_element_by_link_text('next')).click().perform()
except NoSuchElementException:
break
except StaleElementReferenceException:
pass
print "Reached the last page"
driver.quit()
I met the same error and the solution is not to scroll the window to the object(Maybe it can fix some errors but not in my case).
My solution is using javascript, the code as follows:
click_goal = web.find_element_by_xpath('//*[#id="s_position_list"]/ul/li[1]/div[1]/div[1]/div[1]/a/h3')
web.execute_script("arguments[0].click();", click_goal)
Related
I'm trying to simulate clikcin on the "Load more listings" button on the "https://empireflippers.com/marketplace/" webpage untill the button no longer is. I tried the following code but it results in "Move target out bounds" error.
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains
HOME_PAGE_URL = "https://empireflippers.com/marketplace/"
driver = webdriver.Chrome('./chromedriver.exe')
driver.get(HOME_PAGE_URL)
while True:
try:
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(text(),'Load More Listings')]")))
ActionChains(driver).move_to_element(element).click().perform()
except Exception as e:
print (e)
break
print("Complete")
time.sleep(10)
page_source = driver.page_source
driver.quit()
I'm expecting to retrieve the html code of the full web page without load more listings button.
So it seems that the button that you are trying to click is not visible on the screen. You could try this:
driver.execute_script("arguments[0].click();", driver.find_element(By.XPATH, "//button[contains(text(),'Load More Listings')]"))
To click the button.
I have no idea why, but trying to click twice works for me. [I still get the same error if I try to click twice with ActionChains, and I'm not familiar enough with ActionChains to try to fix that; my usual approach is to use .execute_script to scroll to the element with JavaScript and then just apply .click() to the element, so that's what I've done below.]
while True:
try:
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(text(),'Load More Listings')]")))
# ActionChains(driver).move_to_element(element).click().perform()
driver.execute_script('arguments[0].scrollIntoView(false);', element)
try: element.click() # for some reason, the 1st click always fails
except: element.click() # but after the 1st attempt, the 2nd click works...
except Exception as e:
print (e)
break
I do not understand why I am getting this error :
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of <a id="u153-popover-trigger--3926" class="udlite-custom-focus-visible browse-course-card--link--3KIkQ" href="/course/kafka-streams-real-time-stream-processing-master-class/"> is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed
I'm using WebDriverWait twice to check if new page is loaded :
If pagination link for the new page changes accordingly
If the new page's course list div element is loaded
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def waitForLoad(inputXPath):
Wait = WebDriverWait(driver, 10)
Wait.until(EC.presence_of_element_located((By.XPATH, inputXPath)))
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options, service_log_path='NUL')
driver.get("https://www.udemy.com/courses/development/?sort=highest-rated")
courses = []
f = open("0udemy.txt","a", encoding="utf-8")
page = 1
try:
waitForLoad("//div[#class='filter-panel--paginated-course-list--2F0x1']")
except TimeoutException as e:
print("timeout!")
while True:
## I also tried that :
#courses = driver.find_elements_by_xpath("//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")
#for i in courses:
# f.write(f"{i.get_attribute('href')}\n")
for i in range(16):
f.write(driver.find_elements_by_xpath("//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")[i].get_attribute('href')+"\n")
if len(driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']"))==0 :
break
driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']")[0].click()
page+=1
try:
waitForLoad(f"//a[#class='udlite-btn udlite-btn-small udlite-btn-ghost udlite-heading-sm pagination--page--3FKqV pagination--active--3BrK7' and text()={page}]")
except TimeoutException as e:
print("timeout!")
break
try:
waitForLoad("//div[#class='filter-panel--paginated-course-list--2F0x1']")
except TimeoutException as e:
print("timeout!")
break
f.close()
driver.close()
I'm getting the stale error between page 20 and 80. My world record is page 78.
EDIT :
Thanks to arundeep, I can continue the script from where it got the stale error, using a try/except.
When I encounteer the error, I'm catching the exception, closing the driver, creating a new driver instance, get the page where I got the error, and continue the while loop from that page :
except StaleElementReferenceException as e:
print('stale:', e, 'page: ', page)
driver.close()
driver = webdriver.Firefox(options=options, service_log_path='NUL')
driver.get(f"https://www.udemy.com/courses/development/?sort=highest-rated&p={page}")
I do get a stale error at every 10 page or so.
And I'm still wondering why the error happens.
I abandon the idea. Always getting duplicated course links.
If someone can explain me the stale error...
I just went through pages 1 at a time and printed(you can use f.write). You need to add a time.sleep() so Selenium doesn't crash. This can go forever until it runs out of pages. Or if specify a if page==n: break. It can even go through the seleniums life cycle.
while True:
try:
courses=WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")))
for course in courses:
print(course.get_attribute('href')+"\n")
driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']")[0].click()
page=page+1
time.sleep(5)
except:
url=driver.current_url
driver.close()
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.get(url)
I'm writing my first real scraper and although in general it's been going well, I've hit a wall using Selenium. I can't get it to go to the next page.
Below is the head of my code. The output below this is just printing out data in terminal for now and that's all working fine. It just stops scraping at the end of page 1 and shows me my terminal prompt. It never starts on page 2. I would be so grateful if anyone could make a suggestion. I've tried selecting the button at the bottom of the page I'm trying to scrape using both the relative and full Xpath (you're seeing the full one here) but neither work. I'm trying to click the right-arrow button.
I built in my own error message to indicate whether the driver successfully found the element by Xpath or not. The error message fires when I execute my code, so I guess it's not finding the element. I just can't understand why not.
# Importing libraries
import requests
import csv
import re
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
import time
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
driver = webdriver.Chrome("/path/to/driver", options=options)
# Yes, I do have the actual path to my driver in the original code
driver.get("https://uk.eu-supply.com/ctm/supplier/publictenders?B=UK")
time.sleep(5)
while True:
try:
driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div/form/div[3]/div/div/ul[1]/li[4]/a').click()
except (TimeoutException, WebDriverException) as e:
print("A timeout or webdriver exception occurred.")
break
driver.quit()
What you can do is to set up Selenium expected conditions (visibility_of_element_located, element_to_be_clickable) and use a relative XPath to select the next page element. All of this in a loop (its range is the number of pages you have to deal with).
XPath for the next page link :
//div[#class='pagination ctm-pagination']/ul[1]/li[last()-1]/a
Code could look like :
## imports
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://uk.eu-supply.com/ctm/supplier/publictenders?B=UK")
## count the number of pages you have
els = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='pagination ctm-pagination']/ul[1]/li[last()]/a"))).get_attribute("data-current-page")
## loop. at the end of the loop, click on the following page
for i in range(int(els)):
***scrape what you want***
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='pagination ctm-pagination']/ul[1]/li[last()-1]/a"))).click()
You were pretty close with while True and try-catch{} logic. To go to the next page using Selenium and python you have to induce WebDriverWait for element_to_be_clickable() and you can use either of the following Locator Strategies:
Code Block:
driver.get("https://uk.eu-supply.com/ctm/supplier/publictenders?B=UK")
while True:
try:
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class, 'state-active')]//following::li[1]/a[#href]"))).click()
print("Clicked for next page")
WebDriverWait(driver, 10).until(EC.staleness_of(driver.find_element_by_xpath("//a[contains(#class, 'state-active')]//following::li[1]/a[#href]")))
except (TimeoutException):
print("No more pages")
break
driver.quit()
Console Output:
Clicked for next page
No more pages
I have read several articles on this site regarding around the StaleElementReferenceException and am aware that this error is caused by the element no longer being in the site's DOM. What I am trying to do is click the bottom links on this webpage in order to go on and see the next page's listings. I have tried a few ways around this exception being given to me, and haven't found any to work. Here is an example of the code I have tried, and what I thought it might accomplish.
driver = webdriver.Chrome(r'C:\Users\Hank\Desktop\chromedriver_win32\chromedriver.exe')
driver.get('https://steamcommunity.com/market/listings/440/Unusual%20Old%20Guadalajara')
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
action = ActionChains(driver)
page_links = wait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[class^=market_paging_pagelink]')))
try:
action.move_to_element(page_links[1]).click().perform()
except StaleElementReferenceException as Exception:
print("Exception received, trying again")
time.sleep(5)
page_links = wait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '[class^=market_paging_pagelink]')))
action.move_to_element(page_links[1]).click().perform()
I was hoping that this code segment would attempt to move to the element at the bottom, click it, or return the error message, and try again, succeeding the second time. Instead, the code simply throws the error again. If my question has already been answered, please direct me to the relevant link.
Thank you!
The approach I normally go for is to click Next page until the button gets disabled/invisible.
Here's a working example based on your page. You should obviously do whatever relevant in the while loop; I chose to capture prices for the sake of example.
url="https://steamcommunity.com/market/listings/440/Unusual%20Old%20Guadalajara"
driver.get(url)
next_button=wait(driver, 10).until(EC.presence_of_element_located((By.ID,'searchResults_btn_next')))
# capture the start value from "Showing x-xx of 22 results"
#need this to check against later
ref_val=wait(driver, 10).until(EC.presence_of_element_located((By.ID,'searchResults_start'))).text
while next_button.get_attribute('class') == 'pagebtn':
next_button.click()
#wait until ref_val has changed
wait(driver, 10).until(lambda driver: wait(driver, 10).until(EC.presence_of_element_located((By.ID,'searchResults_start'))).text != ref_val)
# ====== Do whatever relevant here =============================
page_num=wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR,'.market_paging_pagelink.active'))).text
print(f"Prices from page {page_num}")
prices = wait(driver, 10).until(EC.presence_of_all_elements_located(
(By.XPATH, ".//span[#class='market_listing_price market_listing_price_with_fee']")))
for price in prices:
print(price.text)
#================================================================
#get the new reference value
ref_val = wait(driver, 10).until(EC.presence_of_element_located((By.ID, 'searchResults_start'))).text
I have been given a model to run a successful web scraper on a selected website, however, when i alter this to collect data from a second website, it keeps returning as an error. I'm not sure if it is an error in the code or the website is refusing my requests. Could you please look through this and see where my issue lies. Any help hugely appreciated!
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item highcharts-pie-series highcharts-color-0'))) # wait till relevant elements are on the page
except:
driver.quit() # quit if there was an error getting the page or we've waited 15 seconds and the stats haven't appeared.
stat_elements = driver.find_elements_by_css_selector('.highcharts-legend-item highcharts-pie-series highcharts-color-0')
for el in stat_elements:
print(el.find_element_by_css_selector('b').text)
print(el.find_element_by_css_selector('br').text)
driver.quit()
First of all you are passing wrong CSS as it should be like this
.highcharts-legend-item.highcharts-pie-series.highcharts-color-0
not as you have mentioned.
Then you are closing the browser and then trying to again close it getting the error
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item.highcharts-pie-series.highcharts-color-0'))) # wait till relevant elements are on the page
except:
driver.quit()
Next on list item you are fetching text
print(el.find_element_by_css_selector('b').text)
Debugged Code here:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome()
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item.highcharts-pie-series.highcharts-color-0'))) # wait till relevant elements are on the page
#driver.quit() # quit if there was an error getting the page or we've waited 15 seconds and the stats haven't appeared.
except TimeoutException:
pass
finally:
try:
stat_elements = driver.find_elements_by_css_selector('.highcharts-legend-item.highcharts-pie-series.highcharts-color-0')
for el in stat_elements:
for i in el.find_elements_by_tag_name('b'):
print(i.text)
for i in el.find_elements_by_tag_name('br'):
print(i.text)
except NoSuchElementException:
print("No Such Element Found")
driver.quit()
I hope this has solved your problem if not then let me know.