stale element even if using WebDriverWait - python

I do not understand why I am getting this error :
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of <a id="u153-popover-trigger--3926" class="udlite-custom-focus-visible browse-course-card--link--3KIkQ" href="/course/kafka-streams-real-time-stream-processing-master-class/"> is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed
I'm using WebDriverWait twice to check if new page is loaded :
If pagination link for the new page changes accordingly
If the new page's course list div element is loaded
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def waitForLoad(inputXPath):
Wait = WebDriverWait(driver, 10)
Wait.until(EC.presence_of_element_located((By.XPATH, inputXPath)))
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options, service_log_path='NUL')
driver.get("https://www.udemy.com/courses/development/?sort=highest-rated")
courses = []
f = open("0udemy.txt","a", encoding="utf-8")
page = 1
try:
waitForLoad("//div[#class='filter-panel--paginated-course-list--2F0x1']")
except TimeoutException as e:
print("timeout!")
while True:
## I also tried that :
#courses = driver.find_elements_by_xpath("//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")
#for i in courses:
# f.write(f"{i.get_attribute('href')}\n")
for i in range(16):
f.write(driver.find_elements_by_xpath("//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")[i].get_attribute('href')+"\n")
if len(driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']"))==0 :
break
driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']")[0].click()
page+=1
try:
waitForLoad(f"//a[#class='udlite-btn udlite-btn-small udlite-btn-ghost udlite-heading-sm pagination--page--3FKqV pagination--active--3BrK7' and text()={page}]")
except TimeoutException as e:
print("timeout!")
break
try:
waitForLoad("//div[#class='filter-panel--paginated-course-list--2F0x1']")
except TimeoutException as e:
print("timeout!")
break
f.close()
driver.close()
I'm getting the stale error between page 20 and 80. My world record is page 78.
EDIT :
Thanks to arundeep, I can continue the script from where it got the stale error, using a try/except.
When I encounteer the error, I'm catching the exception, closing the driver, creating a new driver instance, get the page where I got the error, and continue the while loop from that page :
except StaleElementReferenceException as e:
print('stale:', e, 'page: ', page)
driver.close()
driver = webdriver.Firefox(options=options, service_log_path='NUL')
driver.get(f"https://www.udemy.com/courses/development/?sort=highest-rated&p={page}")
I do get a stale error at every 10 page or so.
And I'm still wondering why the error happens.
I abandon the idea. Always getting duplicated course links.
If someone can explain me the stale error...

I just went through pages 1 at a time and printed(you can use f.write). You need to add a time.sleep() so Selenium doesn't crash. This can go forever until it runs out of pages. Or if specify a if page==n: break. It can even go through the seleniums life cycle.
while True:
try:
courses=WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[#class='course-list--container--3zXPS']//a[contains(#class, 'browse-course-card--link--3KIkQ')]")))
for course in courses:
print(course.get_attribute('href')+"\n")
driver.find_elements_by_xpath("//a[#class='udlite-btn udlite-btn-small udlite-btn-secondary udlite-heading-sm udlite-btn-icon udlite-btn-icon-small udlite-btn-icon-round pagination--next--5NrLo']")[0].click()
page=page+1
time.sleep(5)
except:
url=driver.current_url
driver.close()
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.get(url)

Related

How to solve "Move target out of bounds" Selenium error?

I'm trying to simulate clikcin on the "Load more listings" button on the "https://empireflippers.com/marketplace/" webpage untill the button no longer is. I tried the following code but it results in "Move target out bounds" error.
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains
HOME_PAGE_URL = "https://empireflippers.com/marketplace/"
driver = webdriver.Chrome('./chromedriver.exe')
driver.get(HOME_PAGE_URL)
while True:
try:
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(text(),'Load More Listings')]")))
ActionChains(driver).move_to_element(element).click().perform()
except Exception as e:
print (e)
break
print("Complete")
time.sleep(10)
page_source = driver.page_source
driver.quit()
I'm expecting to retrieve the html code of the full web page without load more listings button.
So it seems that the button that you are trying to click is not visible on the screen. You could try this:
driver.execute_script("arguments[0].click();", driver.find_element(By.XPATH, "//button[contains(text(),'Load More Listings')]"))
To click the button.
I have no idea why, but trying to click twice works for me. [I still get the same error if I try to click twice with ActionChains, and I'm not familiar enough with ActionChains to try to fix that; my usual approach is to use .execute_script to scroll to the element with JavaScript and then just apply .click() to the element, so that's what I've done below.]
while True:
try:
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(text(),'Load More Listings')]")))
# ActionChains(driver).move_to_element(element).click().perform()
driver.execute_script('arguments[0].scrollIntoView(false);', element)
try: element.click() # for some reason, the 1st click always fails
except: element.click() # but after the 1st attempt, the 2nd click works...
except Exception as e:
print (e)
break

In selenium how to avoid TimeoutException error after multiple hits in sequence?

With Python3 and selenium I want to automate the search on a public information site. In this site it is necessary to enter the name of a person, then select the spelling chosen for that name (without or with accents or name variations), access a page with the list of lawsuits found and in this list you can access the page of each case.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
import re
Name that will be searched
name = 'JOSE ROBERTO ARRUDA'
Create path, search start link, and empty list to store information
firefoxPath="/home/abraji/Documentos/Code/geckodriver"
link = 'https://ww2.stj.jus.br/processo/pesquisa/?aplicacao=processos.ea'
processos = []
Call driver and go to first search page
driver = webdriver.Firefox(executable_path=firefoxPath)
driver.get(link)
Position cursor, fill and click
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idParteNome'))).click()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="idParteNome"]').send_keys(name)
time.sleep(6)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarFormularioExtendido'))).click()
Mark all spelling possibilities for searching
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoMarcarTodos'))).click()
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarMarcados'))).click()
time.sleep(1)
Check how many pages of data there are - to be used in "for range"
capta = driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]').text
print(capta)
paginas = int(re.search(r'\d+', capta).group(0))
paginas = int(paginas) + 1
print(paginas)
Capture routine
for acumula in range(1, paginas):
# Fill the field with the page number and press enter
driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(acumula)
driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(Keys.RETURN)
time.sleep(2)
# Captures the number of processes found on the current page - qt
qt = driver.find_element_by_xpath('//*[#id="idDivBlocoMensagem"]/div/b').text
qt = int(qt) + 2
print(qt)
# Iterate from found number of processes
for item in range(2, qt):
# Find the XPATH of each process link - start at number 2
vez = '//*[#id="idBlocoInternoLinhasProcesso"]/div[' + str(item) + ']/span[1]/span[1]/span[1]/span[2]/a'
# Access the direct link and click
time.sleep(2)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, vez))).click()
time.sleep(1)
# Run tests to get data
try:
num_unico = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
except NoSuchElementException:
num_unico = "sem_numero_unico"
try:
nome_proc = driver.find_element_by_xpath('//*[#id="idSpanClasseDescricao"]').text
except NoSuchElementException:
nome_proc = "sem_nome_encontrado"
try:
data_autu = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
except NoSuchElementException:
data_autu = "sem_data_encontrada"
# Fills dictionary and list
dicionario = {"num_unico": num_unico,
"nome_proc": nome_proc,
"data_autu": data_autu
}
processos.append(dicionario)
# Return a page to click on next process
driver.execute_script("window.history.go(-1)")
# Close driver
driver.quit()
After about 30 hits to the direct links with the information of each process I have this error:
---------------------------------------------------------------------------
TimeoutException Traceback (most recent call last)
<ipython-input-10-a901a514bd82> in <module>
16
17 time.sleep(2)
---> 18 WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, vez))).click()
19 time.sleep(1)
20
~/Documentos/Code/publique_se/lib/python3.6/site-packages/selenium/webdriver/support/wait.py in until(self, method, message)
78 if time.time() > end_time:
79 break
---> 80 raise TimeoutException(message, screen, stacktrace)
81
82 def until_not(self, method, message=''):
TimeoutException: Message:
Apparently the site gets slow and the script shows error because it can't find the information, right?
Please, on sites where selenium traverses multiple pages is there a way to avoid this kind of error?
Can the website itself slow down when it perceives hits in sequence?
You are using threads to wait which is not a good way and causing errors as you are getting
you are using time.sleep(2) instead use explicit waits of selenium
Example:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "myXpath")))
element.click();
Note: You need to change the time from 20 to as per your application approx time you observe

Not able to avoid ElementNotVisibleException even though I'm using a try and except method

I want to download millions of excel files from a website using selenium. My current code tries to handle issues with ElementNotVisibleException, but my "try and except" methods seems to fall short.
I have tried to implement a try and except solution, where if the error message appears I have instructed Selenium to wait untill the "button" appears.
import os
import time
import csv
from tqdm import tqdm
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementNotVisibleException
from selenium.common.exceptions import ElementClickInterceptedException
working_directory = r"xx"
os.chdir(working_directory)
options = webdriver.ChromeOptions()
prefs = {
"download.default_directory": r"xx",
"download.prompt_for_download": False,
"download.directory_upgrade": True}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(r"C:xxx\chromedriver.exe", options = options)
driver.get("website")
# Login
driver.find_element_by_class_name("smallLoginBox").click()
driver.implicitly_wait(1)
time.sleep(2)
driver.find_element_by_id('loginFormUC_loginEmailTextBox').send_keys('EMAIL')
driver.find_element_by_id('loginFormUC_loginPasswordTextBox').send_keys('PASWORD')
driver.find_element_by_xpath("//input[#value='Logg inn']").click()
# Get a custom list of firms
bedrifter = []
with open("./listwithIDs.csv") as csvDataFile:
csvReader = csv.reader(csvDataFile)
for row in csvReader:
bedrifter.append(row[0])
# THE LOOP
for ID in tqdm(bedrifter_gjenstår):
driver.get("website" + ID)
source = driver.page_source
if not "Ingen data" in source: # make sure there is an excel file. If not, loop continues to next ID.
# first click on button "download excel"
try:
driver.find_element_by_id("exportExcel").click()
except ElementNotVisibleException:
WebDriverWait(driver, 120).until(
EC.presence_of_element_located((By.ID, "exportExcel")))
driver.find_element_by_id("exportExcel").click()
except ElementClickInterceptedException:
WebDriverWait(driver, 120).until(
EC.presence_of_element_located((By.ID, "exportExcel")))
driver.find_element_by_id("exportExcel").click()
# second click, choosing what format the excel file should be in
try:
driver.find_element_by_id("mainContentPlaceHolder_mainContentPlaceHolder_mainContentPlaceHolder_AccountingNumberTableUc_excelLinkButton").click()
except ElementNotVisibleException:
WebDriverWait(driver, 120).until(
EC.presence_of_element_located((By.ID, "mainContentPlaceHolder_mainContentPlaceHolder_mainContentPlaceHolder_AccountingNumberTableUc_excelLinkButton")))
driver.find_element_by_id("mainContentPlaceHolder_mainContentPlaceHolder_mainContentPlaceHolder_AccountingNumberTableUc_excelLinkButton").click()
# code to switch between windows to remove download window and continue the code
try:
window_export = driver.window_handles[1]
except IndexError:
time.sleep(3)
print("sleep")
window_export = driver.window_handles[1]
try:
window_main = driver.window_handles[0]
except IndexError:
time.sleep(3)
print("sleep")
window_main = driver.window_handles[0]
driver.switch_to.window(window_export)
driver.close()
driver.switch_to.window(window_main)
I expect the code to download all files if there are any, but either ElementNotVisibleException or ElementClickInterceptedException appears.
To make sure the element is not only present and visible but also clickable check for clicability as a wait condition:
element = WebDriverWait(driver, 120).until(EC.element_to_be_clickable((By.ID, "exportExcel"))).click()
More on Python waits can be found here.
Before perform any operation on the element first check it is visible or not
Here is the example code:
wait = WebDriverWait(self.browser, 15)
wait.until(EC.visibility_of_element_located(("element_path")))
driver.find_element_by_xpath("element_path").click()
You probably get these exceptions because they occur inside an except clause and not inside a try. Please look at the traceback to determine which line threw the exception. This should tell you where the problem lies.
Also, if you want to wait for the element to be visible you should use visibility_of_element_located instead of presence_of_element_located

Python: Element is not clickable selenium

I am trying to write a program in Python that click to the next page until the it reaches to the last page. I followed some old posts on Stackoverflow and wrote the following code:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path="/Users/yasirmuhammad/Downloads/chromedriver")
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
while True:
try:
driver.find_element_by_link_text('next').click()
except NoSuchElementException:
break
However, when I run the program, it throws following error:
selenium.common.exceptions.WebDriverException: Message: unknown error: Element ... is not clickable at point (1180, 566). Other element would receive the click: <html class="">...</html>
(Session info: chrome=68.0.3440.106)
I also followed a thread of Stackoverflow (selenium exception: Element is not clickable at point) but no luck.
You need to close this banner first -
Since selenium opens a fresh browser instance so the website will ask you to store cookies every time you run the script. It is this exact banner which is coming in the way of selenium clicking your "next" button. Use this code to delete that close button -
driver.find_element_by_xpath("//a[#class='grid--cell fc-white js-notice-close']").click()
Also, driver.find_element_by_link_text('next') will throw a StaleElementReferenceException. Use this locator instead -
driver.find_element_by_xpath("//span[contains(text(),'next')]").click()
Final code -
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
driver.find_element_by_xpath("//a[#class='grid--cell fc-white js-notice-close']").click()
while True:
try:
time.sleep(3)
driver.find_element_by_xpath("//span[contains(text(),'next')]").click()
except NoSuchElementException:
break
As per your question to click through the next page until the it reaches to the last page, you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[#class='grid--cell fc-white js-notice-close' and #aria-label='notice-dismiss']"))).click()
while True:
try:
driver.execute_script(("window.scrollTo(0, document.body.scrollHeight)"))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='pager fr']//a[last()]/span[#class='page-numbers next']")))
driver.find_element_by_xpath("//div[#class='pager fr']//a[last()]/span[#class='page-numbers next']").click()
except (TimeoutException, NoSuchElementException, StaleElementReferenceException) :
print("Last page reached")
break
driver.quit()
Console Output:
Last page reached
There are couple of things that need to be taken care of:
It seems the element is hidden by the cookies banner. By scrolling
the page the element can be made available.
When you click on the
next - the page is reloaded. So you need to handle the
StaleElementException.
Adding both these the code looks as follows:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
driver = webdriver.Chrome()
driver.get("https://stackoverflow.com/users/37181/alex-gaynor?tab=tags")
driver.execute_script(("window.scrollTo(0, document.body.scrollHeight)"))
while True:
try:
webdriver.ActionChains(driver).move_to_element(driver.find_element_by_link_text('next')).click().perform()
except NoSuchElementException:
break
except StaleElementReferenceException:
pass
print "Reached the last page"
driver.quit()
I met the same error and the solution is not to scroll the window to the object(Maybe it can fix some errors but not in my case).
My solution is using javascript, the code as follows:
click_goal = web.find_element_by_xpath('//*[#id="s_position_list"]/ul/li[1]/div[1]/div[1]/div[1]/a/h3')
web.execute_script("arguments[0].click();", click_goal)

Issues when scraping a web page using selenium in python

I have been given a model to run a successful web scraper on a selected website, however, when i alter this to collect data from a second website, it keeps returning as an error. I'm not sure if it is an error in the code or the website is refusing my requests. Could you please look through this and see where my issue lies. Any help hugely appreciated!
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item highcharts-pie-series highcharts-color-0'))) # wait till relevant elements are on the page
except:
driver.quit() # quit if there was an error getting the page or we've waited 15 seconds and the stats haven't appeared.
stat_elements = driver.find_elements_by_css_selector('.highcharts-legend-item highcharts-pie-series highcharts-color-0')
for el in stat_elements:
print(el.find_element_by_css_selector('b').text)
print(el.find_element_by_css_selector('br').text)
driver.quit()
First of all you are passing wrong CSS as it should be like this
.highcharts-legend-item.highcharts-pie-series.highcharts-color-0
not as you have mentioned.
Then you are closing the browser and then trying to again close it getting the error
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item.highcharts-pie-series.highcharts-color-0'))) # wait till relevant elements are on the page
except:
driver.quit()
Next on list item you are fetching text
print(el.find_element_by_css_selector('b').text)
Debugged Code here:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome()
try:
driver.get("http://www.caiso.com/TodaysOutlook/Pages/supply.aspx") # load the page
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.highcharts-legend-item.highcharts-pie-series.highcharts-color-0'))) # wait till relevant elements are on the page
#driver.quit() # quit if there was an error getting the page or we've waited 15 seconds and the stats haven't appeared.
except TimeoutException:
pass
finally:
try:
stat_elements = driver.find_elements_by_css_selector('.highcharts-legend-item.highcharts-pie-series.highcharts-color-0')
for el in stat_elements:
for i in el.find_elements_by_tag_name('b'):
print(i.text)
for i in el.find_elements_by_tag_name('br'):
print(i.text)
except NoSuchElementException:
print("No Such Element Found")
driver.quit()
I hope this has solved your problem if not then let me know.

Categories