Opening multiple URLs with Selenium (Python) - python

Windows 10 x64 / Python 2.7 / Selenium
I am trying to build a tool to scrape my ticket queue for unassigned tickets, open them and look for keywords, then do other things. But for now, I can't seem to figure out getting the code to open more than the first URL. I keep getting a StaleElementReferenceException error; and I do not understand why.
I'm building on top of examples I have found. This may not even be a good way to go about this. I am open to a new direction as well.
The goal for this will be to have something scrape the queue every X and when certain keywords are found offer me a prompt to assign it. It needs to run on its own while I am doing other tasks so it can not interfere with my keystrokes.
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
import unittest
class LoginTest (unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.get("https://TICKETS.COMPANY.COM/TEAM/MINE")
def test_Login(self):
driver = self.driver
table = "UNASSIGNED-TICKETS"
# Select the UNASSIGNED-TICKETS tab
selectUnassignedTab = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_link_text('Unassigned'))
selectUnassignedTab.click()
# Grab all of the Ticket URLs
unlockedTickets = WebDriverWait(driver, 10).until(lambda driver: driver.find_elements_by_xpath("//table[#id='unlocked-tickets']/tbody/tr[#role='row']/td[#class='nowrap']/a[#href]"))
counter = 1
dictURLs={}
for ticket in unlockedTickets:
ticketUrl = ticket.get_attribute('href')
# Troubleshooting: Make sure URLs are grabbed.
print ticket.get_attribute('href')
# Stuff them in a dict
dictURLs["string{0}".format(counter)]=ticketUrl
# Open each ticket (NOT WORKING)
# driver.get(ticketUrl) <--- Causes the Stale Element Error
if counter == 1:
# Wait for the User and Pass fields to load. Then assign them.
emailFieldElement = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_name('username'))
passFieldElement = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_name('password'))
# Log in information.
emailFieldElement.clear()
emailFieldElement.send_keys("USERNAME-HERE")
passFieldElement.clear()
passFieldElement.send_keys("PASSWORD-HERE")
passFieldElement.submit()
counter = counter + 1
def tearDown(self):
sleep(15)
self.driver.quit()
if __name__ == '__main__':
unittest.main()

I used this code to open multiple windows and it worked like a charm
from selenium import webdriver
import threading
import time
def test_logic():
driver = webdriver.Firefox()
url = 'https://www.google.co.in'
driver.get(url)
# Implement your test logic
time.sleep(2)
driver.quit()
N = 5 # Number of browsers to spawn
thread_list = list()
# Start test
for i in range(N):
t = threading.Thread(name='Test {}'.format(i), target=test_logic)
t.start()
time.sleep(1)
print t.name + ' started!'
thread_list.append(t)
# Wait for all thre<ads to complete
for thread in thread_list:
thread.join()
print 'Test completed!'

Related

Can't send keys using multithreading with selenium

I'm trying to use a multi-thread strategy with selenium. In shorts I'm trying to fill in input field with ids.
This is my script :
from concurrent.futures import ThreadPoolExecutor
from selenium.webdriver.common.by import By
import numpy as np
import sys
from selenium import webdriver
def driver_setup():
path = "geckodriver.exe"
options = webdriver.FirefoxOptions()
options.add_argument('--incognito')
# options.add_argument('--headless')
driver = webdriver.Firefox(options=options, executable_path=path)
return driver
def fetcher(id, driver):
print(id) #this works
# this doesnt work
driver.get(
"https://www.roboform.com/filling-test-all-fields")
driver.find_element(By.XPATH, '//input[#name="30_user_id"]').send_keys(id)
time.sleep(2)
print(i, " sent")
#return data
def crawler(ids):
for id in ids:
print(i)
results = fetcher(id, driver_setup())
drivers = [driver_setup() for _ in range(4)]
ids = list(range(0,50)) # generates ids
print(ids)
chunks = np.array_split(np.array(ids),4) #splits the id list into 4 chunks
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
#results = [item for block in bucket for item in block]
[driver.quit() for driver in drivers]
Everything seems to work except the send_keys method. Both print() works so it seems the ids are sent to both functions. Weirdly, I don't get an error message (i get the pycharm's Process finished with exit code 0 notice) so I don't know what I'm doing wrong.
Any idea what is missing ?
I used this example : https://blog.devgenius.io/multi-threaded-web-scraping-with-selenium-dbcfb0635e83 if it helps
when using threading, watch out for exceptions as they get embedded into futures.
for example change your code to have the below-tweaked code(don't change any other line yet)
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
# bucket is list of futures, so let's try to print it
for e_buck in bucket: # simpleapp add for demo
print(e_buck) #
you will see that you will get exception errors like:
i is not defined, look at this statement print(i, " sent") and print(i) in crawler.
once you fix the above error, the next error will be in the id in send keys- send_keys(id), id is of type numpy.int64. change it to str by typecast, str(), send_keys(str(id))
so your code, after fixes will be like:
from concurrent.futures import ThreadPoolExecutor
from selenium.webdriver.common.by import By
import numpy as np
import sys
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains as AC
from selenium.webdriver.common.keys import Keys
import time
def driver_setup():
path = "geckodriver.exe"
options = webdriver.FirefoxOptions()
options.add_argument('--incognito')
# options.add_argument('--headless')
driver = webdriver.Firefox(options=options, executable_path=path)
return driver
def fetcher(id, driver):
print(id) #this works
# this doesnt work - it will work now :)
driver.get(
"https://www.roboform.com/filling-test-all-fields")
driver.find_element(By.XPATH, '//input[#name="30_user_id"]').send_keys(str(id))
time.sleep(2)
print(id, " sent")
#return data
def crawler(ids):
for id in ids:
print(id)
results = fetcher(id, driver_setup())
#drivers = [driver_setup() for _ in range(4)]
ids = list(range(0,50)) # generates ids
print(ids)
chunks = np.array_split(np.array(ids),4) #splits the id list into 4 chunks
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
# bucket is list of futures, so let's try to print it
for e_buck in bucket: # simpleapp add for demo
print(e_buck) # check what print, you get, first time you will get that
# i is not defined, look at this statment print(i, " sent") and print(i) in crawler.
# once you fix the above error, next error will be in id in send keys- send_keys(id), id is of type ''numpy.int64''. change it to str by typecast, str(), send_keys(str(id))
#results = [item for block in bucket for item in block]
#[driver.quit() for driver in drivers]
Possibly you trying to invoke send_keys() too early even before the <input> field have rendered completely.
Solution
Ideally to send a character sequence to the element you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following locator strategies:
Using NAME:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "30_user_id"))).send_keys(id)
Using CSS_SELECTOR:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='30_user_id']"))).send_keys(id)
Using XPATH:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[#name='30_user_id']"))).send_keys(id)
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Selenium : I am able to open a new tab with Selenium but cannot make any action on this tab

Here is what I am trying to do
go to https://accounts.google.com/signup
click on the 'Help'button to open up a new tab,
enter 'Done' in the 'Describe your issue' text (that field is displayed on the tab I just opened)
here is my code :
from selenium import webdriver
import time
import time
from seleniumpagefactory.Pagefactory import PageFactory
from src.pages.home_page import HomePage
class Switch_Me(PageFactory):
def __init__(self, driver):
self.driver = driver
locators = {
# locators
"help_btn": ('XPATH', '//*[#id="initialView"]/footer/ul/li[1]/a'),
"my_problem_on_help_page" : ("XPATH",'//*[#id="search-form"]/input[1]')
}
def switch(self):
self.help_btn.click_button()
time.sleep(2)
# prints parent window title
print("Parent window title: " + self.driver.current_url)
# get current window handle
p = self.driver.current_window_handle
print('parent index is :'+ ' '+ p)
# get all the tabs opened indexes
chwd = self.driver.window_handles
print('the tabs indexes openned are: ')
print (chwd)
for w in chwd:
#print (chwd)
# switch focus to child window
if (w != p):
print("I am on the child tab")
self.driver.switch_to.window(w)
time.sleep(20)
self.my_problem_on_help_page.set_text("done")
break
time.sleep(0.9)
#print("Child window title: " + self.driver.title)
#driver.quit()
Here is my unittest code
from src.pages.sample import Switch_Me
import unittest
from selenium import webdriver
import time
class TestLogin(unittest.TestCase):
# the function is executed before any test
def setUp(self):
self.driver = webdriver.Chrome()
self.driver.maximize_window()
self.driver.delete_all_cookies()
self.driver.get("https://accounts.google.com/signup")
print("I am executed before any test")
def tearDown(self):
#self.driver.close()
print("executed after each test")
def test_sam(self):
sa = Switch_Me(self.driver)
time.sleep(5)
sa.switch()
time.sleep(5)
print("okay switch test")
if __name__ == "__main__":
unittest.main()
Here is the output when I ran that function (as you can see it doesn't execute the 1 st line of the if statement i.e it cannot switch to the 2 nd tab when I hit the 'help' button)
Regards.
I would start by simplifying your code until you get it to work. First thing I would do is ignore the page objects for now and write a simple script that does what you want. Once you have it working, start adding back in complexity until something breaks. That will help you isolate the problem.
I would make sure that your code is giving the site/browser time to open the new tab before moving on. ExpectedConditions has a method new_window_is_opened() that does just that.
Start with simple code. I added some debugging prints.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.maximize_window()
driver.delete_all_cookies()
driver.get("https://accounts.google.com/signup")
wait = WebDriverWait(driver, 10)
window_handles = driver.window_handles # get current window handles for later reference
print("window handles before click: ", window_handles)
driver.find_element(By.LINK_TEXT, "Help").click() # click the Help link
wait.until(EC.new_window_is_opened(window_handles)) # wait for the new window to open
print("window handles after click and wait: ", driver.window_handles)
new_window = [x for x in driver.window_handles if x not in window_handles] # get new window handle
print("new window handle: ", new_window)
driver.switch_to.window(new_window[0])
driver.find_element(By.CSS_SELECTOR, "[name='q']").send_keys("name of issue") # describe the issue

Selenium scrolling problem - element skipping

I try to scrap my deezer music but, when I scroll the site, selenium skips a lot of music, selenium skips the first 30 music, displays 10, then skips another 30, etc. until the end of the page.
Here is the code:
import selenium
from selenium import webdriver
path = "./chromedriver"
driver = webdriver.Chrome(executable_path=path)
url = 'https://www.deezer.com/fr/playlist/2560242784'
driver.get(url)
for i in range(0,20):
try :
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
musics = driver.find_elements_by_class_name('BT3T6')
for music in musics:
print (music.text)
except Exception as e:
print(e)
I've tried to scrape the page based on your code and ended up with success.
I've decided to scroll the page by 500px per step and then remove all duplications and empty strings.
import selenium
import time
from selenium import webdriver
path = "./chromedriver"
driver = webdriver.Chrome(executable_path=path)
url = 'https://www.deezer.com/fr/playlist/2560242784'
driver.get(url)
all_music = []
last_scroll_y = driver.execute_script("return window.scrollY")
for i in range(0, 100):
try :
#first scrape
musics = driver.find_elements_by_class_name('BT3T6')
for music in musics:
all_music.append(music.text)
#then scroll down +500px
driver.execute_script("window.scrollTo(0, window.scrollY+500);")
time.sleep(0.2) #some wait for the new content (200ms)
current_scroll_y = driver.execute_script("return window.scrollY")
# exit the loop if the page is not scrolled any more
if current_scroll_y == last_scroll_y:
break
last_scroll_y = current_scroll_y
except Exception as e:
print(e)
# this removes all empty strings
all_music = list(filter(None, all_music))
# this removes all duplications, but keeps the order
# based on https://stackoverflow.com/a/17016257/5226491
# python 3.7 required
all_music = list(dict.fromkeys(all_music))
# this also removes all duplications, but the order will be changed
#all_music = list(set(all_music))
for m in all_music:
print(m)
print('Total music found: ' + len(all_music))
This works ~ 60-90 seconds and scrape 1000+ items.
Note: it works fine with the active window, and also works in headless mode, but it finish scraping when I collapse the browser window.. So run this with headless chrome option
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
or do not collapse the window.

Selenium cant use the find_element_by_css_selector()

The Situation:
Firstly I would like to say that am new to selenium and decided to pick it up to practice some python. I am currently following a tutorial online and decided to make a youtube bot.
The Code:
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
import time
import random
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
def login_with_username_and_password(browser, username, password):
# FILL UP THE LOGIN FORM
email_input = browser.find_elements('input[type=email]')
email = username
for letter in email:
email_input.send_keys(letter)
wait_time = random.randint(0,1000)/1000
time.sleep(wait_time)
next_button = browser.find_elements_by_css_selector("button")
time.sleep(2)
next_button[2].click()
time.sleep(2)
password_input = browser.find_element_by_css_selector('input[type=password]')
password = password
for letter in password:
password_input.send_keys(letter)
wait_time = random.randint(0,1000)/1000
time.sleep(wait_time)
next_button = browser.find_elements_by_css_selector("button")
time.sleep(2)
next_button[1].click()
confirm_button = browser.find_elements_by_css_selector("div[role=button]")
time.sleep(2)
if(len(confirm_button)>0):
confirm_button[1].click()
def click_on_agree_and_signin(browser):
# agree_button= browser.find_element_by_css_selector('button')
# time.sleep(2)
# agree_button.click()
signin_buttons= browser.find_elements_by_css_selector(".signin")
time.sleep(6) # Wait longer so the message pops up
while(len(signin_buttons)== 0):
signin_buttons= browser.find_elements_by_css_selector(".signin")
time.sleep(1)
signin_buttons[0].click()
def enter_search_term(browser,search_term):
# Enter text on the search term
search_input = browser.find_element_by_id("search")
for letter in search_term:
search_input.send_keys(letter)
wait_time = random.randint(0,1000)/1000
time.sleep(wait_time)
search_input.send_keys(Keys.ENTER)
def enter_comment(browser, comment):
comment_input = browser.find_element_by_css_selector("ytd-comment-simplebox-renderer")
entering_comment_actions = ActionChains(browser)
entering_comment_actions.move_to_element(comment_input)
entering_comment_actions.click()
for letter in comment:
entering_comment_actions.send_keys(letter)
wait_time = random.randint(0,1000)/1000
entering_comment_actions.pause(wait_time)
entering_comment_actions.perform()
time.sleep(1)
send_comment_button = browser.find_element_by_id("submit-button")
send_comment_button.click()
###########################################
# BOT STARTS HERE #
###########################################
driver=webdriver.Chrome()
driver.maximize_window
driver.get("https://www.youtube.com/")
all_search_terms = ['online marketing']
# Click Agree and Sing In
click_on_agree_and_signin(driver)
# Sign In
login_with_username_and_password(driver, "hey289895#gmail.com", "-1qa2ws3ed4rf-")
for search_term in all_search_terms:
enter_search_term(driver, search_term)
time.sleep(2)
thumbnails = driver.find_element_by_css_selector("ytd-video-renderer")
for index in range(1, 6):
thumbnails[index].click()
time.sleep(6)
enter_comment(driver, "love it")
driver.execute_script("window.history.go(-1)")
thumbnails = driver.find_element_by_css_selector("ytd-video-renderer")
time.sleep(1)
driver.close()
The Problem:
When Running this code it produces an error related to the find_element_by_css_selector method. Most frequently during the sign in phase as shown here.
The Question:
Can anybody explain what is going on here and where I am going wrong, as well as how I can fix this please.
First: it is only warning, not error.
In my version 3.141.0 I can use both methods
from selenium import webdriver
driver = webdriver.Chrome() # Firefox()
driver.find_elements_by_css_selector(...)
driver.find_elements_by_xpath(...)
# etc.
and
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome() # Firefox()
driver.find_elements(By.CSS_SELECTOR, ...)
driver.find_elements(By.XPATH, ...)
# etc.
but it seems they plan to remove functions find_elements_by_... in the future (in versions 4.x) and now find_elements_by_... still works but it shows warning that you should use second method find_elements(By.CSS_SELECTOR, ...).
You could use module warnings to hide these warnings but better start using only second method.
BTW:
In source code for find_elements_by_css_selector you can see it runs warning.warn(...) and next it runs find_elements(By.CSS_SELECTOR, ...)
To check what version you use
import selenium
print(selenium.__version__)

Wait until page is loaded with Selenium WebDriver for Python

I want to scrape all the data of a page implemented by a infinite scroll. The following python code works.
for i in range(100):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)
This means every time I scroll down to the bottom, I need to wait 5 seconds, which is generally enough for the page to finish loading the newly generated contents. But, this may not be time efficient. The page may finish loading the new contents within 5 seconds. How can I detect whether the page finished loading the new contents every time I scroll down? If I can detect this, I can scroll down again to see more contents once I know the page finished loading. This is more time efficient.
The webdriver will wait for a page to load by default via .get() method.
As you may be looking for some specific element as #user227215 said, you should use WebDriverWait to wait for an element located in your page:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
browser = webdriver.Firefox()
browser.get("url")
delay = 3 # seconds
try:
myElem = WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.ID, 'IdOfMyElement')))
print "Page is ready!"
except TimeoutException:
print "Loading took too much time!"
I have used it for checking alerts. You can use any other type methods to find the locator.
EDIT 1:
I should mention that the webdriver will wait for a page to load by default. It does not wait for loading inside frames or for ajax requests. It means when you use .get('url'), your browser will wait until the page is completely loaded and then go to the next command in the code. But when you are posting an ajax request, webdriver does not wait and it's your responsibility to wait an appropriate amount of time for the page or a part of page to load; so there is a module named expected_conditions.
Trying to pass find_element_by_id to the constructor for presence_of_element_located (as shown in the accepted answer) caused NoSuchElementException to be raised. I had to use the syntax in fragles' comment:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('url')
timeout = 5
try:
element_present = EC.presence_of_element_located((By.ID, 'element_id'))
WebDriverWait(driver, timeout).until(element_present)
except TimeoutException:
print "Timed out waiting for page to load"
This matches the example in the documentation. Here is a link to the documentation for By.
Find below 3 methods:
readyState
Checking page readyState (not reliable):
def page_has_loaded(self):
self.log.info("Checking if {} page is loaded.".format(self.driver.current_url))
page_state = self.driver.execute_script('return document.readyState;')
return page_state == 'complete'
The wait_for helper function is good, but unfortunately click_through_to_new_page is open to the race condition where we manage to execute the script in the old page, before the browser has started processing the click, and page_has_loaded just returns true straight away.
id
Comparing new page ids with the old one:
def page_has_loaded_id(self):
self.log.info("Checking if {} page is loaded.".format(self.driver.current_url))
try:
new_page = browser.find_element_by_tag_name('html')
return new_page.id != old_page.id
except NoSuchElementException:
return False
It's possible that comparing ids is not as effective as waiting for stale reference exceptions.
staleness_of
Using staleness_of method:
#contextlib.contextmanager
def wait_for_page_load(self, timeout=10):
self.log.debug("Waiting for page to load at {}.".format(self.driver.current_url))
old_page = self.find_element_by_tag_name('html')
yield
WebDriverWait(self, timeout).until(staleness_of(old_page))
For more details, check Harry's blog.
As mentioned in the answer from David Cullen, I've always seen recommendations to use a line like the following one:
element_present = EC.presence_of_element_located((By.ID, 'element_id'))
WebDriverWait(driver, timeout).until(element_present)
It was difficult for me to find somewhere all the possible locators that can be used with the By, so I thought it would be useful to provide the list here.
According to Web Scraping with Python by Ryan Mitchell:
ID
Used in the example; finds elements by their HTML id attribute
CLASS_NAME
Used to find elements by their HTML class attribute. Why is this
function CLASS_NAME not simply CLASS? Using the form object.CLASS
would create problems for Selenium's Java library, where .class is a
reserved method. In order to keep the Selenium syntax consistent
between different languages, CLASS_NAME was used instead.
CSS_SELECTOR
Finds elements by their class, id, or tag name, using the #idName,
.className, tagName convention.
LINK_TEXT
Finds HTML tags by the text they contain. For example, a link that
says "Next" can be selected using (By.LINK_TEXT, "Next").
PARTIAL_LINK_TEXT
Similar to LINK_TEXT, but matches on a partial string.
NAME
Finds HTML tags by their name attribute. This is handy for HTML forms.
TAG_NAME
Finds HTML tags by their tag name.
XPATH
Uses an XPath expression ... to select matching elements.
From selenium/webdriver/support/wait.py
driver = ...
from selenium.webdriver.support.wait import WebDriverWait
element = WebDriverWait(driver, 10).until(
lambda x: x.find_element_by_id("someId"))
On a side note, instead of scrolling down 100 times, you can check if there are no more modifications to the DOM (we are in the case of the bottom of the page being AJAX lazy-loaded)
def scrollDown(driver, value):
driver.execute_script("window.scrollBy(0,"+str(value)+")")
# Scroll down the page
def scrollDownAllTheWay(driver):
old_page = driver.page_source
while True:
logging.debug("Scrolling loop")
for i in range(2):
scrollDown(driver, 500)
time.sleep(2)
new_page = driver.page_source
if new_page != old_page:
old_page = new_page
else:
break
return True
Have you tried driver.implicitly_wait. It is like a setting for the driver, so you only call it once in the session and it basically tells the driver to wait the given amount of time until each command can be executed.
driver = webdriver.Chrome()
driver.implicitly_wait(10)
So if you set a wait time of 10 seconds it will execute the command as soon as possible, waiting 10 seconds before it gives up. I've used this in similar scroll-down scenarios so I don't see why it wouldn't work in your case. Hope this is helpful.
To be able to fix this answer, I have to add new text. Be sure to use a lower case 'w' in implicitly_wait.
Here I did it using a rather simple form:
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("url")
searchTxt=''
while not searchTxt:
try:
searchTxt=browser.find_element_by_name('NAME OF ELEMENT')
searchTxt.send_keys("USERNAME")
except:continue
Solution for ajax pages that continuously load data. The previews methods stated do not work. What we can do instead is grab the page dom and hash it and compare old and new hash values together over a delta time.
import time
from selenium import webdriver
def page_has_loaded(driver, sleep_time = 2):
'''
Waits for page to completely load by comparing current page hash values.
'''
def get_page_hash(driver):
'''
Returns html dom hash
'''
# can find element by either 'html' tag or by the html 'root' id
dom = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
# dom = driver.find_element_by_id('root').get_attribute('innerHTML')
dom_hash = hash(dom.encode('utf-8'))
return dom_hash
page_hash = 'empty'
page_hash_new = ''
# comparing old and new page DOM hash together to verify the page is fully loaded
while page_hash != page_hash_new:
page_hash = get_page_hash(driver)
time.sleep(sleep_time)
page_hash_new = get_page_hash(driver)
print('<page_has_loaded> - page not loaded')
print('<page_has_loaded> - page loaded: {}'.format(driver.current_url))
How about putting WebDriverWait in While loop and catching the exceptions.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
browser = webdriver.Firefox()
browser.get("url")
delay = 3 # seconds
while True:
try:
WebDriverWait(browser, delay).until(EC.presence_of_element_located(browser.find_element_by_id('IdOfMyElement')))
print "Page is ready!"
break # it will break from the loop once the specific element will be present.
except TimeoutException:
print "Loading took too much time!-Try again"
You can do that very simple by this function:
def page_is_loading(driver):
while True:
x = driver.execute_script("return document.readyState")
if x == "complete":
return True
else:
yield False
and when you want do something after page loading complete,you can use:
Driver = webdriver.Firefox(options=Options, executable_path='geckodriver.exe')
Driver.get("https://www.google.com/")
while not page_is_loading(Driver):
continue
Driver.execute_script("alert('page is loaded')")
use this in code :
from selenium import webdriver
driver = webdriver.Firefox() # or Chrome()
driver.implicitly_wait(10) # seconds
driver.get("http://www.......")
or you can use this code if you are looking for a specific tag :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox() #or Chrome()
driver.get("http://www.......")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "tag_id"))
)
finally:
driver.quit()
Very good answers here. Quick example of wait for XPATH.
# wait for sizes to load - 2s timeout
try:
WebDriverWait(driver, 2).until(expected_conditions.presence_of_element_located(
(By.XPATH, "//div[#id='stockSizes']//a")))
except TimeoutException:
pass
I struggled a bit to get this working as that didn't worked for me as expected. anyone who is still struggling to get this working, may check this.
I want to wait for an element to be present on the webpage before proceeding with my manipulations.
we can use WebDriverWait(driver, 10, 1).until(), but the catch is until() expects a function which it can execute for a period of timeout provided(in our case its 10) for every 1 sec. so keeping it like below worked for me.
element_found = wait_for_element.until(lambda x: x.find_element_by_class_name("MY_ELEMENT_CLASS_NAME").is_displayed())
here is what until() do behind the scene
def until(self, method, message=''):
"""Calls the method provided with the driver as an argument until the \
return value is not False."""
screen = None
stacktrace = None
end_time = time.time() + self._timeout
while True:
try:
value = method(self._driver)
if value:
return value
except self._ignored_exceptions as exc:
screen = getattr(exc, 'screen', None)
stacktrace = getattr(exc, 'stacktrace', None)
time.sleep(self._poll)
if time.time() > end_time:
break
raise TimeoutException(message, screen, stacktrace)
If you are trying to scroll and find all items on a page. You can consider using the following. This is a combination of a few methods mentioned by others here. And it did the job for me:
while True:
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.implicitly_wait(30)
time.sleep(4)
elem1 = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "element-name")))
len_elem_1 = len(elem1)
print(f"A list Length {len_elem_1}")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.implicitly_wait(30)
time.sleep(4)
elem2 = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "element-name")))
len_elem_2 = len(elem2)
print(f"B list Length {len_elem_2}")
if len_elem_1 == len_elem_2:
print(f"final length = {len_elem_1}")
break
except TimeoutException:
print("Loading took too much time!")
selenium can't detect when the page is fully loaded or not, but javascript can. I suggest you try this.
from selenium.webdriver.support.ui import WebDriverWait
WebDriverWait(driver, 100).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
this will execute javascript code instead of using python, because javascript can detect when page is fully loaded, it will show 'complete'. This code means in 100 seconds, keep tryingn document.readyState until complete shows.
nono = driver.current_url
driver.find_element(By.XPATH,"//button[#value='Send']").click()
while driver.current_url == nono:
pass
print("page loaded.")

Categories