I have been trying to understand why the following code does not work. I am intending to send a key to a search input that is on a different web tab and then press a button. After reading similar questions in the forum I think the element might either be hidden or wrapped. How to find this? Also sometimes the elements are in a list and have to be accessed by indexing. The examples I have studied are not like that. Any help will be appreciated.
from selenium import webdriver
from selenium.common.exceptions import *
webdriver_path = "C:/Users/escob/Documents/Projects/WebScrapingExample/chromedriver.exe"
magpie_url = 'https://www.musicmagpie.co.uk/start-selling/'
search_item = "9781912047734"
options = webdriver.ChromeOptions()
browser = webdriver.Chrome (webdriver_path, options = options)
browser.get(magpie_url)
sell_tab = browser.find_element_by_id('pills-media-tab')
sell_tab.click()
##I have tried the following code with no luck
#search_bar = browser.find_elements_by_name('searchString')
#search_bar = browser.find_elements_by_class_name('form-input')
search_bar = browser.find_element_by_xpath("//input[#name='searchString']")
#I am getting elements in a list, the examples I have seen do not need indexing
search_bar[0].send_keys(search_item)
button = browser.find_element_by_class_name(submit-media-search) #will this work?
button[0].click() #again in a list?
Thank you so much for your help from a beginner Seleniumista.
I'm not 100% sure what you're trying to accomplish with search, so I can only provide guidance on searching for your string 9781912047734. The code below will enter the search string and click the add button.
I noted that the page has an "accept cookies button", so I added the code to bypass this.
Please let me know if this code helps you.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-popup-blocking")
# Hide the "Chrome is being controlled by automated test software" banner
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
url = 'https://www.musicmagpie.co.uk/start-selling'
response = driver.get(url)
driver.implicitly_wait(15)
hidden_element = WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "cookieBar")))
if hidden_element.is_displayed():
driver.implicitly_wait(30)
driver.find_element_by_link_text('Accept all cookies').click()
else:
pass
tab = driver.find_element_by_id('pills-media-tab')
tab.click()
# this implicitly_wait is waiting for the page to fully load
driver.implicitly_wait(60)
# this xpath can likely be refined
enter_barcode = driver.find_element_by_xpath("*//div[3]/div/form/div/div[1]/div[1]/input")
enter_barcode.send_keys("9781912047734")
# waiting for the keys to be sent
driver.implicitly_wait(20)
add_button = driver.find_element_by_css_selector("div.form:nth-child(19) > "
"div:nth-child(1) > form:nth-child(1) > "
"div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > "
"input:nth-child(1)")
add_button.click()
# do something else
# call close when finished
driver.close()
Related
I'm trying to create a script to show only pikachus on singapore poke map and the rest of the code is to go over the elements and get the coords for it and print the list.
I'm trying for a long time many suggestions I've seen here but still unable to make the checkbox be set with the latest code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
def find_pokemon():
links = []
service = ChromeService(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.get('https://sgpokemap.com/index.html?fbclid=IwAR2p_93Ll6K9b923VlyfaiTglgeog4uWHOsQksvzQejxo2fkOj4JN_t-MN8')
driver.find_element(By.ID, 'filter_link').click()
driver.find_element(By.ID, 'deselect_all_btn').click()
driver.find_element(By.ID, 'search_pokemon').send_keys("pika")
driver.switch_to.frame(driver.find_elements(By.ID, "filter"))
driver.find_element(By.ID, 'checkbox_25').click()
The second part of the code is working when I'm checking the box manually after putting a breakpoint and ignoring the checkbox click() exception.
Do you have any suggestions what can I try?
Bonus question, how can I determine and close the donate view:
There are several problems with your code:
There is no element with ID = 'search_pokemon'
There is no frame there to switch into it.
You need to use WebDriverWait expected_conditions to wait for elements to be clickable.
And generally you need to learn how to create correct locators.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 30)
url = "https://sgpokemap.com/index.html?fbclid=IwAR2p_93Ll6K9b923VlyfaiTglgeog4uWHOsQksvzQejxo2fkOj4JN_t-MN8"
driver.get(url)
try:
wait.until(EC.element_to_be_clickable((By.ID, 'close_donation_button'))).click()
except:
pass
wait.until(EC.element_to_be_clickable((By.ID, 'filter_link'))).click()
wait.until(EC.element_to_be_clickable((By.ID, "deselect_all_btn"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "[name='search_pokemon']"))).send_keys("pika")
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='filter_checkbox'][not(#style)]//label"))).click()
The result is:
UPD
This time I saw the donation dialog so I added the mechanism to close it.
I still can't see there element with ID = 'search_pokemon' as you mentioned.
As about the XPath to find the relevant checkbox - when pokemon name is inserted you can see in the dev tools that there are a lot of checkboxes there but all of them are invisibly while only one in our case is visible. The invisible elements are all have attribute style="display: none;" while the enabled element does not have style attribute. This is why [not(#style)] is coming there. So, I'm looking for parent element //div[#class='filter_checkbox'] who is also have no style attribute. In XPath words //div[#class='filter_checkbox'][not(#style)] then I'm just looking for it label child to click it. This can also be done with CSS Selectors as well.
The list of invisible elements with the enabled one:
With the help and answers from #Prophet , the current code for crawling the map and getting all of the Pikachus coordinates:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from keep import saveToKeep
def find_pokemon():
links = []
options = Options()
options.add_argument("--headless")
options.add_argument("disable-infobars")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 30)
driver.get('https://sgpokemap.com')
try:
wait.until(EC.element_to_be_clickable((By.ID, 'close_donation_button'))).click()
except:
pass
wait.until(EC.element_to_be_clickable((By.ID, 'filter_link'))).click()
wait.until(EC.element_to_be_clickable((By.ID, "deselect_all_btn"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "[name='search_pokemon']"))).send_keys("pika")
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='filter_checkbox'][not(#style)]//label"))).click()
# count = 0
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'pokemon_icon_img')))
pokeList = driver.find_elements(By.CLASS_NAME, 'pokemon_icon_img')
for poke in pokeList:
# count += 1
try:
poke.click()
links.append(driver.find_element(By.LINK_TEXT, "Maps").get_attribute('href'))
except Exception:
pass
# if count > 300:
# break
res = []
for link in links:
res.append(link.split("=")[1].replace("'", ""))
# for item in res:
# print(item)
if len(res) > 1:
saveToKeep(res)
print("success")
else:
print("unsuccessful")
find_pokemon()
if __name__ == '__main__':
find_pokemon()
Used the headless chrome option in hope to achieve better
performance.
Commented out 'count' in case I want to limit list results
(currently I'm getting like 15 results tops when unlimited
although there are many more...weird :( )
the following code wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'pokemon_icon_img'))) is needed for now since it's not always
showing icons right away, so it's either that or adding a constant
time delay.
Have made this method recursive in case it's unsuccessful(sometimes it still gives out exceptions)
Lastly, saveToKeep(res) method is a simple method I'm using to open
and write results into my google keep notes. Needed to get an app
password within google security settings and I'm using it with my google account credentials for login.
Any comments or regards for improvements are welcomed :D
I am using selenium WebDriver to collect the URL's to images from a website that is loaded with JavaScript. It appears as though my following code returns only 160 out of the about 240 links. Why might this be - because of the JavaScript rendering?
Is there a way to adjust my code to get around this?
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_elements_by_xpath("//div[#class='responsive-image-wrapper']/img")
img_url2 = []
for element in img_url:
new_srcset = 'https:' + element.get_attribute("srcset").split(' 400w', 1)[0]
img_url2.append(new_srcset)
You need to wait for all those elements to be loaded.
The recommended approach is to use WebDriverWait expected_conditions explicit waits.
This code is giving me 760-880 elements in the img_url2 list:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.politicsanddesign.com/"
driver.get(url) # once the browser opens, turn off the year filter and scroll all the way to the bottom as the page does not load all elements on rendering
wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class='responsive-image-wrapper']/img")))
# time.sleep(2)
img_url = driver.find_elements(By.XPATH, "//div[#class='responsive-image-wrapper']/img")
img_url2 = []
for element in img_url:
new_srcset = 'https:' + element.get_attribute("srcset").split(' 400w', 1)[0]
img_url2.append(new_srcset)
I'm not sure if this code is stable enough, so if needed you can activate the delay between the wait line and the next line grabbing all those img_url.
EDIT:
Once the browser opens, you'll need to turn of the page's filter and then scroll all the way to the bottom of the page as it does not automatically load all of the elements when it renders; only once you've worked with the page a little bit.
I click on a specific button on a page, but for some reason there is one of the buttons that I can't click on, even though it's positioned exactly like the other elements like it that I can click on.
The code below as you will notice, it opens a page, then clicks to access another page, do this step because only then can you be redirected to the real url that has the //int.
import datetime
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
with open('my_user_agent.txt') as f:
my_user_agent = f.read()
headers = {
'User-Agent': my_user_agent
}
options = Options()
options.set_preference("general.useragent.override", my_user_agent)
options.set_preference("media.volume_scale", "0.0")
options.page_load_strategy = 'eager'
driver = webdriver.Firefox(options=options)
today = datetime.datetime.now().strftime("%Y/%m/%d")
driver.get(f"https://int.soccerway.com/matches/{today}/")
driver.find_element(by=By.XPATH, value="//div[contains(#class,'language-picker-trigger')]").click()
time.sleep(3)
driver.find_element(by=By.XPATH, value="//li/a[contains(#href,'https://int.soccerway.com')]").click()
time.sleep(3)
try:
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class,'tbl-read-more-btn')]")))
driver.find_element(by=By.XPATH, value="//a[contains(#class,'tbl-read-more-btn')]").click()
time.sleep(0.1)
except:
pass
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[#data-exponload='']//button[contains(#class,'expand-icon')]")))
for btn in driver.find_elements(by=By.XPATH, value="//div[#data-exponload='']//button[contains(#class,'expand-icon')]"):
btn.click()
time.sleep(0.1)
I've tried adding btn.location_once_scrolled_into_view before each click to make sure the button is correctly in the click position, but the problem still persists.
I also tried using the options mentioned here:
Selenium python Error: element could not be scrolled into view
But the essence of the case kept persisting in error, I couldn't understand what the flaw in the case was.
Error text:
selenium.common.exceptions.ElementNotInteractableException: Message: Element <button class="expand-icon"> could not be scrolled into view
Stacktrace:
RemoteError#chrome://remote/content/shared/RemoteError.jsm:12:1
WebDriverError#chrome://remote/content/shared/webdriver/Errors.jsm:192:5
ElementNotInteractableError#chrome://remote/content/shared/webdriver/Errors.jsm:302:5
webdriverClickElement#chrome://remote/content/marionette/interaction.js:156:11
interaction.clickElement#chrome://remote/content/marionette/interaction.js:125:11
clickElement#chrome://remote/content/marionette/actors/MarionetteCommandsChild.jsm:204:29
receiveMessage#chrome://remote/content/marionette/actors/MarionetteCommandsChild.jsm:92:31
Edit 1:
I noticed that the error only happens when the element is colored orange (when they are colored orange it means that one of the competition games is happening now, in other words it is live).
But the button is still the same, it keeps the same element, so I don't know why it's not being clicked.
See the color difference:
Edit 2:
If you open the browser normally or without the settings I put in my code, the elements in orange are loaded already expanded, but using the settings I need to use, they don't come expanded. So please use the settings I use in the code so that the page opens the same.
What you missing here is to wrap the command in the loop opening those sections with try-except block.
the following code works. I tried running is several times.
import datetime
import time
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options, desired_capabilities=caps)
wait = WebDriverWait(driver, 10)
today = datetime.datetime.now().strftime("%Y/%m/%d")
driver.get(f"https://int.soccerway.com/matches/{today}/")
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(#class,'language-picker-trigger')]"))).click()
time.sleep(5)
wait.until(EC.element_to_be_clickable((By.XPATH, "//li/a[contains(#href,'https://int.soccerway.com')]"))).click()
time.sleep(5)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class,'tbl-read-more-btn')]")))
driver.find_element(By.XPATH, "//a[contains(#class,'tbl-read-more-btn')]").click()
time.sleep(0.1)
except:
pass
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#data-exponload='']//button[contains(#class,'expand-icon')]")))
for btn in driver.find_elements(By.XPATH, "//div[#data-exponload='' and not(contains(#class,'status-playing'))]//button[contains(#class,'expand-icon')]"):
btn.click()
time.sleep(0.1)
UPD
We need to open only closed elements. The already opened sections should be stayed open. In this case click will always work without throwing exceptions. To do so we just need to add such indication - click buttons not inside the section where status is currently playing.
I want to download U.S. Department of Housing and Urban Development data using Python's Selenium. Here's my code.
import os
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
preferences= {"download.default_directory": os.getcwd(), "directory_upgrade": True}
options.add_experimental_option("prefs", preferences)
#options.headless = True
options.add_experimental_option('excludeSwitches', ['enable-logging'])
url = "https://hudgis-hud.opendata.arcgis.com/datasets/deteriorated-paint-index-by-county/explore"
# Path of my WebDriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
wait = WebDriverWait(driver, 60)
# to maximize the browser window
driver.maximize_window()
#get method to launch the URL
driver.get(url)
paths = ["#ember97", "calcite-card > div > calcite-button"]
for x in paths:
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, x))).click()
I can click the button to expand the side panel, where the CSV file button is located, but I cannot click the CSV file itself to download it. My first thought was to check for if the side panel existed within an IFRAME, so I did
seq = driver.find_elements_by_tag_name('iframe')
seq
And it returned nothing. The content is nested in a class called side-panel-ref. Is there a way to switch to this somehow so I can click that content, when iframes aren't there? What might I be missing?
Your button is inside a shadowroot.
You see this when you inspect in devtools:
Quickest and easiest way to handle this is with some JS .This is your script slightly refactored + the JS call:
url = "https://hudgis-hud.opendata.arcgis.com/datasets/deteriorated-paint-index-by-county/explore"
wait = WebDriverWait(driver, 60)
driver.maximize_window()
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#ember97'))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.dataset-download-card > hub-download-card")))
driver.execute_script('document.querySelector("div.dataset-download-card > hub-download-card").shadowRoot.querySelector("calcite-card > div > calcite-button").click()')
It's a fairly lengthy JS call. It's reasonably self explanatory if you read it, there are 5 parts to it: document.querySelector(..).shadowRoot.querySelector(..).click() - but just ask if you need more support.
Please also be aware that selenium is bad at downloading files. There's no API that exposes the downloads progress. You'll need to ensure your browser remains open while you download the file.
It seems a pretty quick download so you might get away with a hard coded sleep.
Also worth a mention - If you're not a fan of the long JS, you can also break it down like so:
container = driver.find_element_by_css_selector("div.dataset-download-card > hub-download-card")
shadowRoot = driver.execute_script("return arguments[0].shadowRoot", container)
shadowRoot.find_element_by_css_selector("calcite-card > div > calcite-button").click()
Selenium does not seem to register that I manually go to the publish0x.com page.
Does anyone know a solution?
My goal is to manually do the captcha at the login page and afterwards, when I log in and land on the main page I want the script to resume.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import datetime
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import sys
from selenium.webdriver.support.ui import WebDriverWait
def waitForLoad(inputXPath):
Wait = WebDriverWait(driver, 10)
Wait.until(EC.presence_of_element_located((By.XPATH, inputXPath)))
email = '
password = '
options = Options()
options.add_experimental_option("detach", True)
options.add_argument("--window-size=1920,1080")
## options.add_argument("user-data-dir=/Users/vadim/Library/Application Support/BraveSoftware/Brave-Browser")
options.binary_location = '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'
driver_path = '/usr/local/bin/chromedriver'
driver = webdriver.Chrome(options=options, executable_path=driver_path)
driver.get('https://www.publish0x.com/login')
waitForLoad('//*[#id="email"]')
E_Mail_vak = driver.find_element_by_xpath('//*[#id="email"]')
E_Mail_vak.send_keys(email)
Pass_vak = driver.find_element_by_xpath('//*[#id="password"]')
Pass_vak.send_keys(password)
frame = driver.find_element_by_xpath('//iframe[contains(#src, "recaptcha")]')
driver.switch_to.frame(frame)
Captcha = driver.find_element_by_xpath("//*[#id='recaptcha-anchor']")
Captcha.click()
wait = WebDriverWait(driver, 500)
wait.until(EC.url_to_be("publish0x.com"))
driver.get('https://www.publish0x.com/newposts')
post = driver.find_element_by_css_selector('#main > div.infinite-scroll > div:nth-child(1) > div.content')
title = post.find_element_by_css_selector('h2 > a').text
author = post.find_element_by_css_selector('p.text-secondary > small:nth-child(4) > a').text
title.click()
slider = driver.find_element_by_xpath('//*[#id="tipslider"]')
There are two ways I can think of, one by adding an Input statement like this:
options.add_argument('--disable-gpu')#For properly seeing the outputs
input("Please do the captcha and press any key...)
In this way, the user would complete the data and then press any key for the script to continue.
The other way is by adding a try and except statement.
try:
driver.find_element_by_id("some-element")
except NoSuchElementException:
#do something like
print("Captcha Failed or Incomplete...")
In this, replace the element id "some-element" with any element that is present and only present after the user logs in, for e.g elements like Profile_nav or Settings are only present when someone logs in. So if the element doesn't exist then it would mean that the user didn't complete the captcha.