The code clicks on a element that may or may not exist on the page and then needs to click on all elements of the same class:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
import time
my_user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'
options = Options()
options.set_preference("general.useragent.override", my_user_agent)
options.page_load_strategy = 'eager'
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
driver.get("https://int.soccerway.com/matches/2022/07/23/")
try:
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, "//a[contains(#class,'tbl-read-more-btn')]")))
driver.find_element(by=By.XPATH, value="//a[contains(#class,'tbl-read-more-btn')]").click()
time.sleep(0.1)
except:
pass
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, "//tr[contains(#class,'group-head')]")))
for btn in driver.find_elements(by=By.XPATH, value="//tr[contains(#class,'group-head')]"):
btn.click()
time.sleep(0.1)
But this work takes 90 seconds to do and when I remove the time.sleep it drops to 65 seconds, but if I remove it I notice that in some very random times some of the elements that should be clicked are ignored.
Is there any way to do this same service but clicking all the elements at the same time to speed up the process?
Buttons to click on visual examples:
Expected Result after clicks (open the boxes):
In order to speed up the process you can click on all the competition-link one by one in a sequence using either of the following Locator Strategies:
Using CSS_SELECTOR:
for ele in driver.find_elements(By.CSS_SELECTOR, "tr.group-head.clickable th.competition-link>a"):
ele.click()
Using XPATH:
for ele in driver.find_elements(By.XPATH, "//tr[#class='group-head clickable ']//th[#class='competition-link']/a"):
ele.click()
Related
The error messageI try to extract data from the below website. But when the selenium click the "search" button (the last step of the code), error was returned, it seems blocked by the server (It is totally alright when I access the website manually. But when I use automated Chrome browser, the attached error message was returned when I click the "search" button). How should I get around this?
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
ser = Service(r"C:\Users\shekc\Documents\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("–Referer=https://www.dahsing.com/jsp/fundPlatform/index_e.jsp")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36")
driver = webdriver.Chrome(options=options, service=ser)
url = "https://www.dahsing.com/jsp/fundPlatform/risk_warning_e.jsp"
driver.get(url)
time.sleep(3)
# click "Agree"
driver.find_element(By.LINK_TEXT,"Agree").click()
driver.switch_to.default_content()
driver.switch_to.frame(1)
# return the # Fund house
from selenium.webdriver.support.ui import Select
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlFundHouse"]'))
FH_No=len(Select.options)
# select " all per page"
from selenium.webdriver.support.ui import Select
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlPageNumber"]'))
Select.select_by_index(len(Select.options)-1)
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlFundHouse"]'))
Select.select_by_index(1)
FH_name=Select.first_selected_option.text
# click "Search"
driver.find_element(By.LINK_TEXT,"Search").click()
I would like to parse addresses from the following website: https://filialen.migros.ch/de/center:46.8202,6.9575/zoom:8/
So far I am able to go to the website and click away any pop-ups. But then I need to select the drop-down menu with "1163 STANDORTE" which I am not able to locate with my code.
My code so far:
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import time
import itertools
import os
import numpy as np
import csv
import pdb
os.chdir("Directory")
options = webdriver.ChromeOptions()
options.add_argument("--incognito")
driver = webdriver.Chrome('Directory/chromedriver.exe')
driver.get("https://filialen.migros.ch/de/center:46.8202,6.9575/zoom:8/")
time.sleep(1)
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[#class='close-icon']"))).click() # if there is smth to click away
except:
pass
time.sleep(4)
Then my attempts using the span and button element and several options of navigation:
#Version 1
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[#class='sc-hKFxyN jdMjfs']"))).click()
#Version 2
element = driver.find_element_by_class_name('sc-eCApnc kiXUNl sc-jSFjdj lcZmPE')
driver.execute_script("arguments[0].scrollIntoView();", element)
driver.execute_script("arguments[0].click();", element)
# Version 3
element = driver.find_element_by_class_name('sc-eCApnc kiXUNl sc-jSFjdj lcZmPE')
driver.execute_script("arguments[0].click();", element)
#Version 4
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[#class='sc-eCApnc kiXUNl sc-jSFjdj lcZmPE']"))).click()
# Version 5
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[2]/div/main/nav/header/button[1]"))).click()
# Version 6
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='1163 STANDORTE']"))).click()
Actually, there are three problems:
If I just open the link on my Chrome manually, "1163 STANDORTE" appears, whereas if I open the link on Chrome using python, fewer STANDORTE appear, but I cannot zoom out. So I crucially need ALL 1163 STANDORTE to appear.
I cannot locate the button using class and XPATH.
Behind the button is a probably linked XML file, and the information of the addresses only appears after having clicked on the button. In the end I want to scrape text, written on the XML file linked to that button.
Any suggestions?
My question is similar to these previous questions: How to parse several attributes of website with same class name in python? and to Selenium-Debugging: Element is not clickable at point (X,Y)
Few points :
Launch browser in full screen mode.
Use explicit waits.
Use this xpath //span[contains(#aria-label, 'Standorte anzeigen')]/..
Sample code :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
#driver.implicitly_wait(50)
wait = WebDriverWait(driver, 20)
driver.get("https://filialen.migros.ch/de/center:46.8202,6.9575/zoom:8/")
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[#class='close-icon']"))).click() # if there is smth to click away
except:
pass
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(#aria-label, 'Standorte anzeigen')]/.."))).click()
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
PS : Please check in the dev tools (Google chrome) if we have unique entry in HTML DOM or not.
Steps to check:
Press F12 in Chrome -> go to element section -> do a CTRL + F -> then paste the xpath and see, if your desired element is getting highlighted with 1/1 matching node.
The data you are looking for is based of fetch / xhr call.
You can get it without scraping. See below.
import requests
headers = {'Origin': 'https://filialen.migros.ch',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
r = requests.get(
'https://web-api.migros.ch/widgets/stores?key=loh7Diephiengaiv&aggregation_options[empty_buckets]=true&filters[markets][0][0]=super&filters[markets][0][1]=mno&filters[markets][0][2]=voi&filters[markets][0][3]=mp&filters[markets][0][4]=out&filters[markets][0][5]=spx&filters[markets][0][6]=doi&filters[markets][0][7]=mec&filters[markets][0][8]=mica&filters[markets][0][9]=res&filters[markets][0][10]=flori&filters[markets][0][11]=gour&filters[markets][0][12]=alna&filters[markets][0][13]=cof&filters[markets][0][14]=chng&verbosity=store&offset=0&limit=5000',
headers=headers)
if r.status_code == 200:
print('stores data below:')
data = r.json()
print(data)
else:
print(f'Oops. Statud code is {r.status_code}')
I'm trying to build a bot for Nike.com.
I'm rotating user agent, automation blink is hidden and have done everything needed (Even using VPN).
URL : https://www.nike.com/ca/t/air-force-1-pixel-shoe-txmVNP/CK6649-100
Size:2
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
import requests
import sys
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
ua=UserAgent()
userAgent=ua.random
options.add_argument("--log-level=3")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(options=options)
driver.minimize_window()
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
URL=input("Enter URL : ")
SIZE=input("ENter Size : ")
driver.maximize_window()
driver.get(URL)
time.sleep(5)
print("Starting again")
while(True):
## try:
s_size=driver.find_element_by_xpath('//*[#id="buyTools"]/div[1]/fieldset/div/div['+SIZE+']/label')
s_size.click()
time.sleep(1)
## try:
print('here')
time.sleep(5)
## WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="floating-atc-wrapper"]/div/button[1]'))).click()
add_to_bag=driver.find_element_by_xpath('//*[#id="floating-atc-wrapper"]/div/button[1]')
time.sleep(3)
add_to_bag.click()
print('1')
break
time.sleep(1)
while(True):
try:
move_to_cart=driver.find_element_by_xpath('//*[#id="nav-cart"]/a/div/span')
move_to_cart.click()
break
except:
time.sleep(1)
This code is selecting the required size, and also Clicks on Add to Bag button with clicking animation on website but after that also nothing happens even when I manually click on Add To Bag button or reload website nothing happen.
The only way out is open link in new tab and do all things manually
Can anyone give me a workaround for this.
I think selenium is doing its work its getting blocked by website
When I run the code below, I get selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[#id="i0"]/input"}
(Session info: chrome=83.0.4103.97).
However, in my terminal I can access the required element:
>>> steam_pressure_field = browser.find_element_by_xpath('//*[#id="i0"]/input')
>>> steam_pressure_field.get_attribute('value')
'0'
As you can see in the comment section of the code, I have tried waiting for a delay of 3 seconds to see if that made a difference. However, after doing this several times, the page stopped loading when I used the delay (maybe some anti-bot feature?)
So I'm trying to understand what is going on here, and how I can successfully access the fields I require. Is there something I am missing please?
BTW, The div IDs don't seem to be dynamic.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
options = webdriver.ChromeOptions()
options.add_argument(f'user-agent={user_agent}')
browser = webdriver.Chrome(options=options)
url = 'https://www.tlv.com/global/US/calculator/superheated-steam-table.html?advanced=off'
# delay = 3
# try:
# wait = WebDriverWait(browser, delay)
# wait.until(EC.presence_of_element_located((By.ID, "body")))
# browser.get(url)
# print("Page is ready")
# except TimeoutException:
# print("Loading took too much time")
browser.get(url)
steam_pressure_field = browser.find_element_by_xpath('//*[#id="i0"]/input')
print(steam_pressure_field.get_attribute('value'))
To print the value 0 you have to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get('https://www.tlv.com/global/US/calculator/superheated-steam-table.html?advanced=off')
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.inputPanel>div.Controlpanel input.inputText"))).get_attribute("value"))
Using XPATH:
driver.get('https://www.tlv.com/global/US/calculator/superheated-steam-table.html?advanced=off')
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='InputControlLabel' and text()='Steam Pressure']//following-sibling::input[1]"))).get_attribute("value"))
Console Output:
0
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I'm learning to scrape with selenium, but I'm having trouble connecting to this site 'http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720'
it does not load the content of the site
I would like to learn how to connect to this site to request images and data
my code is simple because I'm learning, I looked for ways to make the connection but without success
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
ff_profile = FirefoxProfile()
ff_profile.set_preference("general.useragent.override", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36")
driver = webdriver.Firefox(firefox_profile = ff_profile)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
time.sleep(5)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)
As the the desired element is within an <iframe> so to invoke extract the src attribute of the desired element you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired visibility_of_element_located().
You can use the following Locator Strategies:
driver = webdriver.Firefox(executable_path=r'C:\Utility\BrowserDrivers\geckodriver.exe')
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='CamosIFId' and #name='CamosIF']")))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//img[#id='of132']"))).get_attribute("src"))
However as in one of the comments #google mentioned, it seems the browsing experiance is better with ChromeDriver / Chrome and you can use the following solution:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
WWebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#CamosIFId[name='CamosIF']")))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "img#of132"))).get_attribute("src"))
Note : You have to add the following imports :
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Console Output:
https://www.festo.com/cfp/camosHtml/i?SIG=0020e295a546f45d9acb6844231fd8ff31ca817a_64_64.png
Here you can find a relevant discussion on Ways to deal with #document under iframe
try this
for more information here
FIREFOX_DRIVER_PATH = "your_geckodriver_path"
firefox_options = FirefoxOptions()
firefox_options.headless = True
# set options as per requirement for firefox
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-setuid-sandbox")
firefox_options.add_argument('--disable-dev-shm-usage')
firefox_options.add_argument("--window-size=1920,1080")
driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=FIREFOX_DRIVER_PATH)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_SCurrentPartNo=8043720')
time.sleep(5)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)
download the driver from this link
and place it a folder and copy the complete path and paste below
FIREFOX_DRIVER_PATH = "driver_path"
firefox_options = FirefoxOptions()
#only if you dont want to see the gui else make is false or comment
firefox_options.headless = True
driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=FIREFOX_DRIVER_PATH)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_SCurrentPartNo=8043720')
time.sleep(3)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)