Python Selenium: How to avoid being detected/ blocked? - python

The error messageI try to extract data from the below website. But when the selenium click the "search" button (the last step of the code), error was returned, it seems blocked by the server (It is totally alright when I access the website manually. But when I use automated Chrome browser, the attached error message was returned when I click the "search" button). How should I get around this?
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
ser = Service(r"C:\Users\shekc\Documents\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("–Referer=https://www.dahsing.com/jsp/fundPlatform/index_e.jsp")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36")
driver = webdriver.Chrome(options=options, service=ser)
url = "https://www.dahsing.com/jsp/fundPlatform/risk_warning_e.jsp"
driver.get(url)
time.sleep(3)
# click "Agree"
driver.find_element(By.LINK_TEXT,"Agree").click()
driver.switch_to.default_content()
driver.switch_to.frame(1)
# return the # Fund house
from selenium.webdriver.support.ui import Select
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlFundHouse"]'))
FH_No=len(Select.options)
# select " all per page"
from selenium.webdriver.support.ui import Select
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlPageNumber"]'))
Select.select_by_index(len(Select.options)-1)
Select =Select(driver.find_element(By.XPATH,'//*[#id="mainContent_ddlFundHouse"]'))
Select.select_by_index(1)
FH_name=Select.first_selected_option.text
# click "Search"
driver.find_element(By.LINK_TEXT,"Search").click()

Related

Click on all elements of the same class at the same time

The code clicks on a element that may or may not exist on the page and then needs to click on all elements of the same class:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
import time
my_user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'
options = Options()
options.set_preference("general.useragent.override", my_user_agent)
options.page_load_strategy = 'eager'
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
driver.get("https://int.soccerway.com/matches/2022/07/23/")
try:
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, "//a[contains(#class,'tbl-read-more-btn')]")))
driver.find_element(by=By.XPATH, value="//a[contains(#class,'tbl-read-more-btn')]").click()
time.sleep(0.1)
except:
pass
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, "//tr[contains(#class,'group-head')]")))
for btn in driver.find_elements(by=By.XPATH, value="//tr[contains(#class,'group-head')]"):
btn.click()
time.sleep(0.1)
But this work takes 90 seconds to do and when I remove the time.sleep it drops to 65 seconds, but if I remove it I notice that in some very random times some of the elements that should be clicked are ignored.
Is there any way to do this same service but clicking all the elements at the same time to speed up the process?
Buttons to click on visual examples:
Expected Result after clicks (open the boxes):
In order to speed up the process you can click on all the competition-link one by one in a sequence using either of the following Locator Strategies:
Using CSS_SELECTOR:
for ele in driver.find_elements(By.CSS_SELECTOR, "tr.group-head.clickable th.competition-link>a"):
ele.click()
Using XPATH:
for ele in driver.find_elements(By.XPATH, "//tr[#class='group-head clickable ']//th[#class='competition-link']/a"):
ele.click()

.click() button doing nothing in Selenium python

I'm trying to build a bot for Nike.com.
I'm rotating user agent, automation blink is hidden and have done everything needed (Even using VPN).
URL : https://www.nike.com/ca/t/air-force-1-pixel-shoe-txmVNP/CK6649-100
Size:2
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
import requests
import sys
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
ua=UserAgent()
userAgent=ua.random
options.add_argument("--log-level=3")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(options=options)
driver.minimize_window()
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
URL=input("Enter URL : ")
SIZE=input("ENter Size : ")
driver.maximize_window()
driver.get(URL)
time.sleep(5)
print("Starting again")
while(True):
## try:
s_size=driver.find_element_by_xpath('//*[#id="buyTools"]/div[1]/fieldset/div/div['+SIZE+']/label')
s_size.click()
time.sleep(1)
## try:
print('here')
time.sleep(5)
## WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="floating-atc-wrapper"]/div/button[1]'))).click()
add_to_bag=driver.find_element_by_xpath('//*[#id="floating-atc-wrapper"]/div/button[1]')
time.sleep(3)
add_to_bag.click()
print('1')
break
time.sleep(1)
while(True):
try:
move_to_cart=driver.find_element_by_xpath('//*[#id="nav-cart"]/a/div/span')
move_to_cart.click()
break
except:
time.sleep(1)
This code is selecting the required size, and also Clicks on Add to Bag button with clicking animation on website but after that also nothing happens even when I manually click on Add To Bag button or reload website nothing happen.
The only way out is open link in new tab and do all things manually
Can anyone give me a workaround for this.
I think selenium is doing its work its getting blocked by website

Learning to scrape with Selenium and Python

I'm learning to scrape with selenium, but I'm having trouble connecting to this site 'http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720'
it does not load the content of the site
I would like to learn how to connect to this site to request images and data
my code is simple because I'm learning, I looked for ways to make the connection but without success
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
ff_profile = FirefoxProfile()
ff_profile.set_preference("general.useragent.override", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36")
driver = webdriver.Firefox(firefox_profile = ff_profile)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
time.sleep(5)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)
As the the desired element is within an <iframe> so to invoke extract the src attribute of the desired element you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired visibility_of_element_located().
You can use the following Locator Strategies:
driver = webdriver.Firefox(executable_path=r'C:\Utility\BrowserDrivers\geckodriver.exe')
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='CamosIFId' and #name='CamosIF']")))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//img[#id='of132']"))).get_attribute("src"))
However as in one of the comments #google mentioned, it seems the browsing experiance is better with ChromeDriver / Chrome and you can use the following solution:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get('http://www.festo.com/cat/it_it/products_VUVG_S?CurrentPartNo=8043720')
WWebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#CamosIFId[name='CamosIF']")))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "img#of132"))).get_attribute("src"))
Note : You have to add the following imports :
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Console Output:
https://www.festo.com/cfp/camosHtml/i?SIG=0020e295a546f45d9acb6844231fd8ff31ca817a_64_64.png
Here you can find a relevant discussion on Ways to deal with #document under iframe
try this
for more information here
FIREFOX_DRIVER_PATH = "your_geckodriver_path"
firefox_options = FirefoxOptions()
firefox_options.headless = True
# set options as per requirement for firefox
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-setuid-sandbox")
firefox_options.add_argument('--disable-dev-shm-usage')
firefox_options.add_argument("--window-size=1920,1080")
driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=FIREFOX_DRIVER_PATH)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_SCurrentPartNo=8043720')
time.sleep(5)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)
download the driver from this link
and place it a folder and copy the complete path and paste below
FIREFOX_DRIVER_PATH = "driver_path"
firefox_options = FirefoxOptions()
#only if you dont want to see the gui else make is false or comment
firefox_options.headless = True
driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=FIREFOX_DRIVER_PATH)
driver.get('http://www.festo.com/cat/it_it/products_VUVG_SCurrentPartNo=8043720')
time.sleep(3)
campo_busca = driver.find_elements_by_id('of132')
print(campo_busca)

Selenium Python 2.7: how to fill in the gaps?

everyone! I'm new to coding and I'm trying to write a webcrawler using Selenium with Python 2.7. I'm still on an early stage, however I've been having problems filling in the gaps of the website. It's https://comtrade.un.org/data/ from the UN. I've already tried inspecting the webpage elements and using different methods (find_element_by_id, find_element_by_class, find_element_by_name, send_keys, etc.), but none seems to work.
Here's my code thus far:
import selenium.webdriver.support.ui as ui
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
HEADER = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
BASE_URL = 'https://comtrade.un.org/data/'
def initialize_webdriver():
profile = webdriver.Chrome()
profile.set_preference("general.useragent.override", HEADER)
driver = webdriver.Chrome(profile)
# driver.implicitly_wait(30)
driver.set_window_size(1180, 980)
return driver
def main():
# driver = initialize_webdriver()
driver = webdriver.Chrome()
driver.get(BASE_URL)
for period in driver.find_element_by_class('s2id_periods'):
period.clear()
period.send_keys('2013')
Can anyone help me out? Thanks in advance!

Python | PhantomJS not clicking on element

I have been trying to solve this for an entire week now and this is my last shot at this (asking stackoverflow).
I use phantomjs with selenium to go to the login page of YouTube and fill in the credentials and log in.
I get to the login page and it manages to fill in the email, but no matter what I try, it won't click on the "next" button.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["-phantomjs.page.settings.userAgent-"] = (
"-Mozilla-5.0 (Windows NT 6.3; WOW64) AppleWebKit-537.36 (KHTML, like Gecko) Chrome-34.0.1847.137 Safari-537.36-"
)
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.set_window_size(1920,1080)
driver.get("https://youtube.com")
driver.find_element_by_class_name("yt-uix-button-content").click()
print("Logging in...")
driver.find_element_by_id("identifierId").send_keys("email")
time.sleep(1)
driver.find_element_by_class_name("ZFr60d").click()
driver.save_screenshot('testing4.png')
Now I have tried all these
driver.find_element_by_xpath("""//*[#id="identifierNext"]/content/span""").click()
driver.find_element_by_css_selector("#identifierNext>content>span").click()
webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
driver.find_element_by_id("identifierNext").click()
and none of these works. I tried the javascript command aswell.
I would also like to add that clicking on the element works perfectly fine with selenium without PhantomJS.
I would really appreciate it if anyone here could help me.
EDIT:
This info might be helpful. After clicking "Next", it takes about a second to get to the password part. It's a sliding animation.
This question has yet not been answered.
Here is the Answer to your Question:
A couple of words:
The locator you have used to identify the Sign in button is not unique. Consider constructing an unique xpath for the Sign in button.
The locator you have used to identify the Email or phone also needs to be modified a bit.
You can consider to use the locator id to identify and click on the Next button.
Here is the code block which does the same and prints out Clicked on Next Button on the console.
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["-phantomjs.page.settings.userAgent-"] = (
"-Mozilla-5.0 (Windows NT 6.3; WOW64) AppleWebKit-537.36 (KHTML, like Gecko) Chrome-34.0.1847.137 Safari-537.36-"
)
driver = webdriver.PhantomJS(desired_capabilities=dcap, executable_path="C:\\Utility\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe")
driver.get("https://youtube.com")
driver.find_element_by_xpath("//button[#class='yt-uix-button yt-uix-button-size-default yt-uix-button-primary']/span[#class='yt-uix-button-content']").click()
print("Logging in...")
email_phone = driver.find_element_by_xpath("//input[#id='identifierId']")
email_phone.send_keys("debanjanb")
driver.find_element_by_id("identifierNext").click()
print("Clicked on Next Button")
Let me know if this Answers your Query.

Categories