last problem when scraping bet365.com with selenium - python

After looking for information in the community, I have seen in a post that the next code worked until some days ago:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("window-size=1920,1080")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser=webdriver.Chrome(options=options,executable_path=r"chromedriver.exe")
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
browser.execute_cdp_cmd('Network.setUserAgentOverride',
{"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4240.198 Safari/537.36'})
browser.get('https://www.bet365.com')
After that, the next worked as a solution:
Open the file chromedriver.exe with Notepad ++ and searched and replaced "cdc_" with "xyz_" and saved the file. And add this line to the options of the chromedriver: options.add_argument('--disable-blink-features=AutomationControlled')
I don't know why this don't work for me. I am using Chrome 88.0.4324.146 and the chromedriver version 88.0.4324.96, and executing this code:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("window-size=1920,1080")
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
browser=webdriver.Chrome(options=options,executable_path=r"chromedriver.exe")
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
browser.execute_cdp_cmd('Network.setUserAgentOverride',
{"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4240.198 Safari/537.36'})
browser.get('https://www.bet365.com')
But after executing the page gets stuck loading until it crash.

import subprocess
#other imports
subprocess.Popen(
'"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" --remote-debugging-port=9222', shell=True)
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get('https://www.bet365.com')
It seems that the site detects the automation some how , work around is to open chrome using debug address and then connect selenium to this using above code . Change the chrome.exe according to your environment
Note: Make sure you close all the chrome browsers before running this script

Related

While I'm scraping with Selenium it keeps telling me that I'm an unusual browser and that I have to enable javascrept

I just started learning programming and started with scraping with python Selenium but when get the Url and send elemets the website keep sending me (Your browser is a bit unusual...
Try disabling ad blockers and other extensions, enabling javascript, or using a different web browser.)
I tried some of the solutions provided on the site, but none of them solved my problem.
Can you explain and solve the problem with python please?
import selenium
from selenium import webdriver
from time import sleep
options = webdriver.ChromeOptions()
options.add_argument("--incognito")
driver = webdriver.Chrome('chromedriver.exe', options=options)
driver.set_window_size(620, 720)
driver.delete_all_cookies()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver.implicitly_wait(5)
options.add_argument("--headless")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
driver.get('https://sso.godaddy.com/v1/account/create?realm=idp&path=%2Fcontact%2Fvalidate%3FcontactType%3DphoneMobile%26app%3Dsso%26path%3Dprofile%252Fedit%26profileUpdate%3DTrue%26userInteraction%3DPROFILE_UPDATE&app=sso&auth_reason=1&iframe=false')

Selenium python can't scrape a site

i need Scraping website, but display "Checking your browser before accessing" and Prevents access to the site
Do I have to define a cookie or is there another solution?
from selenium import webdriver
from time import sleep
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1920,1080")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/20100101 Firefox/95.0")
mainbrowser = webdriver.Chrome(chrome_options=options)
mainbrowser.get('https://trade.kraken.com/charts/KRAKEN:BTC-USDT')
sleep(20)
I have used the following options recently to avoid captcha detection on certain sites:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("./chrome_data") # Chrome Profile data (moved from ~/Library/Application Support/Google/Chrome)
options.add_argument("--user-data-dir=chrome-data")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
Furthermore I've made use of the library selenium-stealth (https://pypi.org/project/selenium-stealth/) which has incorporated many of the techniques used to avoid detection into a package:
driver = webdriver.Chrome(options=options)
stealth(
driver,
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36',
languages = ["en-US", "en"],
vendor = "Google Inc.",
platform = "Win32",
webgl_vendor = "Intel Inc.",
renderer = "Intel Iris OpenGL Engine",
fix_hairline = True,
run_on_insecure_origins = True)

Python Selenium - Disable Chromedriver log message in cmd

I can't disable chromedriver logging message "DevTools listening on ws:......" in cmd, i've used some methods like,
options.add_argument("log-level=3")
options.add_argument("disable-logging")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
but the message "DevTools listening on ws:....." still appear in the cmd.
My code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
options = webdriver.ChromeOptions()
options.add_argument("log-level=3")
options.add_argument("start-maximized")
options.add_argument("disable-logging")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("prefs", {"credentials_enable_service": False, "profile.password_manager_enabled": False})
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()), service_log_path = "NUL", options = options)
browser.set_window_size(360, 720)
browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
browser.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36"})
browser.implicitly_wait(5)
browser.get("https://www.instagram.com/")
sleep(5)
maybe a little late but this worked for me
import logging
logger = logging.getLogger('urllib3.connectionpool')
logger.setLevel(logging.INFO)
logger = logging.getLogger('selenium.webdriver.remote.remote_connection')
logger.setLevel(logging.WARNING)
Hope this help

Unable to access site with python selenium webdriver

I have been trying to build a universal scraper. But somehow there is some site that I am unable to access for some reason.
I have tried to use various options available on the internet to make sure that I avoided the bot detection flag but somehow the site apparently "detects" that I am a bot.
Here are the options I have been using.
```options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-data-dir=" + r'C:\Users\JEGSTUDIO\AppData\Local\Google\Chrome\selenium')
options.add_argument("window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)```
I see and compare the cookies, and it looks like this site is using Cloudflare js based on the cookie naming.
https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
Here is the full code so you guys can try
```from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-data-dir=" + r'C:\Users\JEGSTUDIO\AppData\Local\Google\Chrome\selenium')
options.add_argument("window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\Users\JEGSTUDIO\Gochi\Scraping Project\Scraper - AITOPIA v2\chromedriver88crack.exe')
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.get("https://google.com")
input('Next site')
driver.get("https://www.harrods.com/")
input('enter to quit')
driver.quit()```
Any clue would be appreciated
options.add_argument("--remote-debugging-port=9222")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get("https://www.harrods.com/")
Adding remote debugging port makes the site works

selenium twitter login without new device notification

Is there a way to log in to twitter with selenium/python without getting the notification "login from a new device" on Twitter?
Current code is
import urllib.request
import selenium
from bs4 import BeautifulSoup
import requests
import re
import json
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path='./chromedriver')
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
print(driver.execute_script("return navigator.userAgent;"))
def twitter_login(driver, username, password):
driver.get('https://twitter.com/login')
driver.find_element_by_xpath("//input[contains(#name,'username')]").send_keys(username)
driver.find_element_by_xpath("//input[contains(#name,'password')]").send_keys(password)
driver.find_elements_by_xpath("//*[contains(text(), 'Log in')]")[1].click()
twitter_login(driver, username, password)
You can start Selenium with a Chrome profile that is already familiar with your Twitter account. That way, you won't be bothered by any notifications.
options.add_argument(r"--user-data-dir=C:\Users\Me\AppData\Local\Google\Chrome\User Data") # Enter 'chrome://version' in the adress bar and paste in the Profile Path
options.add_argument(r"--profile-directory=Profile 4") # The profile name can be found in the last bit of the Profile Path

Categories