Python Selenium - Disable Chromedriver log message in cmd - python

I can't disable chromedriver logging message "DevTools listening on ws:......" in cmd, i've used some methods like,
options.add_argument("log-level=3")
options.add_argument("disable-logging")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
but the message "DevTools listening on ws:....." still appear in the cmd.
My code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
options = webdriver.ChromeOptions()
options.add_argument("log-level=3")
options.add_argument("start-maximized")
options.add_argument("disable-logging")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("prefs", {"credentials_enable_service": False, "profile.password_manager_enabled": False})
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()), service_log_path = "NUL", options = options)
browser.set_window_size(360, 720)
browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
browser.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36"})
browser.implicitly_wait(5)
browser.get("https://www.instagram.com/")
sleep(5)

maybe a little late but this worked for me
import logging
logger = logging.getLogger('urllib3.connectionpool')
logger.setLevel(logging.INFO)
logger = logging.getLogger('selenium.webdriver.remote.remote_connection')
logger.setLevel(logging.WARNING)
Hope this help

Related

How to turn off webdriver mode in selenium python?

I found an option that turns it off:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'
options = webdriver.FirefoxOptions()
options.binary_location = r"C:\Program Files\Mozilla Firefox\firefox.exe"
options.set_preference('devtools.jsonview.enabled', False)
options.set_preference("general.useragent.override", ua)
options.set_preference("dom.webdriver.enabled", False)
options.set_preference('devtools.jsonview.enabled', False)
options.set_preference("useAutomationExtension", False)
driver = webdriver.Firefox(options=options, executable_path='geckodriver.exe')
action=ActionChains(driver)
driver.get('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')
But any way I get this:
How to turn it off and make browser undetectable?

Max threads for selenium [duplicate]

This is the error traceback after several hours of scraping:
The process started from chrome location /usr/bin/google-chrome is no longer running, so ChromeDriver is assuming that Chrome has crashed.
This is my setup of selenium python:
#scrape.py
from selenium.common.exceptions import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
def run_scrape(link):
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--lang=en")
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
chrome_options.binary_location = "/usr/bin/google-chrome"
browser = webdriver.Chrome(executable_path=r'/usr/local/bin/chromedriver', options=chrome_options)
browser.get(<link passed here>)
try:
#scrape process
except:
#other stuffs
browser.quit()
#multiprocess.py
import time,
from multiprocessing import Pool
from scrape import *
if __name__ == '__main__':
start_time = time.time()
#links = list of links to be scraped
pool = Pool(20)
results = pool.map(run_scrape, links)
pool.close()
print("Total Time Processed: "+"--- %s seconds ---" % (time.time() - start_time))
Chrome, ChromeDriver Setup, Selenium Version
ChromeDriver 79.0.3945.36 (3582db32b33893869b8c1339e8f4d9ed1816f143-refs/branch-heads/3945#{#614})
Google Chrome 79.0.3945.79
Selenium Version: 4.0.0a3
Im wondering why is the chrome is closing but other processes are working?
I took your code, modified it a bit to suit to my Test Environment and here is the execution results:
Code Block:
multiprocess.py:
import time
from multiprocessing import Pool
from multiprocessingPool.scrape import run_scrape
if __name__ == '__main__':
start_time = time.time()
links = ["https://selenium.dev/downloads/", "https://selenium.dev/documentation/en/"]
pool = Pool(2)
results = pool.map(run_scrape, links)
pool.close()
print("Total Time Processed: "+"--- %s seconds ---" % (time.time() - start_time))
scrape.py:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
def run_scrape(link):
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--lang=en")
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
chrome_options.binary_location=r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
browser = webdriver.Chrome(executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe', options=chrome_options)
browser.get(link)
try:
print(browser.title)
except (NoSuchElementException, TimeoutException):
print("Error")
browser.quit()
Console Output:
Downloads
The Selenium Browser Automation Project :: Documentation for Selenium
Total Time Processed: --- 10.248600006103516 seconds ---
Conclusion
It is pretty much evident your program is logically flawless and just perfect.
This usecase
As you mentioned this error surfaces after several hours of scraping, I suspect this due to the fact that WebDriver is not thread-safe. Having said that, if you can serialize access to the underlying driver instance, you can share a reference in more than one thread. This is not advisable. But you can always instantiate one WebDriver instance for each thread.
Ideally the issue of thread-safety isn't in your code but in the actual browser bindings. They all assume there will only be one command at a time (e.g. like a real user). But on the other hand you can always instantiate one WebDriver instance for each thread which will launch multiple browsing tabs/windows. Till this point it seems your program is perfect.
Now, different threads can be run on same Webdriver, but then the results of the tests would not be what you expect. The reason behind is, when you use multi-threading to run different tests on different tabs/windows a little bit of thread safety coding is required or else the actions you will perform like click() or send_keys() will go to the opened tab/window that is currently having the focus regardless of the thread you expect to be running. Which essentially means all the test will run simultaneously on the same tab/window that has focus but not on the intended tab/window.
Right now im using this threading module to instantiate one Webdriver each thread
import threading
threadLocal = threading.local()
def get_driver():
browser = getattr(threadLocal, 'browser', None)
if browser is None:
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--lang=en")
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
chrome_options.binary_location = "/usr/bin/google-chrome"
browser = webdriver.Chrome(executable_path=r'/usr/local/bin/chromedriver', options=chrome_options)
setattr(threadLocal, 'browser', browser)
return browser
and it really helps me to scrape faster than executing one driver at a time.

Getting blocked by a website with selenium and chromedriver

I'm having some trouble trying to access a web site (bet365.com) with a chrome driver and selenium (I'm quite being "blocked").
I can access the site with my ordinary chrome but when I try with chrome driver, it doesn't work.
I had this problem before and corrected it by using some options as below (python):
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'PATH_TO\chromedriver.exe')
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.execute_cdp_cmd("Network.enable", {})
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
driver.get("https://www.bet365.com/")
Now, the problem came back and this code is not working anymore to bypass the protection.
Can someone help me?
In case the Selenium driven ChromeDriver initiated google-chrome Browsing Context is getting detected a potential solution would be to use the undetected-chromedriver to initialize the Chrome Browsing Context.
undetected-chromedriver is an optimized Selenium Chromedriver patch which does not trigger anti-bot services like Distill Network / Imperva / DataDome / Botprotect.io. It automatically downloads the driver binary and patches it.
Code Block:
import undetected_chromedriver as uc
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = uc.Chrome(options=options)
driver.get('https://bet365.com')
References
You can find a couple of relevant detailed discussions in:
Undetected Chromedriver not loading correctly

last problem when scraping bet365.com with selenium

After looking for information in the community, I have seen in a post that the next code worked until some days ago:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("window-size=1920,1080")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser=webdriver.Chrome(options=options,executable_path=r"chromedriver.exe")
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
browser.execute_cdp_cmd('Network.setUserAgentOverride',
{"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4240.198 Safari/537.36'})
browser.get('https://www.bet365.com')
After that, the next worked as a solution:
Open the file chromedriver.exe with Notepad ++ and searched and replaced "cdc_" with "xyz_" and saved the file. And add this line to the options of the chromedriver: options.add_argument('--disable-blink-features=AutomationControlled')
I don't know why this don't work for me. I am using Chrome 88.0.4324.146 and the chromedriver version 88.0.4324.96, and executing this code:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("window-size=1920,1080")
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
browser=webdriver.Chrome(options=options,executable_path=r"chromedriver.exe")
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
browser.execute_cdp_cmd('Network.setUserAgentOverride',
{"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4240.198 Safari/537.36'})
browser.get('https://www.bet365.com')
But after executing the page gets stuck loading until it crash.
import subprocess
#other imports
subprocess.Popen(
'"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" --remote-debugging-port=9222', shell=True)
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get('https://www.bet365.com')
It seems that the site detects the automation some how , work around is to open chrome using debug address and then connect selenium to this using above code . Change the chrome.exe according to your environment
Note: Make sure you close all the chrome browsers before running this script

selenium twitter login without new device notification

Is there a way to log in to twitter with selenium/python without getting the notification "login from a new device" on Twitter?
Current code is
import urllib.request
import selenium
from bs4 import BeautifulSoup
import requests
import re
import json
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path='./chromedriver')
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
print(driver.execute_script("return navigator.userAgent;"))
def twitter_login(driver, username, password):
driver.get('https://twitter.com/login')
driver.find_element_by_xpath("//input[contains(#name,'username')]").send_keys(username)
driver.find_element_by_xpath("//input[contains(#name,'password')]").send_keys(password)
driver.find_elements_by_xpath("//*[contains(text(), 'Log in')]")[1].click()
twitter_login(driver, username, password)
You can start Selenium with a Chrome profile that is already familiar with your Twitter account. That way, you won't be bothered by any notifications.
options.add_argument(r"--user-data-dir=C:\Users\Me\AppData\Local\Google\Chrome\User Data") # Enter 'chrome://version' in the adress bar and paste in the Profile Path
options.add_argument(r"--profile-directory=Profile 4") # The profile name can be found in the last bit of the Profile Path

Categories