I found an option that turns it off:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'
options = webdriver.FirefoxOptions()
options.binary_location = r"C:\Program Files\Mozilla Firefox\firefox.exe"
options.set_preference('devtools.jsonview.enabled', False)
options.set_preference("general.useragent.override", ua)
options.set_preference("dom.webdriver.enabled", False)
options.set_preference('devtools.jsonview.enabled', False)
options.set_preference("useAutomationExtension", False)
driver = webdriver.Firefox(options=options, executable_path='geckodriver.exe')
action=ActionChains(driver)
driver.get('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')
But any way I get this:
How to turn it off and make browser undetectable?
Related
I'm trying to get access to web site, but in headless mode I get this:
<html><head>
<title>Access Denied</title>
</head><body>
<h1>Access Denied</h1>
You don't have permission to access "http://www.bybit.com/fiat/trade/otc/?" on this server.<p>
Reference #18.3f62645f.1657282455.2f87631
</p></body></html>
So I us user agent by adding this:
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
chrome_options.add_argument(f'user-agent={user_agent}')
It works well on my local machine, but when I deployed it on Heroku server, I'v got the same issue.
Main part of the code:
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-features=NetworkService")
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_argument("window-size=800,600")
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
chrome_options.add_argument(f'user-agent={user_agent}')
bybit_url = 'https://www.bybit.com/fiat/trade/otc/?actionType=0&token=USDT&fiat=RUB&paymentMethod=75'
driver.get(bybit_url)
sleep(5)
print(driver.page_source)
Perhaps the Access is Denied as Selenium driven ChromeDriver initiated google-chrome Browsing Context is getting detected as a bot.
To avoid the detection you need to use the argument --disable-blink-features=AutomationControlled as follows:
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
I had exactly the same problem on Heroku trying to access bybit sites with selenium.
I used this user agent and it works for me:
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
Among many other trial and error attempts, I reached this user agent comparing user agents used locally and on Heroku.
Locally:
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.binary_location = os.environ.get( "GOOGLE_CHROME_BIN")
#options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://www.bybit.com/fiat/trade/otc/?actionType=0&token=USDT&fiat=RUB&paymentMethod=75")
print(driver.page_source)
print(driver.execute_script("return navigator.userAgent"))
driver.quit()
Result:
(...)</iframe></html>
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36
Locally:
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.binary_location = os.environ.get( "GOOGLE_CHROME_BIN")
options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://www.bybit.com/fiat/trade/otc/?actionType=0&token=USDT&fiat=RUB&paymentMethod=75")
print(driver.page_source)
print(driver.execute_script("return navigator.userAgent"))
driver.quit()
Result:
<html><head>
<title>Access Denied</title>
</head><body>
<h1>Access Denied</h1>
You don t have permission to access "http://www.bybit.com/fiat/trade/otc/?" on this server.<p>
Reference #18.17bd2f17.1664927640.82fbfed7
</p></body></html>
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/106.0.5249.91 Safari/537.36
On Heroku:
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.binary_location = os.environ.get( "GOOGLE_CHROME_BIN")
options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
print(driver.execute_script("return navigator.userAgent"))
driver.quit()
Result:
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/106.0.5249.91 Safari/537.36
On Heroku:
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.binary_location = os.environ.get( "GOOGLE_CHROME_BIN")
options.add_argument("--headless")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
user_agent ='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
options.add_argument('user-agent={0}'.format(user_agent))
driver = webdriver.Chrome(options=options)
driver.get("https://www.bybit.com/fiat/trade/otc/?actionType=0&token=USDT&fiat=RUB&paymentMethod=75")
print(driver.page_source)
print(driver.execute_script("return navigator.userAgent"))
driver.quit()
Winning result:
(...)</iframe></html>
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36
Tried to login in zedrun using selenium but failed.
I want to use metamask extension there so i am using profiles
but the website catches me everytime.
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=C:\\Users\\code\\AppData\\Local\\Google\\Chrome\\User Data")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument(
"user-agent= Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36")
options.add_argument("cookie: cookie here")
driver=webdriver.Chrome(options=options)
i need Scraping website, but display "Checking your browser before accessing" and Prevents access to the site
Do I have to define a cookie or is there another solution?
from selenium import webdriver
from time import sleep
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1920,1080")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/20100101 Firefox/95.0")
mainbrowser = webdriver.Chrome(chrome_options=options)
mainbrowser.get('https://trade.kraken.com/charts/KRAKEN:BTC-USDT')
sleep(20)
I have used the following options recently to avoid captcha detection on certain sites:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("./chrome_data") # Chrome Profile data (moved from ~/Library/Application Support/Google/Chrome)
options.add_argument("--user-data-dir=chrome-data")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
Furthermore I've made use of the library selenium-stealth (https://pypi.org/project/selenium-stealth/) which has incorporated many of the techniques used to avoid detection into a package:
driver = webdriver.Chrome(options=options)
stealth(
driver,
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36',
languages = ["en-US", "en"],
vendor = "Google Inc.",
platform = "Win32",
webgl_vendor = "Intel Inc.",
renderer = "Intel Iris OpenGL Engine",
fix_hairline = True,
run_on_insecure_origins = True)
I have been trying to build a universal scraper. But somehow there is some site that I am unable to access for some reason.
I have tried to use various options available on the internet to make sure that I avoided the bot detection flag but somehow the site apparently "detects" that I am a bot.
Here are the options I have been using.
```options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-data-dir=" + r'C:\Users\JEGSTUDIO\AppData\Local\Google\Chrome\selenium')
options.add_argument("window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)```
I see and compare the cookies, and it looks like this site is using Cloudflare js based on the cookie naming.
https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
Here is the full code so you guys can try
```from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-data-dir=" + r'C:\Users\JEGSTUDIO\AppData\Local\Google\Chrome\selenium')
options.add_argument("window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\Users\JEGSTUDIO\Gochi\Scraping Project\Scraper - AITOPIA v2\chromedriver88crack.exe')
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.get("https://google.com")
input('Next site')
driver.get("https://www.harrods.com/")
input('enter to quit')
driver.quit()```
Any clue would be appreciated
options.add_argument("--remote-debugging-port=9222")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get("https://www.harrods.com/")
Adding remote debugging port makes the site works
This is my code:
chrome_options = Options()
WINDOW_SIZE = "1920,1080"
path_profile = "C:\\Users\\xxxx\\AppData\\Local\\Google\\Chrome\\User Data"
chrome_options.add_argument("--user-data-dir="+path_profile)
chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)
chrome_options.add_argument("--enable-javascript")
chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--ignore-ssl-errors')
chrome_options.headless = True
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options = chrome_options,executable_path=xxxxx))
driver.get('https://www.youtube.com/upload')
time.sleep(10)
driver.save_screenshot(dirname(abspath(__file__))+'/screen_shot.png')
driver.close()
In my profile, I have extension ads blocker and cookies login youtube
But when I screenshot,
I realize selenium has not used the profile yet. Is there a way to do this? Thanks
Set path_profile value to the full path of your profile, add your profile name at the end, e.g. Profile 1:
path_profile = "C:\\Users\\xxxx\\AppData\\Local\\Google\\Chrome\\User Data\\Profile 1"