The bot I have made keeps get detecting - python

Hey there I am trying to automate a buying process but there is some abnormal behaviour, that sometimes (very few) the bot is able to make till the end and the rest of the time it is being detected in the middle or in the beggining of the process, I have heard that the IP I am using rotates in the middle of the process and this can be the reason, I am attaching the code for selenium initiazation I am doing, I have used every possible thing, do help please.
def __init__(self,url):
self.url = url
def get_selenium_res(self):
try:
software_names = [SoftwareName.CHROME.value]
operating_systems = [OperatingSystem.WINDOWS.value,OperatingSystem.LINUX.value]
user_agent_rotator = UserAgent(software_names=software_names,operating_systems=operating_systems,limit=1000)
user_agent = user_agent_rotator.get_random_user_agent()
print(user_agent)
options = Options()
options.add_argument('--no-sandbox')
options.add_argument("start-maximized")
options.add_argument('--disable-gpu')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(f'user-agent={user_agent}')
PROXY = "http://pl.smartproxy.com:20000"
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.autodetect = False
capabilities = webdriver.DesiredCapabilities.CHROME
proxy.http_proxy = PROXY
proxy.ssl_proxy = PROXY
proxy.add_to_capabilities(capabilities)
browser = uc.Chrome(executable_path='chromedriver.exe',chrome_options=options)#Remove navigator.webdriver Flag using JavaScript
browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

Related

Proxy in Selenium Python with Firefox

This is the code that I'm using, I've tried numerous other solutions from stackoverflow, but everytime I make a request to httpbin.org/ip I see my own IP.
# Selenium Logging
selenium_logger = logging.getLogger('seleniumwire')
selenium_logger.setLevel(logging.ERROR)
# Web driver settings
# Profile
profile = webdriver.FirefoxProfile()
profile.accept_untrusted_certs = True
# Capabilities
firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
# Options
options = Options()
options.headless = False
brightdata_proxy = "personal_brighdata_url"
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = brightdata_proxy
firefox_proxies.add_to_capabilities(firefox_capabilities)
self._driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(),
firefox_profile=profile,
capabilities=firefox_capabilities,
options=options,
)
Can anybody help me to figure those proxies out ?

Selenium Chrome Headless failing immediately

When I set "options.headless = True", this fails because the EC condition is never met, as if the menus are not actually being clicked and the page title is not changing. When not in headless mode, this works absolutely fine and the EC condition is nicely met, implying the top level menu selections worked fine and took me to the right page.....
This is my first attempt at headless mode, but I am assuming it just builds a virtual dom, and so the navigation through the menus should take me to the same page, with the same title as non-headless?
class OrganizationsAutomate():
def __init__(self, driver:ERVAutomate):
self.driver=driver
def task_navigate_to_organizations(self):
print("Navigating to Organizations...")
print("Clicking Products menu...")
self.driver.ui.click_nav_menu(selector_type="CSS", selector_text="a.vv-navbar-link",
menu_name="Products", time_to_sleep=2)
print("Clicking Organizations menu...")
self.driver.ui.click_nav_menu(selector_type="CSS", selector_text="a.vv-navbar-link",
menu_name="Organizations", time_to_sleep=2)
try:
element = WebDriverWait(self.driver, 30).until(
EC.title_contains("Organizations")
)
except Exception as e:
print("Do not appear to be on the Organizations page....", e)
raise e
print("Navigated to Organizations....")
Update: I am having more luck by not maximising the window in Headless mode, and just setting a window size.
def __init__(self, driver_path, url, user_name, user_password, implicit_wait=5, teardown=False):
self.__driver_path = driver_path
self.__url = url
self.__user_name = user_name
self.__user_password = user_password
self.__implicit_wait = implicit_wait
self.__teardown = teardown
self.__ui = UIInteract(self)
# add the chrome drive to the path
os.environ['PATH'] += self.__driver_path
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.headless = True
options.add_argument("--disable-extensions")
options.add_argument("--disable-infobars")
options.add_argument("--window-size=1920,1080");
# initalise webdriver super class
super(ERVAutomate, self).__init__(options=options)
# implicit wait time for finding an element in selenium
self.implicitly_wait(self.__implicit_wait)
In headless mode, you can add below args to make it more stable.
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--start-maximized")
options.add_argument("--disable-extensions")
options.add_argument("--disable-infobars")
options.add_argument('--window-size=1920,1080')
driver = webdriver.Chrome(executable_path = driver_path, options = options)
or
driver = webdriver.Chrome(options = options)
also try to put some sleep or delay, having an explicit waits is a way to go with Selenium.
Also read here for more details.
You can try this argument as well as headless mode.
options.add_argument('--no-sandbox')

Using proxy with Chromedriver within Google Cloud Engine

I'm trying to use a proxy within Google Cloud Engine with chromedriver.
I've tried many solutions suggested (see below) but everytime the IP was the one on Google server.
Attempt 1:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--ignore-certificate-errors")
myproxy = '207.157.25.44:80'
prox = Proxy()
prox.proxy_type = ProxyType.MANUAL
prox.http_proxy = myproxy
prox.ssl_proxy = myproxy
capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)
driver = webdriver.Chrome(options=chrome_options,
executable_path="/user/sebastien/chromedriver",
desired_capabilities=capabilities)
driver.get("https://www.whatismyip.com/")
get_location()
Attempt 2:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--ignore-certificate-errors")
myproxy = '207.157.25.44:80'
prefs = {}
prefs["network.proxy.type"] = 1
prefs["network.proxy.http"] = myproxy
prefs["network.proxy.ssl"] = myproxy
chrome_options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(options=chrome_options,
executable_path="/user/sebastien/chromedriver")
driver.get("https://www.whatismyip.com/")
get_location()
Attempt 3:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--ignore-certificate-errors")
myproxy = '207.157.25.44:80'
chrome_options.add_argument("--proxy-server=http://%s" % myproxy)
driver = webdriver.Chrome(options=chrome_options,
executable_path="/user/sebastien/chromedriver")
driver.get("https://www.whatismyip.com/")
get_location()
None of them would reach the website with the desired IP.
Again, this issue is happening when running the code on GCP Compute Engine, Canonical, Ubuntu, 16.04 LTS, amd64 xenial.
Below the function to test the IP:
import json
from urllib.request import urlopen
def get_location(ip=False):
if ip:
html = urlopen(f"http://ipinfo.io/{str(ip).split(':')[0]}/json")
else:
html = urlopen("http://ipinfo.io/json")
data = json.loads(html.read().decode('utf-8'))
IP = data['ip']
org = data['org']
city = data['city']
country = data['country']
region = data['region']
print('IP detail')
print('IP : {4} \nRegion : {1} \nCountry : {2} \nCity : {3} \nOrg : {0}'.format(org, region, country, city, IP))
Thanks for reading !
I don't think the issue that you're having is related to your code implementation. I'm sure that the issue that you're having is related to your usage of a free proxy. These type of proxies
are notorious for having connections issues, such as timeouts related to latency. Plus these sites can also be intermittent, which means that they can go down at anytime. And sometimes these sites are being abused, so they can get blocked.
Your proxy is 207.157.25.44:80, which is shown in the image below.
When I tested this code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
proxy_server = '207.157.25.44:80'
chrome_options = Options()
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument('--proxy-server=%s' % proxy_server)
# disable the banner "Chrome is being controlled by automated test software"
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
driver.get('https://www.whatismyip.com/')
The Chrome browser opens, but it does not display any content.
If I check the address 207.157.25.44:80 via an online proxy checker service, I get mixed results.
This image below shows that the proxy is not responding to any query types (HTTP, HTTPS, SOCKS4, SOCKS5).
When I do the same check 5 minutes later the proxy is up on HTTP, but has latency issues.
If I selected another proxy from the free proxy website:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
proxy_server = '47.184.133.79:3128'
chrome_options = Options()
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument('--proxy-server=%s' % proxy_server)
# disable the banner "Chrome is being controlled by automated test software"
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
driver.get('https://www.whatismyip.com/')
I get a CloudFlare challenge page when connecting to the website whatismyip.
But if I try the same proxy on the website nordvpn.com/what-is-my-ip I get the proxy's IP address.
I would highly recommend testing any free proxy IP address multiple times to see if the address has any types of issues. Additionally, you need to add some error handling in your code to catch issues when a proxy goes offline, because they can drop at anytime.
If you need to use a proxy, I would strongly recommend using a commercial proxy service, because they are more reliable than the free proxy services.
oxylabs.io
bright data

Detach for keeping web browser open in selenium webdriver python not working

Okay, so I'm trying to keep the browser open once the script is finished executing. I want to be able to close it manually so the browser doens't close before the user is finished. I'll leave my code below and hopefully someone can help me to see where I'm going wrong. I'm using Chrome as browser. Version 90.0.4430.85
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
# Launch the browser with 4 tabs.
def launchBrowser():
driver = webdriver.Chrome()
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
# Disables infobar: 'Chrome is being controlled by automation blah blah.' Check: https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md#--enable-automation if anything goes wrong.
options.add_experimental_option("excludeSwitches", ["enable-automation"])
# Ignores certificate and ssl errors.
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
# Doesn't work!
options.add_experimental_option("detach", True)
# Reject chrome notifications.
prefs = {'profile.default_content_setting_values.notifications': 2}
options.add_experimental_option('prefs', prefs)
# This line works for both the maximization of the window and the notifications preference.
driver = webdriver.Chrome(options=options, executable_path=r'C:\Users\Bernardo\Desktop\Python Projects\Automations\chromedriver.exe')
# Main browser, first tab - River.
driver.get('https://lapaginamillonaria.com/')
cookies_button = driver.find_element_by_class_name('fc-button.fc-cta-manage-options.fc-secondary-button')
ActionChains(driver).move_to_element(cookies_button).click(cookies_button).perform()
interes_legitimo = driver.find_element_by_class_name('fc-preference-legitimate-interest.purpose')
if interes_legitimo.get_attribute('checked') == 'true':
ActionChains(driver).move_to_element(interes_legitimo).click(interes_legitimo).perform()
enviar_button = driver.find_element_by_class_name('fc-button.fc-save-continue.fc-primary-button')
ActionChains(driver).move_to_element(enviar_button).click(enviar_button).perform()
else:
print("Couldn't proceed with our task, master Rari.")
# Second tab - La NaciĆ³n.
driver.execute_script("window.open('about:blank', 'tab2');")
driver.switch_to.window("tab2")
driver.get('https://www.lanacion.com.ar/')
# Third tab - BBC.
driver.execute_script("window.open('about:blank', 'tab3')")
driver.switch_to.window("tab3")
driver.get('https://www.bbc.com/')
cookies_button = driver.find_element_by_class_name('fc-button.fc-cta-manage-options.fc-secondary-button')
ActionChains(driver).move_to_element(cookies_button).click(cookies_button).perform()
interes_legitimo = driver.find_element_by_class_name('fc-preference-legitimate-interest.purpose')
if interes_legitimo.get_attribute('checked') == 'true':
ActionChains(driver).move_to_element(interes_legitimo).click(interes_legitimo).perform()
enviar_button = driver.find_element_by_class_name('fc-button.fc-save-continue.fc-primary-button')
ActionChains(driver).move_to_element(enviar_button).click(enviar_button).perform()
accept_continue = driver.find_element_by_class_name('continue-button.banner-button')
ActionChains(driver).move_to_element(accept_continue).click(accept_continue).perform()
else:
print("Couldn't proceed with our task.")
take_me_to_news = driver.find_element_by_class_name('orb-nav-newsdotcom')
ActionChains(driver).move_to_element(take_me_to_news).click(take_me_to_news).perform()
launching = launchBrowser()

Python Selenium: Quit Browser if Idle

I'm trying to make a list of headless Chrome webdrivers the following way:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--log-level=3')
browsers = {}
def add_browser(browser_id):
browsers[browser_id] = webdriver.Chrome(executable_path=chromedriver, options=options)
browsers[browser_id].get(URL)
However, I would like to close browsers that have been active and idle for too long. How may I implement that?
If it helps - this is being used for a Flask app.
from datetime import datetime
def add_browser(browser_id):
browsers[browser_id] = {
browser: webdriver.Chrome(executable_path=chromedriver, options=options),
last_active: datetime.now()
}
browsers[browser_id][browser].get(URL)
// do some stuff... scrape links? navigate through pages? input text?
Then when writing your script in the //do some stuff section, you can now do one of two things:
// "check in" to confirm the session is still active
browsers[browser_id][last_active] = datetime.now()
// during a loop that you are worried about getting stuck in due to the browser being idle
idle_time = datetime.now() - browsers[browser_id][last_active]
if idle_time.seconds > maximum_idle_time:
browsers[browser_id][browser].quit()

Categories