Headless Selenium is not extracting data in Python using Chrome driver - python

While navigating to the page, It is able to extract the data.
Code:
from selenium.webdriver.chrome.options import Options
options1 = Options()
options1.headless = True
driver = webdriver.Chrome(os.getcwd() +"/chromedriver",options = options1)
this is how I am getting the chrome driver
content = BeautifulSoup(driver.page_source,"html.parser")
this is how I am getting the content from the navigated page. While we are navigating to the page able to extract the data.

Can you try it like below?
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(os.getcwd() +"/chromedriver", chrome_options=options)

Related

Selenium with Python not able to fully open the website

I have tried the following code and tried to open the website as mentioned:
driver = webdriver.Chrome(r"..\chromedriver_win32\chromedriver.exe")
driver.get("https://example.com")
The website opens with the Chrome Browser but not with the Selenium using Python.
Please let me know what should I do to open the website completely.
You can run it with chrome options. I am able to launch your application with below code:
from time import sleep
from selenium import webdriver
PATH = "chromedriver path"
option = webdriver.ChromeOptions()
option.add_argument('--disable-blink-features=AutomationControlled')
option.add_argument("start-maximized")
option.add_experimental_option(
"excludeSwitches", ["enable-automation"])
option.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(PATH, options=option)
url = 'https://example.com'
driver.get(url)
driver.maximize_window()
sleep(20)
output:

selenium chrome options (running in backgroud)

I want to get some information from a website and Chrome should run in the background to fulfill that task. Down there you can see my code. It works so far and I get the desired output, but when I add the chrome_options so that Chrome is hidden I don't get the output anymore.
What's the problem and how can I fix this?
from selenium import webdriver
def get_stockname(wkn):
PATH = r"***placeholder***chromedriver.exe"
url = "***placeholder***"
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("headless")
browser = webdriver.Chrome(PATH, options=chrome_options)
browser.get(url)
search_box = browser.find_element_by_class_name('input-field__text-input')
search_box.send_keys(wkn)
search_box.submit()
name = browser.find_element_by_xpath("/html/body/div[2]/div[1]/div[2]/div[13]/div[2]/div[1]/h2").text
name = name[13:]
print(name)
try like that, but with replacing your url:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
url = "https://google.com"
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(options=chrome_options)
browser.get(url)
search_box = browser.find_element_by_class_name('input-field__text-input')
search_box.send_keys(wkn)
search_box.submit()
name = browser.find_element_by_xpath("(//h2[#class='box-headline'])[2]").get_attribute('innerText')
name = name[13:]
print(name)
By the way, can you show DOM code snipped to make element more unique with such external relation or even URL and element what info you want to get out?

How to bypass the message-"your connection is not private" on non-secure page using Selenium?

I'm trying to interact with the page "Your connection is not private".
The solution of using options.add_argument('--ignore-certificate-errors') is not helpful for two reasons:
I'm using an already open window.
Even if I was using a "selenium opened window" the script runs non stop, and the issue I'm trying to solve is when my browser disconnects from a splunk dashboard and I want it to automatically connect again(and it pops the private connection window).
How do I click on "Advanced" and then click on "Proceed to splunk_server (unsafe)?
For chrome:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=options)
If not work then this:
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
options = webdriver.ChromeOptions()
options.add_argument('--allow-insecure-localhost') # differ on driver version. can ignore.
caps = options.to_capabilities()
caps["acceptInsecureCerts"] = True
driver = webdriver.Chrome(desired_capabilities=caps)
For firefox:
from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.accept_untrusted_certs = True
driver = webdriver.Firefox(firefox_profile=profile)
driver.get('https://cacert.org/')
driver.close()
If not work then this:
capabilities = webdriver.DesiredCapabilities().FIREFOX
capabilities['acceptSslCerts'] = True
driver = webdriver.Firefox(capabilities=capabilities)
driver.get('https://cacert.org/')
driver.close()
Above all worked for me!
This is how i handle this problem:
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.remote.CapabilityType;
ChromeOptions capability = new ChromeOptions();
capability.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);
capability.setCapability(CapabilityType.ACCEPT_INSECURE_CERTS,true);
WebDriver driver = new ChromeDriver(capability);
This chrome option is the silver bullet for me:
chromeOptions.addArguments("--allow-running-insecure-content");
If you need more, Open chrome & paste this URL:
chrome://flags/
One will find all the options and their impact on the chrome.
Either of below 2 solutions worked for me using Python Chrome Selenium Webdriver:
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
capabilities = DesiredCapabilities.CHROME.copy()
capabilities["acceptInsecureCerts"] = True
driver = webdriver.Chrome(desired_capabilities=capabilities)
And accepted solution:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=options)

Using selenium: How to keep logged in after closing Driver in Python

I want to get my Whatsapp web (web.whatsapp.com) logged in, at the second time opening the Whatsapp web on chrome driver. Following is my code based on Python need your help.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_path = r"chromedriver.exe"
options = Options();
options.add_argument("user-data-
dir=C:/Users/Username/AppData/Local/Google/Chrome/User Data");
#options.add_argument("--start-maximized");
driver = webdriver.Chrome(chrome_path,chrome_options=options);
#driver = webdriver.Chrome();
driver.get('https://web.whatsapp.com/')
I tried on my Mac, below code and it worked perfectly fine, I don't need to login again
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("user-data-dir=/tmp/tarun")
driver = webdriver.Chrome(chrome_options=options)
driver.get('https://web.whatsapp.com/')
driver.quit()
For window you can try changing the path as below
options.add_argument("user-data-dir=C:\\Users\\Username\\AppData\\Local\\Google\\Chrome\\User Data")
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("user-data-dir=C:\\Users\\Username\\AppData\\Local\\Google\\Chrome\\User Data")
driver = webdriver.Chrome(chrome_options=options)
driver.get('https://web.whatsapp.com/')
driver.quit()
Here it is for Windows. Works perfect on Python 3.6

How can I get the page source, without showing the page opened, using selenium with chromedriver and python?

I'm using selenium with Chrome driver; How can I get the page source, without showing the page opened? What I should specify in webdriver.ChromeOptions()?
Here the code:
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("???")
bowser = webdriver.Chrome(chrome_options=chrome_options)
browser = webdriver.Chrome()
try:
browser.get("www.google.com")
html_content = browser.page_source
#do stuff
browser.quit()
except WebDriverException:
print "Invalid URL"
You should not use ChromeDriver but some headless Webdriver like HtmlUnitDriver, explained here
If you are adamant to use selenium, then you can use any of the headless browsers such as htmlunit driver.
Else you can can just send a get request on the URL and get the response text.
Selenium / Chrome has a headless option, which allows you to load webpages from code:
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = Chrome(options=chrome_options, executable_path='path_to_chromedriver')
browser.get('https://wwww.mywebsite.com')

Categories