Headless Chrome Navigate to Page, click to linked page, then printToPDF - python

I have to print all HTML pages linked to from a product information page:
http://prod.adv-bio.com/ProductDetail.aspx?ProdNo=1197
I have set up a test script to see if I could do this for just one of the HTML files. I used PyChromeDevTools and Selenium, hoping I could use the printToPDF function in DevTools while using Selenium to click the links I need to click.
Here is the code I have so far:
import requests
import time
import re
import websocket
import PyChromeDevTools
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--remote-debugging-port=9222')
chrome_options.add_argument('--window-size=1200x600')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome('C:\\Users\\me\\AppData\\Local\\Programs\\Python\\Python36- 32\\selenium\\webdriver\\chromedriver_win32\\chromedriver.exe', chrome_options = chrome_options)
driver.get("http://prod.adv-bio.com/ProductDetail.aspx?ProdNo=1197")
browser = PyChromeDevTools.ChromeInterface()
browser.Network.enable()
browser.Page.enable()
driver.implicitly_wait(20)
driver.get("http://prod.adv-bio.com/ProductDetail.aspx?ProdNo=1197")
time.sleep(3)
url = driver.find_element_by_partial_link_text("Continuing")
time.sleep(3)
ActionChains(driver).click(url).perform()
time.sleep(3)
Page.printToPDF()
time.sleep(5)
and I am getting this error
Traceback (most recent call last):
File "C:\Users\me\SOtestfile.py", line 29, in <module>
driver = webdriver.Chrome('C:\\Python27\\Lib\\site-
packages\\chromedriver_win32\\chromedriver.exe', chrome_options =
chromeOptions, desired_capabilities=capabilities)
File "C:\Python27\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 69, in __init__
desired_capabilities=desired_capabilities)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 98, in __init__
self.start_session(desired_capabilities, browser_profile)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 188, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 256, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
WebDriverException: Message: chrome not reachable
(Driver info: chromedriver=2.31.488763 (092de99f48a300323ecf8c2a4e2e7cab51de5ba8),platform=Windows NT 6.1.7601 SP1 x86_64)
I have updated chromedriver to 2.31 and have chrome version 60, should I be using canary? I appreciate any help on this.

from selenium.webdriver.common.desired_capabilities import
DesiredCapabilities
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument("headless")
chromeOptions.add_argument("window-size=1366,768")
capabilities = DesiredCapabilities.CHROME.copy()
capabilities["browserName"] = "chrome"
driver = webdriver.Chrome(chrome_options=chromeOptions,
desired_capabilities=capabilities)
Worked here!! (Chrome 60 + Chromedriver 2.31) Hope this helps!!

Related

Selenium Firefox webdriver issues

Below is the python code that I am trying to run
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
binary = '/usr/bin/firefox'
options = webdriver.FirefoxOptions()
options.binary = binary
options.add_argument('start-maximized')
options.add_argument('--headless')
options.headless=False
options.binary_location = '/usr/bin/firefox'
driverService = Service('/usr/bin/geckodriver')
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-dev-shm-usage')
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
driver = Firefox(capabilities=cap, options=options, service=driverService)
driver.capabilities['browserVersion']
print(driver.capabilities['browserVersion'])
I don't know what version of firefox I'm using and I don't know how to find that out, selenium is 4.1.5 and geckodriver is 0.31
On running this, I get
driver = Firefox(capabilities=cap, options=options, service=driverService)
Traceback (most recent call last):
File "/usr/lib/python3.9/site-packages/xtesting/ci/./ui_testing_firefox.py", line 30, in <module>
driver = Firefox(capabilities=cap, options=options, service=driverService)
File "/usr/lib/python3.9/site-packages/selenium/webdriver/firefox/webdriver.py", line 180, in __init__
RemoteWebDriver.__init__(
File "/usr/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 275, in __init__
self.start_session(capabilities, browser_profile)
File "/usr/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 365, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/usr/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 430, in execute
self.error_handler.check_response(response)
File "/usr/lib/python3.9/site-packages/selenium/webdriver/remote/errorhandler.py", line 247, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status signal
I am unable to find a specific solution to resolve this issue, I would really appreciate the help.

unknown error: failed to wait for extension background page to load: chrome-extension error loading an extension to Chrome Headless using Selenium

I try to run chromedriver via selenium in headless mode.
IMPORTANT
The code runs perfectly fine if I eliminate the following code lines (but is not headless):
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
This is the error I get when I try to implement the headless argument:
Traceback (most recent call last):
File "camel.py", line 83, in <module>
executable_path=executable_path)
File "/home/.local/lib/python3.6/site-packages/selenium/webdriver/chrome/webdriver.py", line 81, in __init__
desired_capabilities=desired_capabilities)
File "/home/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/home/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/.local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: failed to wait for extension background page to load: chrome-extension://jkompbllimaoekaogchhkmkdogpkhojg/_generated_background_page.html
from unknown error: page could not be found: chrome-extension://jkompbllimaoekaogchhkmkdogpkhojg/_generated_background_page.html
This are the lines 81, 82 and 83
chrome_options.add_extension(extension_path)
driver = webdriver.Chrome(options=chrome_options,
executable_path=executable_path)
This is the code (the crhomedriver execution parts):
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.expected_conditions import presence_of_element_located
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
log_path = os.path.join(BASE_DIR, 'cronJobChromeDriver.log')
executable_path = os.path.join(BASE_DIR, 'chromedriver_linux64/chromedriver')
extension_path = os.path.join(
BASE_DIR, 'chromedriver_linux64/extension_2_8_9_0.crx')
print('executable_path', executable_path)
The bottom line is, No, google-chrome-headless doesn't supports extensions.
In the one of his comment, alexclarke#chromium.org mentioned:
I realize a lot of folks would like to use extensions with headless but unfortunately that's a large project which we have /no plans to do/. The problem is Headless Chromium is a content embedder which means it doesn't have access to anything from other content embedders such as chrome and unfortunately extensions are a chrome feature.
In another comment he further added, if you're using Selenium through DevTools you can build a proxy. Next you can filter URLs and modify headers via Network.setRequestInterception and Network.continueInterceptedRequest.
Reference
You can find a relevant detailed discussion in:
Is it possible to run Google Chrome in headless mode with extensions?
this is now possible by modifying the following flag:
chrome_options.add_argument('--headless=chrome')
I tested it successfully.
I found it here: https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
Chrome does not support headless, but apparently Firefox does.
Some relevant discussions:
https://sqa.stackexchange.com/questions/32611/selenium-chromedriver-headless-chrome-failed-to-wait-for-extension-backgro
Is it possible to run Google Chrome in headless mode with extensions?

Could Not Open Site In Firefox Through Python

I tried to open a site in firefox through selenium python, when i run the code it opens firefox after that nothing happens ,
WATCH THIS --> HOW TO USE FIREFOX IN PYTHON & HOW TO SET VALUES IN DROP-DOWN LISTS?
here is the error
bash-3.1$ C:/Users/user/AppData/Local/Programs/Python/Python35-32/python.exe d:/PYTHONS/EXTRACT-NEWS/FFD
RIVER.py
Traceback (most recent call last):
File "d:/PYTHONS/EXTRACT-NEWS/FFDRIVER.py", line 23, in <module>
executable_path=r"D:\\PYTHONS\\DRIVERS\\geckodriver.exe")
File "c:\Users\user\AppData\Local\Programs\Python\Python35-32\lib\site-packages\selenium\webdriver\firefox\webdriver.py", line 174, in __init__
keep_alive=True)
File "c:\Users\user\AppData\Local\Programs\Python\Python35-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "c:\Users\user\AppData\Local\Programs\Python\Python35-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "c:\Users\user\AppData\Local\Programs\Python\Python35-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "c:\Users\user\AppData\Local\Programs\Python\Python35-32\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: newSession
WHEN I RUN THE CODE I GOT THIS ERR
HOW TO USE FF IN PYTHON?
THX IN ADVANCE
# Import Libraries
import os
import sys
import time
from selenium import webdriver
import selenium.webdriver.firefox.options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# Configure Firefox Options
profile = webdriver.Firefox(
executable_path=r"D:\\PYTHONS\\DRIVERS\\geckodriver.exe")
# 0 means to download to the desktop, 1 means to download to the default "Downloads" directory, 2 means to use the directory
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.dir", download_path)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference(
"browser.helperApps.neverAsk.saveToDisk", "application/x-gzip/text/csv")
os.system("cls")
# firefox_profile=profile
driver = webdriver.Firefox(firefox_profile=profile)
driver.get('https://www.google.com')
print(driver.title)
# driver.quit()
[1]: https://i.stack.imgur.com/T3Bf8.png
To set preference you have to use FirefoxProfile() instead of Firefox()
from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.dir", '.')
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-gzip/text/csv")
driver = webdriver.Firefox(firefox_profile=profile, executable_path="D:\\PYTHONS\\DRIVERS\\geckodriver.exe")
driver.get('https://www.google.com')
print(driver.title)
BTW: and don't run webdriver.Firefox() two times beacuse Selenium doesn't know how to run two browsers at the same time - and this gave you error message.

python, chrome driver errors

I have this file test.py:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=options)
driver.get("https://www.google.com")
print(driver.page_source)
driver.quit()
I want to get the page source from the page with this python code. I'm using chrome driver selenium and python. But when I start this script I've got this error:
File "test2.py", line 8, in <module>
driver = webdriver.Chrome(chrome_options=options)
File "/usr/local/python3/lib/python3.6/site-packages/selenium/webdriver/chrome/webdriver.py", line 81, in __init__
desired_capabilities=desired_capabilities)
File "/usr/local/python3/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/usr/local/python3/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/usr/local/python3/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/usr/local/python3/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: cannot create temp dir for user data dir
I tried almost every solution on the internet but every time I'm just having other errors, does someone know what should I do in this situation, any advice on how I can fix this problem?
make sure you have correct chrome browser installed and correct supportive version of chromedriver
and here is some code edit suggestions
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)
driver.get("https://www.google.com")
print(driver.page_source)
driver.quit()
Try set the path driver , like bellow .... locate with this command :
whereis chromedriver
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('/usr/bin/chromedriver',chrome_options=chrome_options)

Error logging into website with selenium webdriver

I know that there are many questions with the same problem but none of them have helped me.
This is my script. I am trying to login into the willhaben website. I am from austria that is why I am using willhaben.at
Python version 3.5.4
Selenium version 3.8.0
PhantomJS version 2.1
#need some of the libraries for later code
from bs4 import BeautifulSoup as soup
import requests
import sys
import re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
input("If ready press 'enter'")
loginurl = 'https://www.willhaben.at/iad/?islogout=true&logoff.y=10'
chrome_options = Options()
chrome_options.add_argument('disable-infobars')
driver = webdriver.Chrome(executable_path="C:\\Users\\laure\\Downloads\\chromedriver_win32\\chromedriver.exe", chrome_options=chrome_options, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
USERNAME = 'notgonnagiveit :)'
PASSWORD = 'notgonnagiveit'
driver.get(loginurl)
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="username"]'))
)
username = driver.find_element_by_xpath('//*[#id="username"]')
password = driver.find_element_by_xpath('//*[#id="password"]')
password.send_keys(PASSWORD)
username.send_keys(USERNAME)
driver.quit()
Error:
File "D:\OneDrive\Dokumente\Willhaben\Willhaben.py", line 73, in <module>
password.send_keys(PASSWORD)
File "C:\Users\laure\AppData\Local\Programs\Python\Python35\lib\site-packages\selenium\webdriver\remote\webelement.py", line 352, in send_keys
'value': keys_to_typing(value)})
File "C:\Users\laure\AppData\Local\Programs\Python\Python35\lib\site-packages\selenium\webdriver\remote\webelement.py", line 501, in _execute
return self._parent.execute(command, params)
File "C:\Users\laure\AppData\Local\Programs\Python\Python35\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 311, in execute
self.error_handler.check_response(response)
File "C:\Users\laure\AppData\Local\Programs\Python\Python35\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 237, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException: Message: element not visible
(Session info: chrome=63.0.3239.84)
(Driver info: chromedriver=2.34.522940 (1a76f96f66e3ca7b8e57d503b4dd3bccfba87af1),platform=Windows NT 10.0.16299 x86_64)
Try to use :
username = driver.find_element_by_xpath("//input[#id='username']")
password = driver.find_element_by_xpath("//input[#id='password']")

Categories