How to add webdriver extension arguments on super() in python selenium? - python

I tried to add chrome extension when open chrome webdriver in python.
OK.py
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
extension_path = r'C:/Users/path/to/extension'
chrome_options = Options()
chrome_options.add_argument('load-extension=' + extension_path)
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-infobars")
browser = webdriver.Chrome(executable_path=r'C:/Users/path/to/chromedriver.exe', options = chrome_options)
The codes runs fine and the extension was presented. However, when I split into 2 files and use classes.
variables.py
from selenium.webdriver.chrome.options import Options
driver_path = r'C:/Users/path/to/chromedriver.exe'
extension_path = r'C:/Users/path/to/extension'
chrome_options = Options()
chrome_options.add_argument('load-extension=' + extension_path)
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-infobars")
broken.py
import variables as vb
from selenium import webdriver
class nokair(webdriver.Chrome):
def __init__(self, executable_path = vb.driver_path, options = vb.chrome_options):
self.driver_path = executable_path
self.options = options
super().__init__()
init = nokair()
When launched, the webdriver was executed without any extension. I've tried other workaround
import variables as vb
from selenium import webdriver
class nokair(webdriver.Chrome):
def __init__(self, executable_path = vb.driver_path, options = vb.chrome_options):
self.driver_path = executable_path
self.options = options
super().__init__(executable_path, options)
init = nokair()
This threw an error. What did I do wrong?
Traceback (most recent call last):
File "c:\Users\Kittinun\Desktop\VS workspace\Playground\Flight\nokair.py", line 93, in <module>
nok = nokair()
File "c:\Users\Kittinun\Desktop\VS workspace\Playground\Flight\nokair.py", line 14, in __init__
super(nokair, self).__init__(executable_path, options)
File "C:\ProgramData\Miniconda3\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 73, in __init__
self.service.start()
File "C:\ProgramData\Miniconda3\lib\site-packages\selenium\webdriver\common\service.py", line 71, in start
cmd.extend(self.command_line_args())
File "C:\ProgramData\Miniconda3\lib\site-packages\selenium\webdriver\chrome\service.py", line 45, in command_line_args
return ["--port=%d" % self.port] + self.service_args
TypeError: %d format: a number is required, not Options

broken.py does not work because there is not options field in webdriver.Chrome, hence it is not used by that driver.
The latter case does not work because you are not using named parameters which forces Python to assign them sequentially. The second parameter of the constructor is port. But you pass Options to it.
Workable code would be
import variables as vb
from selenium import webdriver
class nokair(webdriver.Chrome):
def __init__(self):
super().__init__(executable_path=vb.driver_path, options=vb.chrome_options)
init = nokair()
In my example I left nokair constructor without any parameter to simplify the things and not mess up parameter names.

Related

selenium AttributeError: 'str' object has no attribute 'start'

im trying to make a program that works like this but i keep getting this error
Traceback (most recent call last):
File "/workspaces/vscode-remote-try-python/VirtualBrowser/main.py", line 7, in <module>
driver = webdriver.Chrome(service=r'VirtualBrowser/chromedriver')
File "/home/vscode/.local/lib/python3.9/site-packages/selenium/webdriver/chrome/webdriver.py", line 80, in __init__
super().__init__(
File "/home/vscode/.local/lib/python3.9/site-packages/selenium/webdriver/chromium/webdriver.py", line 101, in __init__
self.service.start()
AttributeError: 'str' object has no attribute 'start'
this is my code
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
chromeOptions = webdriver.ChromeOptions()
chromeOptions.binary_location = "/workspaces/vscode-remote-try-python/VirtualBrowser/chromedriver"
chromeDriver = 'VirtualBrowser/chromedriver'
driver = webdriver.Chrome(service=chromeDriver)
driver.get("https://google.com")
input("Running...")
im not exactly the greatest at debugging and most of the code is fixes that ive tried and they have worked towards throwing less errors. any help would be appreciated, thanks
I think there is a problem in driver = webdriver.Chrome(service=chromeDriver) where service kwarg should be a service object.
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
# ...
chrome_options = Options()
chrome_service = Service("path")
driver = webdriver.Chrome(options=chrome_options, service=chrome_service)

Use chrome instead of chrome driver

I'm using Python with Selenium but I need to use it with extension (and probably with cookies). Extension is uploaded from ZIP file and I need to change something in this extension settings after instalation so it will be hard to reupload extension every start of project. Is there aby option to use it like that? I was trying to use profile from normal chrome but it doesn't work for me.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
service = Service('D:\\chromedriver.exe') # your driver path
chrome_options = Options()
chrome_options.add_argument \
(r"--user-data-dir=C:\\Users\\czarn\\AppData\\Local\\Google\\Chrome\\User Data") # your chrome user data directory
chrome_options.add_argument(r'--profile-directory=Member') # the profile with the extensions loaded
window = webdriver.Chrome(service=service, options=chrome_options)
I have this error:
Traceback (most recent call last):
File "C:\Users\czarn\PycharmProjects\trening\main.py", line 16, in <module>
window = webdriver.Chrome(service=service, options=chrome_options)
TypeError: __init__() got an unexpected keyword argument 'service'
As you mentioned, you can use an already existing Chrome Profile.
Example code where the paths are referring to my machine but it should be easy for you to adapt to your use case.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
service = Service('C:\\Users\\nicoc\\PycharmProjects\\WriteAI\\chromedriver.exe') # your driver path
chrome_options = Options()
chrome_options.add_argument \
(r"--user-data-dir=C:\\Users\\nicoc\\AppData\\Local\\Google\\Chrome\\User Data") # your chrome user data directory
chrome_options.add_argument(r'--profile-directory=Default') # the profile with the extensions loaded
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get("https://google.com/")
The code works for Selenium v4 (it's still in beta but it works fine)

AttributeError: 'Options' object has no attribute 'binary' error invoking Headless Firefox using GeckoDriver through Selenium

options = FirefoxOptions()
options.add_argument("--headless")
driver = webdriver.Firefox(firefox_options=options, executable_path='/Users/toprak/Desktop/geckodriver')
driver.get("https://twitter.com/login?lang=en")
When I try to run my code, I get this error:
Warning (from warnings module):
File "/Users/toprak/Desktop/topla.py", line 19
driver = webdriver.Firefox(firefox_options=options, executable_path='/Users/toprak/Desktop/geckodriver')
DeprecationWarning: use options instead of firefox_options
Traceback (most recent call last):
File "/Users/toprak/Desktop/topla.py", line 19, in <module>
driver = webdriver.Firefox(firefox_options=options, executable_path='/Users/toprak/Desktop/geckodriver')
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/selenium/webdriver/firefox/webdriver.py", line 137, in __init__
if options.binary is not None:
AttributeError: 'Options' object has no attribute 'binary'
When I delete the lines which are about options and take out "firefox_options=options", the code works fine. What should I do to fix this?
Instead of using firefox_options object you need to use options object. Additionally you need to use the headless attribute. So your effective code block will be:
options = FirefoxOptions()
options.headless = True
driver = webdriver.Firefox(executable_path='/Users/toprak/Desktop/geckodriver', options=options)
driver.get("https://twitter.com/login?lang=en")
References
You can find a couple of relevant detailed discussions in:
How to make Firefox headless programmatically in Selenium with Python?
The --headless argument works fine in Firefox (geckodriver) these days.
If you're getting the error mentioned in the title, then you're probably accidentally creating or passing a Chrome-based Options object rather than a Firefox-based Options object.
To avoid that mistake, it's best to create an import alias for both of them so that they're easier to distinguish.
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.firefox.options import Options as FirefoxOptions
chrome_options = ChromeOptions()
chrome_options.add_argument('--headless')
chrome_driver = webdriver.Chrome(executable_path = r"..\mypath\chromedriver.exe", options=chrome_options)
firefox_options = FirefoxOptions()
firefox_options.add_argument('--headless')
firefox_driver = webdriver.Firefox(executable_path = r"..\mypath\geckodriver.exe", options=firefox_options)

Scraping URLs with Python and selenium

I am trying to get a python selenium script working that should do the following:
Take text file, BookTitle.txt that is a list of Book Titles.
Using Python/Selenium then searches the site, GoodReads.com for that title.
Takes the URL for the result and makes a new .CSV file with column 1=book title and column 2=Site URL
I hope that we can get this working, then please help me with step by step to get it to run.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import Options
from pyvirtualdisplay import Display
#from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common import keys
import csv
import time
import json
class Book:
def __init__(self, title, url):
self.title = title
self.url = url
def __iter__(self):
return iter([self.title, self.url])
url = 'https://www.goodreads.com/'
def create_csv_file():
header = ['Title', 'URL']
with open('/home/l/gDrive/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'w+', encoding='utf-8') as csv_file:
wr = csv.writer(csv_file, delimiter=',')
wr.writerow(header)
def read_from_txt_file():
lines = [line.rstrip('\n') for line in open('/home/l/gDrive/AudioBookReviews/WebScraping/BookTitles.txt', encoding='utf-8')]
return lines
def init_selenium():
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
options = Options()
options.add_argument('--headless')
global driver
driver = webdriver.Chrome("/home/l/gDrive/AudioBookReviews/WebScraping/chromedriver", chrome_options=chrome_options)
driver.get(url)
time.sleep(30)
driver.get('https://www.goodreads.com/search?q=')
def search_for_title(title):
search_field = driver.find_element_by_xpath('//*[#id="search_query_main"]')
search_field.clear()
search_field.send_keys(title)
search_button = driver.find_element_by_xpath('/html/body/div[2]/div[3]/div[1]/div[1]/div[2]/form/div[1]/input[3]')
search_button.click()
def scrape_url():
try:
url = driver.find_element_by_css_selector('a.bookTitle').get_attribute('href')
except:
url = "N/A"
return url
def write_into_csv_file(vendor):
with open('/home/l/gDrive/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'a', encoding='utf-8') as csv_file:
wr = csv.writer(csv_file, delimiter=',')
wr.writerow(list(vendor))
create_csv_file()
titles = read_from_txt_file()
init_selenium()
for title in titles:
search_for_title(title)
url = scrape_url()
book = Book(title, url)
write_into_csv_file(book)
Running the above, I get the following errors:
Traceback (most recent call last): File
"/home/l/gDrive/AudioBookReviews/WebScraping/GoodreadsScraper.py",
line 68, in
init_selenium() File "/home/l/gDrive/AudioBookReviews/WebScraping/GoodreadsScraper.py",
line 41, in init_selenium
driver = webdriver.Chrome("/home/l/gDrive/AudioBookReviews/WebScraping/chromedriver",
chrome_options=chrome_options) File
"/usr/local/lib/python3.6/dist-packages/selenium/webdriver/chrome/webdriver.py",
line 81, in init
desired_capabilities=desired_capabilities) File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/webdriver.py",
line 157, in init
self.start_session(capabilities, browser_profile) File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/webdriver.py",
line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters) File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/webdriver.py",
line 321, in execute
self.error_handler.check_response(response) File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/errorhandler.py",
line 242, in check_response
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.WebDriverException: Message: unknown error:
Chrome failed to start: exited abnormally (unknown error:
DevToolsActivePort file doesn't exist) (The process started from
chrome location /usr/bin/google-chrome is no longer running, so
ChromeDriver is assuming that Chrome has crashed.) (Driver info:
chromedriver=2.44.609551
(5d576e9a44fe4c5b6a07e568f1ebc753f1214634),platform=Linux
4.15.0-60-generic x86_64)
There are couple of errors I cansee for now:
1) you have to uncomment chrome options and comment firefox' as you're passing the chromedriver later in code
# from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
Btw, that pyvirtualdisplay is an alternative for headless chrome, you don't need it imported.
2) you have instantiated Options two times and you're using only the first one. Change your code to:
def init_selenium():
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless')
I guess these two are just for start, edit your question when you encounter the next problem you can't solve.
You are using chrome driver, but you comment it out at import.
from selenium.webdriver.chrome.options import Options
In the search function, the process is:
get page -> find search box -> input value -> enter keys -> grab results.
Something like this:
def search_for_title(title):
driver.get('https://www.goodreads.com/search?q=')
search_field = driver.find_element_by_name('q')
search_field.clear()
search_field.send_keys(title)
search_field.send_keys(keys.Keys.RETURN) # you missed this part
url = driver.find_element_by_xpath(
'/html/body/div[2]/div[3]/div[1]/div[2]/div[2]/table/tbody/tr[1]/td[2]/a')
print(url.get_attribute('href'))

How to start Chrome Browser through Chromedriver and Selenium

I am getting error issues all of a sudden with selenium and the chromedriver. I haven't changed a single thing yet I am met with these error messages. The script literally worked hours ago and now without any tweaks its not working.
traceback (most recent call last):
File "email.py", line 3, in <module>
from selenium import webdriver
File "C:\ProgramData\Anaconda3\lib\site-packages\selenium\webdriver\__init__.py", line 18, in <module>
from .firefox.webdriver import WebDriver as Firefox # noqa
File "C:\ProgramData\Anaconda3\lib\site-packages\selenium\webdriver\firefox\webdriver.py", line 20, in <module>
import http.client as http_client
File "C:\ProgramData\Anaconda3\lib\http\client.py", line 71, in <module>
import email.parser
File "C:\Users\Doe Labs\Desktop\Austin\Scripts\email.py", line 12, in <module>
options = webdriver.ChromeOptions()
Here is my corresponding code:
import pyautogui
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
caps = DesiredCapabilities().CHROME
#caps["pageLoadStrategy"] = "eager"
options = webdriver.ChromeOptions()
options.add_argument(r'load-extension=C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\facebookpixelhelper')
#options.add_argument('start-fullscreen')
options.add_argument('disable-infobars')
driver=webdriver.Chrome(desired_capabilities = caps, executable_path=r'C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\chromedriver', chrome_options=options)
driver.get('http://www.doelabs.com/')
driver.maximize_window()
Even more strange is that when open new terminal, load python, and type from selenium import webdriver, i dont get any errors. But, when I navigate to the folder where the script lives, and load python and type from selenium import webdriver, i get the error message that shows up above. I hope this can give some insight into my current predicament.
A few words about the solution :
email is a reserved word / keyword in Python Language, avoid using the word email within user defined filename/methods/classes.
pageLoadStrategy as eager is yet to be implemented in ChromeDriver, use either none or normal instead as per your requirement.
To maximize the Chrome Browser Window instead of maximize_window() use the argument start-maximized through ChromeOptions()
To load an extension use ChromeOptions as follows :
options.addExtensions(new File("/path/to/extension.crx"));
Here are the four methods to initialize Chrome Browser through ChromeDriver :
Vanila Method :
from selenium import webdriver
driver = webdriver.Firefox(r'C:\path\to\chromedriver.exe')
driver.get('http://www.doelabs.com/')
print("Page Title is : %s" %driver.title)
driver.quit()
Arguments as ChromeOptions :
from selenium import webdriver
options = webdriver.ChromeOptions()
options.addExtensions(new File("C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\facebookpixelhelper.crx"));
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\path\to\chromedriver.exe')
driver.get('http://www.doelabs.com/')
print("Page Title is : %s" %driver.title)
driver.quit()
Capabilities as DesiredCapabilities :
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
caps = DesiredCapabilities().CHROME.copy()
caps["pageLoadStrategy"] = "normal"
driver = webdriver.Chrome(executable_path=r'C:\path\to\chromedriver.exe', desired_capabilities=caps)
driver.get('http://www.doelabs.com/')
print("Page Title is : %s" %driver.title)
driver.quit()
Arguments as ChromeOptions and Capabilities as DesiredCapabilities :
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
caps = DesiredCapabilities().CHROME.copy()
caps["pageLoadStrategy"] = "normal"
options = webdriver.ChromeOptions()
options.addExtensions(new File("C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\facebookpixelhelper.crx"));
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\path\to\chromedriver.exe', desired_capabilities=caps)
driver.get('http://www.doelabs.com/')
print("Page Title is : %s" %driver.title)
driver.quit()
You might want to change
executable_path=r'C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\chromedriver',
to
executable_path=r'C:\Users\Doe Labs\Desktop\Austin\sales_prospecting\chromedriver.exe',
You seem to have missed .exe, the extension of the executable file.

Categories