from bs4 import BeautifulSoup
import requests
from urllib.request import urlopen
import json
import time
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementNotVisibleException
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
search_key = '성남 스터디카페'
url = "https://map.naver.com/v5/search/" + search_key
driverPath = "chromedriver.exe"
driver = webdriver.Chrome(driverPath, options=chrome_options)
driver.get(url)
I am making web crawler and I can see the page that I entered, but it keeps closing after 2~3 seconds.
I also used detach option.
Chrome version is 91.0.4472.124, so I downloaded 91 version webdriver, but the browser is still closing.
Is there any problem with my code?
It's closing because the main python process will stop after running the code.
If you want to keep the browser is opened, simply add in the end:
time.sleep(1000)
Related
type hefrom selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
s=Service('I:\chromedriver_win32\chromedriver.exe')
path='I:\chromedriver_win32\chromedriver.exe'
#Website to scrap
website='https://www.adamchoi.co.uk/overs/detailed'
driver=webdriver.Chrome(service=s,options=chrome_options)
driver.get(website)
#Locating and clicking an element
all_matches_button=driver.find_element(by='xpath',value="//label[normalize-space()='All matches']").click()
matches=driver.find_elements(by="xpath",value='tr')
for match in matches:
print(match.text)
Error:"USB: usb_device_handle_win.cc:1045 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)" and "Bluetooth: bluetooth_adapter_winrt.cc:1074 Getting Default Adapter failed."
A soultion to my problem
how did you get usb error and bluetooth? whats going on?
type hefrom selenium import webdriver where from you copy pasted?
variable all_matches_button never used in your code and there is no need to save anything to variable when click on element.
#Locating and clicking an element
all_matches_button=driver.find_element(by='xpath',value="//label[normalize-space()='All matches']").click()
here is working code to start dig deeper
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
website = 'https://www.adamchoi.co.uk/overs/detailed'
driver = webdriver.Chrome(options=chrome_options)
driver.get(website)
sleep(4)
tr_elements = driver.find_elements(By.XPATH, "//tr")
for tr in tr_elements:
print(tr.tag_name, tr.get_attribute('textContent'))
however, this message shouldn't prevent you from getting scraping result. try to correct your xpath in the last line by adding slashes
matches=driver.find_elements(by="xpath",value='//tr')
I am just trying to send keys to #inputPlaylist text field in this website https://youtubemultidownloader.net/playlists.html, but selenium is detected in someway.
I tried the most famous driver arguments and JS scripts but in vain.
Can anyone handle this website ?
#Othman Alkhatib, I tried the following and it is working. Let me know if this is what you were looking for. The last line time.sleep(5) is kept just so that anyone running this can see what is happening before the browser gets closed:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
chrome_options = Options()
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(options=chrome_options, service=s)
driver.get("https://youtubemultidownloader.net/playlists.html")
driver.find_element(By.ID, "inputPlaylist").send_keys("ABC")
time.sleep(5)
I'm pretty new in programming so I might be an easy question, but I don't understand why the browsers opened by Selenium closes at the end of the code.
from lib2to3.pgen2 import driver
from selenium import webdriver
def Online_PLatform():
Driver = webdriver.Chrome()
Driver.get('https://elearningmarikina.ph/')
Gmail = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[1]/input')
Gmail.send_keys('958rectin#depedmarikina.com')
Pass = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[2]/input')
Pass.send_keys('33112')
Button = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[3]/button')
Button.click()
You can use 2 approaches in order to keep you driver open.
1.
Add the 'detach' option to your driver settings:
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
Simply add a delay at the end of your test code (less elegant approach but more simple)
from lib2to3.pgen2 import driver
from selenium import webdriver
import time
def Online_PLatform():
Driver = webdriver.Chrome()
Driver.get('https://elearningmarikina.ph/')
Gmail = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[1]/input')
Gmail.send_keys('958rectin#depedmarikina.com')
Pass = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[2]/input')
Pass.send_keys('33112')
Button = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[3]/button')
Button.click()
time.sleep(50)
This is because after the all functions, The code stops running and that's why selenium exits.
You can use the time module to delay.
import time
from lib2to3.pgen2 import driver
from selenium import webdriver
def Online_PLatform():
Driver = webdriver.Chrome()
Driver.get('https://elearningmarikina.ph/')
Gmail = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[1]/input')
Gmail.send_keys('958rectin#depedmarikina.com')
Pass = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[2]/input')
Pass.send_keys('33112')
Button = Driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div[2]/div/div/form/div[3]/button')
Button.click()
time.sleep(50) #---> 50 second delay
I am trying to learn Selenium to scrape some Javascript heavy websites. I can locate and extract information just fine. However, I find that for some sites I need to switch my user agent. I did it the following way to test it:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
PATH ="C:/my/path/to/chromedriver.exe"
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options = Options()
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(chrome_options=options, executable_path=PATH)
driver.get("https://www.whatismybrowser.com/detect/what-is-my-user-agent")
The code works and my user agent is switched, however there is one bug that occurs now which did not occur before. The webdriver/browser (Chrome driver) automatically closes after displaying the website for a second without me specifying the driver.quit() argument. When I do not switch my user agent it does not close unless I do and I want to study the page a bit before closing it. I have tried to wait using time.sleep() but this doesn't work.
How can I make the webdriver not close until specified?
Answers are greatly appreciated, preferably with a code example of how to implement the solution.
This should do you nicely:
options.add_experimental_option("detach", True)
in your code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
PATH ="C:/my/path/to/chromedriver.exe"
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options = Options()
options.add_argument(f'user-agent={userAgent}')
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(chrome_options=options, executable_path=PATH)
driver.get("https://www.whatismybrowser.com/detect/what-is-my-user-agent")
I am not sure if it is possible that the problem is related to the webdriver version you are using or not but when I tried to add time.sleep(n) to your code while using webdriver_manager library to download most recent version of ChromeWebDriver I had the chance to look at the website and the browser didn't close until the timer finished.
My code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
from webdriver_manager.chrome import ChromeDriverManager
import time
# PATH ="C:/my/path/to/chromedriver.exe"
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options = Options()
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
driver.get("https://www.whatismybrowser.com/detect/what-is-my-user-agent")
time.sleep(100)
I am doing a simple experiment with Amazon and Webdriver. However, using Webdriver Headless cannot find elements and errors out, but non-headless works.
Any suggestions how to get it working headless?
There is a comment right above the --headless flag.
from selenium import webdriver
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_inventory(url):
chrome_options = Options()
# Making it headless breaks. Commenting
# this line, making it non-headless works.
chrome_options.add_argument("--headless")
chrome_options.add_experimental_option(
"prefs", {'profile.managed_default_content_settings.javascript': 2})
chrome_options.binary_location = '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary'
driver = webdriver.Chrome(executable_path=os.path.abspath(
'chromedriver'), chrome_options=chrome_options)
driver.set_window_size(1200, 1000)
try:
driver.get(url)
add_to_cart_button_xp = '//*[#id="add-to-cart-button"]'
add_to_cart_button = driver.find_element_by_xpath(add_to_cart_button_xp)
add_to_cart_button.click()
driver.get('https://www.amazon.com/gp/cart/view.html/ref=lh_cart')
qty_field_xp = '//div/input[starts-with(#name, "quantity.") and #type="text"]'
qty_field = driver.find_element_by_xpath(qty_field_xp)
qty_field.clear()
qty_field.send_keys("2")
update_link_xp = f'//input[#value="Update" and #type="submit"]'
update_link = driver.find_element_by_xpath(update_link_xp)
update_link.click()
url = 'https://www.amazon.com/Pexio-Professional-Stainless-Food-Safe-Dishwasher/dp/B07BGBSY9F'
get_inventory(url)
I think you just had some selector issues. I checked the elements and updated the quantity setting; everything else should be pretty much the same, aside from the binary locations.
from selenium import webdriver
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_inventory(url):
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='/usr/bin/chromedriver',
chrome_options=chrome_options,
)
chrome_options.add_experimental_option(
"prefs",
{'profile.managed_default_content_settings.javascript': 2},
)
chrome_options.binary_location = '/usr/bin'
driver.set_window_size(1200, 1000)
try:
driver.get(url)
driver.save_screenshot("/tmp/x1.png")
driver.find_element_by_xpath('//*[#id="add-to-cart-button"]').click()
driver.get('https://www.amazon.com/gp/cart/view.html/ref=lh_cart')
driver.find_element_by_xpath("//span[#data-action='a-dropdown-button']").click()
driver.find_element_by_xpath("//*[#class='a-dropdown-link'][text()[contains(., '2')]]").click()
driver.find_element_by_class_name("nav-logo-base").click()
driver.save_screenshot("/tmp/confirm.png")
driver.close()
except Exception as e:
print(e)
url = 'https://www.amazon.com/Pexio-Professional-Stainless-Food-Safe-Dishwasher/dp/B07BGBSY9F'
get_inventory(url)
I've run this with and without --headless and it's working fine for me. I navigated to the homepage at the end so you can confirm the quantity change worked (hence the screenshot).
What is the behavior you see?
When I enabled headless, scripts started failing because running headless slows execution down.
I currently run chrome with these options:
'--no-sandbox', '--headless', '--window-size=1920,1080', '--proxy-server="direct://"', '--proxy-bypass-list=*'
The last two options supposedly help with the slowness, but I didn't see any difference.
Hope this helps.
I verified your claim on my Mac (using /Applications/Google Chrome.app/Contents/MacOS/Google Chrome).
My guess is that, since you are moving from an item page to the cart page of Amazon, the cookies are lost, so that the cart page won't show any item, and therefore won't contain any text input with a name starting with “quantity”, which is what the exception is about.
Googling for headless chrome cookies yields this page, which in turn points to this page, the content of which could also be about your problem. Be it this, or be it a particularly smart behavior of the Amazon website, the fact remains: the cookie that stores the cart (or a key thereof, but the result is the same) is not read by the cart page when in headless mode.