I wrote a simple scraper sometime ago which opens up chrome browser and scrapes some data from a website. However now everytime I run that script it does not open the url I provide but instead redirects to GDPR consent website. I removed --incognito mode from options but it is still the same. The chrome opens then the script crashes because it is atuomatically redirected to that GDPR consent webpage.
How can I go around this issue?
Here is the code to reproduce the error.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
option = webdriver.ChromeOptions()
#option.add_argument("--incognito")
browser = webdriver.Chrome(executable_path='chromedriverpath', chrome_options=option)
rval=[]
browser.get("https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL")
timeout = 10
WebDriverWait(browser, timeout)
values_element = browser.find_elements_by_xpath("//td[#class='Fz(s) Fw(500) Ta(end)']")
print(browser)
values = [x.text for x in values_element]
rval.append(values[8])
for title, value in zip(stockname, rval):
print(title + ': ' + value)
evdict=dict(zip(stockname, rval))
So to to bypass such popup window which block selenium from scraping the data I needed to add:
browser.find_element_by_xpath("//input[#type='submit' and #value='OK']").click()
Which would click on the proper button and close the window for me. Then Selenium works without further problems.
Related
I'm a beginner in web scrapping and I've followed a few YouTube videos about how to do this, but regardless to what I try I can't have my code accept the cookies.
This is the code I have so far:
from selenium import webdriver
import time
driver = webdriver.Safari()
URL = "https://www.zoopla.co.uk/new-homes/property/london/?q=London&results_sort=newest_listings&search_source=new-homes&page_size=25&pn=1&view_type=list"
driver.get(URL)
time.sleep(2) # Wait a couple of seconds, so the website doesn't suspect you are a bot
try:
driver.switch_to_frame('gdpr-consent-notice') # This is the id of the frame
accept_cookies_button = driver.find_element_by_xpath('//*[#id="save"]')
accept_cookies_button.click()
except AttributeError: # If you have the latest version of selenium, the code above won't run because the "switch_to_frame" is deprecated
driver.switch_to.frame('gdpr-consent-notice') # This is the id of the frame
accept_cookies_button = driver.find_element_by_xpath('//*[#id="save"]')
accept_cookies_button.click()
except:
pass # If there is no cookies button, we won't find it, so we can pass
I don't have safari webdriver but chrome webdriver, but I think they works similar. On chrome you close the cookie banner with this code
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get(URL)
# wait no more than 20 seconds for the `iframe` with id `gdpr-consent-notice` to appear, then switch to it
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.ID, "gdpr-consent-notice")))
# click accept cookies button
driver.find_element(By.CSS_SELECTOR, '#save').click()
I've tried the following way, but every time I click on the "Next" button in the login, chromdriver would open a new tab and redirect me to this page.
from selenium import webdriver
path = "https://login.yahoo.com/config/login?.src=finance&.intl=us&.lang=en-US&.done=https%3A%2F%2Ffinance.yahoo.com%2Fquotes%2Flogin%2Fview%2Fv1%2F"
option = webdriver.Chrome()
option.add_argument("--incognito")
option.add_argument("--disable-notifications")
browser = webdriver.Chrome("/path/to/chromedriver", optiont=option)
browser.get(path)
browser.find_element_by_name("username").send_keys("username")
# all three attempts below redirected me to the page mentioned above
browser.find_element_by_name("signin").click()
browser.find_element_by_class_name("button-container").click()
browser.find_element_by_id("login-username-form").click()
I wonder this is some kind of secutiy they have considering the redirected page.
I also tried sending password in the hidden-input-container
browser.find_element_by_name("passwd").send_keys("password")
to get selenium.common.exceptions.ElementNotInteractableException: Message: element not interactable error. I guess I need to press the Next button before sending the password.
I'd much appreciate any help on this issue.
Induce WebDriverWait() and wait for element_to_be_clickable()
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
path = "https://login.yahoo.com/config/login?.src=finance&.intl=us&.lang=en-US&.done=https%3A%2F%2Ffinance.yahoo.com%2Fquotes%2Flogin%2Fview%2Fv1%2F"
option = Options()
option.add_argument("--incognito")
option.add_argument("--disable-notifications")
browser = webdriver.Chrome(executable_path="/path/to/chromedriver",options=option)
browser.get(path)
WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.NAME,"username"))).send_keys("validusername")
WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.NAME,"signin"))).click()
WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.NAME,"password"))).send_keys("password")
WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.ID,"login-signin"))).click()
Browser snapshot:
I've written a script that successfully makes the login on Instagram.
When I should go on my account, at home, the website displays a popup that asks you if you want notifications.
At this point, I tried a lot of solutions, but I got nothing.
I just want that, when the pop-up is displayed, the script should click on "Not now".
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
ids = []
driver = webdriver.Chrome(executable_path = '/usr/local/bin/chromedriver')
driver.get("https://www.instagram.com/accounts/login/?source=auth_switcher")
usm = driver.find_element_by_name('username').send_keys("**")
pwd = driver.find_element_by_name('password').send_keys("**")
btnLog = driver.find_element_by_tag_name('form').submit()
acpt = driver.find_element_by_xpath("//*[contains(#class, 'aOOlW HoLwm ')]")
In the image, there's the line of the button highlighted that I want to click:
Try the following code for this:
from selenium.webdriver.support import ui
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
ui.WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".aOOlW.HoLwm"))).click()
PS: I have used 10-second wait for the element to be clickable before click on it.
Hope it helps you!
To click() on the element with text as Not now on Instagram popup notification you can use the following solution:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("https://www.instagram.com/accounts/login/?source=auth_switcher")
driver.find_element_by_name('username').send_keys("Giacomo")
driver.find_element_by_name('password').send_keys("Maraglino")
driver.find_element_by_tag_name('form').submit()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(.,'Non ora')]"))).click()
just add click to acpt, like this:
acpt = driver.find_element_by_xpath("//*[contains(#class, 'aOOlW HoLwm ')]")
acpt.click()
For some reason, the solution to this question didn't fully work for me. The script does click on "not now" then I'm redirected to the "Home" page only to find the 'activate notifications' pop-up waiting for me.
This is the solution that I came up with:
Go to https://instagram.com/
Wait for the page to load.
Find the "Not now" button on the button using the full XPATH.
Clicking on it.
Here's the code (inserted after identifying to my Instagram account):
driver.get("https://instagram.com/")
time.sleep(7)
acpt = browser.find_element(by=By.XPATH, value='/html/body/div[1]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div/div/div/div[3]/button[2]')
acpt.click()
I've written a script in python in combination with selenium to parse some company names from a webpage. The selector I've defined are flawless. However, as soon as the webpage opens up an annoying ad pops up hiding the data and for that I can't reach there. How can i kick it out and parse the data I would like to. I've tried with switching several iframes available in that webpage but none of them worked. The existing one which I used within my script throws an error showing Message: no such element: Unable to locate element.
This is what i tried so far:
from selenium import webdriver
driver = webdriver.Chrome()
driver.get("https://www.inc.com/inc5000/list/2017")
driver.switch_to_frame(driver.find_element_by_id("jw_player_iconic"))
for item in driver.find_elements_by_css_selector("#data-container .row"):
company = item.find_elements_by_css_selector(".company a")[0].text
print(company)
driver.quit()
The pop-up ad is something like below (the bright one):
You can try to wait for ad to close it by clicking "SKIP" button:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
driver = webdriver.Chrome()
driver.get("https://www.inc.com/inc5000/list/2017")
driver.maximize_window()
try:
ad_iframe_close = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "//span[.='SKIP']")))
ad_iframe_close.click()
except TimeoutException:
pass
for item in driver.find_elements_by_css_selector("#data-container .row"):
company = item.find_elements_by_css_selector(".company a")[0].text
print(company)
This should allow you to wait to close ad frame or do nothing in case it didn't appear whithin 3 seconds
I have a script that collects a screenshot of a web site using Selenium. My issue is that if a site requests basic authentication I would like the script to just error and quit.
At the moment it just sits there for about a minute and then takes a blank screen shot.
The code I am using is below.
#!/usr/bin/env python
from pyvirtualdisplay import Display
from selenium import webdriver
display = Display(visible=0, size=(800, 600))
display.start()
browser = webdriver.Firefox()
browser.get('http://www.google.com')
browser.save_screenshot('screenshot.png')
browser.quit()
display.stop()
I am hoping that there is an easy way of making the script after the browser.get command to error if asked for authentication.
Thanks for your help.
May be not loaded this page after then selenium do screenshot. Could you use explicit waits function? For example:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
ff = webdriver.Firefox()
ff.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(ff, 10).until(EC.presence_of_element_located((By.ID, "myDynamicElement")))
finally:
ff.quit()
This example quoted from this page.