Why can't I retrieve the price from a webpage? - python

I am trying to retrieve a price from a product on a webshop but can't find the right code to get it.
Price of product I want to extract: https://www.berger-camping.nl/zoeken/?q=3138522088064
This is the line of code I have to retrieve the price:
Prijs_BergerCamping = driver.find_element(by=By.XPATH, value='//div[#class="prod_price__prod_price"]').text
print(Prijs_BergerCamping)
Any tips on what I seem to be missing?

Your code is correct.
I guess all you missing is to wait for element visibility.
This code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)
The output is:
79,99 €
But you also need to close the cookies banner and Select store dialog (at least I see it). So, my code has the following additionals:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#style] //*[contains(#class,'uk-close')]"))).click()
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)

Related

Issues scraping a dynamic table displaying available and booked time slots

I want to scrape the following website:https://padelbox.de/koeln-weiden/padelplatz-buchen. I want to scrape the planning tool every day and see what slots are booked and which are not. However, using the code below an error code suggesting the values are not found. Can anyone help me with this?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import requests
import time
website = 'https://padelbox.de/koeln-weiden/padelplatz-buchen'
path = '/Users/joeplamers/Downloads/chromedriver_mac_arm64/chromedriver'
# Initialize the webdriver
driver = webdriver.Chrome(path)
# Open the website
driver.get(website)
#close the cookie pop-up and maximize window
all_matches_button = driver.find_element(By.XPATH, '//a[#class="_brlbs-btn _brlbs-btn-accept- all _brlbs-cursor"]')
all_matches_button.click()
driver.maximize_window()
wait = WebDriverWait(driver, 60)
wait.until(ec.presence_of_element_located((By.CSS_SELECTOR,'[data-state="booked"]')))
booked_elements = driver.find_elements(By.CSS_SELECTOR,'[data-state="booked"]')
print(booked_elements)
#Close the browser
driver.quit()
The desired elements are within an <iframe> so you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired element to be visible.
You can use the following locator strategies:
driver.get("https://padelbox.de/koeln-weiden/padelplatz-buchen")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a._brlbs-btn-accept-all"))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[src^='https://www.eversports.de/widget']")))
print(len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "td[data-state='booked'][data-date]")))))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Console Output:
318
Update
Here's the complete code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get("https://padelbox.de/koeln-weiden/padelplatz-buchen")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a._brlbs-btn-accept-all"))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[src^='https://www.eversports.de/widget']")))
print(len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "td[data-state='booked'][data-date]")))))
driver.quit()

not able to get the search button with selenium python

I am trying to scrap articles from this website. I manage to do the login part but when I try to click on the search button and send the values I got a timeout error. I try running the selenium with start-maximize option and I noticed the page don't seem to load.
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search__input"]')))
input_text = elem.find_element(by=By.XPATH, value='//*[#id="search__input"]').click()
input_text.send_keys("Anthony Albanese")
print(input_text.get_attribute('value'))
I have tried to use the get_attribute('innerHtml') and I got the search button HTML but I have to send the keys so it does not seems to be of any use. This is the error that I got
screenshot. What should I do to send in the search terms?
There are 2 elements on that page matching //*[#id="search__input"] XPath locator, while you need the second one.
You have to use unique locator.
This should work better:
text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='header__search']//*[#id='search__input']")))
text_input.click()
text_input.send_keys("Anthony Albanese")
This can also be done with CSS Selectors. They are shorter in this case:
text_input = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".header__search #search__input")))
text_input.click()
text_input.send_keys("Anthony Albanese")
UPD
This is the code I used, exactly accordingly to what I wrote before:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.theaustralian.com.au/"
driver.get(url)
text_input = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".header__search #search__input")))
text_input.click()
text_input.send_keys("Anthony Albanese")
The web page after the code above looks as following:
And if you add an Enter click to the search input as following:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.theaustralian.com.au/"
driver.get(url)
text_input = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".header__search #search__input")))
text_input.click()
text_input.send_keys("Anthony Albanese" + Keys.ENTER)
the web page will finally become as following

Why doesn’t Selenium find an element with this XPath expression?

I’m trying to find an element with this XPath expression:
/html/body/div/div[1]/div/div/div[2]/div/div/div/div[2]/form/div[1]/div[3]/div/input
But Selenium can’t find one.
The page I’m trying to access - https://account.aax.com/en-US/login/
I already tried to follow this path by myself, and it’s fine.
You are missing a delay. WebDriverWait expected_conditions should be used for that.
You have to improve your locators.
The following code works:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
url = "https://account.aax.com/en-US/login/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[#class='MuiInputBase-input MuiOutlinedInput-input']"))).click()

Why isn't selenium successfully clicking my download link?

I am trying to use selenium to download an excel file from a website. I am not sure why the code isn't allowing me to download it. I get an exit code 0 so everything ran successfully but I am not seeing the file in my downloads.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
def scrape_mclellan_website():
url = 'https://www.mcoscillator.com/market_breadth_data/'
s = Service(ChromeDriverManager().install())
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome(service=s)
driver.get(url)
download_link = driver.find_element(by=By.XPATH, value='//*[#id="data_table"]/a[1]/img')
download_link.click()
scrape_mclellan_website()
How to fix?
Wait until the element you try to click is present and click the <a> not the <img>:
download_link = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="data_table"]/a[1]')))
download_link.click()
Set your preferences for donwload folder and take care window is opening in right size:
prefs = {'download.default_directory':'ENTER PATH TO DOWNLOAD FOLDER'}
options = webdriver.ChromeOptions()
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--headless")
options.add_experimental_option("prefs",prefs)
Example (selenium 4)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
service = Service(executable_path='ENTER YOUR PATH TO CHROMEDRIVER')
prefs = {'download.default_directory':'ENTER PATH TO DOWNLOAD FOLDER'}
options = webdriver.ChromeOptions()
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--headless")
options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(service=service, options=options)
driver.get('https://www.mcoscillator.com/market_breadth_data/')
download_link = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="data_table"]/a[1]')))
download_link.click()

HTML table is missing the last value when extracted via Selenium

This code scrapes everything but the last value in the table (0 Measure). I've looked everywhere but can't understand why it's missing it. Any idea?
import pandas as pd
import csv
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
t1 = []
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(executable_path = 'mypath/chromedriver.exe', options = options)
driver.get("https://ai.fmcsa.dot.gov/SMS")
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#title='Close']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "(//input[#name='MCSearch'])[2]"))).send_keys('1605061')
wait.until(EC.element_to_be_clickable((By.XPATH, "(//input[#name='search'])[2]"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#id='BASICs']/p[2]/a"))).click()
tbl = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//tr[#class='valueRow sumData']")))
tab = tbl.text
print(tab)

Categories