Getting the HTML element using Selenium WebDriver - python

I'm trying to get price of a product on amazon using Selenium:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
url = \
"https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
services = Service(r"C:\Users\Deepak Shetter\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=services)
driver.get(url)
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print("price is "+price.text)
As you can see in this image the html for the price is of class="a-offscreen". But when I run my code on pycharm it return None. How can I get the price string? (btw I checked it using Beautiful soup and it worked fine)
Edit :
This time I used another url : https://www.amazon.in/Avvatar-Alpha-Choco-Latte-Shaker/dp/B08S3TNGYK/?_encoding=UTF8&pd_rd_w=ofFKu&content-id=amzn1.sym.1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_p=1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_r=PT3Y6GWJ7YHADW09VKNK&pd_rd_wg=lBWZa&pd_rd_r=0a44c278-bcfa-49c2-806b-cf8eb292038a&ref_=pd_gw_ci_mcx_mr_hp_atf_m
In this case it has 2 price elements one with the class="a-offscreen" and another one with calss="a-price-whole".
my code :
price = driver.find_element(By.CLASS_NAME, "a-price-whole")
this time return value is 1,580.

The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#corePrice_desktop .a-span12 .apexPriceToPay"))).text
print(price)
The output is:
₹566

Using Safari, the following code works:
from selenium import webdriver
from selenium.webdriver.common.by import By
with webdriver.Safari() as driver:
driver.get('https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5')
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print(price.text)
Which gives this output:
₹566.00
Therefore it appears that your use of the ChromeDriver may be flawed

Related

full xpath does not math the correct field in python selenium

I have a following problem. On the picture bellow I would like to fill some text into the second (red) field.
My code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
def set_scraper():
"""Function kills running applications and set up the ChromeDriver."""
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", options=options)
return driver
def main() -> None:
"""Main function that is call when the script is run."""
driver = set_scraper()
driver.get("https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO")
pokus = driver.find_element(By.XPATH, '/html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1]')
driver.implicitly_wait(10)
ActionChains(driver).move_to_element(pokus).send_keys("2727").perform()
The problem is that it sends "2727" into the first field, not into the red one. Although /html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1] is the full xpath of the second field. Do you know why, please?
You can use XPath to locate the parent element based on unique text "Obec" in the child element and then locate the proper input element.
Here I'm using fixed attribute values that not seem to change.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='fieldsetWrapper'][contains(.,'Obec')]//input[#type='text']"))).send_keys("2727")
The result is:
Try with below
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[#title='Zadejte název obce']")))
element.send_keys("2727")
You can enter the text in the second text field using the below XPATH:
driver.find_element(By.XPATH, ".//input[#name='ctl00$bodyPlaceHolder$vyberObec$txtObec']").send_keys("2727")
# clicking on the button
driver.find_element(By.XPATH, ".//input[#title='Vyhledat obec']").click()

Why can't I retrieve the price from a webpage?

I am trying to retrieve a price from a product on a webshop but can't find the right code to get it.
Price of product I want to extract: https://www.berger-camping.nl/zoeken/?q=3138522088064
This is the line of code I have to retrieve the price:
Prijs_BergerCamping = driver.find_element(by=By.XPATH, value='//div[#class="prod_price__prod_price"]').text
print(Prijs_BergerCamping)
Any tips on what I seem to be missing?
Your code is correct.
I guess all you missing is to wait for element visibility.
This code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)
The output is:
79,99 €
But you also need to close the cookies banner and Select store dialog (at least I see it). So, my code has the following additionals:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#style] //*[contains(#class,'uk-close')]"))).click()
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)

Find value from div where multiple divs with same name

I want to find the value of TTM EPS from link https://www.moneycontrol.com/india/stockpricequote/computers-software/infosys/IT
I wrote the following code:
import os
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--start-maximized')
options.page_load_strategy = 'eager'
driver = webdriver.Chrome(options=options)
company = "Infosys"
wait = WebDriverWait(driver, 20)
driver.get("https://www.moneycontrol.com")
inputElement = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#form_topsearch>.txtsrchbox.FL')))
inputElement.send_keys(company, Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#proceed-button'))).click()
driver.implicitly_wait(30)
driver.find_element_by_class_name("oview_table")
Now, there are multiple classes with the same name "oview_table" and its table. How can I get the value of TTM EPS?
Try the following xpath //td[contains(text(),'TTM EPS')]/../td[contains(#class,'nseceps')]
This is the lement you are looking for.
Now you can extract the text value from it.
In order to get value of any other asset just pass it as a parameter to this string
This xpath should also work. Pass in the text you are looking for.
//div[#class='oview_table']//td[contains(normalize-space(.), 'TTM EPS')]

Selenium is unable to locate elements by class name

I am trying to get a list of the prices from this page.
The class name of the elements I'm trying to get is called s-item__price.
This is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
chrome_options = Options()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
browser.get(url)
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
The output is just an empty list.
You can use WebDriverWait to wait until the javascript generated the element:
wait = WebDriverWait(browser, 15) # 15 sec timeout
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
You could also use presence_of_elements_located but if it comes to click interaction it won't work with hidden elements.
So prefer using: visibility_of_element_located
Example Code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
options = Options()
options.add_argument('--headless')
browser = webdriver.Chrome(options=options)
browser.get(url)
wait = WebDriverWait(browser, 15) # Throws a TimeoutException after 15 seconds
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
# you may also culd use the return value of the wait
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
You get an empty list I think it because you need wait.
Use WebDriverWait and utilize .presence_of_all_elements_located to collect elements in a list.
Then extract them with a loop and you must call the .text method to grab the text
browser.get('https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone%208%20&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1')
wait = WebDriverWait(browser, 20)
list_price = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 's-item__price')))
for price in list_price:
print(price.text)
driver.quit()
Following import:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

How can I scrape Tax information from zillow with selenium

zillow picture
I have the above image at https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/60.780619,-65.522461,4.521666,-125.551758_rect/3_zm/
I cant seem to find the selector for tax history.
I tried to use driver wait but the table that is output is the price history not tax history.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
options = webdriver.ChromeOptions()
options.add_argument(f"user-agent={useragent[0]}")
options.add_argument('--proxy-server=%s' % ips[0])
options.add_argument('--incognito')
chromedriver = '~/Downloads/chromedriver'
chromedriver = os.path.expanduser(chromedriver)
driver = webdriver.Chrome(chromedriver, chrome_options=options)
driver.get('https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/60.673178,-74.663086,4.653079,-116.323243_rect/3_zm/2_p/')
wait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "hdp-collapse"))).click()
table = wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div#hdp-tax-history")))
Looks like you need some waits (and maybe some clicks to get that tab visible. You can write out the table. The below is just to show how you can access
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
url = 'https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/53.566414,-73.081055,17.434511,-118.081055_rect/3_zm/'
d = webdriver.Chrome()
d.get(url)
WebDriverWait(d,20).until(EC.presence_of_element_located((By.ID , 'price-and-tax-history'))).click()
tabs = WebDriverWait(d,5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".zsg-tab-link")))
tabs[1].click()
print(d.find_element_by_css_selector('#hdp-tax-history table').text) # just to show present

Categories