I'm trying to get price of a product on amazon using Selenium:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
url = \
"https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
services = Service(r"C:\Users\Deepak Shetter\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=services)
driver.get(url)
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print("price is "+price.text)
As you can see in this image the html for the price is of class="a-offscreen". But when I run my code on pycharm it return None. How can I get the price string? (btw I checked it using Beautiful soup and it worked fine)
Edit :
This time I used another url : https://www.amazon.in/Avvatar-Alpha-Choco-Latte-Shaker/dp/B08S3TNGYK/?_encoding=UTF8&pd_rd_w=ofFKu&content-id=amzn1.sym.1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_p=1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_r=PT3Y6GWJ7YHADW09VKNK&pd_rd_wg=lBWZa&pd_rd_r=0a44c278-bcfa-49c2-806b-cf8eb292038a&ref_=pd_gw_ci_mcx_mr_hp_atf_m
In this case it has 2 price elements one with the class="a-offscreen" and another one with calss="a-price-whole".
my code :
price = driver.find_element(By.CLASS_NAME, "a-price-whole")
this time return value is 1,580.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#corePrice_desktop .a-span12 .apexPriceToPay"))).text
print(price)
The output is:
₹566
Using Safari, the following code works:
from selenium import webdriver
from selenium.webdriver.common.by import By
with webdriver.Safari() as driver:
driver.get('https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5')
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print(price.text)
Which gives this output:
₹566.00
Therefore it appears that your use of the ChromeDriver may be flawed
This is the website link which I am trying to scrape for data https://tis.nhai.gov.in/tollplazasataglance.aspx?language=en#
There are links in 4th column in above site if clicked a popup window comes which has certain info along with href for the next link when we click More Information tab.We get to such links https://tis.nhai.gov.in/TollInformation.aspx?TollPlazaID=236
From selenium import webdriver
driver = webdriver.Firefox()
driver.maximize_window()
driver.get("https://tis.nhai.gov.in/tollplazasataglance.aspx?language=en#")
b = driver.find_element("xpath", '//*[#id="tollList"]/table/tbody/tr[2]/td[4]/a')
c = driver.execute_script("arguments[0].click();", b)
At this point I am stuck up as am unable to capture the href or url of the popup window... Kindly help me to get past to the other page from the pop up window
Those pop-ups are the result of POST requests, where the payload is each location ID. Here is a way to get the locations IDs:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 25)
url = 'https://tis.nhai.gov.in/tollplazasataglance.aspx?language=en#'
driver.get(url)
places = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[#id="tollList"]//tbody/tr/td[4]' )))
for p in places:
p_id = p.find_element(By.XPATH, './/a').get_attribute('onclick').split('(')[1].split(')')[0]
print(p.text, p_id)
Result in terminal:
Aganampudi 236
Amakathadu 258
Badava 4486
Bandapalli 5697
Bandlapalli 5952
Basapuram 4542
Bathalapalli 5753
Bolapalli 252
[...]
Once you have the IDs, you can go to each place' page with https://tis.nhai.gov.in/TollInformation.aspx?TollPlazaID={place_id}.
The question is about selenium webdriver's explicitly wait feature which is same locating action with a timeout. Webdriver waits element to be visible or present. In my situation, I locate elements with explicit waiting and without it. When it is without the explicit wait, it founds and assigns it. But below, I also try to locate element explicitly waiting, that variable is unassigned, and in such form passes to for iteration, and my for iteration gives UnboundLocalError
I know that error is not selenium but python based. I can get rid of that error by using instead of pass y_priceNewProds = x_priceNewProds` but I need to understand why selenium webdriver cannot locate the element with wait if it already a line above found it. There must be cookies also.
code is below;
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#reading from csv file url-s
def readCSV(path_csv):
df=pd.read_csv(path_csv)
return df
fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()
def create_driver():
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service(r'C:\pythonPro\w_crawl\AmznScrpBot\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
return driver
#going to urls 1-by-1
def goToUrl_Se(driver):
global counter
counter = 0
for i in range(0, length_of_column_urls + 1):
xUrl = fileCSV.iloc[i, 1]
print(xUrl,i)
# going to url(amazn) via Selenium WebDriver
driver.get(xUrl)
parse_data()
counter+=1
driver.quit()
#fetch-parse the data from url page
def parse_data():
global asin, title, bookform, priceNewProd
wait=WebDriverWait(driver,timeout=20)
try:
#trying code snippets where locating elements occur by XPATH, in some ofthem
#there is also WebDriver explicit wait, that element has both methods
#with presence_of element wait method and without explicit wait method. Just trying to understand
#why sometimes elements cannot be located on page
y_data_index=wait.until(EC.presence_of_element_located((By.XPATH,"//div[#data-index='1']")))
print(y_data_index.text,'y_index')
x_data_index=wait.until(EC.visibility_of_element_located((By.XPATH,"//div[#data-index='1']")))
print(x_data_index.text,'x_index')
x_titles=driver.find_elements(By.XPATH,'//span[contains(#class,"a-size-medium")]')
x_bookforms = driver.find_elements(By.XPATH,'//div[contains(#class,"a-section a-spacing-none a-spacing-top-micro s-price-instructions-style")]//div[contains(#class,"a-row a-size-base a-color-base")]//a[contains(#class,"a-size-base a-link-normal puis-medium-weight-text s-underline-text s-underline-link-text s-link-style s-link-centralized-style a-text-bold")]')
x_priceNewProds = driver.find_elements(By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')
y_priceNewProds=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
except:
pass
for i in range(len(x_titles)):
x_title=x_titles[i].text
x_priceNewProd = x_priceNewProds[i].text
print(x_priceNewProd)
y_priceNewProd=y_priceNewProds[i].text
print(y_priceNewProd)
try:
x_bookform=x_bookforms[i].text
print(x_bookform)
except:
x_bookform='-'
title = x_title
bookform = x_bookform
priceNewProd = x_priceNewProd
write_to_csv()
def write_to_csv():
if counter==0:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='w')
else:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='a',header=False)
driver=create_driver()
goToUrl_Se(driver)
y_priceNewProd=y_priceNewProds[i].text UnboundLocalError: local variable 'y_priceNewProds' referenced before assignment
Since you didn't share all the code including the link we can't know what exactly happens there, but I guess that this line
wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
Throws exception since that element(s) are / is not visible.
This is why y_priceNewProds remains not assigned any value.
While if you use simple driver.find_elements method it can find those elements since they are existing but, again, not visible.
UPD
After you shared the link I tested that and now I can say: Yes, I was right!
Those elements are not visible.
The following code throws TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
print(m_list)
UPD2
But if you will take the direct parent of that element it will work correctly since it is visible.
For the following code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]')))
print(len(m_list))
the output is:
27
I'm trying to get a html from the url below.
https://reserve.tokyodisneyresort.jp/restaurant/search/?useDate=20220714&adultNum=4&childNum=0&childAgeInform=&restaurantType=4&restaurantType=5&restaurantType=3&restaurantType=1&restaurantType=2&restaurantType=7&nameCd=&wheelchairCount=0&stretcherCount=0&keyword=&reservationStatus=0
But after starting chromedriver, it continues to load all the time and nothing appears.
How can I fix it?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
def get_html():
url = 'https://reserve.tokyodisneyresort.jp/restaurant/search/?useDate=20220714&adultNum=4&childNum=0&childAgeInform=&restaurantType=4&restaurantType=5&restaurantType=3&restaurantType=1&restaurantType=2&restaurantType=7&nameCd=&wheelchairCount=0&stretcherCount=0&keyword=&reservationStatus=0'
options = Options()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(
executable_path='./chromedriver.exe', chrome_options=options)
driver.implicitly_wait(60)
driver.get(url)
get_html()
I want to download files by clicking on Download icon on Chrome browser.
I tried several ways like Xpath and CSS but it doesn't worked. Please let me know if there is any solution on this using Python 3.x and selenium.
Below is code that I have tried,
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
class TEAutomation:
def automateTask(self):
chromeOptions = Options()
chromeOptions.add_experimental_option("prefs",{"download.default_directory": "/home/vishal/Documents/PythonProgram/"})
baseUrl = "https://www.te.com/commerce/DocumentDelivery/DDEController?Action=showdoc&DocId=Customer+Drawing%7F160743%7FM2%7Fpdf%7FEnglish%7FENG_CD_160743_M2.pdf%7F160743-1"
driver = webdriver.Chrome(executable_path="/home/vishal/PycharmProjects/VSProgramming/drivers/chromedriver",chrome_options=chromeOptions)
driver.maximize_window()
driver.get(baseUrl)
driver.implicitly_wait(10)
driver.find_element(By.XPATH,'//*[#id="download"]').click()
#driver.find_element(By.CSS_SELECTOR, '#download').click()
time.sleep(5)
driver.quit()
molexAuto = TEAutomation()
molexAuto.automateTask()
Thank you in advance.
Maybe the element is still not loaded when you try to click it, try waiting for it with WebDriverWait, I don't have chrome so you will have to test this yourself:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
class TEAutomation:
def automateTask(self):
chromeOptions = Options()
prefs = {
"download.default_directory": "/home/vishal/Documents/PythonProgram/",
"plugins.always_open_pdf_externally": True
}
chromeOptions.add_experimental_option("prefs", prefs)
baseUrl = "https://www.te.com/commerce/DocumentDelivery/DDEController?Action=showdoc&DocId=Customer+Drawing%7F160743%7FM2%7Fpdf%7FEnglish%7FENG_CD_160743_M2.pdf%7F160743-1"
driver = webdriver.Chrome(executable_path="/home/vishal/PycharmProjects/VSProgramming/drivers/chromedriver",chrome_options=chromeOptions)
driver.implicitly_wait(10)
driver.maximize_window()
driver.get(baseUrl)
time.sleep(5)
driver.quit()
molexAuto = TEAutomation()
molexAuto.automateTask()