I am trying to scrape data from a website using Selenium, I am able to send values but not receiving the result to start scraping. The program is also not throwing any errors. Here's the reproducible code:
# Setting up driver
driver = webdriver.Chrome(executable_path='D:\Program Files\ChromeDriver\chromedriver.exe')
# Opening up the webpage
driver.get("https://www1.nseindia.com/products/content/derivatives/equities/historical_fo.htm")
# Setting the value of Select Instrument
driver.find_element_by_id('instrumentType').send_keys('Index Options')
# Setting the value of Select Symbol
driver.find_element_by_id('symbol').send_keys('NIFTY 50')
# Setting the value of Select Year
driver.find_element_by_id('year').send_keys('2019')
# Setting the value of Select Expiry
select = Select(driver.find_element_by_id('expiryDate'))
noOfExpiries = 2
select.select_by_index(noOfExpiries)
# Setting the value of Select Option Type
cycle = 'PE'
driver.find_element_by_id('optionType').send_keys(cycle)
# Clicking the date range radio button
driver.find_element_by_xpath("//input[#id='rdDateToDate']").click()
# Setting the date range
fromDate = (datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y') - timedelta(days=45)).strftime("%d-%b-%Y")
driver.find_element_by_id('fromDate').send_keys(fromDate)
toDate = datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y').strftime("%d-%b-%Y")
driver.find_element_by_id('toDate').send_keys(toDate)
print(fromDate, toDate)
# Clicking the Get Data button
driver.find_element_by_id('getButton').click()
Any clue as to what am I missing here?
Okay, this should do it. I came up with selenium based solution only because you expressed disinterest in requests module:
from selenium import webdriver
from datetime import datetime, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = ChromeOptions()
options.add_argument('disable-blink-features=AutomationControlled')
with webdriver.Chrome(options=options) as driver:
wait = WebDriverWait(driver,10)
driver.get("https://www1.nseindia.com/products/content/derivatives/equities/historical_fo.htm")
wait.until(EC.presence_of_element_located((By.ID, "instrumentType"))).send_keys('Index Options')
wait.until(EC.presence_of_element_located((By.ID, "symbol"))).send_keys('NIFTY 50')
wait.until(EC.presence_of_element_located((By.ID, "year"))).send_keys('2019')
select = Select(wait.until(EC.presence_of_element_located((By.ID, "expiryDate"))))
noOfExpiries = 13
select.select_by_index(noOfExpiries)
cycle = 'PE'
wait.until(EC.presence_of_element_located((By.ID, "optionType"))).send_keys(cycle)
item = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#id='rdDateToDate']")))
driver.execute_script("arguments[0].click();",item)
fromDate = (datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y') - timedelta(days=45)).strftime("%d-%b-%Y")
wait.until(EC.presence_of_element_located((By.ID, "fromDate"))).send_keys(fromDate)
toDate = datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y').strftime("%d-%b-%Y")
wait.until(EC.presence_of_element_located((By.ID, "toDate"))).send_keys(toDate,Keys.ENTER)
print(fromDate, toDate)
search_button = wait.until(EC.presence_of_element_located((By.ID, "getButton")))
driver.execute_script("arguments[0].click();",search_button)
tabular_content = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#historicalData > table"))).get_attribute("innerHTML")
print(tabular_content)
Related
Im cannot select the year.
Run the code please an you will see what happens at the end.
I've tried many ways.
Cannot find the solution.
# -*- coding: utf-8 -*-
from time import time
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from datetime import datetime
from datetime import date
driver = webdriver.Chrome()
paginaHit = 'https://hit.com.do/solicitud-de-verificacion/'
driver.get(paginaHit)
driver.maximize_window()
embed = driver.find_element(By.CSS_SELECTOR, "embed")
driver.switch_to.frame(embed)
bl = 'SMLU7270944A'
clasificacion = 'Mudanzas'
wait =WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.ID, "billoflanding"))).send_keys(bl)
seleccion = Select(driver.find_element(By.ID, "cboClasificación"))
seleccion.select_by_visible_text(clasificacion)
btnBuscar = driver.find_element(By.XPATH, '/html/body/div/app-root/div/form/div/div[3]/div/button').click()
time.sleep(4)
consignatario = driver.find_element(By.ID, 'cosignatario').send_keys("LOGISTICA ADUANAL")
# seleccionMercancia = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[9]/div/select'))
# seleccionMercancia.select_by_visible_text("Articulos del hogar")
condicion = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[10]/div/select'))
condicion.select_by_visible_text("Verificación")
solicitante = driver.find_element(By.ID, "nombreVisitante").send_keys("JONATHAN MENDEZ GARCIA")
correo = driver.find_element(By.ID, "correo").send_keys("laduanal#gmail.com")
telefono = driver.find_element(By.ID, "telefono").send_keys("8098013610")
tipoDocumento = Select(driver.find_element(By.XPATH,'/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[16]/div/select'))
tipoDocumento.select_by_visible_text("Cédula")
cedula = driver.find_element(By.ID, "cedulaVisitante2").send_keys("00111452470")
nombreYapellido = driver.find_element(By.ID, "text01").send_keys("JONATHAN MENDEZ GARCIA")
tipoDocumento2 = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[3]/div/select'))
tipoDocumento2.select_by_visible_text("Cédula")
rolVisitante = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[4]/div/select'))
rolVisitante.select_by_visible_text("Representante")
cedulaVisitante = driver.find_element(By.ID, "cedulaVisitante").send_keys("00111452470")
btnAgregarPersonal = driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[7]/div/div[1]/button').click()
#SELECCIONAR FECHA DE VERFICACION
fechaDeseada = "0929 2022"
fechaVerificacion = driver.find_element(By.ID, "fechaVerificar")
fechaVerificacion.send_keys(fechaDeseada)
The problem is in the last 3 lines of the code, but you need to run it in order to see what happen. Will apreciate any help. Im trying to fill up a form because is a task that need to be done all day long.
For whatever reason, the date field on hit.com.do is buggy, so I had to enter the year first, then go back to entering the day & month. I'd replace your last three lines with:
fechaVerificacion = driver.find_element(By.ID, "fechaVerificar")
fechaVerificacion.send_keys(Keys.TAB, Keys.TAB, "2022", Keys.LEFT, Keys.LEFT, "2909")
My target website is https://bscscan.com/address/0xb66e947f49b6811a8bf438040d1582232d3232d7#tokentxns
this text appears when you hover the mouse over time info
I am trying to get the exact date of the last transaction in here (age) when you hover the mouse it appears, therefore I need to get the 'title data-original-title' attribute because it has the date can be seen here but when I try to get the date with this code it prints None
data = driver.find_element_by_xpath('//*[#id="body"]/div[3]/table/tbody/tr[1]/td[3]/span').get_attribute('title data-original-title')
print(data)
driver_path = "browser/chromedriver.exe"
partial_website_link = "b66e947f49b6811a8bf438040d1582232d3232d7"
final_website_link = f"https://bscscan.com/address/0x{partial_website_link}#tokentxns"
driver = webdriver.Chrome(driver_path)
driver.get(final_website_link)
time.sleep(3)
frame = driver.find_element_by_xpath('//*[#id="tokenpageiframe"]')
driver.switch_to.frame(frame)
data = driver.find_element_by_xpath('//*[#id="body"]/div[3]/table/tbody/tr[1]/td[3]/span').get_attribute('title data-original-title')
print(data)
Try this:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.action_chains import ActionChains
driver_path = "browser/chromedriver.exe"
partial_website_link = "b66e947f49b6811a8bf438040d1582232d3232d7"
final_website_link = f"https://bscscan.com/address/0x{partial_website_link}#tokentxns"
driver = webdriver.Chrome(driver_path)
driver.get(final_website_link)
wait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it(driver.find_element_by_xpath('//*[#id="tokenpageiframe"]')))
time.sleep(1)
ages = driver.find_elements_by_xpath(`//div[#class="table-responsive" and not(#style)]//td[#class="showAge "]//span`)
latest_age = ages[0].get_attribute("title data-original-title")
hover = ActionChains(driver).move_to_element(latest_age)
hover.perform()
tool_tip = driver.find_element_by_xpath('//div[#class="tooltip-inner"]')
age = tool_tip.text
print(age)
After a few tries and expanding your question, I came up with the following working example:
from selenium import webdriver
driver_path = "browser/chromedriver.exe"
driver = webdriver.Chrome(driver_path)
partial_website_link = "b66e947f49b6811a8bf438040d1582232d3232d7"
final_website_link = f"https://bscscan.com/address/0x{partial_website_link}#tokentxns"
driver.get(final_website_link)
frame = driver.find_element_by_xpath('//*[#id="tokenpageiframe"]')
driver.switch_to.frame(frame)
# For some reason the Info you want to print is in the attribute title
data = driver.find_element_by_xpath("""/html/body/div[3]/table/tbody/tr[1]/td[3]/span""").get_attribute('title')
print(data)
If you want to get all the Dates from the first page do this:
from selenium import webdriver
driver_path = "browser/chromedriver.exe"
driver = webdriver.Chrome(driver_path)
partial_website_link = "b66e947f49b6811a8bf438040d1582232d3232d7"
final_website_link = f"https://bscscan.com/address/0x{partial_website_link}#tokentxns"
driver.get(final_website_link)
frame = driver.find_element_by_xpath('//*[#id="tokenpageiframe"]')
driver.switch_to.frame(frame)
# The star in the Xpath is a wildcard, Note the find_elements_by_xpath instead of find_element_by_xpath
dates = driver.find_elements_by_xpath("""//*[#id="body"]/div[3]/table/tbody/tr[*]/td[3]/span""")
# Iterate over every found element
for date in dates:
print(date.get_attribute('title'))
See if this works:-
tableRows = driver.find_elements_by_xpath(".//table[#class='table table-hover']/tbody/tr")
for tr in tableRows:
print(tr.find_element_by_xpath(".//td[#class='showAge ']/span[#rel='tooltip']").get_attribute("data-original-title"))
I have wrote the code
import os
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--start-maximized')
options.page_load_strategy = 'eager'
driver = webdriver.Chrome(options=options)
url = "https://www.moneycontrol.com/india/stockpricequote/chemicals/tatachemicals/TC"
driver.get(url)
try:
wait = WebDriverWait(driver, 10)
except Exception:
driver.send_keys(Keys.CONTROL +'Escape')
driver.find_element_by_link_text("Bonus").click()
try:
wait = WebDriverWait(driver, 5)
except Exception:
driver.send_keys(Keys.CONTROL +'Escape')
for i in range(0, 50):
bonus_month = driver.find_element_by_xpath ("//*[#class= 'mctable1.thborder.frtab']/tbody/tr[%s]/td[1]"%(i))
print(bonus_month.text)
bonus = driver.find_element_by_xpath ("//*[#class= 'mctable1.thborder.frtab']/tbody/tr[%s]/td[1]"%(i))
print(bonus.text)
This gives me error
no such element: Unable to locate element: {"method":"xpath","selector":"//*[#class= 'mctable1.thborder.frtab']/tbody/tr[0]/td[1]"}
Element on the page:
Where I am making mistake in finding Exbonus and Ratio?
First use the clickable method from the expected conditions to check that the element is clickable within given time to just make sure it is operational.
Once the click action performed on the bonus link the table takes some time to finishes loading. In meantime selenium tries to fetch the table content and fails to get it. So again add wait for the element to load and then grab the table using Xpath and iterate over the rows of the table. -
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//ul[#id='corporation_tab']//a[#href='#ca_bonus']")))
driver.find_element_by_link_text("Bonus").click()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//tbody[#id='id_b']/tr")))
tableRows = driver.find_elements_by_xpath('//tbody[#id="id_b"]/tr')
print(tableRows)
for i in tableRows:
AnnouncementDate = i.find_element_by_xpath('td[1]').text
exbonus = i.find_element_by_xpath('td[2]').text
ratio = i.find_element_by_xpath('td[3]').text
print(AnnouncementDate + " \t\t " + exbonus + " \t\t " + ratio)
This returns me the output -
You will need following extra import -
from selenium.webdriver.support import expected_conditions as EC
This partially will solve your issue with locators:
1 To find Ex-Bonus use css selector: #id_b>tr>td:nth-of-type(2)
2 To find ratio use also css selector, #id_b>tr>td:nth-of-type(3)
To iterante use:
#id_b>tr:nth-of-type(x)>td:nth-of-type(3)
where x is the number of row.
For example, #id_b>tr:nth-of-type(1)>td:nth-of-type(3) will give you text with ratio 3:5
If you avoid avoid using #id_b, this locator will not be unique.
Instead of range function I'd use find_elements_by_css_selector. Try following:
rows = driver.find_elements_by_css_selector("#id_b>tr")
for row in rows:
bonus = row.find_element_by_css_selector("td:nth-of-type(2)").text
ratio = row.find_element_by_css_selector("td:nth-of-type(3)").text
There are only 5 of this elements on the page. I'll have to click See More.
I want to scrape product information from this page . This product have three different size and price will be change if I select different size from drop-down section. Right now my scraper only can scrape default price after first time initially page load which is 35 for 1kg. How I will scrape price for 500g and 250g. here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
#argument for incognito Chrome
#argument for incognito Chrome
option = Options()
option.add_argument("--incognito")
browser = webdriver.Chrome(options=option)
browser.get("https://boutique.cafebarista.ca/products/cremone?variant=18033418797121")
product_title = browser.find_element_by_xpath('//h1[#class="product-name"]')
long_description = browser.find_element_by_xpath('//div[#class="product-landing-container"]')
price=browser.find_element_by_xpath('//div[#class="product-btn-price ProductPrice"]')
print(product_title.text,long_description.text,price.text)
browser.quit()
With .find_elements_by_css_selector you can get each text without clicking the weight drop down first, this is the selector I mean:
nav[id="w-dropdown-list-16"] > a > div
And you can also click on each of these elements using .execute_script
Try following code:
driver.get('https://boutique.cafebarista.ca/products/autentico?variant=18033459331137')
weight_list = driver.find_elements_by_css_selector('nav[id="w-dropdown-list-16"] > a > div')
for weight in weight_list:
driver.execute_script('arguments[0].click();', weight)
price = driver.find_element_by_id('ProductPrice').text
print(weight.get_attribute('innerHTML') +' ' +price)
Try below solution
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
browser = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
browser.maximize_window()
wait = WebDriverWait(browser, 20)
browser.get("https://boutique.cafebarista.ca/products/cremone?variant=18033418797121")
kg_button=browser.find_element_by_xpath("//div[#id='w-dropdown-toggle-16']")
kg_button.click()
list =wait.until(EC.presence_of_all_elements_located((By.XPATH, "//nav[#id='w-dropdown-list-16']//a")))
kg_button.click()
for element in list:
kg_button.click()
actionChains = ActionChains(browser)
actionChains.move_to_element(element).click().perform()
price = browser.find_element_by_xpath("//div[#id='ProductPrice']")
print product_title.text
print element.text
print price.text
browser.quit()
I am not able to print the link of the final pdf which is opening after running the given code
from selenium import webdriver
from selenium.webdriver.support import ui
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
def page_is_loaded(driver):
return driver.find_element_by_tag_name("body")!= None
def check_exists_by_text(text):
try:
driver.find_element_by_link_text(text)
except NoSuchElementException:
return False
return True
driver = webdriver.Chrome("C:/Users/Roshan/Desktop/sbi/chromedriver")
driver.maximize_window()
driver.get("http://www.careratings.com/brief-rationale.aspx")
wait = ui.WebDriverWait(driver,10)
wait.until(page_is_loaded)
location_field = driver.find_element_by_name("txtfromdate")
location_field.send_keys("2019-05-06")
last_date = driver.find_element_by_name("txttodate")
last_date.send_keys("2019-05-21")
driver.find_element_by_xpath("//input[#name='btn_submit']").click()
if check_exists_by_text('Reliance Capital Limited'):
elm =driver.find_element_by_link_text('Reliance Capital Limited')
driver.implicitly_wait(5)
elm.click()
driver.implicitly_wait(50)
#time.sleep(5)
#driver.quit()
else :
print("Company is not rated in the given Date range")
I am expecting the actual output is the link of this pdf :
"http://www.careratings.com/upload/CompanyFiles/PR/Reliance%20Capital%20Ltd.-05-18-2019.pdf"
but I do not know how to print this link
You need to find all elements in table, then extract data from them.
from selenium import webdriver
import os
# setup path to chrome driver
chrome_driver = os.getcwd() + '/chromedriver'
# initialise chrome driver
browser = webdriver.Chrome(chrome_driver)
# load url
browser.get('http://www.careratings.com/brief-rationale.aspx')
# setup date range
location_field = browser.find_element_by_name("txtfromdate")
location_field.send_keys("2019-05-06")
last_date = browser.find_element_by_name("txttodate")
last_date.send_keys("2019-05-21")
browser.find_element_by_xpath("//input[#name='btn_submit']").click()
# get all data rows
content = browser.find_elements_by_xpath('//*[#id="divManagementSpeak"]/table/tbody/tr/td/a')
# get text and href link from each element
collected_data = []
for item in content:
url = item.get_attribute("href")
description = item.get_attribute("innerText")
collected_data.append((url, description ))
Output:
('http://www.careratings.com/upload/CompanyFiles/PR/Ashwini%20Frozen%20Foods-05-21-2019.pdf', 'Ashwini Frozen Foods')
('http://www.careratings.com/upload/CompanyFiles/PR/Vanita%20Cold%20Storage-05-21-2019.pdf', 'Vanita Cold Storage')
and so on
I would say you just need to put this line:
pdf_link = elm.get_attribute("href")
Just check out the below image. You have missed one important part to click on. When you enter some text in that inputbox, there is a dropdown projected downward displaying the search results available in their stock to choose from. Once you click on that, the rest are as it is.
Try the following script:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = "http://www.careratings.com/brief-rationale.aspx"
with webdriver.Chrome() as driver:
driver.get(url)
wait = WebDriverWait(driver,10)
location_field = wait.until(EC.presence_of_element_located((By.NAME, "txtfromdate")))
location_field.send_keys("2019-05-06")
last_date = wait.until(EC.presence_of_element_located((By.NAME, "txttodate")))
last_date.send_keys("2019-05-21")
input_search = wait.until(EC.presence_of_element_located((By.NAME, "txtSearchCompany_brief")))
input_search.send_keys('Reliance Capital Limited')
time.sleep(3) #could not get rid of this hardcoded delay to make the script work
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"[onclick*='Reliance Capital Limited']"))).click()
# time.sleep(2) #activate this line in case the script behaves otherwise
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"input[name='btn_submit']"))).click()
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"table tr td > a[href$='.pdf']"))):
print(item.get_attribute("href"))