Python Selenium Datepicker select date - python

I am trying select date with date no.
url:http://www.szse.cn/disclosure/bond/notice/index.html
I am trying: for the start month & date,
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[#class='monthselect'][1]//li[text()='{}']".format("1")))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "(//table[#class='table-condensed'])[1]//tbody/div[#class='tdcontainer' and contains(text()='15')]"))).click()
I can go with like tr[2]//td[3] but I want just use the date no. like 1,2,3..
Many thanks!!

I did it by typing the date, not by using the datepicker. In my experience, this is the more reliable way.
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
def run():
driver = webdriver.Chrome(executable_path="C:\Windows\chromedriver.exe")
driver.get('http://www.szse.cn/disclosure/bond/notice/index.html')
wait = WebDriverWait(driver, 10, poll_frequency=1)
element = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "input-left")))
s1 = driver.find_element_by_class_name('input-left')
s1.send_keys("2021-03-03")
s2 = driver.find_element_by_class_name('input-right')
s2.send_keys("2021-03-04")
s3 = driver.find_element_by_id("query-btn")
s3.click()
time.sleep(5)
if __name__ == '__main__':
run()

Related

Python WebScraping - Sleep oscillate in slow websites

I have a webscraping, but the site I'm using in some days is slow and sometimes not. Using the fixed SLEEP, it gives an error in a few days. How to fix this?
I use SLEEP in the intervals of the tasks that I have placed, because the site is sometimes slow and does not return the result giving me an error.
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox import options
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import Select
import pandas as pd
import json
from time import sleep
options = Options()
options.headless = True
navegador = webdriver.Firefox(options = options)
link = '****************************'
navegador.get(url = link)
sleep(1)
usuario = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_txtLogin')
usuario.send_keys('****************************')
sleep(1)
senha = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_txtSenha')
senha.send_keys('****************************')
sleep(2.5)
botaologin = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_btnEnviar')
botaologin.click()
sleep(40)
agendamento = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_TreeView2t8')
agendamento.click()
sleep(2)
selecdia = navegador.find_element(By.CSS_SELECTOR, "a[title='06 de dezembro']")
selecdia.click()
sleep(2)
selecterminal = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_ddlVagasTerminalEmpresa')
selecterminal.click()
sleep(1)
select = Select(navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_ddlVagasTerminalEmpresa'))
select.select_by_index(1)
sleep(10)
buscalink = navegador.find_elements(by=By.XPATH, value='//*[#id="divScroll"]')
for element in buscalink:
teste3 = element.get_attribute('innerHTML')
soup = BeautifulSoup(teste3, "html.parser")
Vagas = soup.find_all(title="Vaga disponível.")
print(Vagas)
temp=[]
for i in Vagas:
on_click = i.get('onclick')
temp.append(on_click)
df = pd.DataFrame(temp)
df.to_csv('test.csv', mode='a', header=False, index=False)
It returns an error because the page does not load in time and it cannot get the data, but this time is variable
Instead of all these hardcoded sleeps you need to use WebDriverWait expected_conditions explicit waits.
With it you can set some timeout period so Selenium will poll the page periodically until the expected condition is fulfilled.
For example if you need to click a button you will wait for that element clickability. Once this condition is found Selenium will return you that element and you will be able to click it.
This will reduce all the redundant delays on the one hand and will keep waiting until the condition is matched on the other hand (until it is inside the defined timeout).
So, your code can be modified as following:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
#-----
wait = WebDriverWait(navegador, 30)
navegador.get(link)
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_txtLogin"))).send_keys('****************************')
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_txtSenha"))).send_keys('****************************')
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_btnEnviar"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_TreeView2t8"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[title='06 de dezembro']"))).click()
etc.

How to find 'Text' after node?

driver = webdriver.Chrome()
URL= ['https://makemyhomevn.com/collections/ghe-an-cafe/products/ghe-go-tron']
driver.get(URL)
sleep(1)
des = driver.find_element_by_xpath('//div[#class="product-item-description"]//strong/following sibling::text()[1]')
print(des)
I expect my result as 'Gỗ tự nhiên', I have tried many ways but couldn't get the text after 'Chất liệu:'.
You can take the entire span text using .get_attribute('innerText') and then use the split function from Python like below:
driver.maximize_window()
wait = WebDriverWait(driver, 20)
driver.get("https://makemyhomevn.com/collections/ghe-an-cafe/products/ghe-go-tron")
time.sleep(1)
entire_span = wait.until(EC.visibility_of_element_located((By.XPATH, "//strong[text()='Chất liệu:']/..")))
entire_span_splitted = entire_span.get_attribute('innerText').split(":")
#print(entire_span_splitted[0])
print(entire_span_splitted[1])
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output:
Gỗ tự nhiên.

How to scrap each product page (comments and custumer country)

I am trying to scrape each product page from aliexpress website in order to get number of comments, number of photos published by the custumer and also the custumer country and put it to a dataframe.
I have written a code that scrape custumer country but I don't know how to get the number of custumer comments and the number of images.
This is my code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
data.append({
'country':country,
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
I would appreciate any help from you! Thank you !
If you want numbers of comments / reviews, you can just check the value in this section :
driver.find_element(By.XPATH, 'XPATH_OF_ELEMENT_TO_SCRAP')
To do so in your exemple lets do this outside your loop :
number_feedbacks = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]/div[1]')
number_images = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]//label[1]/em')
If you dont understand or know this function, please feal free to ask and I will explain where I found theses XPATH.We also can use find by id function.
In your code it would be :
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
number_feedbacks = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]/div[1]')
number_images = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]//label[1]/em')
print(f'number_feedbacks = {number_feedbacks}\nnumber_images = {number_images}')
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
data.append({
'country':country,
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)

Python Selenium - Data not visible; no error

I am trying to scrape data from a website using Selenium, I am able to send values but not receiving the result to start scraping. The program is also not throwing any errors. Here's the reproducible code:
# Setting up driver
driver = webdriver.Chrome(executable_path='D:\Program Files\ChromeDriver\chromedriver.exe')
# Opening up the webpage
driver.get("https://www1.nseindia.com/products/content/derivatives/equities/historical_fo.htm")
# Setting the value of Select Instrument
driver.find_element_by_id('instrumentType').send_keys('Index Options')
# Setting the value of Select Symbol
driver.find_element_by_id('symbol').send_keys('NIFTY 50')
# Setting the value of Select Year
driver.find_element_by_id('year').send_keys('2019')
# Setting the value of Select Expiry
select = Select(driver.find_element_by_id('expiryDate'))
noOfExpiries = 2
select.select_by_index(noOfExpiries)
# Setting the value of Select Option Type
cycle = 'PE'
driver.find_element_by_id('optionType').send_keys(cycle)
# Clicking the date range radio button
driver.find_element_by_xpath("//input[#id='rdDateToDate']").click()
# Setting the date range
fromDate = (datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y') - timedelta(days=45)).strftime("%d-%b-%Y")
driver.find_element_by_id('fromDate').send_keys(fromDate)
toDate = datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y').strftime("%d-%b-%Y")
driver.find_element_by_id('toDate').send_keys(toDate)
print(fromDate, toDate)
# Clicking the Get Data button
driver.find_element_by_id('getButton').click()
Any clue as to what am I missing here?
Okay, this should do it. I came up with selenium based solution only because you expressed disinterest in requests module:
from selenium import webdriver
from datetime import datetime, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = ChromeOptions()
options.add_argument('disable-blink-features=AutomationControlled')
with webdriver.Chrome(options=options) as driver:
wait = WebDriverWait(driver,10)
driver.get("https://www1.nseindia.com/products/content/derivatives/equities/historical_fo.htm")
wait.until(EC.presence_of_element_located((By.ID, "instrumentType"))).send_keys('Index Options')
wait.until(EC.presence_of_element_located((By.ID, "symbol"))).send_keys('NIFTY 50')
wait.until(EC.presence_of_element_located((By.ID, "year"))).send_keys('2019')
select = Select(wait.until(EC.presence_of_element_located((By.ID, "expiryDate"))))
noOfExpiries = 13
select.select_by_index(noOfExpiries)
cycle = 'PE'
wait.until(EC.presence_of_element_located((By.ID, "optionType"))).send_keys(cycle)
item = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#id='rdDateToDate']")))
driver.execute_script("arguments[0].click();",item)
fromDate = (datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y') - timedelta(days=45)).strftime("%d-%b-%Y")
wait.until(EC.presence_of_element_located((By.ID, "fromDate"))).send_keys(fromDate)
toDate = datetime.strptime(select.options[noOfExpiries].text, '%d-%m-%Y').strftime("%d-%b-%Y")
wait.until(EC.presence_of_element_located((By.ID, "toDate"))).send_keys(toDate,Keys.ENTER)
print(fromDate, toDate)
search_button = wait.until(EC.presence_of_element_located((By.ID, "getButton")))
driver.execute_script("arguments[0].click();",search_button)
tabular_content = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#historicalData > table"))).get_attribute("innerHTML")
print(tabular_content)

How can i click specific category element of list in selenium python

from selenium import webdriver
import time
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome("C:/Users/ysatish/PycharmProjects/all rules/driver/chromedriver.exe")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.myntra.com/men-tshirts")
chali = driver.find_elements_by_xpath('//li//a[1]//div[2]//div[1]//span')
dak = driver.find_elements_by_xpath('//li//a[1]//div[2]//div[1]//span[1]')
sub = len(chali)
da = len(dak)
print(da)
print(sub
Updated solution:
driver.get('https://www.myntra.com/men-tshirts')
wait = WebDriverWait(driver, 10)
chali = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//li[*]//a[1]//div[2]//div[1]//span[1]")))
print(len(chali))
for element in chali:
print element.text
element.click()
dak = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//li//a[1]//div[2]//div[1]//span")))
print(len(dak))
for element0 in dak:
print element0.text
element0.click()
Note : please add below imports to your solution
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
Updated Solution:
//li[*]//a[1]//div[2]//div[1]//span[not(#class='product-strike')][not(#class='product-discountedPrice')][not(#class='product-discountPercentage')][last()]//span[1]
and
//li[*]//a[1]//div[2]//div[1]//span
Output:

Categories