Select hidden <li></li> with python selenium

Select hidden <li></li> with python selenium - python

I have so source html code:
<div class="timezone" id="js-timezone">
<span class="timezone__current">2:27 (GMT +2)</span>
<ul class="timezone__list">
<li><span>+3</span>Moscow, Riyadh</li>
<li><span>+4</span>Muscat </li>
</ul>
</div>
It's hidden, when I move coursor to span class="timezone__current" show the menu.
Now i'm trying to click on "+3 GMT" with Python and Selenium
print ('Opening browser...')
driver = webdriver.Firefox(executable_path = 'C:\geckodriver\geckodriver.exe')
driver.get("https://www.betexplorer.com/next/soccer/")
search_timezone =
driver.find_element_by_xpath("//ul[#class='timezone__list']")
driver.execute_script('arguments[0].setAttribute("onclick", "set_timezone(+3);")', search_timezone)
And nothing going on.
I've also tried:
driver = webdriver.Firefox(executable_path = 'C:\geckodriver\geckodriver.exe')
driver.get("https://www.betexplorer.com/next/soccer/")
search_timezone =
driver.find_element_by_xpath("//ul[#class='timezone__list']")
wait = WebDriverWait(driver, 10)
action = ActionChains(driver)
action.move_to_element(search_timezone).perform()
select_timezone = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "+3 Moscow, Riyadh")))
select_timezone.click()
And there is TimeoutException
Please, help me with it, i'm going crazy

Found this:
search_timezone = driver.find_element_by_class_name("timezone")
action = ActionChains(driver)
action.move_to_element(search_timezone).perform()
wait = WebDriverWait(driver, 10)
select_timezone = search_timezone.find_elements_by_tag_name('a')[3]
select_timezone.click()

Related

Click on link by Python

I have a part of HTML, and I need Python/selenium script to click on elelement "Odhlásit se"
<div class="no-top-bar-right">
<ul class="vertical medium-horizontal menu" data-responsive-menu="accordion medium-dropdown">
<li class="show-for-medium">
|
</li>
<li>
Můj profil
</li>
<li>
Odhlásit se
</li>
</ul>
</div>
But find_element_by_link_text and find_element(by="link text") not work.
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(executable_path = "C:\selenium browser drivers\chromedriver.exe")
driver.get("https://cs.laurie-project.com/login") #načtení stránky
confirmation = driver.find_element(by="xpath", value="//button[#aria-label='Close reveal']")
confirmation.click() #potvrzení vyskakovacího okna
login_field = driver.find_element(by="id", value="username-label")
login_field.send_keys("TestovaciUcet")# Vyplnění uživatelského jména
password_field = driver.find_element(by="id", value="pw-label")
password_field.send_keys("Heslo123")#Vyplnění hesla
login_button = driver.find_element(by="id", value="register-label")
login_button.click()#Odeslání přihlášení
if driver.current_url == "https://cs.laurie-project.com/home":
out_link = driver.find_element(by="link_text", value="logout")
out_link.click()#Odhlášení
else:
print("Chyba přihlášení")#Chyba přihlášení
Any tips from somebody? Thanks.

try:
out_link = driver.find_element_by_xpath("//a[text()='Odhlásit se']")
or:
out_link = driver.find_element_by_xpath('//a[contains(#href,"/logout.html")]')
I hope this works for you.
Thanks.

The value of LINK_TEXT attribute is not logout but Odhlásit se. The href attribute contains the keyword logout. And better to use Explicit waits.
# Imports
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://cs.laurie-project.com/login")
wait = WebDriverWait(driver,30)
confirm = wait.until(EC.element_to_be_clickable((By.XPATH,"//button[#aria-label='Close reveal']")))
confirm.click()
username = wait.until(EC.element_to_be_clickable((By.XPATH,"//input[#id='username-label']")))
username.send_keys("TestovaciUcet")
password = wait.until(EC.element_to_be_clickable((By.XPATH,"//input[#id='pw-label']")))
password.send_keys("Heslo123")
submit = wait.until(EC.element_to_be_clickable((By.ID,"register-label")))
submit.click()
if driver.current_url == "https://cs.laurie-project.com/home":
# 1. Can use Link text - Odhlásit se to logout
# logout = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,"Odhlásit se")))
# 2. Can use href attribute to click on logout button
logout = wait.until(EC.element_to_be_clickable((By.XPATH,"//a[contains(#href,'logout')]")))
logout.click()
else:
print("Chyba přihlášení")#Chyba přihlášení

Python selenium find element returns nothing

I would like to find titles contain '募集说明书', but the following codes just return nothing.
No error, just nothing. empty results.
driver.get('http://www.szse.cn/disclosure/bond/notice/index.html')
wait = WebDriverWait(driver, 30)
datefield_st = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='input-group-wrap form-control dropdown-btn']/input[1]")))
datefield_st.click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='calendar-control'][1]//div[3]//a"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[#class='monthselect'][1]//li[text()='{}']".format("1")))).click()
s1 = driver.find_element_by_class_name('input-left')
s1.send_keys("2022-1-1")
s2 = driver.find_element_by_class_name('input-right')
s2.send_keys("2022-1-18")
driver.find_element_by_id("query-btn").click()
while True:
time.sleep(2)
try:
links=[link.get_attribute('href') for link in wait.until(EC.presence_of_all_elements_located((By.XPATH,"//a[#attachformat][.//span[contains(text(),'募集说明书')]]")))]
titles=[title.text for title in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='pull-left ellipsis title-text' and contains(text(), '募集说明书')]//parent::a")))]
dates=[date.text for date in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='pull-left ellipsis title-text' and contains(text(), '募集说明书')]//ancestor::td//following-sibling::td")))]
for link, title, date in zip(links, titles, dates):
print(link,title,date)
<div class="text-title-box">
<a attachformat="pdf" attachpath="/disc/disk03/finalpage/2022-01-21/bb9854c5-9d89-4914-a6ea-219b487b874a.PDF" target="_blank" href="/disclosure/listed/bulletinDetail/index.html?bd5fd845-e810-42d3-98b3-d2501daaabc3" class="annon-title-link">
<span class="pull-left title-text multiline" title="22新资01：新疆金投资产管理股份有限公司2022年面向专业投资者公开发行公司债券（第一期）募集说明书">22新资01：新疆金投资产管理股份有限公司2022年面向专业投资者公开发行公司债券（第一期）募集说明书</span>
<span class="pull-left ellipsis title-icon" title="点击下载公告文件"><img src="http://res.static.szse.cn/modules/disclosure/images/icon_pdf.png">(5822k)</span>
<span class="titledownload-icon" title="点击下载公告文件"></span>
</a>
</div>
Could someone please help with this issue? Many thanks

Elements matching //a[#attachformat][.//span[contains(text(),'募集说明书')]] XPath are located on the bottom of presented search results, they are out of the visible screen until you scroll them into the view.
Also, you are using a wrong locators for titles. See my fixes there.
The same about dates.
Also there is no need to use wait.until(EC.visibility_of_all_elements_located 3 times there. Since elements are found (and scrolled into the view) you can simply get them by driver.find_elements.
I also see no need for while True: loop here, it will break your code from complete after getting those 2 elements data, but I leaved it as is since you mentioned you intend to click "next page" there.
from selenium.webdriver.common.action_chains import ActionChains
driver.get('http://www.szse.cn/disclosure/bond/notice/index.html')
wait = WebDriverWait(driver, 30)
actions = ActionChains(driver)
datefield_st = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='input-group-wrap form-control dropdown-btn']/input[1]")))
datefield_st.click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='calendar-control'][1]//div[3]//a"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[#class='monthselect'][1]//li[text()='{}']".format("1")))).click()
s1 = driver.find_element_by_class_name('input-left')
s1.send_keys("2022-1-1")
s2 = driver.find_element_by_class_name('input-right')
s2.send_keys("2022-1-18")
driver.find_element_by_id("query-btn").click()
while True:
time.sleep(2)
try:
lower_link =wait.until(EC.presence_of_element_located((By.XPATH,"(//a[#attachformat][.//span[contains(text(),'募集说明书')]])[last()]")))
actions.move_to_element(lower_link).perform()
time.sleep(0.5)
links=[link.get_attribute('href') for link in driver.find_elements(By.XPATH,"//a[#attachformat][.//span[contains(text(),'募集说明书')]]")]
titles=[title.text for title in driver.find_elements(By.XPATH, "//span[contains(#class,'pull-left') and contains(text(), '募集说明书')]//parent::a")]
dates=[date.text for date in driver.find_elements(By.XPATH, "//span[contains(#class,'pull-left') and contains(text(), '募集说明书')]//ancestor::td//following-sibling::td")]
for link, title, date in zip(links, titles, dates):
print(link,title,date)

Pagination with selenium

i'm learning python and scraping and i'm currently trying to scrape this page: enter link description here
I managed to get some data in my csv file with the below code:
from typing import Text
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.select import Select
import time
with open('scraping_5_pagination.csv', 'w') as file:
file.write("business_names, attestation, town_pc, region")
driver = webdriver.Chrome(ChromeDriverManager().install()) # initialise chrome driver
driver.get(
'https://www.cci.fr/agent-immobilier?company_name=agences%20immobili%C3%A8res%20&brand_name=&siren=&numero_carte=&code_region=84&city=&code_postal=&person_name=&state_recherche=1&name_region=AUVERGNE-RHONE-ALPES')
driver.maximize_window()
time.sleep(1)
agences_recherche = driver.find_element_by_id('edit-company-name')
# agences_recherche.send_keys('agences immobilières')
time.sleep(1)
region = driver.find_element_by_id('edit-code-region')
# region.send_heys('AUVERGNE-RHONE-ALPES')
time.sleep(1)
search = driver.find_element_by_xpath('//input[#value="Rechercher"]')
time.sleep(1)
for i in range(200): # Loop for pagination
business_names = driver.find_elements_by_xpath('//td[#class="titre_entreprise"]')
# driver.execute_script("arguments[0].click();", business_names)
attestation = driver.find_elements_by_xpath('//tr[#class="lien-fiche"]/td/a')
# driver.execute_script("arguments[0].click();", attestation)
town_pc = driver.find_elements_by_xpath('//*[#id="main-content"]/div/table/tbody/tr/td[2]')
# driver.execute_script("arguments[0].click();", town_pc)
region = driver.find_elements_by_xpath('//*[#id="main-content"]/div/table/tbody/tr/td[3]')
# driver.execute_script("arguments[0].click();", region)
number_of_pages = int(driver.find_element_by_xpath('//span[contains(text(),"suivant")]')
with open('scraping_5_pagination.csv', 'w') as file:
for i in range(len(business_names)):
file.write(
business_names[i].text + ";" + attestation[i].text + ";" + town_pc[i].text + ";" + region[i].text + "\n")
number_of_pages=driver.find_element_by_xpath('//span[contains(text(),"suivant")]').click()
time.sleep(1)
driver.get(url)
driver.close()
But i don't get why it doesnt click on the ext page button, idk if it's a problem with the Xpath expression, or with the implementation
Also, here's the the html code of the "Next button"
<a href="?company_name=agences%20immobili%C3%A8res%20&brand_name=&siren=&numero_carte=&code_region=84&city=&code_postal=&person_name=&state_recherche=1&name_region=AUVERGNE-RHONE-ALPES&page=2" title="Aller à la page suivante" rel="next" class="">
<span class="visually-hidden">Page suivante</span>
<span aria-hidden="true" class="">Suivant</span>
</a>
Thank you for reading me

Scraping with selenium and BeautifulSoup doesn´t return all the items in the page

So I came from the question here
Now I am able to interact with the page, scroll down the page, close the popup that appears and click at the bottom to expand the page.
The problem is when I count the items, the code only returns 20 and it should be 40.
I have checked the code again and again - I'm missing something but I don't know what.
See my code below:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
import datetime
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
#options.add_argument('--headless')
driver = webdriver.Chrome(executable_path=r"C:\\chromedriver.exe", options=options)
url = 'https://www.coolmod.com/componentes-pc-procesadores?f=375::No'
driver.get(url)
iter=1
while True:
scrollHeight = driver.execute_script("return document.documentElement.scrollHeight")
Height=10*iter
driver.execute_script("window.scrollTo(0, " + str(Height) + ");")
if Height > scrollHeight:
print('End of page')
break
iter+=1
time.sleep(3)
popup = driver.find_element_by_class_name('confirm').click()
time.sleep(3)
ver_mas = driver.find_elements_by_class_name('button-load-more')
for x in range(len(ver_mas)):
if ver_mas[x].is_displayed():
driver.execute_script("arguments[0].click();", ver_mas[x])
time.sleep(10)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
# print(soup)
items = soup.find_all('div',class_='col-xs-12 col-sm-6 col-sm-6 col-md-6 col-lg-3 col-product col-custom-width')
print(len(items))
````=
What is wrong?. I newbie in the scraping world.
Regards

Your while and for statements don't work as intended.
Using while True: is a bad practice
You scroll until the bottom - but the button-load-more button isn't displayed there - and Selenium will not find it as displayed
find_elements_by_class_name - looks for multiple elements - the page has only one element with that class
if ver_mas[x].is_displayed(): if you are lucky this will be executed only once because the range is 1
Below you can find the solution - here the code looks for the button, moves to it instead of scrolling, and performs a click. If the code fails to found the button - meaning that all the items were loaded - it breaks the while and moves forward.
url = 'https://www.coolmod.com/componentes-pc-procesadores?f=375::No'
driver.get(url)
time.sleep(3)
popup = driver.find_element_by_class_name('confirm').click()
iter = 1
while iter > 0:
time.sleep(3)
try:
ver_mas = driver.find_element_by_class_name('button-load-more')
actions = ActionChains(driver)
actions.move_to_element(ver_mas).perform()
driver.execute_script("arguments[0].click();", ver_mas)
except NoSuchElementException:
break
iter += 1
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
# print(soup)
items = soup.find_all('div', class_='col-xs-12 col-sm-6 col-sm-6 col-md-6 col-lg-3 col-product col-custom-width')
print(len(items))

Extract iFrame content using Selenium and BeautifulSoup

Sorry, this is my first post, so forgive me for all that I don't know yet! Thanks.
I am trying to complete the following form and extract the associating premium. When I run my code I would expect the annual premium of $156 to be extracted, but all I get is "Annual Premium: -"
Here is the code:
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import requests
driver = webdriver.Chrome(r"C:\Users\tomwp\Downloads\chromedriver_win32\chromedriver.exe")
page = driver.get("https://www.earthquakeauthority.com/")
xpath = '//*[#id="form"]/header/div[2]/a'
btn = driver.find_element_by_xpath(xpath)
btn.click()
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[#id='premiumCalc-iframe']")
driver.switch_to.frame(iframe)
xpath = '//*[#id="cea-page-1"]/div/div/div[1]/div/button[1]'
btn = driver.find_element_by_xpath(xpath)
btn.click()
xpath = '//*[#id="startdate"]'
incept_date = driver.find_element_by_xpath(xpath)
incept_date.send_keys("03/24/2019")
xpath = '//*[#id="participatingInsurer"]'
insurance_company = driver.find_element_by_xpath(xpath)
insurance_company.send_keys("Other")
xpath = '//*[#id="street"]'
street_address = driver.find_element_by_xpath(xpath)
street_address.send_keys("26 Los Indios")
xpath = '//*[#id="zipcode"]'
zip_code = driver.find_element_by_xpath(xpath)
zip_code.send_keys("92618")
xpath = '//*[#id="form-views"]/div[18]/div/button'
btn = driver.find_element_by_xpath(xpath)
btn.click()
xpath = '//*[#id="yearbuilt"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("2011")
xpath = '//*[#id="insuredvalue"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("100000")
xpath = '//*[#id="numberOfStories"]'
number_stories = driver.find_element_by_xpath(xpath)
number_stories.send_keys("Greater than one")
xpath = '//*[#id="foundationtype"]'
foundation = driver.find_element_by_xpath(xpath)
foundation.send_keys("slab")
xpath = '//*[#id="form-views"]/div[14]/div/button'
btn = driver.find_element_by_xpath(xpath)
btn.click()
soup = BeautifulSoup(driver.page_source, 'lxml')
premium = soup.find('div', class_='gauge-subtitle ng-binding ng-scope')
print(premium.text)
This is the $156 I would like to extract:
<div ng-if="isQuoting == false" class="gauge-subtitle ng-binding ng-scope">Annual Premium: $156.00</div>
Note that iframe id is as follows (not sure if this is helpful):
<iframe id="premiumCalc-iframe" style="width: 100%; border: none; height: 1397px;" scrolling="no" src="//calc.earthquakeauthority.com/app/index.html" cd_frame_id_="d0b3a5bcdcfe60ced66a29d282ad86c6"></iframe>
enter image description here

I have tried to make it a little more robust by adding in wait conditions. Your final quote page refreshes at the last click so you likely got a stale element exception. If you can find a decent indicator of that refresh completing you should replace my current time.sleep.
Personally, I would use CSS selectors throughout but I am sticking with xpath to align with your code.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(r"C:\Users\tomwp\Downloads\chromedriver_win32\chromedriver.exe")
page = driver.get("https://www.earthquakeauthority.com/")
xpath = '//*[#id="form"]/header/div[2]/a'
btn = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
iframe = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, "//iframe[#id='premiumCalc-iframe']")))
driver.switch_to.frame(iframe)
xpath = '//*[#id="cea-page-1"]/div/div/div[1]/div/button[1]'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
xpath = '//*[#id="startdate"]'
incept_date = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, xpath)))
incept_date.send_keys("03/24/2019")
xpath = '//*[#id="participatingInsurer"]'
insurance_company = driver.find_element_by_xpath(xpath)
insurance_company.send_keys("Other")
xpath = '//*[#id="street"]'
street_address = driver.find_element_by_xpath(xpath)
street_address.send_keys("26 Los Indios")
xpath = '//*[#id="zipcode"]'
zip_code = driver.find_element_by_xpath(xpath)
zip_code.send_keys("92618")
xpath = '//*[#id="form-views"]/div[18]/div/button'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
xpath = '//*[#id="yearbuilt"]'
year_built = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, xpath)))
year_built.send_keys("2011")
xpath = '//*[#id="insuredvalue"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("100000")
xpath = '//*[#id="numberOfStories"]'
number_stories = driver.find_element_by_xpath(xpath)
number_stories.send_keys("Greater than one")
xpath = '//*[#id="foundationtype"]'
foundation = driver.find_element_by_xpath(xpath)
foundation.send_keys("slab")
xpath = '//*[#id="form-views"]/div[14]/div/button'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
time.sleep(2)
quote = driver.find_element_by_css_selector(".gauge-subtitle").text
print(quote)

if I get you correct, you are able to navigate upto estimation page and able to see estimated annual premium value.
If thats the case then just try this code:
iframe = driver.find_element_by_xpath("//iframe[#id='premiumCalc-iframe']")
yourResult = driver.find_element_by_class_name("gauge-subtitle ng-binding ng-scope").text

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Select hidden <li></li> with python selenium - python

Found this: search_timezone = driver.find_element_by_class_name("timezone") action = ActionChains(driver) action.move_to_element(search_timezone).perform() wait = WebDriverWait(driver, 10) select_timezone = search_timezone.find_elements_by_tag_name('a')[3] select_timezone.click()

Related

Click on link by Python

Python selenium find element returns nothing

Pagination with selenium

Scraping with selenium and BeautifulSoup doesn´t return all the items in the page

Extract iFrame content using Selenium and BeautifulSoup

Categories

Resources