Python selenium find element returns nothing - python

I would like to find titles contain '募集说明书', but the following codes just return nothing.
No error, just nothing. empty results.
driver.get('http://www.szse.cn/disclosure/bond/notice/index.html')
wait = WebDriverWait(driver, 30)
datefield_st = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='input-group-wrap form-control dropdown-btn']/input[1]")))
datefield_st.click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='calendar-control'][1]//div[3]//a"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[#class='monthselect'][1]//li[text()='{}']".format("1")))).click()
s1 = driver.find_element_by_class_name('input-left')
s1.send_keys("2022-1-1")
s2 = driver.find_element_by_class_name('input-right')
s2.send_keys("2022-1-18")
driver.find_element_by_id("query-btn").click()
while True:
time.sleep(2)
try:
links=[link.get_attribute('href') for link in wait.until(EC.presence_of_all_elements_located((By.XPATH,"//a[#attachformat][.//span[contains(text(),'募集说明书')]]")))]
titles=[title.text for title in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='pull-left ellipsis title-text' and contains(text(), '募集说明书')]//parent::a")))]
dates=[date.text for date in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='pull-left ellipsis title-text' and contains(text(), '募集说明书')]//ancestor::td//following-sibling::td")))]
for link, title, date in zip(links, titles, dates):
print(link,title,date)
<div class="text-title-box">
<a attachformat="pdf" attachpath="/disc/disk03/finalpage/2022-01-21/bb9854c5-9d89-4914-a6ea-219b487b874a.PDF" target="_blank" href="/disclosure/listed/bulletinDetail/index.html?bd5fd845-e810-42d3-98b3-d2501daaabc3" class="annon-title-link">
<span class="pull-left title-text multiline" title="22新资01:新疆金投资产管理股份有限公司2022年面向专业投资者公开发行公司债券(第一期)募集说明书">22新资01:新疆金投资产管理股份有限公司2022年面向专业投资者公开发行公司债券(第一期)募集说明书</span>
<span class="pull-left ellipsis title-icon" title="点击下载公告文件"><img src="http://res.static.szse.cn/modules/disclosure/images/icon_pdf.png">(5822k)</span>
<span class="titledownload-icon" title="点击下载公告文件"></span>
</a>
</div>
Could someone please help with this issue? Many thanks

Elements matching //a[#attachformat][.//span[contains(text(),'募集说明书')]] XPath are located on the bottom of presented search results, they are out of the visible screen until you scroll them into the view.
Also, you are using a wrong locators for titles. See my fixes there.
The same about dates.
Also there is no need to use wait.until(EC.visibility_of_all_elements_located 3 times there. Since elements are found (and scrolled into the view) you can simply get them by driver.find_elements.
I also see no need for while True: loop here, it will break your code from complete after getting those 2 elements data, but I leaved it as is since you mentioned you intend to click "next page" there.
from selenium.webdriver.common.action_chains import ActionChains
driver.get('http://www.szse.cn/disclosure/bond/notice/index.html')
wait = WebDriverWait(driver, 30)
actions = ActionChains(driver)
datefield_st = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='input-group-wrap form-control dropdown-btn']/input[1]")))
datefield_st.click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='calendar-control'][1]//div[3]//a"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[#class='monthselect'][1]//li[text()='{}']".format("1")))).click()
s1 = driver.find_element_by_class_name('input-left')
s1.send_keys("2022-1-1")
s2 = driver.find_element_by_class_name('input-right')
s2.send_keys("2022-1-18")
driver.find_element_by_id("query-btn").click()
while True:
time.sleep(2)
try:
lower_link =wait.until(EC.presence_of_element_located((By.XPATH,"(//a[#attachformat][.//span[contains(text(),'募集说明书')]])[last()]")))
actions.move_to_element(lower_link).perform()
time.sleep(0.5)
links=[link.get_attribute('href') for link in driver.find_elements(By.XPATH,"//a[#attachformat][.//span[contains(text(),'募集说明书')]]")]
titles=[title.text for title in driver.find_elements(By.XPATH, "//span[contains(#class,'pull-left') and contains(text(), '募集说明书')]//parent::a")]
dates=[date.text for date in driver.find_elements(By.XPATH, "//span[contains(#class,'pull-left') and contains(text(), '募集说明书')]//ancestor::td//following-sibling::td")]
for link, title, date in zip(links, titles, dates):
print(link,title,date)

Related

Selenium element not clickable at point error [duplicate]

This question already has answers here:
Can not click on a Element: ElementClickInterceptedException in Splinter / Selenium
(7 answers)
Closed last month.
I'm trying to click multiple links from a page but it seems that I can only click the first link and then the program crashes at the second link. This is the program that I'm running:
driver = webdriver.Chrome("C://Users//user/chromedriver.exe")
profile_url = "https://www.goodreads.com/list?ref=nav_brws_lists"
driver.get(profile_url)
src = driver.page_source
soup = bs(src, "lxml")
ul_wrapper = soup.find_all("ul", {"class": "listTagsTwoColumn"})
for tag in ul_wrapper:
list = tag.find_all("li")
for li in list:
a = li.find("a")
elem = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
(By.XPATH, "//a[#href='" + a.get("href") + "']")))
elem.click()
driver.back()
This is the error that I get on the elem.click() function:
selenium.common.exceptions.ElementClickInterceptedException: Message: element click intercepted: Element <a class="actionLinkLite" href="/list/tag/...">fiction</a> is not clickable at point (704, 677). Other element would receive the click: <div class="modal modal--overlay modal--centered" tabindex="0" data-reactid=".1i6bv62jphg">...</div> (Session info: chrome=108.0.5359.125)
The program works as expected for the first link. It clicks through it and I'm able to get the contents. But it fails for the second loop. I'm expecting to be able to click through all the links. Any suggestions?
You could try to use javascript to click the element. See below.
driver = webdriver.Chrome("C://Users//user/chromedriver.exe")
profile_url = "https://www.goodreads.com/list?ref=nav_brws_lists"
driver.get(profile_url)
src = driver.page_source
soup = bs(src, "lxml")
ul_wrapper = soup.find_all("ul", {"class": "listTagsTwoColumn"})
for tag in ul_wrapper:
list = tag.find_all("li")
for li in list:
a = li.find("a")
elem = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
(By.XPATH, "//a[#href='" + a.get("href") + "']")))
# elem.click()
driver.execute_script("arguments[0].click();", elem)
driver.back()

Selecting an element with conditions and multiple attributes using Python's Selenium

I'm having trouble accessing a specific span with selenium. I want to click on a specific search result if the spans texts match a specific string from the website https://www.superherodb.com/battle/create/#add_member
The html code:
<span>
<img src="/pictures2/portraits/10/025/791.jpg" class="avatar avatar-sm" alt="Superman"> Superman
<span class="suffix level-1">Kal-El</span>
<span class="suffix level-2">Prime Earth</span>
</span>
I want to check if the string from the first span = Superman, and second span = Kal-El, and third span= Prime Earth, match the strings on a list.
My code:
driver = webdriver.Chrome('C:\Webdriver\chromedriver.exe', options= options)
driver.get('https://www.superherodb.com/battle/create')
wait = WebDriverWait(driver, 5)
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="team1"]/div/a'))).click()
Superheroes = ["Superman", "Spiderman"] #check in span
Names = ["Kal-El", "Peter Parker"] #check in span class = "suffix level-1"
Universes = ["Prime Earth", "Prime, Earth"] #check in span class = "suffix level-1"
wait.until(EC.visibility_of_element_located((By.NAME, 'quickselect'))).send_keys(Superheroes[0]) #Search for Superman
search = [my_elem for my_elem in WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH,
".//*[#id='quickselect_result']/li/span/[contains(text(), Superheroes[0] )]/..")))
if
my elem in WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH,
".//*[#id='quickselect_result']/li/span/span[1][#class='suffix level-1' and contains(text(), Names[0] )]/..")))
if
my_elem in WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH,
".//*[#id='quickselect_result']/li/span/span[2][#class='suffix level-2' and contains(text(), Universes [0])]/..")))
]
search.click()
My Error:
InvalidSelectorException: Message: invalid selector: Unable to locate an element with the xpath expression .//*[#id='quickselect_result']/li/span/[contains(text(), Superheroes[0] )]/.. because of the following error:
SyntaxError: Failed to execute 'evaluate' on 'Document': The string './/*[#id='quickselect_result']/li/span/[contains(text(), Superheroes[0] )]/..' is not a valid XPath expression.
I want to ultimately turn this into a for loop to check a huge list of Superheroes but I want to select the right one from the list because there are many versions of the same Superhero (i.e. same Superhero, but different Universe).
What am I doing that is wrong?

Python Selenium can't click on next page button

I want to click the next page until no more page, but it does not click.
returns the error:raise exception_class(message, screen, stacktrace)
StaleElementReferenceException: stale element reference: element is not attached to the page document
my codes:
Thanks in advance!
driver.get('http://www.chinamoney.com.cn/chinese/zjfxzx/?tbnm=%E6%9C%80%E6%96%B0&tc=null&isNewTab=1')
driver.implicitly_wait(10)
driver.refresh()
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 5)
datefield_st = wait.until(EC.element_to_be_clickable((By.ID, "pdbp-date-1")))
datefield_st.click()
select_st = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-year"))))
select_st.select_by_visible_text("2021")
select2 = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-month"))))
select2.select_by_value("1")
day=1
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[#data-handler='selectDay']/a[text()='{}']".format(str(day))))).click()
datefield_ed = wait.until(EC.element_to_be_clickable((By.ID, "pdbp-date-2")))
datefield_ed.click()
select_ed = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-year"))))
select_ed.select_by_visible_text("2021")
select2 = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-month"))))
select2.select_by_value("1")
day=1
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[#data-handler='selectDay']/a[text()='{}']".format(str(day))))).click()
driver.find_element_by_link_text("查询").click()
while True:
driver.implicitly_wait(10)
links=[link.get_attribute('href') for link in driver.find_elements_by_xpath("//a[contains(#title,'同业存单') and not(contains(#title,'申购说明')) and not(contains(#title,'公告'))]")]
titles = [title.text for title in driver.find_elements_by_xpath("//a[contains(#title,'中期票据') and not(contains(#title,'申购说明')) and not(contains(#title,'公告'))]")]
dates = [date.text for date in driver.find_elements_by_xpath('//*[#class="san-grid-r text-date"]')]
driver.implicitly_wait(10)
for link, title,date in zip(links, titles,dates):
dataframe = pd.DataFrame({'col1':date,'col2':title,'col3':link},index=[0])
dataframe.to_csv('Chinamoney.csv',mode='a+',header=False,index=False,encoding='utf-8-sig')
print(link,title,date)
try:
driver.find_element_by_xpath('//*[contains(#class, "page-next")]').click()
except:
print('No more pages')
You passed two class names into selector while it's not allowed for search by class name. Either try
(By.CLASS_NAME, 'page-next')
or
(By.CSS_SELECTOR, '.page-btn.page-next')
Also your element and icon select the same element. So you don't need to define icon. Simply use element.click()
You are using:
driver.find_element_by_xpath('//*[contains(#class, "page-next")]').click()
Try:
element = driver.find_element_by_xpath('//*[contains(#class, "page-next")]')
driver.execute_script("arguments[0].click();", element)
If this doesnt work, you can try to obtain the url/link value and store it, and later you can go to the url or do what you want without click in it.

Scraping with selenium and BeautifulSoup doesn´t return all the items in the page

So I came from the question here
Now I am able to interact with the page, scroll down the page, close the popup that appears and click at the bottom to expand the page.
The problem is when I count the items, the code only returns 20 and it should be 40.
I have checked the code again and again - I'm missing something but I don't know what.
See my code below:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
import datetime
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
#options.add_argument('--headless')
driver = webdriver.Chrome(executable_path=r"C:\\chromedriver.exe", options=options)
url = 'https://www.coolmod.com/componentes-pc-procesadores?f=375::No'
driver.get(url)
iter=1
while True:
scrollHeight = driver.execute_script("return document.documentElement.scrollHeight")
Height=10*iter
driver.execute_script("window.scrollTo(0, " + str(Height) + ");")
if Height > scrollHeight:
print('End of page')
break
iter+=1
time.sleep(3)
popup = driver.find_element_by_class_name('confirm').click()
time.sleep(3)
ver_mas = driver.find_elements_by_class_name('button-load-more')
for x in range(len(ver_mas)):
if ver_mas[x].is_displayed():
driver.execute_script("arguments[0].click();", ver_mas[x])
time.sleep(10)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
# print(soup)
items = soup.find_all('div',class_='col-xs-12 col-sm-6 col-sm-6 col-md-6 col-lg-3 col-product col-custom-width')
print(len(items))
````=
What is wrong?. I newbie in the scraping world.
Regards
Your while and for statements don't work as intended.
Using while True: is a bad practice
You scroll until the bottom - but the button-load-more button isn't displayed there - and Selenium will not find it as displayed
find_elements_by_class_name - looks for multiple elements - the page has only one element with that class
if ver_mas[x].is_displayed(): if you are lucky this will be executed only once because the range is 1
Below you can find the solution - here the code looks for the button, moves to it instead of scrolling, and performs a click. If the code fails to found the button - meaning that all the items were loaded - it breaks the while and moves forward.
url = 'https://www.coolmod.com/componentes-pc-procesadores?f=375::No'
driver.get(url)
time.sleep(3)
popup = driver.find_element_by_class_name('confirm').click()
iter = 1
while iter > 0:
time.sleep(3)
try:
ver_mas = driver.find_element_by_class_name('button-load-more')
actions = ActionChains(driver)
actions.move_to_element(ver_mas).perform()
driver.execute_script("arguments[0].click();", ver_mas)
except NoSuchElementException:
break
iter += 1
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
# print(soup)
items = soup.find_all('div', class_='col-xs-12 col-sm-6 col-sm-6 col-md-6 col-lg-3 col-product col-custom-width')
print(len(items))

Select hidden <li></li> with python selenium

I have so source html code:
<div class="timezone" id="js-timezone">
<span class="timezone__current">2:27 (GMT +2)</span>
<ul class="timezone__list">
<li><span>+3</span>Moscow, Riyadh</li>
<li><span>+4</span>Muscat </li>
</ul>
</div>
It's hidden, when I move coursor to span class="timezone__current" show the menu.
Now i'm trying to click on "+3 GMT" with Python and Selenium
print ('Opening browser...')
driver = webdriver.Firefox(executable_path = 'C:\geckodriver\geckodriver.exe')
driver.get("https://www.betexplorer.com/next/soccer/")
search_timezone =
driver.find_element_by_xpath("//ul[#class='timezone__list']")
driver.execute_script('arguments[0].setAttribute("onclick", "set_timezone(+3);")', search_timezone)
And nothing going on.
I've also tried:
driver = webdriver.Firefox(executable_path = 'C:\geckodriver\geckodriver.exe')
driver.get("https://www.betexplorer.com/next/soccer/")
search_timezone =
driver.find_element_by_xpath("//ul[#class='timezone__list']")
wait = WebDriverWait(driver, 10)
action = ActionChains(driver)
action.move_to_element(search_timezone).perform()
select_timezone = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "+3 Moscow, Riyadh")))
select_timezone.click()
And there is TimeoutException
Please, help me with it, i'm going crazy
Found this:
search_timezone = driver.find_element_by_class_name("timezone")
action = ActionChains(driver)
action.move_to_element(search_timezone).perform()
wait = WebDriverWait(driver, 10)
select_timezone = search_timezone.find_elements_by_tag_name('a')[3]
select_timezone.click()

Categories