Selenium can't get the text from element - python

I can't get the text from the element. I think it is a dynamically added text (from Angular) to the element and therefore not loaded directly in the element. The text inside the element is in the format of e.g. "3" with citation marks around ut.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import xlsxwriter
import re
pattern = r"[\"\d{1, 2}\"]"
PATH = "C:\Program Files (x86)\chromedriver.exe"
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
driver = webdriver.Chrome(PATH, chrome_options=chrome_options)
driver.get("some-url")
xpathPain = "/html/body/div[2]/div/div/div[1]/div/div/div[1]/div[3]/div/div/div[1]/div[3]/development-numbers/status-numbers/div/div[2]/div/h4"
try:
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, xpathPain)))
elementPain = driver.find_element_by_xpath(xpathPain)
print(elementPain.text)
except TimeoutException:
print("Failed to load elementPain")
I get the output: (blank , like an empty string)
. I have tried to wait til the text is loaded with the EC text_to_be_present_in_element(locator, text_) and tried to use a regular expression for the text part.
The page source for the element is:
<h4 class="status-numbers__number">
"6"
<!---->
</h4>
So how do I get the number 6 from this element?
I have tried print(elementPain.get_attribute("innerHTML")) and that gets the "<!---->" part of the text but not the '"6"' part. I have also tried .getAttribute("innerText"), .getAttribute("textContent").
I have tried using the firefox geckodriver instead as well. No result.
I have managed to solve the issue using Firefox and this code:
try:
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, xpathPain)))
elementPain = driver.find_element_by_xpath(xpathPain)
print(elementPain.get_attribute("innerHTML"))
Don't know it it had to do with the element out of viewport.

I have managed to solve the issue using Firefox and this code:
try:
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, xpathPain)))
elementPain = driver.find_element_by_xpath(xpathPain)
print(elementPain.get_attribute("innerHTML"))
Don't know it it had to do with the element out of viewport.

Use the following XPath to identify the element.
You can use element.text or element.get_attribute("textContent") to get the text.
try:
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//h4[#class='status-numbers__number']")))
elementPain = driver.find_element_by_xpath("//h4[#class='status-numbers__number']")
print(elementPain.text) #To get the text using text
print(elementPain.get_attribute("textContent")) #To get the text using get_attribute()
except TimeoutException:
print("Failed to load elementPain")

Related

Xpath using using random.randint(2,8) always identifies the first item using Python Selenium

Working on a random question picker from a codechef webpage but the problem is even when i am using random value of i, it always clicks the first question.
Code:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
import random
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.codechef.com/practice?page=0&limit=20&sort_by=difficulty_rating& sort_order=asc&search=&start_rating=0&end_rating=999&topic=&tags=&group=all")
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(3)
# element = driver.find_element_by_link_text("Roller Coaster")
i = random.randint(2,8)
try:
item = WebDriverWait(driver, 25).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div/div/div/div/div[3]/div/div[2]/div/div[3]/div/div/table/tbody/tr['+str(i)+']/td[2]/div/a"))
)
item.click()
except:
driver.quit()
You need to make two simple tweaks as follows:
To click() on the clickable element instead of presence_of_element_located() you need to induce WebDriverWait for the element_to_be_clickable().
You need to replace the single quotes i.e. '+str(i)+' with double quotes "+str(i)+"
Your effective code block will be:
driver.get("https://www.codechef.com/practice?page=0&limit=20&sort_by=difficulty_rating&sort_order=asc&search=&start_rating=0&end_rating=999&topic=&tags=&group=all")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#gdpr-i-love-cookies"))).click()
i = random.randint(2,8)
print(i)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div/div/div/div/div[3]/div/div[2]/div/div[3]/div/div/table/tbody/tr["+str(i)+"]/td[2]/div/a"))).click()
Console Output:
7
Just change the Xpath to the following f-String and it will work perfectly:
Change this: "/html/body/div/div/div/div/div[3]/div/div[2]/div/div[3]/div/div/table/tbody/tr['+str(i)+']/td[2]/div/a"
To this: f"/html/body/div/div/div/div/div[3]/div/div[2]/div/div[3]/div/div/table/tbody/tr[{i}]/td[2]/div/a"

Selenium Python extract text between Span

I am trying to extract the text "Margaret Osbon" from HTML below via Python with Selenium. But I keep getting blank values when I print. I have tried get_attribute
Still getting blank values when I print
<div class="author-info hidden-md">
By (author)
<span itemprop="author" itemtype="http://schema.org/Person" itemscope="Margareta Osborn">
<a href="/author/Margareta-Osborn" itemprop="url">
<span itemprop="name">
Margareta Osborn</span>
</a>
</span>
</div>
Below is my code for Python
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time"
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.bookdepository.com/")
keyword = "9781925324402"
Search = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="book-search-form"]/div[1]/input[1]'))
)
Search.clear()
Search.send_keys(keyword)
Search.send_keys(Keys.RETURN)
try:
authors = driver.find_element_by_xpath("//div[#class='author-info hidden-md']/span/a/span").text
print(authors)
driver.quit()
except:
authors = "Not Available"
print(authors)
driver.quit()
You need to call the .text method which is present in the Selenium Python binding.
.text is present for web element
authors = driver.find_element_by_xpath("//div[#class='author-info hidden-md']/span/a/span").text
print(authors)
or
authors = driver.find_element_by_xpath("//a[contains(#href,'/author/Margareta-Osborn')]").get_attribute('innerHTML')
print(authors)
Update 1 :
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.bookdepository.com/Rose-River-Margareta-Osborn/9781925324402")
authors = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.author-info.hidden-md span[itemprop='author'] span"))).text
print(authors)
You are missing ".text" to get the value and maybe because of that you are getting some junk value. I am thinking that you are receiving just a reference ID for that.
Using .text -
#Get Element using Xpath
element = //span[#itemprop='name']
#Fetch using the driver findElement
author = driver.find_element_by_xpath(element).text
#Print the text
print(author)
Using JavaScriptExecutor -
driver.execute_script('return arguments[0].innerText;', element)
Using Get Attribute -
driver.find_element_by_xpath(element).get_attribute('innerText')
To get the value from span. Use WebDriverWait() and wait for visibility_of_element_located() and following css selector.
and use either .text or .get_attribute("textContent"))
driver.get('https://www.bookdepository.com/Rose-River-Margareta-Osborn/9781925324402')
print(WebDriverWait(driver,5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.author-info.hidden-md [ itemprop="author"]'))).text)
print(WebDriverWait(driver,5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.author-info.hidden-md [ itemprop="author"]'))).get_attribute("textContent"))
you need to import below libraries.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

Selenium is returning empty text for elements that definitely have text

I'm practicing trying to scrape my university's course catalog. I have a few lines in Python that open the url in Chrome and clicks the search button to bring up the course catalog. When I go to extract the texting using find_elements_by_xpath(), it returns blank. When I use the dev tools on Chrome, there definitely is text there.
from selenium import webdriver
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]')
print(course)
I'm trying to extract the text from the element 'OSU_CAT_SRCH_OSR_CRSE_HEADER'. I don't understand why it's not returning the text values especially when I can see that it contains text with dev tools.
You are not using text that is the reason you are not getting the text.
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]').text
Try above changes in last second line
Below is the full code after the changes
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
# wait 10 seconds
course = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]'))
).text
print(course)

How to find hidden class with Selenium

I am currently working on a demo Selenium project with Python. I have been able to navigate to a page but when trying to collect text within a "div class" selenium fails to find the HTML :
Code to be collected
I have made use of the wait functionality but the code still does not find the Html element.
Any suggestions on how to resolve this issue would be appreciated, please see my code below :
Image of my selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import json
# establish Json dict
global data
data = {}
global date
date = '''&checkin=2021-02-22&checkout=2021-02-28&adults=1&source'''
def find_info(place):
data[place] = []
driver = webdriver.Chrome('chromedriver.exe')
driver.get("https://www.airbnb.co.uk/")
time.sleep(2)
#first_page_search_bar
search_bar = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "_1xq16jy")))
time.sleep(2)
search_bar.clear()
time.sleep(2)
search_bar.send_keys(f"{place}")
time.sleep(2)
enter_button = driver.find_element_by_class_name("_1mzhry13")
enter_button.click()
#load page
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "_ljad0a")))
page = driver.current_url
new_url = page.replace("&source", date)
# driver = webdriver.Chrome('chromedriver.exe')
driver.get(new_url)
time.sleep(3)
click_button = driver.find_element_by_xpath('//*[#id="menuItemButton-price_range"]/button')
click_button.click()
time.sleep(5)
price = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[16]/section/div/div/div[2]/div/section/div[2]/div/div/div[1]')))
print(price)
find_info("London, United Kingdom")
I've fixed the xpath at the end of your script:
price = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="menu"]//div[#dir="ltr"])[1]/preceding-sibling::div')))
print(price.text)
Explanation: Under the <div role="menu" ... there are 3 <div dir="ltr">elements and the first one happens to be just after the div you are looking for. So we find that one and select the preceding sibling.
Another recommendation: if you replace EC.presence_of_element_located to EC.element_to_be_clickable when you are looking for the input fields at the start you can get rid of a few time.sleep statements.

Selenium iframe Data issue - python

I have been trying to access the widget data from https://www.dukascopy.com/trading-tools/widgets/quotes/historical_data_feed so that my program can scrape the website and select the search bar to enter the data and return the result in the iframe. However, even when I convert my driver to the iframe it still can't find the iframe data elements. the error:
I tried to add the time delay for everything to load but still no luck.
elenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"d-e-Xg"}
There are actually two nested iframes in play here. That, combined with some WebDriverWaits allowed me to arrive at the following solution. Here I locate the "Instrument" field and input text into it:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.wait import WebDriverWait
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(
options=options, executable_path=r"C:\chromedriver\chromedriver.exe"
)
driver.implicitly_wait(10)
try:
driver.get(
"https://www.dukascopy.com/trading-tools/widgets/quotes/historical_data_feed"
)
driver.switch_to.frame(driver.find_element(By.ID, "widget-container"))
driver.switch_to.frame(driver.find_element(By.TAG_NAME, "iframe"))
# Wait for "Loading" overlay to disappear
WebDriverWait(driver, 10).until(
ec.invisibility_of_element_located((By.CLASS_NAME, "d-e-r-k-n"))
)
# Click "Instrument"
driver.find_element(By.CLASS_NAME, "d-oh-i-ph-qh-rh-m").click()
# Enter text into now-visible instrument field
driver.find_element(By.CLASS_NAME, "d-e-Xg").send_keys("metlife")
finally:
driver.quit()

Categories