How can I print email address using Selenium Python - python

<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
<a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse#bjcta.org">xyzmmm#tccp.org</a>
</div>
I have tried using
email = browser.find_elements_by_xpath('//div[#id="MainCopy_ctl13_presentJob_EmailAddress"]//a').text
print(email)
But I'm not getting a result.

The email inside the a tag is the href of the a tag so just do this:
Using Selenium:
from selenium import webdriver
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
a_tag = driver.find_element_by_id('MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.get_attribute("href")
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)
Using Beautifulsoup:
from bs4 import BeautifulSoup
content="""
<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse#bjcta.org">xyzmmm#tccp.org</a>
</div>"""
soup = BeautifulSoup(content)
a_tag = soup.find(id='MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.attrs['href']
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)

text print only visible text use textContent attribute for text not in display port:
email = browser.find_element_by_xpath('//div[#id="MainCopy_ctl13_presentJob_EmailAddressPanel"]//a').get_attribute("textContent")
print(email)

is the element already there? or perhaps code executed before the element is loaded by Selenium?
consider using wait :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()

The id attribute which you have used i.e. MainCopy_ctl13_presentJob_EmailAddress belongs to the <a> tag instead of the <div>
To print the email address you can use either of the following Locator Strategies:
Using css_selector and get_attribute():
print(driver.find_element(By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress").get_attribute("innerHTML"))
Using xpath and text attribute:
print(driver.find_element(By.XPATH, "//a[#id='MainCopy_ctl13_presentJob_EmailAddress']").text)
Ideally you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR and text attribute:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress"))).text)
Using XPATH and get_attribute():
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//a[#id='MainCopy_ctl13_presentJob_EmailAddress']"))).get_attribute("innerHTML"))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Related

Getting text from a <pre> object with Selenium

I'm trying to get the text inside of <pre> tag and I have tried with get_attribute('text'), get_attribute('value'), .text(), .value(), get_attribute("innerHTML") but I keep failing:
Snapshot:
This is the code that i'm using:
import unittest
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome("chromedriver.exe")
driver.get("###")
elem=driver.find_element(By.ID, "login_admin").click()
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[2]/form/div[1]/input").send_keys("###")
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[2]/form/div[2]/input").send_keys("###")
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[3]/div/button[1]").click()
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[1]/button/span[3]').click()
time.sleep(2)
elem = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[2]/ul/li/a')))
time.sleep(2)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[2]/ul/li/a').click()
time.sleep(2)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div[2]/div/span[1]/input[2]').send_keys('###')
time.sleep(1)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div[2]/div/span[2]/button').click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='output']/pre[text()]"))).get_attribute("innerHTML"))
And this is what it says when print:
To print the text within the <pre> tag you can use either of the following locator strategies:
Using css_selector and get_attribute("innerHTML"):
print(driver.find_element(By.CSS_SELECTOR, "div.output > pre").get_attribute("innerHTML"))
Using xpath and text attribute:
print(driver.find_element(By.XPATH, "//div[#class='output']/pre[contains(., 'ContactUri')]").text)
To extract the text ideally you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using CSS_SELECTOR and text attribute:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.output > pre"))).text)
Using XPATH and get_attribute("innerHTML"):
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='output']/pre[contains(., 'ContactUri')]"))).get_attribute("innerHTML"))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in How to retrieve the text of a WebElement using Selenium - Python

Python Selenium: How can I print the link?

How can I print the value of the href attribute?
How can I print the link aaaaa.pdf with python selenium?
HTML:
<div class="xxxx">
</div>
You can do like this:
print(driver.find_element_by_css_selector(".xxxx a").get_attribute('href'))
div.xxxx a
first, check if this CSS_SELECTOR is representing the desired element.
Steps to check:
Press F12 in Chrome -> go to element section -> do a CTRL + F -> then paste the css and see, if your desired element is getting highlighted with 1/1 matching node.
If yes, then use explicit waits:
wait = WebDriverWait(driver, 20)
print(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.xxxx a"))).get_attribute('href'))
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Try the below:
pName = driver.find_element_by_css_selector(".xxxx").text
print(pName)
or
pName = driver.find_element_by_css_selector(".xxxx").get_attribute("href")
print(pName)
The value of the href attribute i.e. aaaaa.pdf is within the <a> tag which is the only descendant of the <div> tag.
Solution
To print the value of the href attribute you can use either of the following locator strategies:
Using css_selector:
print(driver.find_element(By.CSS_SELECTOR, "div.xxxx > a").get_attribute("href"))
Using xpath:
print(driver.find_element(By.XPATH, "//div[#class='xxxx']/a").get_attribute("href"))
To extract the value ideally you have to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.xxxx > a"))).get_attribute("href"))
Using XPATH:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='xxxx']/a"))).get_attribute("href"))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Why selenium and firefox webdriver cannot crawl wesite tags loaded by ajax

I want to get some HTML tags' texts from bonbast which some elements are loaded by ajax (for example tag with "ounce_top" id). I have tried selenium and geckodriver but again I can not crawl these tags and also when robotic firefox (geckodriver) opens, these elements are not shown on the web page! I have no idea why it happens. How can I crawl this website?
Code trials:
from selenium import webdriver
from bs4 import BeautifulSoup
url_news = 'https://bonbast.com/'
driver = webdriver.Firefox()
driver.get(url_news)
html = driver.page_source
soup = BeautifulSoup(html)
a = driver.find_element_by_id(id_="ounce_top")
The desired element is a dynamic element, so ideally to extract the desired text i.e. 1,817.43 you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get("https://bonbast.com/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.btn-primary.btn-sm.acceptcookies"))).click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span#ounce_top"))).text)
Using XPATH:
driver.get("https://bonbast.com/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.btn-primary.btn-sm.acceptcookies"))).click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[#id='ounce_top']"))).text)
Console Output:
1,817.43
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in How to retrieve the text of a WebElement using Selenium - Python
To do that with Selenium you will need to add a wait / delay. Preferably to use the expected conditions explicit wait.
I guess you are trying to get the text value inside that element?
This should work:
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url_news = 'https://bonbast.com/'
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 20)
driver.get(url_news)
html = driver.page_source
soup = BeautifulSoup(html)
your_gold_value = wait.until(EC.visibility_of_element_located((By.ID, "ounce_top"))).text

Selenium Python extract text between Span

I am trying to extract the text "Margaret Osbon" from HTML below via Python with Selenium. But I keep getting blank values when I print. I have tried get_attribute
Still getting blank values when I print
<div class="author-info hidden-md">
By (author)
<span itemprop="author" itemtype="http://schema.org/Person" itemscope="Margareta Osborn">
<a href="/author/Margareta-Osborn" itemprop="url">
<span itemprop="name">
Margareta Osborn</span>
</a>
</span>
</div>
Below is my code for Python
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time"
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.bookdepository.com/")
keyword = "9781925324402"
Search = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="book-search-form"]/div[1]/input[1]'))
)
Search.clear()
Search.send_keys(keyword)
Search.send_keys(Keys.RETURN)
try:
authors = driver.find_element_by_xpath("//div[#class='author-info hidden-md']/span/a/span").text
print(authors)
driver.quit()
except:
authors = "Not Available"
print(authors)
driver.quit()
You need to call the .text method which is present in the Selenium Python binding.
.text is present for web element
authors = driver.find_element_by_xpath("//div[#class='author-info hidden-md']/span/a/span").text
print(authors)
or
authors = driver.find_element_by_xpath("//a[contains(#href,'/author/Margareta-Osborn')]").get_attribute('innerHTML')
print(authors)
Update 1 :
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.bookdepository.com/Rose-River-Margareta-Osborn/9781925324402")
authors = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.author-info.hidden-md span[itemprop='author'] span"))).text
print(authors)
You are missing ".text" to get the value and maybe because of that you are getting some junk value. I am thinking that you are receiving just a reference ID for that.
Using .text -
#Get Element using Xpath
element = //span[#itemprop='name']
#Fetch using the driver findElement
author = driver.find_element_by_xpath(element).text
#Print the text
print(author)
Using JavaScriptExecutor -
driver.execute_script('return arguments[0].innerText;', element)
Using Get Attribute -
driver.find_element_by_xpath(element).get_attribute('innerText')
To get the value from span. Use WebDriverWait() and wait for visibility_of_element_located() and following css selector.
and use either .text or .get_attribute("textContent"))
driver.get('https://www.bookdepository.com/Rose-River-Margareta-Osborn/9781925324402')
print(WebDriverWait(driver,5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.author-info.hidden-md [ itemprop="author"]'))).text)
print(WebDriverWait(driver,5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.author-info.hidden-md [ itemprop="author"]'))).get_attribute("textContent"))
you need to import below libraries.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

Retrieving dynamic value with selenium webdriver, python

I am aware that there already exists similar threads about this. However, when trying previously suggested methods to retrieve my specific dynamic table value, all I am getting is either a nbsp value or something cryptic like "1a207feb-8080-4ff0-..."
What I am trying to do:
Get the current table value for euro/oz value for gold from here. I "inspected" the page and got the xpath (//*[#id="bullionPriceTable"]/div/table/tbody/tr[3]/td[3]/span)
My code:
driver = webdriver.Chrome("path/to/chromedriver")
driver.get("https://www.bullionvault.com/gold-price-chart.do")
xpath = '//*[#id="bullionPriceTable"]/div/table/tbody/tr[3]/td[3]/span'
select=driver.find_element_by_xpath(xpath)
print(select)
This prints:
<selenium.webdriver.remote.webelement.WebElement (session="3ade114e9f0907e4eb13deac6a264fc8", element="3a670af5-8594-4504-908a-a9bfcbac7342")>
which obviously is not the number I was looking for.
I've also experimented with using get_attribute('innerHtml') and .text on the webElement, but to no avail. What am I missing here? Am I just not encoding this value correctly, or am I extracting from the wrong source?
To extract the table value for euro/oz value for gold i.e. the text €1,452.47 you have to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following Locator Strategies:
Using XPATH and get_attribute():
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver.get('https://www.bullionvault.com/gold-price-chart.do#')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='cookies-warning-buttons']//a[text()='Accept']"))).click()
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//strong[text()='Live Gold Price']"))))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//th[text()='Gold Price per Ounce']//following-sibling::td[3]/span[#data-currency='EUR']"))).get_attribute("innerHTML"))
Console Output:
€1,456.30
Using XPATH and text attribute:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver.get('https://www.bullionvault.com/gold-price-chart.do#')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='cookies-warning-buttons']//a[text()='Accept']"))).click()
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//strong[text()='Live Gold Price']"))))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//th[text()='Gold Price per Ounce']//following-sibling::td[3]/span[#data-currency='EUR']"))).text)
Console Output:
€1,456.30
Wait for the page to load then try to get the innerHTML like the following example
import time
from selenium import webdriver
chrome_browser = webdriver.Chrome(
executable_path=r"chromedriver.exe")
chrome_browser.get("https://www.bullionvault.com/gold-price-chart.do")
time.sleep(2)
select = chrome_browser.find_element_by_xpath(
"//*[#id='bullionPriceTable']/div/table/tbody/tr[3]/td[3]/span"
).get_attribute("innerHTML")
print(select)
€1,450.98

Categories