I would like to extract the text that appears on mouse hover on an element from the website https://idsc.cidadessustentaveis.org.br/rankings. A particular example of text of interest, is the text “Erradicacao da pobreza Pontuacao: 44,47” which appears on hovering the first bar located in the column “Desempenho por ODS”. I have tried the code below but it returns the blank text ‘’
from selenium import webdriver
driver = webdriver.Firefox()
driver.get("https://idsc.cidadessustentaveis.org.br/rankings")
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
wait = WebDriverWait(driver, 20)
desired_elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.SdgPerformanceBar__Block-sc-1yl1q71-2.fBQLcJ')))
I printed an attribute of the element extracted which confirmed that I have successfully gotten to the targeted element.
print(desired_elem.get_attribute('outerHTML'))
Which returned:
<div style="width:2.62%" class="SdgPerformanceBar__Block-sc-1yl1q71-2 fBQLcJ"></div>
Note that by inspecting the element in Firefox I found that the element has no innerHTML.
I then tried to extract the text using desired_elem.text but I get a blank ‘’
I also tried the code below which returned a blank as well.
from selenium.webdriver.common.action_chains import ActionChains
elem = driver.find_element(By.CSS_SELECTOR, '.SdgPerformanceBar__Block-sc-1yl1q71-2.fBQLcJ');
actions = ActionChains(driver)
actions.move_to_element(elem)
actions.move_to_element(elem).perform()
Calling elem.text returned ''
You was close to the solution.
When hovering over those elements tooltips appearing.
These tooltips will present different texts according to element you hovered over.
Here I used your code, just added the tooltips
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
actions = ActionChains(driver)
tooltip1 = "div[role='tooltip'] .MuiTypography-root.MuiTypography-body1"
tooltip2 = "div[role='tooltip'] .MuiTypography-root.MuiTypography-body2"
url = "https://idsc.cidadessustentaveis.org.br/rankings"
driver.get(url)
desired_elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.SdgPerformanceBar__Block-sc-1yl1q71-2.fBQLcJ')))
actions.move_to_element(desired_elem).perform()
tt1_text = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, tooltip1))).text
tt2_text = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, tooltip2))).text
print(tt1_text)
print(tt2_text)
The output is:
Erradicação da pobreza
Pontuação: 44,47
Related
I have a following problem. On the picture bellow I would like to fill some text into the second (red) field.
My code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
def set_scraper():
"""Function kills running applications and set up the ChromeDriver."""
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", options=options)
return driver
def main() -> None:
"""Main function that is call when the script is run."""
driver = set_scraper()
driver.get("https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO")
pokus = driver.find_element(By.XPATH, '/html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1]')
driver.implicitly_wait(10)
ActionChains(driver).move_to_element(pokus).send_keys("2727").perform()
The problem is that it sends "2727" into the first field, not into the red one. Although /html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1] is the full xpath of the second field. Do you know why, please?
You can use XPath to locate the parent element based on unique text "Obec" in the child element and then locate the proper input element.
Here I'm using fixed attribute values that not seem to change.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='fieldsetWrapper'][contains(.,'Obec')]//input[#type='text']"))).send_keys("2727")
The result is:
Try with below
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[#title='Zadejte název obce']")))
element.send_keys("2727")
You can enter the text in the second text field using the below XPATH:
driver.find_element(By.XPATH, ".//input[#name='ctl00$bodyPlaceHolder$vyberObec$txtObec']").send_keys("2727")
# clicking on the button
driver.find_element(By.XPATH, ".//input[#title='Vyhledat obec']").click()
I'm using selenium to try and scrape a listing of products in this website:
https://www.zonacriativa.com.br/harry-potter
However, I'm having trouble getting the full listing of products. the page list 116 products, yet only a few are shown at a time. If I want to see the other ones, I need to click on the "Carregar mais Produtos" (load more products) button at the bottom a few times to get the full listing.
I'm having trouble locating this button, as it doesn't have an id and its class is a huge string. I've tried several things, like the examples below, but they don't seem to work. Any suggestions?
driver.find_element("xpath", "//button[text()='Carregar mais Produtos']").click()
driver.find_element("css selector", ".vtex-button__label.flex.items-center.justify-center.h-100.ph5").click()
driver.find_element(By.CLASS_NAME, "vtex-button.bw1.ba.fw5.v-mid.relative.pa0.lh-solid.br2.min-h-small.t-action--small.bg-action-primary.b--action-primary.c-on-action-primary.hover-bg-action-primary.hover-b--action-primary.hover-c-on-action-primary.pointer").click()
The element you trying to click is initially out of the visible screen so you can't click it. Also this XPath at least for me doesn't locate that element.
What you need to do is to scroll the page down untill that button becomes visible and clickable and then click it.
The following code clicks that button 1 time:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://www.zonacriativa.com.br/harry-potter"
driver.get(url)
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(#class,'buttonShowMore')]//button"))).click()
break
except:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
The above code can be simply modified to scroll and click that button until we reach the latest page where this button is not presented:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://www.zonacriativa.com.br/harry-potter"
driver.get(url)
while driver.find_elements(By.XPATH, "//div[contains(#class,'buttonShowMore')]//button"):
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(#class,'buttonShowMore')]//button"))).click()
except:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
How can I make Selenium run scroll only in the left quadrant?
when I use the command below it is executed in the zoom of the map and that is not my intention, because I want to scrape the links of the companies that are in the left column
driver.execute_script("window.scrollBy(0, 200)")
You need to find the scrollable div element and then you can apply JavaScript as following:
element = wait.until(EC.presence_of_element_located((By.XPATH, "//div[#role='main']//div[contains(#aria-label,'lanchonet')]")))
driver.execute_script("arguments[0].scroll(0, arguments[0].scrollHeight);", element)
The code above works for me.
The entire code is:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://www.google.com.br/maps/search/lanchonete,/#-27.0027727,-48.6293259,15z"
driver.get(url)
element = wait.until(EC.presence_of_element_located((By.XPATH, "//div[#role='main']//div[contains(#aria-label,'lanchonet')]")))
driver.execute_script("arguments[0].scroll(0, arguments[0].scrollHeight);", element)
you can, of course, scroll for other lengths, not only for the entire height.
I am trying to scraping in this URL, dealing with a Download button and I am having a problem, as the last line gives a ElementClickInterceptedException.
My actual goal is to download the CSV file.
The code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from ipykernel import kernelapp as app
import time
options = webdriver.ChromeOptions()
driver_path = 'C:\\Users\\Idener\\Downloads\\chromedriver_win32\\chromedriver.exe'
driver = webdriver.Chrome(driver_path, options=options)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078"
driver.get(url)
wait = WebDriverWait(driver, 5)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="NIOSH-Toxicity-Data"]/div[1]/div/div/a'))).click()
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="Download"]'))).click()
enter image description here
Element you trying to click in initially out of the visible viewpoint. So, you need first to scroll the page and only then to click on that element.
By clicking the first element new tab is opened and the second element you want to click is there, on the second tab. So, you need to switch to the new tab to access that element.
No need to define wait = WebDriverWait(driver, 10) second time.
The following code is working:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078#section=Toxicity"
driver.get(url)
element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#NIOSH-Toxicity-Data a[title*='Open']")))
element.location_once_scrolled_into_view
time.sleep(1)
element.click()
driver.switch_to.window(driver.window_handles[1])
wait.until(EC.element_to_be_clickable((By.ID, "Download"))).click()
It does not download the file, only opens the downloading dialog
I am trying to get a list of the prices from this page.
The class name of the elements I'm trying to get is called s-item__price.
This is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
chrome_options = Options()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
browser.get(url)
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
The output is just an empty list.
You can use WebDriverWait to wait until the javascript generated the element:
wait = WebDriverWait(browser, 15) # 15 sec timeout
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
You could also use presence_of_elements_located but if it comes to click interaction it won't work with hidden elements.
So prefer using: visibility_of_element_located
Example Code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
options = Options()
options.add_argument('--headless')
browser = webdriver.Chrome(options=options)
browser.get(url)
wait = WebDriverWait(browser, 15) # Throws a TimeoutException after 15 seconds
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
# you may also culd use the return value of the wait
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
You get an empty list I think it because you need wait.
Use WebDriverWait and utilize .presence_of_all_elements_located to collect elements in a list.
Then extract them with a loop and you must call the .text method to grab the text
browser.get('https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone%208%20&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1')
wait = WebDriverWait(browser, 20)
list_price = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 's-item__price')))
for price in list_price:
print(price.text)
driver.quit()
Following import:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC