click on element with classname selenium - python

I am trying to scrape the opening hours of bars from a website. There is a list of bars which then if you navigate to you the opening hours are available. I am having an issue clicking on an element when it has a class name.
I have written the code to get the hours from one venuw, however, I am unable to navigate to each venue from the first link.
This code works when I get the hours for one venue
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://www.designmynight.com/london/bars/soho/six-storeys')
hours = driver.find_element_by_xpath('//li[#id="hours"]')
hours.click()
hoursTable = driver.find_elements_by_css_selector("table.opening-times tr")
for row in hoursTable:
print(row.text)
The issue is when I try to navigate to this page to I am unable to click into each link. Can anyone see what I am doing wrong?
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://www.designmynight.com/london/search-results#!?type_of_venue=512b2019d5d190d2978c9ea9&type_of_venue=512b2019d5d190d2978c9ea8&type_of_venue=512b2019d5d190d2978c9ead&type_of_venue=512b2019d5d190d2978c9eaa&type_of_venue=512b2019d5d190d2978c9eab&type=&q=&radius=')
venue = driver.find_element_by_xpath('//a[#class="ng-binding"]')
venue.click()
//this should then lead me to the following link ->
driver.get('https://www.designmynight.com/london/bars/soho/six-storeys')
hours = driver.find_element_by_xpath('//li[#id="hours"]')
hours.click()
hoursTable = driver.find_elements_by_css_selector("table.opening-times tr")
for row in hoursTable:
print(row.text)

All links with ng-binding class names are generated dynamically, so you need to wait untill link appears in DOM and it's also clickable:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://www.designmynight.com/london/search-results#!?type_of_venue=512b2019d5d190d2978c9ea9&type_of_venue=512b2019d5d190d2978c9ea8&type_of_venue=512b2019d5d190d2978c9ead&type_of_venue=512b2019d5d190d2978c9eaa&type_of_venue=512b2019d5d190d2978c9eab&type=&q=&radius=')
venue = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[#class="ng-binding"]')))
venue.click()
But if you want to follow each link I'd suggest you not to click those links, but get the list of references and then open each one as below:
xpath = '//a[#class="ng-binding"]'
wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, xpath)))
links = [venue.get_attribute('href') for venue in driver.find_elements_by_xpath(xpath)]
for link in links:
driver.get(link)
hours = driver.find_element_by_xpath('//li[#id="hours"]')
hours.click()
hoursTable = driver.find_elements_by_css_selector("table.opening-times tr")
for row in hoursTable:
print(row.text)

I feel the issue is that the driver is trying to locate the element before it is available in the DOM. Try waiting until the element is present, instead of directly trying to find it.
You could replace this line:
venue = driver.find_element_by_xpath('//a[#class="ng-binding"]')
with:
venue = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//a[#class="ng-binding"]')))
Note that you'll need to do a few imports to do this:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Source : http://selenium-python.readthedocs.io/waits.html

Related

Scraping flex-element Selenium Python

I am trying to scrape some tennis statistics starting from 01-01-2019.
For this I try to scrape the following webpage with selenium: https://www.sofascore.com/de/tennis/2019-01-01
When I click on the first match manually the container on the right side changes and shows the statistics.
This is what I want to access automatically.
When I try to click on the element with selenium it redirects me to another page.
Can anyone tell me why it is not just showing the same content as by manually clicking and how I can solve this issue?
Here is my code:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
import time
options = Options()
options.binary_location = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
browser = webdriver.Chrome(chrome_options = options)
url = 'https://www.sofascore.com/de/tennis/2019-01-01'
browser.get(url)
browser.maximize_window()
xpath = '/html/body/div[1]/main/div/div[2]/div/div[3]/div[2]/div/div/div/div/div[2]/a/div'
browser.find_element_by_xpath(xpath).click()
time.sleep(2)
browser.close()`
You can use the below xpath :
//div[contains(#class, 'Col-pm5mcz-')]//descendant::div[contains(#class, 'styles__StyledWidget-')]
and get the innerHTML of that using get_attribute method
Code :
url = "https://www.sofascore.com/de/tennis/2019-01-01"
driver.get(url)
xpath = '/html/body/div[1]/main/div/div[2]/div/div[3]/div[2]/div/div/div/div/div[2]/a/div'
driver.find_element_by_xpath(xpath).click()
sleep(2)
details = driver.find_element_by_xpath("//div[contains(#class, 'Col-pm5mcz-')]//descendant::div[contains(#class, 'styles__StyledWidget-')]").get_attribute('innerHTML')
print(details)
The xpath that you are using is absolute xpath /html/body/div[1]/main/div/div[2]/div/div[3]/div[2]/div/div/div/div/div[2]/a/div
try to replace that with Relative xpath.
See if this works
tableRows = driver.find_elements_by_xpath(".//div[#class='ReactVirtualized__Grid ReactVirtualized__List']//following::div/a[contains(#class,'EventCellstyles__Link')]")
for e in tableRows:
e.click()
//You can add implicit wait here for the statics section to load
driver.find_element_by_xpath(".//a[text()='Statistiken']").click()

how to click on item in UL using pythong selenium

I want to click the 1st item, I tried locating it by using find element xpath, tag name, but im getting an error
first you should realize, that selector what you're trying to interract with the page is not unique and class name is not enough to get the particular one
second I would suggest again using explicitly waits:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
timeout = 30
driver = webdriver.Chrome()
driver.get('https://weed.com/product-category/cbd/')
element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, "(//a[#class='woocommerce-LoopProduct-link woocommerce-loop-product__link'])[1]")))
element.click()
driver.quit()
if you want to click on the particular element, I would suggest using text and action-perform:
timeout = 30
driver = webdriver.Chrome()
driver.get('https://weed.com/product-category/cbd/')
element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, "//h2[text()='Binoid Good Night CBD Oil – Sleep Blend']")))
action = ActionChains(driver)
action.move_to_element(element).click().perform()
driver.quit()

Selenium is returning empty text for elements that definitely have text

I'm practicing trying to scrape my university's course catalog. I have a few lines in Python that open the url in Chrome and clicks the search button to bring up the course catalog. When I go to extract the texting using find_elements_by_xpath(), it returns blank. When I use the dev tools on Chrome, there definitely is text there.
from selenium import webdriver
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]')
print(course)
I'm trying to extract the text from the element 'OSU_CAT_SRCH_OSR_CRSE_HEADER'. I don't understand why it's not returning the text values especially when I can see that it contains text with dev tools.
You are not using text that is the reason you are not getting the text.
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]').text
Try above changes in last second line
Below is the full code after the changes
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
# wait 10 seconds
course = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]'))
).text
print(course)

BeautifulSoup scraping from a web page already opened by Selenium

I would like to make scrape a web page which was opened by Selenium from a different webpage.
I entered a search term into a website using Selenium and this landed me in a new page. My aim is to create soup out of this new page. But, the soup is getting created out of the previous page where I entered my search term. Help please!
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries')
inputElement.send_keys(Keys.ENTER)
driver.wait.until(staleness_of('txtStock')
source = driver.page_source
soup = BeautifulSoup(source)
You need to know the exect company names for your search. After you are using send_keys, you tried to check for staleness of an element. I did not understand how that statement should work. I added WebDriverWait for an element of the new page.
The following works for me reagrding the selenium part up to getting the page source:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries Ltd.')
inputElement.send_keys(Keys.ENTER)
company = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'lblCompany')))
source = driver.page_source
You should add exception handling.
#Jens Dibbern has given a working solution. But it is not necessary that the exact name of the company should be given in the search. What happens is that when you type a non-exact name, a drop-down will pop up.
I have observed that until and unless this drop-down is present enter key is not working. You can check this by going to the site, pasting the name and without waiting press the enter key as fast as possible. Nothing happens.
You could also wait for this drop-down to be visible instead and the send the enter key.This also works perfectly. Note that this will end up selecting the first item in the drop-down if more than one is present.
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries')
drop_down=driver.find_element_by_css_selector("#listPlacementStock")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#listPlacementStock:not([style*="display: none"])')))
inputElement.send_keys(Keys.ENTER)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="CompanyLink"]')))
source = driver.page_source
soup = BeautifulSoup(source,'html.parser')
print(soup)

Python 2.7 Selenium No Such Element on Website

I'm trying to do some webscraping from a betting website:
As part of the process, I have to click on the different buttons under the "Favourites" section on the left side to select different competitions.
Let's take the ENG Premier League button as example. I identified the button as:
(source: 666kb.com)
The XPath is: //*[#id="SportMenuF"]/div[3] and the ID is 91.
My code for clicking on the button is as follows:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_path = "C:\Python27\Scripts\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("URL Removed")
content = driver.find_element_by_xpath('//*[#id="SportMenuF"]/div[3]')
content.click()
Unfortunately, I always get this error message when I run the script:
"no such element: Unable to locate element:
{"method":"xpath","selector":"//*[#id="SportMenuF"]/div[3]"}"
I have tried different identifiers such as CCS Selector, ID and, as shown in the example above, the Xpath. I tried using waits and explicit conditions, too. None of this has worked.
I also attempted scraping some values from the website without any success:
from selenium import webdriver
from selenium.webdriver.common.by import By
chrome_path = "C:\Python27\Scripts\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("URL removed")
content = driver.find_elements_by_class_name('price-val')
for entry in content:
print entry.text
Same problem, nothing shows up.
The website embeddes an iframe from a different website. Could this be the cause of my problems? I tried scraping directly from the iframe URL, too, which didn't work, either.
I would appreciate any suggestions.
Sometimes elements are either hiding behind an iframe, or they haven't loaded yet
For the iframe check, try:
driver.switch_to.frame(0)
For the wait check, try:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '-put the x-path here-')))

Categories