I'm trying to click on all the links on a web page after clicking on the dates (https://www.eduqas.co.uk/qualifications/computer-science-as-a-level/#tab_pastpapers) but the links don't have unique class names and only have a tag name "a" but multiple other elements have the same tag name. How can I click on the links
Here is the current code, it clicks on the dates but as I said I can't click on the links:
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
PATH = "C:\Program Files (x86)\chromedriver.exe" # path of chrome driver
driver = webdriver.Chrome(PATH) # accesses the chrome driver
driver.get("https://www.eduqas.co.uk/qualifications/computer-science-as-a-level/#tab_pastpapers") # website
driver.maximize_window()
driver.implicitly_wait(3)
driver.execute_script("window.scrollTo(0, 540)")
sleep(3) # Giving time to fully load the content
elements = driver.find_elements(By.CSS_SELECTOR, ".css-13punl2")
driver.find_element(By.ID, 'accept-cookies').click() # Closes the cookies prompt
for x in elements:
if x.text == 'GCSE':
continue
x.click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # this scrolls the page to the bottom
sleep(1) # This sleep is necessary to give time to finish scrolling
print(len(elements))
Image of links
I think you are trying this:
# Needed libs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome()
driver.get('https://www.eduqas.co.uk/qualifications/computer-science-as-a-level/#tab_pastpapers')
driver.maximize_window()
actions = ActionChains(driver)
# We get how many dates (year) fields we have
dates_count = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[#id='pastpapers_content']//button[#class='css-13punl2']/..")))
driver.find_element(By.ID, 'accept-cookies').click()
# We do scroll to the year element and we click every year
for i in range(1, len(dates_count)+1):
date = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"(//div[#id='pastpapers_content']//button[#class='css-13punl2']/..)[1]")))
driver.execute_script("arguments[0].scrollIntoView();", date)
time.sleep(0.3)
date.click()
time.sleep(0.3)
# For every year we get all the links
links = date = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, f"(//div[#class='css-1301qtx'])[{i}]//a")))
# We do scroll to the link element and we click every link
for link in links:
driver.execute_script("arguments[0].scrollIntoView();", link)
time.sleep(0.3)
link.click()
driver.switch_to.window(driver.window_handles[0])
This open every singlle link in the page.
I hope the comments into the code help to understand what the code does
The main reason for using XPath is when you don’t have a suitable id or name attribute for the element you wish to locate. You can use XPath to either locate the element in absolute terms (not advised) or relative to an element that does have an id or name attribute. XPath locators can also be used to specify elements via attributes other than id and name.
Find the elements:
elem = driver.find_element(By.XPATH, "/html/body/form[1]")
Get XPath:
Go to the inspect window
Select the desired element and right-click on it
In the dropdown click on copy tab, then on copy XPath
Hope this helps.Happy Coding:)
Related
I am trying to input the name of the city that I want to depart from into Google Flights using python. After locating the element (the input box) in the html code I noticed that once you interact with the element it changes its class from class="II2One j0Ppje zmMKJ LbIaRd" to class="II2One j0Ppje zmMKJ LbIaRd VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe". Therefore driver.find_element(By.CSS_SELECTOR, '.LbIaRd').send_keys(city_name) only manages to send the first character of the string before the input box changes class. However I am unable to send any text to this element after it changes its class.
I have tried interacting with the element in order to change its class and then applying the same method to the new class of the input box:
x = driver.find_elements(By.CSS_SELECTOR, '.LbIaRd')[0]
x.click()
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, '.VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe').send_keys(city_name)
The code ends with no errors but it the text is not sent to the input box. There is only one element with .VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe locator.
I have also tried to click the element after the class change and then send the keys:
x = driver.find_elements(By.CSS_SELECTOR, '.LbIaRd')[0]
x.click()
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, '.VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe').click()
driver.find_element(By.CSS_SELECTOR, '.VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe').send_keys(city_name)
But I get the following exception:
selenium.common.exceptions.ElementClickInterceptedException:
Message: element click intercepted:
Element <input class="II2One j0Ppje zmMKJ LbIaRd VfPpkd-ksKsZd-mWPk3d-OWXEXe-AHe6Kc-XpnDCe, redacted html code">
is not clickable at point (612, 472).
Other element would receive the click: <input class="II2One j0Ppje zmMKJ LbIaRd", redacted html code>
Any help would be greatly appreciated.
Something like this? Try to send keys with ActionChains instead with send_keys function
# Needed libs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
# We create the driver
driver = webdriver.Chrome()
# We instantiate the ActionChains which will allow to us to send several keys
action = ActionChains(driver)
# We maximize the window
driver.maximize_window()
# We navigate to the url
url='https://www.google.com/travel/flights'
driver.get(url)
departure = "Berlin, Germany"
destiny = "Austin, Texas, USA"
# We click on Reject cookies button from pop up
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//button)[1]"))).click()
# We clear the departure input in case there is something
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//input)[1]"))).clear()
# We click on it
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//input)[1]"))).click()
# We send the keys with the actionChains previously initialized
action.send_keys(departure).perform()
# We click on the first result
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f"//ul[#role='listbox']//div[text()='{departure}']"))).click()
# Same for destiny input
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//input)[3]"))).click()
action.send_keys(destiny).perform()
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f"//ul[#role='listbox']//div[text()='{destiny}']"))).click()
# Click on search button
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f"//button/span[text()='Search']"))).click()
I'm practicing trying to scrape my university's course catalog. I have a few lines in Python that open the url in Chrome and clicks the search button to bring up the course catalog. When I go to extract the texting using find_elements_by_xpath(), it returns blank. When I use the dev tools on Chrome, there definitely is text there.
from selenium import webdriver
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]')
print(course)
I'm trying to extract the text from the element 'OSU_CAT_SRCH_OSR_CRSE_HEADER'. I don't understand why it's not returning the text values especially when I can see that it contains text with dev tools.
You are not using text that is the reason you are not getting the text.
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]').text
Try above changes in last second line
Below is the full code after the changes
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
# wait 10 seconds
course = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]'))
).text
print(course)
Context: I'm trying to iterate over a list of links (single webpage, hundreds of links listed) using Selenium to click into the links then return to the page with the list. The script correctly identifies the list by the xpath, but for some reason the loop always picks this option in the list:
PowerForm - Adult Ambulatory Intake History Ortho - Form Design - Excel
Here's the beginning of the list I'm trying to iterate over:
Webpage links list
After the script goes to that page, and returns to the list, it still selects that specific one to go back into and I have no idea why that one in particular. Please help!
Here's my code:
from selenium import webdriver
#below imports the ability to access keys/buttons on web pages
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from time import sleep
from selenium.common.exceptions import TimeoutException
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
#directing to desired wiki page
driver.get("https://businesslogin.cerner.com/?ReturnUrl=https%3A%2F%2Fwiki.cerner.com%2Flogin.action%3Fos_destination%3D%252Fpages%252Fviewpage.action%253FspaceKey%253Dreference%2526title%253DClinical%25252520Content%25252520PowerForms%25252520Pages")
#clicking associate login button
login = driver.find_element_by_id("react-548405278")
login.click()
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "react-548405278"))
)
element.click()
except:
time.sleep(2)
acceptcookies = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler"))
)
acceptcookies.click()
#at this point you should be at the desired wiki page with the list of links
#below accesses each link, opens the tinylink, and copies it
powerforms = driver.find_element_by_xpath("//*[#id='main-content']/ul")
links = powerforms.find_elements_by_tag_name("li")
linkslist = []
for li in links:
linkslist.append(li.text)
continue
# WRITE LIST OF FORMS TO EXCEL FILE HERE
#workbook = xlsxwriter.Workbook('powerforms.tinylinks.xslx')
for link in links:
try:
link = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "main-content")))
link.click()
# clicks 'Share' button to open popout
share = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'shareContentLink')))
share.click()
# clicks 'Copy' button to copy the tinylink to the clipboard
copy = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'share-link-copy-button')))
copy.click()
driver.find_element_by_link_text("Clinical Content PowerForms Pages").click()
except TimeoutException:
print("Couldn't load element")
continue
The following Selenium automated script correctly opens the given URL and opens the View Invoice tab that opens up a detailed Invoice.
I need to fetch some values like Number, Date and table values from the detailed invoice. The values are very nested to get to them correctly. The URL that opens up when the View Invoice is clicked, I don't know how to scrape it or use selenium to proceed with.
Is the element in the code like an instance to get the values of the opened detailed invoice page or is there some different approach?
Here is the code:
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r'D:/Chrome driver/chromedriver.exe') # Get local session(use webdriver.Chrome() for chrome)
driver.implicitly_wait(3)
driver.get("URL") # load page from some url
driver.find_element_by_xpath("//input[#id='PNRId']").send_keys("MHUISZ")
driver.find_element_by_xpath("//input[#id='GstRetrievePageInteraction']").click()
element = driver.find_element_by_name('ViewInvoice')
element.click()
Can anyone please guide me on how to fetch the values from the invoice page?
So try to wait for elements to be visible or clickable and your clicking on the invoice actually creates new child handles so you have to switch to them. All you have to do now is figure how to go through a table try looking through it's xpath.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path=r'D:/Chrome driver/chromedriver.exe') # Get local session(use webdriver.Chrome() for chrome)
driver.get("URL")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//input[#id='PNRId']"))).send_keys("MHUISZ")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//input[#id='GstRetrievePageInteraction']"))).click()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'ViewInvoice'))).click()
p = driver.current_window_handle
#get first child window
chwnd = driver.window_handles
for w in chwnd:
#switch focus to child window
if(w!=p):
driver.switch_to.window(w)
break
invoiceTable = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "TableHeader")))
print(invoiceTable.find_element_by_xpath("tbody/tr[1]/td").text)
driver.quit()
Link to the page I am trying to scrape:
https://www.nytimes.com/reviews/dining
Because this page has a "show more" button, I needed Selenium to automatically click the "show more" button iteratively, and then somehow use Beautiful soup to harvest the links to each individual restaurant review on the page. In the photo below, the link I want to harvest is within the https://...onigiri.html">.
Code so far:
url = "https://www.nytimes.com/reviews/dining"
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
driver.get(url)
for i in range(1):
button = driver.find_element_by_tag_name("button")
button.click()
How do I use WebDriverWait and BeautifulSoup [BeautifulSoup(driver.page_source, 'html.parser')] to complete this task?
Go to https://www.nytimes.com/reviews/dining press F12 and then press Ctrl+Shift+C to get element Show More, then as I showed in picture get your xpath of element:
In order to find xpath please look at:
https://www.techbeamers.com/locate-elements-selenium-python/#locate-element-by-xpath
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def executeTest():
global driver
driver.get('https://www.nytimes.com/reviews/dining')
time.sleep(7)
element = driver.find_element_by_xpath('Your_Xpath')
element.click()
time.sleep(3)
def startWebDriver():
global driver
options = Options()
options.add_argument("--disable-infobars")
driver = webdriver.Chrome(chrome_options=options)
if __name__ == "__main__":
startWebDriver()
executeTest()
driver.quit()
This is a lazy loading application.To click on the Show More button you need to use infinite loop and scroll down the page to look for and then click and wait for some time to load the page and then store the value in the list.Verify the list before and after if it matches then break from infinite loop.
Code:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import time
driver=webdriver.Chrome()
driver.get("https://www.nytimes.com/reviews/dining")
#To accept the coockie click on that
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Accept']"))).click()
listhref=[]
while(True):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
elements=WebDriverWait(driver,20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"a.css-gg4vpm")))
lenlistbefore=len(listhref)
for ele in elements:
if ele.get_attribute("href") in listhref:
continue
else:
listhref.append(ele.get_attribute("href"))
lenlistafter = len(listhref)
if lenlistbefore==lenlistafter:
break
button=WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH,"//button[text()='Show More']")))
driver.execute_script("arguments[0].click();", button)
time.sleep(2)
print(len(listhref))
print(listhref)
Note:- I am getting list count 499