Scrape #shadow-root (open) - python

I'm trying to scrape href tags that are hidden under a #shadow-root (open) on this website: https://iltacon2022.expofp.com/. I'm new to coding and was wondering if someone could help me out.
Here is the code I've been trying to access the #shadow-root (open), but I'm in front of my skies on what to do next.
Code:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
url = "https://iltacon2022.expofp.com/"
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
driver.get(url)
time.sleep(6)
root1 = driver.find_element(By.XPATH,"/html/body/div[1]/div").shadow_root
root2 = driver.find_element(By.XPATH,"/html/body/div[1]/div//div")
print(root2)
driver.quit()
error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div[1]/div//div"}
Desired output:
?access-corp
?accruent-inc
?aceds-association-of-certified-e-discovery-specialists
?actionstep
?aderant
etc
etc
etc

Related

No Selenium selectors are working for me python

I understand that this question has been asked a lot in one way or another, however, I have tried finding elements on selenium with every type that I have at my disposal and it keeps giving me the error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element:
Am I just grossly misusing selenium or is it the website?
I honestly just want to select the element so that I can start working with it for some practice code that I am doing.
Here is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
import time
email = 'example#email.com'
options = Options()
options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
service = Service("/Users/NAME/Desktop/job_stuff/chromedriver")
driver = webdriver.Chrome(options = options, service=service)
driver.get('https://www.hgtv.com/sweepstakes/hgtv-urban-oasis/sweepstakes')
is_open = True
time.sleep(5)
# inputField = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,'//*[#id="xReturningUserEmail"]')))
inputField = driver.find_element(By.XPATH, '/html/body/div[1]/div/main/section/div/div/div/div/div/div[1]/div/form/div[1]/fieldset/div/div[2]/div[1]/input')
It is an iframe.
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it(driver.find_element(By.XPATH, ".//*[starts-with(#id,'ngxFrame')]")))
driver.find_element(By.XPATH,".//input[#type='email' and #id='xReturningUserEmail']").send_keys("test#gmail.com")

Web Scraping Identify and Extract Hyperlink

HI I have the following script that extracts the name and address of each site but I want to be able to also extract the href for each site so that I link to the individual sites. Any suggestions?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://order.marstons.co.uk/")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="app"]/div/div/div/div[2]/div'))
).find_elements_by_tag_name('a')
for el in element:
print("heading", el.find_element_by_tag_name('h3').text)
print("address", el.find_element_by_tag_name('p').text)
finally:
driver.quit()
You mean like this?
print(el.get_attribute("href"))
You can get attribute of a element from this.

Unable to scrape table using selenium

I'm trying to scrape the following webpage using selenium https://www.stakingrewards.com/cryptoassets/, butI keep getting the same error. Does anyone know what I'm doing wrong? It seems like it can't find the class name for some reason. The error I am getting is the following
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".table-wrap"}
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(f'https://www.stakingrewards.com/cryptoassets/')
driver.implicitly_wait(10)
element = driver.find_element_by_class_name("table-wrap")
html = BeautifulSoup(driver.execute_script("return arguments[0].innerHTML;", element), 'html.parser')
for item in html.findAll('div', {"class": "rt-tr-group"}):
print(item.text)
I always find elements using the full xPath. For me it has been a lot more reliable, and since there can be multiple occurrences of a class name I recommend not using that. What you can do is right-click the webpage and press inspect, find the element you're looking for, right-click the element, and press copy full xpath.
Replace:
element = driver.find_element_by_class_name("table-wrap")
with:
element = driver.find_element_by_xpath('/html/body/div[1]/section/section/main/div/div/section/section[2]')
Furthermore, you are trying to act on the driver variable with BeautifulSoup, which does not work. You have to pass BeautifulSoup the URL and keep going. I am unfamiliar with BeautifulSoup and can't help you much there :/

Selenium unable to click on button

I'm trying to click on the "next page" button using selenium, but I'm having no success. Am I using the right CSS selector or should I change it to something else?
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_argument("window-size=1200x600")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(f'https://www.stakingrewards.com/cryptoassets')
driver.implicitly_wait(10)
button = driver.find_element(By.CSS_SELECTOR,"button[data-testid='next-page-button']")
button.click()
driver.quit()
change
button = driver.find_element(By.CSS_SELECTOR,"button[data-testid='next-page-button']")
to
button = driver.find_element(By.XPATH, "//button[#data-testid='next-page-button']")
You were previously trying to select with an XPATH, but had it as CSS selector. It also needed relative pathing/corrections. Let me know if there's anything else I can help with or missed.

Extracting text from a website using selenium

trying to find a way to extract the book's summary from the good reads page. Have tried Beautiful soup / Selenium, unfortunately to no avail.
link:https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1
code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
link='https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1'
driver.get(link)
Description=driver.find_element_by_xpath("//div[contains(text(),'TextContainer')]")
#first TextContainer contains the sumary of the book
book_page = requests.get(link)
soup = BeautifulSoup(book_page.text, "html.parser")
print(soup)
Container = soup.find('class', class_='leftContainer')
print(Container)
Error:
container is empty +
NoSuchElementException: no such element: Unable to locate element:
{"method":"xpath","selector":"//div[contains(text(),'TextContainer')]"}
(Session info: chrome=83.0.4103.116)
You can get the description like so
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
...
driver.get("https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1")
description = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'div#description span[style="display:none"]'))
)
print(description.get_attribute('textContent'))
I have utilised a CSS Selector to get the specific hidden span that contains the full description. I have also used an explicit wait to give the element time to load.

Categories