Scrape #shadow-root (open)

Scrape #shadow-root (open) - python

I'm trying to scrape href tags that are hidden under a #shadow-root (open) on this website: https://iltacon2022.expofp.com/. I'm new to coding and was wondering if someone could help me out.
Here is the code I've been trying to access the #shadow-root (open), but I'm in front of my skies on what to do next.
Code:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
url = "https://iltacon2022.expofp.com/"
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
driver.get(url)
time.sleep(6)
root1 = driver.find_element(By.XPATH,"/html/body/div[1]/div").shadow_root
root2 = driver.find_element(By.XPATH,"/html/body/div[1]/div//div")
print(root2)
driver.quit()
error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div[1]/div//div"}
Desired output:
?access-corp
?accruent-inc
?aceds-association-of-certified-e-discovery-specialists
?actionstep
?aderant
etc
etc
etc

Related

No Selenium selectors are working for me python

I understand that this question has been asked a lot in one way or another, however, I have tried finding elements on selenium with every type that I have at my disposal and it keeps giving me the error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element:
Am I just grossly misusing selenium or is it the website?
I honestly just want to select the element so that I can start working with it for some practice code that I am doing.
Here is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
import time
email = 'example#email.com'
options = Options()
options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
service = Service("/Users/NAME/Desktop/job_stuff/chromedriver")
driver = webdriver.Chrome(options = options, service=service)
driver.get('https://www.hgtv.com/sweepstakes/hgtv-urban-oasis/sweepstakes')
is_open = True
time.sleep(5)
# inputField = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,'//*[#id="xReturningUserEmail"]')))
inputField = driver.find_element(By.XPATH, '/html/body/div[1]/div/main/section/div/div/div/div/div/div[1]/div/form/div[1]/fieldset/div/div[2]/div[1]/input')

It is an iframe.
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it(driver.find_element(By.XPATH, ".//*[starts-with(#id,'ngxFrame')]")))
driver.find_element(By.XPATH,".//input[#type='email' and #id='xReturningUserEmail']").send_keys("test#gmail.com")

Web Scraping Identify and Extract Hyperlink

HI I have the following script that extracts the name and address of each site but I want to be able to also extract the href for each site so that I link to the individual sites. Any suggestions?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://order.marstons.co.uk/")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="app"]/div/div/div/div[2]/div'))
).find_elements_by_tag_name('a')
for el in element:
print("heading", el.find_element_by_tag_name('h3').text)
print("address", el.find_element_by_tag_name('p').text)
finally:
driver.quit()

You mean like this?
print(el.get_attribute("href"))
You can get attribute of a element from this.

Unable to scrape table using selenium

I'm trying to scrape the following webpage using selenium https://www.stakingrewards.com/cryptoassets/, butI keep getting the same error. Does anyone know what I'm doing wrong? It seems like it can't find the class name for some reason. The error I am getting is the following
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".table-wrap"}
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(f'https://www.stakingrewards.com/cryptoassets/')
driver.implicitly_wait(10)
element = driver.find_element_by_class_name("table-wrap")
html = BeautifulSoup(driver.execute_script("return arguments[0].innerHTML;", element), 'html.parser')
for item in html.findAll('div', {"class": "rt-tr-group"}):
print(item.text)

I always find elements using the full xPath. For me it has been a lot more reliable, and since there can be multiple occurrences of a class name I recommend not using that. What you can do is right-click the webpage and press inspect, find the element you're looking for, right-click the element, and press copy full xpath.
Replace:
element = driver.find_element_by_class_name("table-wrap")
with:
element = driver.find_element_by_xpath('/html/body/div[1]/section/section/main/div/div/section/section[2]')
Furthermore, you are trying to act on the driver variable with BeautifulSoup, which does not work. You have to pass BeautifulSoup the URL and keep going. I am unfamiliar with BeautifulSoup and can't help you much there :/

Selenium unable to click on button

I'm trying to click on the "next page" button using selenium, but I'm having no success. Am I using the right CSS selector or should I change it to something else?
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_argument("window-size=1200x600")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(f'https://www.stakingrewards.com/cryptoassets')
driver.implicitly_wait(10)
button = driver.find_element(By.CSS_SELECTOR,"button[data-testid='next-page-button']")
button.click()
driver.quit()

change
button = driver.find_element(By.CSS_SELECTOR,"button[data-testid='next-page-button']")
to
button = driver.find_element(By.XPATH, "//button[#data-testid='next-page-button']")
You were previously trying to select with an XPATH, but had it as CSS selector. It also needed relative pathing/corrections. Let me know if there's anything else I can help with or missed.

Extracting text from a website using selenium

trying to find a way to extract the book's summary from the good reads page. Have tried Beautiful soup / Selenium, unfortunately to no avail.
link:https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1
code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
link='https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1'
driver.get(link)
Description=driver.find_element_by_xpath("//div[contains(text(),'TextContainer')]")
#first TextContainer contains the sumary of the book
book_page = requests.get(link)
soup = BeautifulSoup(book_page.text, "html.parser")
print(soup)
Container = soup.find('class', class_='leftContainer')
print(Container)
Error:
container is empty +
NoSuchElementException: no such element: Unable to locate element:
{"method":"xpath","selector":"//div[contains(text(),'TextContainer')]"}
(Session info: chrome=83.0.4103.116)

You can get the description like so
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
...
driver.get("https://www.goodreads.com/book/show/67896.Tao_Te_Ching?from_search=true&from_srp=true&qid=D19iQu7KWI&rank=1")
description = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'div#description span[style="display:none"]'))
)
print(description.get_attribute('textContent'))
I have utilised a CSS Selector to get the specific hidden span that contains the full description. I have also used an explicit wait to give the element time to load.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Scrape #shadow-root (open) - python

Related

No Selenium selectors are working for me python

Web Scraping Identify and Extract Hyperlink

Unable to scrape table using selenium

Selenium unable to click on button

Extracting text from a website using selenium

Categories

Resources