How to get iframe source from page_source - python

Hello I try to extract the link from page_source and my code is:
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import html5lib
driver_path = r"C:\Users\666\Desktop\New folder (8)\chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.implicitly_wait(10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
try:
time.sleep(4)
iframe = driver.find_elements_by_tag_name('iframe')
for i in range(0, len(iframe)):
f = driver.find_elements_by_tag_name('iframe')[i]
driver.switch_to.frame(i)
# your work to extract link
text = driver.find_element_by_tag_name('body').text
print(text)
driver.switch_to.default_content()
output = driver.page_source
print (output)
finally:
driver.quit();
And now I want to scrape just this link

Try the below script to get the link you wanna parse. You didn't need to switch to iframe to get the link. Hardcoded delay is always the worst choice to parse any dynamic content. What if the link apprears after 5 seconds. I used Explicit Wait within the below script to make it robust.
from selenium import webdriver
from selenium.webdriver.support import ui
driver = webdriver.Chrome()
wait = ui.WebDriverWait(driver, 10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
elem = wait.until(lambda driver: driver.find_element_by_id("iframevideo"))
print(elem.get_attribute("src"))
driver.quit()
Output:
https://openload.co/embed/8wVwFQEP1Sw

Try with
element = driver.find_element_by_id('iframevideo')
link = element.get_attribute('src')

Related

Python - Using Selenium to Login to Web

I cannot login to this site with Selenium.
This is the url.
https://www.burn-cycle.com/my-account/pearl-district
What I tried:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import yaml
import time
conf = yaml.full_load(open("login_details.yml"))
my_burn_email = conf["user"]["email"]
my_burn_password = conf["user"]["password"]
driver = webdriver.Chrome()
driver.get("https://www.burn-cycle.com/my-account/pearl-district")
time.sleep(1)
username = driver.find_element(By.XPATH, "//*[#id='USERNAME']")
username.send_keys(my_burn_email)
pw = driver.find_element(By.XPATH, "//*[#id='PASSWORD']")
pw.send_keys(my_burn_password)
login_button = driver.find_element(By.XPATH("//*[#id='liFormWrap']/form[1]/button")).click()
The website loads (slowly) but nothing populates. This is the output:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[#id='USERNAME']"}
(Session info: chrome=103.0.5060.134)
What am I doing wrong?
You need to wait for the website to load completely so that you can fetch those elements from the webpage, you can achieve this by using implicitly.wait(#amount of second) command right after initializing the web driver.
driver = webdriver.Chrome()
driver.implicitly_wait(15) # gives an implicit wait for 15 seconds
the element is under iframe, you need try this way, first switch into iframe,
see this link
driver.get("https://www.burn-cycle.com/my-account/pearl-district")
time.sleep(5)
iframe = driver.find_element(By.XPATH, "//*[#id='sf-frame']")
# switch to selected iframe
driver.switch_to.frame(iframe)
username = driver.find_element(By.XPATH, "//input[#data-val-required='Username is required']")
username.send_keys("test")

Selenium is returning empty text for elements that definitely have text

I'm practicing trying to scrape my university's course catalog. I have a few lines in Python that open the url in Chrome and clicks the search button to bring up the course catalog. When I go to extract the texting using find_elements_by_xpath(), it returns blank. When I use the dev tools on Chrome, there definitely is text there.
from selenium import webdriver
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]')
print(course)
I'm trying to extract the text from the element 'OSU_CAT_SRCH_OSR_CRSE_HEADER'. I don't understand why it's not returning the text values especially when I can see that it contains text with dev tools.
You are not using text that is the reason you are not getting the text.
course = driver.find_elements_by_xpath('//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]').text
Try above changes in last second line
Below is the full code after the changes
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
url = 'https://courses.osu.edu/psp/csosuct/EMPLOYEE/PUB/c/COMMUNITY_ACCESS.OSR_CAT_SRCH.GBL?'
driver.get(url)
time.sleep(3)
iframe = driver.find_element_by_id('ptifrmtgtframe')
driver.switch_to.frame(iframe)
element = driver.find_element_by_xpath('//*[#id="OSR_CAT_SRCH_WK_BUTTON1"]')
element.click()
# wait 10 seconds
course = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="OSR_CAT_SRCH_OSR_CRSE_HEADER$0"]'))
).text
print(course)

Pulling all text (multple p tags) with BeautifulSoup and Selenium returns []

I am trying to pull the p tag comments within a review card, eventually looping through a search on vivino.com through this link using BeautifulSoup and Selenium. I was able to open the first link but pulling the p text in the review boxes returns [].
url = "https://www.vivino.com/explore?e=eJwNyTEOgCAQBdHbbA2F5e-8gbE2uKyERBYCaOT20swrJlVYSlFhjaHkPixTHtg34pmVyvzhwutqlO5uyid8bJwf7UeRyqKdMrw0pgYdPwIzGwQ="
driver = webdriver.Chrome('/Users/myname/Downloads/chromedriver')
driver.implicitly_wait(30)
driver.get(url)
python_button = driver.find_element_by_class_name('anchor__anchor--2QZvA')
python_button.click()
soup = BeautifulSoup(driver.page_source, 'lxml')
print(soup.find_all('p'))
table = soup.findAll('div',attrs={"class":"reviewCard__reviewContainer--1kMJM"})
print(table)
driver.quit()
Could anybody advise on the correct way to pull the comments? Since there are more than 1 comment per page would I need to loop?
I also tried this with 'html.parser' instead of 'lxml'. Which is the correct one to use?
Thank you so much for your help.
Here is what you need to do:
import atexit
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.expected_conditions import visibility_of_all_elements_located
from selenium.webdriver.support.wait import WebDriverWait
def start_driver():
driver = webdriver.Chrome()
atexit.register(driver.quit)
driver.maximize_window()
return driver
def find_elements(driver, locator):
return WebDriverWait(driver, 10, 2).until(visibility_of_all_elements_located(locator))
URL = "https://www.vivino.com/explore?e=eJwNyTEOgCAQBdHbbA2F5e-8gbE2uKyERBYCaOT20swrJlVYSlFhjaHkPixTHtg34pmVyvzhwutqlO5uyid8bJwf7UeRyqKdMrw0pgYdPwIzGwQ="
RESULTS = By.CSS_SELECTOR, "div[class*='vintageTitle'] > a"
def main():
driver = start_driver()
driver.get(URL)
# note the results
wines = []
for element in find_elements(driver, RESULTS):
link = element.get_attribute("href")
name = element.find_element_by_css_selector("span[class*='vintageTitle__wine']").text
wines.append((name, link))
pprint(wines)
# go extract details from each result's page
for name, link in wines:
print("getting comments for wine: ", name)
driver.get(link)
# you can do the rest ;)
if __name__ == '__main__':
main()

Select element from a list inside an iframe using python selenium

I would like to write a python Programm which downloads automaticaly historical stock data from a web-page. The correspindent HTML-Code of the Element I would like to select is on the following Picture:
There are two iframes. One is inside the other. I switch to the second iframe, but the element I would like to click can't be found. I get the following error: "Message: no such element: Unable to locate element: {"method":"css selector","selector":"[id=":cu"]"} (Session info: chrome=75.0.3770.100)"
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import ctypes # An included library with Python install.
import time
user = ""
pwd = ""
driver = webdriver.Chrome()
driver.get("https://www.dukascopy.com/trading-tools/widgets/quotes/historical_data_feed")
driver.maximize_window()
## Give time for iframe to load ##
time.sleep(1)
# get the list of iframes present on the web page using tag "iframe"
seq = driver.find_elements_by_tag_name('iframe')
print("No of frames present in the web page are: ", len(seq))
#switch to correct iFrame
driver.switch_to_default_content()
iframe = driver.find_elements_by_tag_name('iframe')[1]
driver.switch_to.frame(iframe)
driver.implicitly_wait(5)
elem = driver.find_element_by_id(':cu')
elem.click()
ctypes.windll.user32.MessageBoxW(0, "Test", "Test MsgBox", 1)
driver.close()
If my code would be correct the element "EUR/TRY" in the List would be selected.
There are total 4 iframes.
The table you want to interact with is in iframe[src^='https://freeserv'] and parent iframe is widget-container. One by one you have to switch to it like this :
Code :
wait = WebDriverWait(driver,10)
driver.maximize_window()
driver.get("https://www.dukascopy.com/trading-tools/widgets/quotes/historical_data_feed")
wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID, "widget-container")))
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[src^='https://freeserv']")))
check_Box = wait.until(EC.visibility_of_element_located((By.XPATH, "//strong[text()='EUR/TRY']/../preceding-sibling::span/span")))
ActionChains(driver).move_to_element(check_Box).perform()
check_Box.click()

Getting value after button click with BeautifulSoup Python

I'm trying to get a value that is given by the website after a click on a button.
Here is the website: https://www.4devs.com.br/gerador_de_cpf
You can see that there is a button called "Gerar CPF", this button provides a number that appears after the click.
My current script opens the browser and get the value, but I'm getting the value from the page before the click, so the value is empty. I would like to know if it is possible to get the value after the click on the button.
from selenium import webdriver
from bs4 import BeautifulSoup
from requests import get
url = "https://www.4devs.com.br/gerador_de_cpf"
def open_browser():
driver = webdriver.Chrome("/home/felipe/Downloads/chromedriver")
driver.get(url)
driver.find_element_by_id('bt_gerar_cpf').click()
def get_cpf():
response = get(url)
page_with_cpf = BeautifulSoup(response.text, 'html.parser')
cpf = page_with_cpf.find("div", {"id": "texto_cpf"}).text
print("The value is: " + cpf)
open_browser()
get_cpf()
open_browser and get_cpf are absolutely not related to each other...
Actually you don't need get_cpf at all. Just wait for text after clicking the button:
from selenium.webdriver.support.ui import WebDriverWait as wait
def open_browser():
driver = webdriver.Chrome("/home/felipe/Downloads/chromedriver")
driver.get(url)
driver.find_element_by_id('bt_gerar_cpf').click()
text_field = driver.find_element_by_id('texto_cpf')
text = wait(driver, 10).until(lambda driver: not text_field.text == 'Gerando...' and text_field.text)
return text
print(open_browser())
Update
The same with requests:
import requests
url = 'https://www.4devs.com.br/ferramentas_online.php'
data = {'acao': 'gerar_cpf', 'pontuacao': 'S'}
response = requests.post(url, data=data)
print(response.text)
You don't need to use requests and BeautifulSoup.
from selenium import webdriver
from time import sleep
url = "https://www.4devs.com.br/gerador_de_cpf"
def get_cpf():
driver = webdriver.Chrome("/home/felipe/Downloads/chromedriver")
driver.get(url)
driver.find_element_by_id('bt_gerar_cpf').click()
sleep(10)
text=driver.find_element_by_id('texto_cpf').text
print(text)
get_cpf()
Can you use a While loop until text changes?
from selenium import webdriver
url = "https://www.4devs.com.br/gerador_de_cpf"
def get_value():
driver = webdriver.Chrome()
driver.get(url)
driver.find_element_by_id('bt_gerar_cpf').click()
while driver.find_element_by_id('texto_cpf').text == 'Gerando...':
continue
val = driver.find_element_by_id('texto_cpf').text
driver.quit()
return val
print(get_value())
I recommend this website that does exactly the same thing.
https://4devs.net.br/gerador-cpf
But to get the "gerar cpf" action with selenium, you can inspect the HTML source code with a browser and click on "copy XPath for this element".
It is much simpler than manually searching for the elements in the page.

Categories