Extract iFrame content using Selenium and BeautifulSoup

Extract iFrame content using Selenium and BeautifulSoup - python

Sorry, this is my first post, so forgive me for all that I don't know yet! Thanks.
I am trying to complete the following form and extract the associating premium. When I run my code I would expect the annual premium of $156 to be extracted, but all I get is "Annual Premium: -"
Here is the code:
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import requests
driver = webdriver.Chrome(r"C:\Users\tomwp\Downloads\chromedriver_win32\chromedriver.exe")
page = driver.get("https://www.earthquakeauthority.com/")
xpath = '//*[#id="form"]/header/div[2]/a'
btn = driver.find_element_by_xpath(xpath)
btn.click()
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[#id='premiumCalc-iframe']")
driver.switch_to.frame(iframe)
xpath = '//*[#id="cea-page-1"]/div/div/div[1]/div/button[1]'
btn = driver.find_element_by_xpath(xpath)
btn.click()
xpath = '//*[#id="startdate"]'
incept_date = driver.find_element_by_xpath(xpath)
incept_date.send_keys("03/24/2019")
xpath = '//*[#id="participatingInsurer"]'
insurance_company = driver.find_element_by_xpath(xpath)
insurance_company.send_keys("Other")
xpath = '//*[#id="street"]'
street_address = driver.find_element_by_xpath(xpath)
street_address.send_keys("26 Los Indios")
xpath = '//*[#id="zipcode"]'
zip_code = driver.find_element_by_xpath(xpath)
zip_code.send_keys("92618")
xpath = '//*[#id="form-views"]/div[18]/div/button'
btn = driver.find_element_by_xpath(xpath)
btn.click()
xpath = '//*[#id="yearbuilt"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("2011")
xpath = '//*[#id="insuredvalue"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("100000")
xpath = '//*[#id="numberOfStories"]'
number_stories = driver.find_element_by_xpath(xpath)
number_stories.send_keys("Greater than one")
xpath = '//*[#id="foundationtype"]'
foundation = driver.find_element_by_xpath(xpath)
foundation.send_keys("slab")
xpath = '//*[#id="form-views"]/div[14]/div/button'
btn = driver.find_element_by_xpath(xpath)
btn.click()
soup = BeautifulSoup(driver.page_source, 'lxml')
premium = soup.find('div', class_='gauge-subtitle ng-binding ng-scope')
print(premium.text)
This is the $156 I would like to extract:
<div ng-if="isQuoting == false" class="gauge-subtitle ng-binding ng-scope">Annual Premium: $156.00</div>
Note that iframe id is as follows (not sure if this is helpful):
<iframe id="premiumCalc-iframe" style="width: 100%; border: none; height: 1397px;" scrolling="no" src="//calc.earthquakeauthority.com/app/index.html" cd_frame_id_="d0b3a5bcdcfe60ced66a29d282ad86c6"></iframe>
enter image description here

I have tried to make it a little more robust by adding in wait conditions. Your final quote page refreshes at the last click so you likely got a stale element exception. If you can find a decent indicator of that refresh completing you should replace my current time.sleep.
Personally, I would use CSS selectors throughout but I am sticking with xpath to align with your code.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(r"C:\Users\tomwp\Downloads\chromedriver_win32\chromedriver.exe")
page = driver.get("https://www.earthquakeauthority.com/")
xpath = '//*[#id="form"]/header/div[2]/a'
btn = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
iframe = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, "//iframe[#id='premiumCalc-iframe']")))
driver.switch_to.frame(iframe)
xpath = '//*[#id="cea-page-1"]/div/div/div[1]/div/button[1]'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
xpath = '//*[#id="startdate"]'
incept_date = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, xpath)))
incept_date.send_keys("03/24/2019")
xpath = '//*[#id="participatingInsurer"]'
insurance_company = driver.find_element_by_xpath(xpath)
insurance_company.send_keys("Other")
xpath = '//*[#id="street"]'
street_address = driver.find_element_by_xpath(xpath)
street_address.send_keys("26 Los Indios")
xpath = '//*[#id="zipcode"]'
zip_code = driver.find_element_by_xpath(xpath)
zip_code.send_keys("92618")
xpath = '//*[#id="form-views"]/div[18]/div/button'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
xpath = '//*[#id="yearbuilt"]'
year_built = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, xpath)))
year_built.send_keys("2011")
xpath = '//*[#id="insuredvalue"]'
year_built = driver.find_element_by_xpath(xpath)
year_built.send_keys("100000")
xpath = '//*[#id="numberOfStories"]'
number_stories = driver.find_element_by_xpath(xpath)
number_stories.send_keys("Greater than one")
xpath = '//*[#id="foundationtype"]'
foundation = driver.find_element_by_xpath(xpath)
foundation.send_keys("slab")
xpath = '//*[#id="form-views"]/div[14]/div/button'
btn = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
btn.click()
time.sleep(2)
quote = driver.find_element_by_css_selector(".gauge-subtitle").text
print(quote)

if I get you correct, you are able to navigate upto estimation page and able to see estimated annual premium value.
If thats the case then just try this code:
iframe = driver.find_element_by_xpath("//iframe[#id='premiumCalc-iframe']")
yourResult = driver.find_element_by_class_name("gauge-subtitle ng-binding ng-scope").text

Related

How to click on every title within the page to scrape the title

For example this is the main page link
https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html
go to these page then click on first title like these 10X Innovations - Swift ULVand then get the title
This is code
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
driver.get('https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html')
data = []
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
hrefs = [my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//h3[#class='text-center-mobile wrap-word']//ancestor::a[1]")))[:5]]
windows_before = driver.current_window_handle
for href in hrefs:
driver.execute_script("window.open('" + href +"');")
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
windows_after = driver.window_handles
new_window = [x for x in windows_after if x != windows_before][0]
driver.switch_to.window(new_window)
data.append(WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//h1[#class='wrap-word']"))).text)
driver.close()
driver.switch_to.window(windows_before)
print(data)

Your current problem is invalid XPath //div[contains(#class,'company-info']//h3)] - wrong parentheses usage. You need to use //div[contains(#class,'company-info')]//h3 instead.
However, if you want to scrape data from each company entry on page then your clicking links approach is not good.
Try to implement the following:
get href attribute of every link. Since not all links initially displayed on page you need to trigger all possible XHRs, so create count variable to get current links count and do in while loop:
execute send END hardkey to scroll page down
try to wait until current links count > count. If True - re-define count with new value. If Exception - break the loop (there are no more links remain to load)
get href of all link nodes //div[#class="company-info"]//a
in for loop navigate to each link with driver.get(<URL>)
scrape data

With in the 2022 EXHIBITOR LIST webpage to click() on each link to scrape you can collect the href attributes and open them in the adjascent tab as follows:
Code Block (sample for first 5 entries):
driver.get('https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html')
data = []
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
hrefs = [my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//h3[#class='text-center-mobile wrap-word']//ancestor::a[1]")))[:5]]
windows_before = driver.current_window_handle
for href in hrefs:
driver.execute_script("window.open('" + href +"');")
WebDriverWait(driver, 20).until(EC.number_of_windows_to_be(2))
windows_after = driver.window_handles
new_window = [x for x in windows_after if x != windows_before][0]
driver.switch_to.window(new_window)
data.append(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h1[#class='wrap-word']"))).text)
driver.close()
driver.switch_to.window(windows_before)
print(data)
Console Output:
['10X Innovations - Swift ULV', '21st Century Inc', '3V Snap Ring LLC.', 'A-ipower Corp', 'A.A.C. Forearm Forklift Inc']

Python Selenium can't click on next page button

I want to click the next page until no more page, but it does not click.
returns the error:raise exception_class(message, screen, stacktrace)
StaleElementReferenceException: stale element reference: element is not attached to the page document
my codes:
Thanks in advance!
driver.get('http://www.chinamoney.com.cn/chinese/zjfxzx/?tbnm=%E6%9C%80%E6%96%B0&tc=null&isNewTab=1')
driver.implicitly_wait(10)
driver.refresh()
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 5)
datefield_st = wait.until(EC.element_to_be_clickable((By.ID, "pdbp-date-1")))
datefield_st.click()
select_st = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-year"))))
select_st.select_by_visible_text("2021")
select2 = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-month"))))
select2.select_by_value("1")
day=1
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[#data-handler='selectDay']/a[text()='{}']".format(str(day))))).click()
datefield_ed = wait.until(EC.element_to_be_clickable((By.ID, "pdbp-date-2")))
datefield_ed.click()
select_ed = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-year"))))
select_ed.select_by_visible_text("2021")
select2 = Select(wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ui-datepicker-month"))))
select2.select_by_value("1")
day=1
wait.until(EC.element_to_be_clickable((By.XPATH, "//td[#data-handler='selectDay']/a[text()='{}']".format(str(day))))).click()
driver.find_element_by_link_text("查询").click()
while True:
driver.implicitly_wait(10)
links=[link.get_attribute('href') for link in driver.find_elements_by_xpath("//a[contains(#title,'同业存单') and not(contains(#title,'申购说明')) and not(contains(#title,'公告'))]")]
titles = [title.text for title in driver.find_elements_by_xpath("//a[contains(#title,'中期票据') and not(contains(#title,'申购说明')) and not(contains(#title,'公告'))]")]
dates = [date.text for date in driver.find_elements_by_xpath('//*[#class="san-grid-r text-date"]')]
driver.implicitly_wait(10)
for link, title,date in zip(links, titles,dates):
dataframe = pd.DataFrame({'col1':date,'col2':title,'col3':link},index=[0])
dataframe.to_csv('Chinamoney.csv',mode='a+',header=False,index=False,encoding='utf-8-sig')
print(link,title,date)
try:
driver.find_element_by_xpath('//*[contains(#class, "page-next")]').click()
except:
print('No more pages')

You passed two class names into selector while it's not allowed for search by class name. Either try
(By.CLASS_NAME, 'page-next')
or
(By.CSS_SELECTOR, '.page-btn.page-next')
Also your element and icon select the same element. So you don't need to define icon. Simply use element.click()

You are using:
driver.find_element_by_xpath('//*[contains(#class, "page-next")]').click()
Try:
element = driver.find_element_by_xpath('//*[contains(#class, "page-next")]')
driver.execute_script("arguments[0].click();", element)
If this doesnt work, you can try to obtain the url/link value and store it, and later you can go to the url or do what you want without click in it.

Next Page Iteration in Selenium/BeautfulSoup for Scraping E-Commerce Website

I'm scraping an E-Commerce website, Lazada using Selenium and bs4, I manage to scrape on the 1st page but I unable to iterate to the next page. What I'm tyring to achieve is to scrape the whole pages based on the categories I've selected.
Here what I've tried :
# Run the argument with incognito
option = webdriver.ChromeOptions()
option.add_argument(' — incognito')
driver = webdriver.Chrome(executable_path='chromedriver', chrome_options=option)
driver.get('https://www.lazada.com.my/')
driver.maximize_window()
# Select category item #
element = driver.find_elements_by_class_name('card-categories-li-content')[0]
webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
t = 10
try:
WebDriverWait(driver,t).until(EC.visibility_of_element_located((By.ID,"a2o4k.searchlistcategory.0.i0.460b6883jV3Y0q")))
except TimeoutException:
print('Page Refresh!')
driver.refresh()
element = driver.find_elements_by_class_name('card-categories-li-content')[0]
webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
print('Page Load!')
#Soup and select element
def getData(np):
soup = bs(driver.page_source, "lxml")
product_containers = soup.findAll("div", class_='c2prKC')
for p in product_containers:
title = (p.find(class_='c16H9d').text)#title
selling_price = (p.find(class_='c13VH6').text)#selling price
try:
original_price=(p.find("del", class_='c13VH6').text)#original price
except:
original_price = "-1"
if p.find("i", class_='ic-dynamic-badge ic-dynamic-badge-freeShipping ic-dynamic-group-2'):
freeShipping = 1
else:
freeShipping = 0
try:
discount = (p.find("span", class_='c1hkC1').text)
except:
discount ="-1"
if p.find(("div", {'class':['c16H9d']})):
url = "https:"+(p.find("a").get("href"))
else:
url = "-1"
nextpage_elements = driver.find_elements_by_class_name('ant-pagination-next')[0]
np=webdriver.ActionChains(driver).move_to_element(nextpage_elements).click(nextpage_elements).perform()
print("- -"*30)
toSave = [title,selling_price,original_price,freeShipping,discount,url]
print(toSave)
writerows(toSave,filename)
getData(np)

The problem might be that the driver is trying to click the button before the element is even loaded correctly.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(PATH, chrome_options=option)
# use this code after driver initialization
# this is make the driver wait 5 seconds for the page to load.
driver.implicitly_wait(5)
url = "https://www.lazada.com.ph/catalog/?q=phone&_keyori=ss&from=input&spm=a2o4l.home.search.go.239e359dTYxZXo"
driver.get(url)
next_page_path = "//ul[#class='ant-pagination ']//li[#class=' ant-pagination-next']"
# the following code will wait 5 seconds for
# element to become clickable
# and then try clicking the element.
try:
next_page = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, next_page_path)))
next_page.click()
except Exception as e:
print(e)
EDIT 1
Changed the code to make the driver wait for the element to become clickable. You can add this code inside a while loop for iterating multiple times and break the loop if the button is not found and is not clickable.

Scraping next pages issue with selenium

I am trying to scrape basic information on google. The code that I am using is the following. Unfortunately it does not move to the next page and I am not figuring the reason why. I am using selenium and google chrome as browser (no firefox). Could you please tell me what is wrong in my code?
driver.get('https://www.google.com/advanced_search?q=google&tbs=cdr:1,cd_min:3/4/2020,cd_max:3/4/2020&hl=en')
search = driver.find_element_by_name('q')
search.send_keys('tea')
search.submit()
soup = BeautifulSoup(driver.page_source,'lxml')
result_div = soup.find_all('div', attrs={'class': 'g'})
titles = []
while True:
next_page_btn =driver.find_elements_by_xpath("//a[#id='pnnext']")
for r in result_div:
if len(next_page_btn) <1:
print("no more pages left")
break
else:
try:
title = None
title = r.find('h3')
if isinstance(title,Tag):
title = title.get_text()
print(title)
if title != '' :
titles.append(title)
except:
continue
element =WebDriverWait(driver,5).until(expected_conditions.element_to_be_clickable((By.ID,'pnnext')))
driver.execute_script("return arguments[0].scrollIntoView();", element)
element.click()

I set q in the query string to be an empty string. Used as_q not q for the search box name. And reordered your code a bit. I put a page limit in to stop it going on forever.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
driver = webdriver.Chrome()
driver.get('https://www.google.com/advanced_search?q=&tbs=cdr:1,cd_min:3/4/2020,cd_max:3/4/2020&hl=en')
search = driver.find_element_by_name('as_q')
search.send_keys('tea')
search.submit()
titles = []
page_limit = 5
page = 0
while True:
soup = BeautifulSoup(driver.page_source, 'lxml')
result_div = soup.find_all('div', attrs={'class': 'g'})
for r in result_div:
for title in r.find_all('h3'):
title = title.get_text()
print(title)
titles.append(title)
next_page_btn = driver.find_elements_by_id('pnnext')
if len(next_page_btn) == 0 or page > page_limit:
break
element = WebDriverWait(driver, 5).until(expected_conditions.element_to_be_clickable((By.ID, 'pnnext')))
driver.execute_script("return arguments[0].scrollIntoView();", element)
element.click()
page = page + 1
driver.quit()

Scrape Hidden Frame JavaScript

I'm trying to scrape data inside a hidden frame; the frame is shown as follows
<!-- Content of the details tabs here -->
<div id="tabDetail_0" class="tab_content tab_detail" style="display:
block;"><iframe id="iframe_0" src="https://www.tmdn.org/tmview/get-
detail?st13=GB500000003342197" width="100%" height="600px;"
frameborder="0"></iframe></div></div></div> <!-- resultTabs -->
As you can see there is a link in the HTML, I tried to open a new webdriver instance and navigate the link and get the data, it worked however the website stopped because navigating directly these links are not allowed or limited.
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
import traceback
import time
option = webdriver.ChromeOptions()
chrome_prefs = {}
option.experimental_options["prefs"] = chrome_prefs
chrome_prefs["profile.default_content_settings"] = {"images": 2}
chrome_prefs["profile.managed_default_content_settings"] = {"images": 2}
url ="https://www.tmdn.org/tmview/welcome#"
xlsName = 'D:\\test.xlsx'
records = []
start_time = time.time()
driver = webdriver.Chrome(executable_path="D:\Python\chromedriver.exe",chrome_options=option)
driver.get(url)
time.sleep(10)
driver.find_element_by_xpath('//*[#id="buttonBox"]/a').click()
time.sleep(10)
x=-1
try:
#click advanced search
driver.find_element_by_name("lnkAdvancedSearch").click()
#
time.sleep(5)
#to select Designated territories
driver.find_element_by_id('DesignatedTerritories').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.optEUGroupContainer label')
for elem in TerritoryLabelElements:
if elem.text == 'United Kingdom':
elem.click()
time.sleep(5)
driver.find_element_by_id('DesignatedTerritories').click()
#
time.sleep(5)
#to select from Trade mark offices
driver.find_element_by_id('SelectedOffices').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
if elem.text == 'GB United Kingdom ( UKIPO )':
elem.click()
time.sleep(5)
driver.find_element_by_id('SelectedOffices').click()
#Trade mark status
driver.find_element_by_id('TradeMarkStatus').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
if elem.text == 'Filed':
elem.click()
if elem.text == 'Registered':
elem.click()
time.sleep(5)
driver.find_element_by_id('TradeMarkStatus').click()
# dates
startdate = driver.find_element_by_id("ApplicationDateFrom")
startdate.clear()
startdate.send_keys ('01-10-2018')
enddate = driver.find_element_by_id("ApplicationDateTo")
enddate.clear()
enddate.send_keys ('31-10-2018' )
# click search
time.sleep(5)
driver.find_element_by_id("SearchCopy").click()
time.sleep(5)
html= driver.page_source
soup = BeautifulSoup(html,'html.parser')
tbl = soup.find("table", id="grid")
driver.find_element_by_link_text('100').click()
time.sleep(5)
# #LOOP
for i in range(1, 73):
html= driver.page_source
soup = BeautifulSoup(html,'html.parser')
tbl = soup.find("table", id="grid")
#extract data from table using soup
tr_rows = tbl.find_all('tr')
for tr_row in tr_rows[1:]:
td_cells=tr_row.find_all('td')
Trade_mark_name=td_cells[4].text
Trade_mark_office=td_cells[5].text
Designated_territory=td_cells[6].text
Application_number=td_cells[7].text
Registration_number=td_cells[8].text
Trade_mark_status=td_cells[9].text
Trade_mark_type=td_cells[13].text
Applicant_name=td_cells[11].text
Nice_class=td_cells[10].text
Application_date=td_cells[12].text
Registration_date=td_cells[14].text
x=x+1
#Click indiviual links
el=driver.find_elements_by_class_name('cell_tmName_column')[x]
action = webdriver.common.action_chains.ActionChains(driver)
action.move_to_element_with_offset(el, 0, 0)
action.click()
action.perform()
time.sleep(3)
#switch to iframe of tab details
iframe = driver.find_elements_by_tag_name('iframe')[0]
driver.switch_to.frame(iframe)
#get data from iframe
html2= driver.page_source
soup2 = BeautifulSoup(html2,'html.parser')
tblOwner = soup2.find("div", id="anchorOwner").find_next('table')
tblRep = soup2.find("div", id="anchorRepresentative").find_next('table')
# then switch back:
driver.switch_to.default_content()
try:
Owner_Address= tblOwner.find("td", text="Address").find_next('td')
except:
Owner_Address='No Entry'
try:
Representative_Name=tblRep.find("td", text="Name").find_next('td').text.strip()
except:
Representative_Name='No Entry'
records.append((Designated_territory,Applicant_name,Trade_mark_name,Application_date,Application_number,Trade_mark_type, Nice_class,Owner_Address,Trade_mark_office, Registration_number,Trade_mark_status,Registration_date,Representative_Name))
time.sleep(1)
driver.find_elements_by_css_selector( 'a.close_tab')[0].click()
#navigate next page_source
driver.find_element_by_id('next_t_grid_toppager').click()
time.sleep(2)
x=-1
#LOOP
df = pd.DataFrame(records, columns=['Designated_territory','Applicant_name','Trade_mark_name','Application_date','Application_number','Trade_mark_type', 'Nice_class','Owner_Address','Trade_mark_office', 'Registration_number','Trade_mark_status','Registration_date','Representative_Name'])
df.to_excel(xlsName,sheet_name='sheet1', index=False, encoding='utf-8')
except Exception:
df = pd.DataFrame(records, columns=['Designated_territory','Applicant_name','Trade_mark_name','Application_date','Application_number','Trade_mark_type', 'Nice_class','Owner_Address','Trade_mark_office', 'Registration_number','Trade_mark_status','Registration_date','Representative_Name'])
df.to_excel(xlsName,sheet_name='sheet1', index=False, encoding='utf-8')
traceback.print_exc()
time.sleep(5)
driver.quit()

You need to do is to switch_to.frame:
iframe = driver.find_element_by_xpath('//iframe[#id="iframe_0"]')
driver.switch_to.frame(iframe)
# than switch back:
driver.switch_to.default_content()
EDIT:
You have asked if the id changes what to do so here is an idea you can use contains in your xpath like this:
# this will find any iframe with and id of iframe_
# you should check there is only one, you can do so with: `iframes = driver.find_elements_by_xpath('//iframe[contains(#id,"iframe_")]')`
# than `print(len(iframes))` to see the amount of iframes
iframe = driver.find_element_by_xpath('//iframe[contains(#id,"iframe_")]')
driver.switch_to.frame(iframe)
# than switch back:
driver.switch_to.default_content()
In your code use:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
url ="https://www.tmdn.org/tmview/welcome#"
driver = webdriver.Chrome(executable_path=r"D:\New Proj\chromedriver.exe")
driver.get(url)
time.sleep(3)
driver.find_element_by_xpath('//*[#id="buttonBox"]/a').click()
time.sleep(3)
#Click advanced search
driver.find_element_by_name("lnkAdvancedSearch").click()
#
time.sleep(5)
#to select Designated territories
driver.find_element_by_id('DesignatedTerritories').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.optEUGroupContainer label')
for elem in TerritoryLabelElements:
if elem.text == 'United Kingdom':
elem.click()
time.sleep(5)
driver.find_element_by_id('DesignatedTerritories').click()
#
time.sleep(5)
#to select from Trade mark offices
driver.find_element_by_id('SelectedOffices').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
if elem.text == 'GB United Kingdom ( UKIPO )':
elem.click()
time.sleep(5)
driver.find_element_by_id('SelectedOffices').click()
#Trade mark status
driver.find_element_by_id('TradeMarkStatus').click()
time.sleep(5)
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
if elem.text == 'Filed':
elem.click()
if elem.text == 'Registered':
elem.click()
time.sleep(5)
driver.find_element_by_id('TradeMarkStatus').click()
# dates
startdate = driver.find_element_by_id("ApplicationDateFrom")
startdate.clear()
startdate.send_keys ('10-01-2018')
enddate = driver.find_element_by_id("ApplicationDateTo")
enddate.clear()
enddate.send_keys ('10-01-2018' )
# click search
time.sleep(5)
driver.find_element_by_id("SearchCopy").click()
time.sleep(30)
#Click first link
el=driver.find_elements_by_class_name('cell_tmName_column')[0]
action = ActionChains(driver)
action.move_to_element_with_offset(el, 0, 0)
action.click()
action.perform()
time.sleep(10)
iframe = driver.find_element_by_xpath('//iframe[#id="iframe_0"]')
driver.switch_to.frame(iframe)
# do something here I am printing the HTML
print(iframe.get_attribute('innerHTML'))
# than switch back:
driver.switch_to.default_content()
Hope this helps you!

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extract iFrame content using Selenium and BeautifulSoup - python

Related

How to click on every title within the page to scrape the title

Python Selenium can't click on next page button

Next Page Iteration in Selenium/BeautfulSoup for Scraping E-Commerce Website

Scraping next pages issue with selenium

Scrape Hidden Frame JavaScript

Categories

Resources