I'm writing a Selenium scraper that waits until a page is loaded before trying to locate an element. When I run the script, it looks to me like the element has loaded in the browser window, but Selenium thinks otherwise.
Here's scraper.py:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("--ignore-certificate-errors")
options.add_argument("--test-type")
options.binary_location = "/usr/bin/chromium"
driver = webdriver.Chrome(chrome_options=options)
startingURL = "https://pbcvssp.co.palm-beach.fl.us/webapp/vssp/AltSelfService;jsessionid=0000lH_keoPs-fzs5sSkYGLah1X:-1"
driver.get(startingURL)
driver.find_element_by_name("guest_login").click()
driver.switch_to_window(driver.window_handles[1]) # Go to window with bids
try:
secondsToWait = 20
wait = WebDriverWait(driver,secondsToWait)
openBidsLinkName = "AMSBrowseOpenSolicit"
openBidsLink = wait.until(
EC.element_to_be_clickable(By.NAME,openBidsLinkName)
)
finally:
print driver.page_source
driver.find_element_by_name(openBidsLinkName)
But when I run python scraper.py, I get this error.
Traceback (most recent call last):
File "scraper.py", line 30, in <module>
driver.find_element_by_name(openBidsLinkName)
File "/home/me/ENV/pbc_vss/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 495, in find_element_by_name
return self.find_element(by=By.NAME, value=name)
File "/home/me/ENV/pbc_vss/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 966, in find_element
'value': value})['value']
File "/home/me/ENV/pbc_vss/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 320, in execute
self.error_handler.check_response(response)
File "/home/me/ENV/pbc_vss/local/lib/python2.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"name","selector":"AMSBrowseOpenSolicit"}
Also driver.page_source looks like this:
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><!-- BEGIN GENERATED HTML --><html xmlns="http://www.w3.org/1999/xhtml" lang="en" oncontextmenu="MNU_ShowPopup('Default', event);return false"><head>
<title>Self Service Application
</title>
<base href="https://pbcvssp.co.palm-beach.fl.us:443/webapp/vssp/advantage/AltSelfService/" />
<script language="JavaScript" type="text/javascript" src="../AMSJS/ALTSS/ALTSSUtil.js">
<!---->
</script>
<script language="JavaScript" type="text/javascript" src="../AMSJS/AMSMenu.js">
<!---->
</script>
<script language="JavaScript" type="text/javascript" src="../AMSJS/AMSDHTMLLib.js">
<!---->
</script>
<script language="JavaScript" type="text/javascript" src="../AMSJS/AMSUtils.js">
<!---->
</script>
<script type="text/javascript" language="JavaScript">
<!--
UTILS_InitPage();
-->
</script>
</head>
<frameset border="0" rows="33, *, 25">
<frame name="AdditionalLinks" src="/LoginExternal/Pages/LoginAdditionalLinks.htm" marginwidth="0" title="Additional Links" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#AdditionalLinks" scrolling="no" />
<frameset cols="150, *" border="0">
<frameset border="0" rows="150, *">
<frame name="pPrimaryNavPanel" src="pPrimaryNavPanel.htm" marginwidth="0" title="Navigation" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#Nav" scrolling="no" />
<frame name="Secondary" src="../AMSImages/ALTSS/portal.htm" marginwidth="0" title="Secondary Navigation" target="Display" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#SecondaryNavigator" scrolling="no" />
</frameset>
<frameset id="AltSSLinkFrame" border="0" rows="100, *">
<frame name="Startup" src="https://pbcvssp.co.palm-beach.fl.us/webapp/vssp/AltSelfService;jsessionid=0000CFpQkQ1YDSjZgm-4yMM0lHd:-1?session_id=CFpQkQ1YDSjZgm-4yMM0lHd&page_id=pid_2712&vsaction=pagetransition&vsnavigation=StartPageNav&frame_name=Startup" marginwidth="0" title="Welcome Area" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#PrimaryNav" scrolling="no" vsaction="true" />
<frame name="Display" src="AltSSHomePage.htm" marginwidth="0" title="Display Frame" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#Display" scrolling="auto" />
</frameset>
</frameset>
<frame name="CopyrightInfo" src="/LoginExternal/Pages/LoginCopyrightInfo.html" marginwidth="0" title="Copyright Info" frameborder="0" marginheight="0" longdesc="../AMSImages/ALTSS/SelfServiceFrameDesc.htm#CopyrightInfo" scrolling="no" />
</frameset>
<noframes>
<body>
<p>This page uses frames, but your browser does not support them. FramesetPage requires a Frames-capable browser</p>
</body>
</noframes>
How can I make Selenium locate the element with the name attribute "AMSBrowseOpenSolicit"?
Your table is in frame, so you have to switch to it before you can interact with this table. This code snippet will help you to do it:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# find frame and switch to it
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//frame[#title = 'Display Frame']")))
# do your stuff
secondsToWait = 20
wait = WebDriverWait(driver,secondsToWait)
openBidsLinkName = "AMSBrowseOpenSolicit"
openBidsLink = wait.until(
EC.element_to_be_clickable(By.NAME,openBidsLinkName)
)
driver.find_element_by_name(openBidsLinkName)
driver.switch_to.default_content() # switch back to default content
Related
Please Note this question remains opened, as the suggested "answer" still gives same output since it doesn't explain why JS isn't running on that page or why selenium can't extract it
I'm trying to read page source of: http://147.235.97.36/ (Hp printer) which is rendered by JS.
So I wrote:
driver.get(url)
wait_for_page(driver)
source = driver.page_source
print(source)
but in the printed source I see:
<p>JavaScript is required to access this website.</p>
<p>Please enable JavaScript or use a browser that supports JavaScript.</p>
and some of the content isn't there, so I changed my code to:
driver.get(url)
wait_for_page(driver)
source = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
print(source)
Still same output, can you help me understand what's the problem here?
Here is my init_driver function:
def init_driver():
# --Initialize Driver--#
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in Background
chrome_options.add_argument('--disable-gpu') if os.name == 'nt' else None # Windows workaround
prefs = {"profile.default_content_settings.images": 2,
"profile.managed_default_content_settings.images": 2} # Disable Loading of Images
chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument('--ignore-ssl-errors=yes')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument("--window-size=1920,1080") # Standard Window Size
chrome_options.add_argument("--pageLoadStrategy=normal")
driver = None
try:
driver = webdriver.Chrome(options=chrome_options, service=Service('./chromedriver'))
driver.set_page_load_timeout(REQUEST_TIMEOUT)
except Exception as e:
log_warning(str(e))
return driver
You can add a few arguments to avoid geting detected and print the Page Source as follows:
Code Block:
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get("http://147.235.97.36/")
print(driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML"))
Console Output:
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/framework/Unified.css" rel="stylesheet" type="text/css">
<script type="text/javascript">
frameWorkObj = {};
frameWorkObj.pkg = "ews";
</script>
<script src="/framework/Unified.js" type="text/javascript"></script>
</head>
<body class="theme-gray">
<iframe src="/framework/cookie/client/cookie.html" style="display: none;"></iframe>
<div id="pgm-overall-container">
<div id="pgm-left-pane-bkground"></div>
<div id="pgm-banner"></div>
<div id="pgm-search-div" class="gui-hidden"></div>
<div id="pgm-top-pane"></div>
<div id="pgm-container-div">
<div id="pgm-left-pane"></div>
<div id="pgm-container" class="clear-fix">
<div id="pgm-title-div" class="gui-hidden"></div>
<div id="contentPane" class="contentPane"></div>
</div>
</div>
<div id="pgm-footer"></div>
</div> <!-- #pgm-overall-container -->
<div id="pgm-theatre-staging-div"></div>
<script type="text/javascript">
// frame buster
if(top != self)
top.location.replace(self.location.href);
</script>
<noscript>
<div id="pgm-no-js-text">
<p>JavaScript is required to access this website.</p>
<p>Please enable JavaScript or use a browser that supports JavaScript.</p>
</div>
</noscript>
<div id="ui-datepicker-div" style="display: none;" tabindex="0"></div></body>
I am working on an Intranet with nested frames, and am unable to access a child frame.
The HTML source:
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title>VIS</title>
<link rel="shortcut icon" href="https://bbbbb/ma1/imagenes/iconos/favicon.ico">
</head>
<frameset rows="51,*" frameborder="no" scrolling="no" border="0">
<frame id="cabecera" name="cabecera" src="./blablabla.html" scrolling="no" border="3">
<frameset id="frame2" name="frame2" cols="180,*,0" frameborder="no" border="1">
<frame id="menu" name="menu" src="./blablabla_files/Menu.html" marginwidth="5" scrolling="auto" frameborder="3">
Buscar
<frame id="contenido" name="contenido" src="./blablabla_files/saved_resource.html" marginwidth="5" marginheight="5">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title>BUSCAr</title>
</head>
<frameset name="principal" rows="220,*" frameborder="NO">
<frame name="Formulario" src="./BusquedaSimple.html" scrolling="AUTO" noresize="noresize">
<input id="year" name="year" size="4" maxlength="4" value="" onchange="javascript:Orden();" onfocus="this.value='2018';this.select();" type="text">
<frame name="Busqueda" src="./saved_resource(2).html" scrolling="AUTO">
</frameset>
<noframes>
<body>
<p>soporte a tramas.</p>
</body>
</noframes>
</html>
<frame name="frameblank" marginwidth="0" scrolling="no" src="./blablabla_files/saved_resource(1).html">
</frameset>
<noframes>
<P>Para ver esta página.</P>
</noframes>
</frameset>
</html>
I locate the button "Buscar" inside of frame "menu" with:
driver.switch_to_default_content()
driver.switch_to_frame(driver.find_element_by_css_selector("html frameset frameset#frame2 frame#menu"))
btn_buscar = driver.find_element_by_css_selector("#div_menu > table:nth-child(10) > tbody > tr > td:nth-child(2) > span > a")
btn_buscar.click()
I've tried this code to locate the input id="year" inside frame="Formulario":
driver.switch_to_default_content()
try: driver.switch_to_frame(driver.switch_to_frame(driver.find_element_by_css_selector("html frameset frameset#frame2 frame#contenido frameset#principal frame#Formulario")))
print("Ok cabecera -> contenido")
except:
print("cabecera not found")
or
driver.switch_to_frame(driver.switch_to_xpath("//*[#id='year"]"))
but they don't work.
Can you help me?
Thanks!
To be able to handle required iframe you need to switch subsequently to all
ancestor frames:
driver.switch_to.frame("cabecera")
driver.switch_to.frame("menu")
btn_buscar = driver.find_element_by_link_text("Buscar")
btn_buscar.click()
Also note that Webdriver instance has no such method as switch_to_xpath() and switch_to_frame(), switch_to_default_content() methods are deprecated so you'd better use switch_to.frame(), switch_to.default_content()
Assuming your program have the focus on Top Level Browsing Context, to locate and the button with text as Buscar you need to switch() through all the parent frames along with WebDriverWait in association with proper expected_conditions and you can use the following code block :
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it(By.ID,"cabecera"))
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it(By.ID,"menu"))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.LINK_TEXT, "Buscar"))).click()
I am using Python to scrape data from a website. While I have been able to use Selenium to log in, I cannot identify the search field once logged in. It appears the web page loads with frames (not iframes), but I cannot access the frame with the search field.
I have tried changing the frame to the relevant frame (which seems to work - no error is thrown up) but then if I try searching for the search element by CSS / Xpath / Name / id I get a NoSuchElementException. I am using the Chrome webdriver.
Any suggestions? The page source is as follows:
<html>
<head>
<title> XYZ </title>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Script-Type" content="text/javascript" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta http-equiv="content-language" content="en" />
<script type="text/javascript">
if (navigator && navigator.appVersion && navigator.appVersion.match("Safari") && !navigator.appVersion.match("Chrome")) {
// hack to force a window redraw
window.onload = function() {
document.getElementsByTagName('html')[0].style.backgroundColor = '#000000';
}
}
</script>
</head>
<frameset id="wc-frameset" rows="82,*" frameborder="no" border="0" framespacing="0">
<frame frameborder="0" src="/frontend/header/" name="top" marginwidth="0" marginheight="0" scrolling="no" noresize="noresize" />
<frameset cols="*,156,850,*" frameborder="NO" border="0" framespacing="0">
<frame frameborder="0" src="/frontend/fillbar/" name="fillbar" marginwidth="0" marginheight="0" scrolling="no" noresize="noresize" />
<frame frameborder="0" src="/frontend/navigation/" name="navigation" marginwidth="0" marginheight="0" scrolling="no" noresize="noresize" />
<frame frameborder="0" src="/frontend/frames/" name="content_area" marginwidth="0" marginheight="0" scrolling="no" noresize>
<frame frameborder="0" src="/frontend/fillbar/" name="fillbar" marginwidth="0" marginheight="0" scrolling="no" noresize="noresize" />
</frameset>
</frameset>
</html>
The code that I have so far is:
username = driver.find_element_by_id("username")
password = driver.find_element_by_id("password")
username.send_keys("****")
password.send_keys("****")
driver.find_element_by_class_name("bg-left").click()
#this bit works
driver.switch_to_frame("content_area")
#this seems to work too, got the frame name from the page source
search = driver.find_element_by_id("field-name")
search.send_keys("TEST")
#this fails, no element found
The target frame source code is:
<div id="field-name" class="field field-StringField">
<label for="name">Name</label> <div class="input-con"><input id="name" name="name" type="text" value=""></div>
</div>
It is possible that there are duplicate elements in the page.
Try the following in chrome:
Open url in chrome
Open developer tools F12
Press ESC to open the chrome console
Select your frame
Search for similar elements using xpath in console
$x("//input[#id='name']")
This should list the number of elements.
Maybe you need to wait for the page to load up completely before continuing searching the element. You can try something like:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
driver.switch_to_frame("content_area")
try:
# this line adds wait for the element to be visible
WebDriverWait(driver, 10).until(EC.visibility_of_element_located(By.ID, 'name'))
except TimeoutException:
# display page timed out error
search = driver.find_element_by_id("name")
search.send_keys("TEST")
I am attempting to use selenium to navigate a website that is using frames.
Here is my working python script for part 1:
from selenium import webdriver
import time
from urllib import request
driver = webdriver.Firefox()
driver.get('http://www.lgs-hosted.com/rmtelldck.html')
driver.switch_to.frame('menu')
driver.execute_script('xSubmit()')
time.sleep(.5)
link = driver.find_element_by_id('ml1T2')
link.click()
Here is the page element:
<html webdriver="true">
<head></head>
<frameset id="menuframe" name="menuframe" border="0" frameborder="0" cols="170,*">
<frameset border="0" frameborder="0" rows="0,*">
<frame scrolling="AUTO" noresize="" frameborder="NO" src="heart.html" name="heart"></frame>
<frame scrolling="AUTO" noresize="" frameborder="NO" src="rmtelldcklogin.html" name="menu"></frame>
</frameset>
<frame scrolling="AUTO" noresize="" frameborder="NO" src="rmtelldcklogo.html" name="update"></frame>
</frameset>
</html>
My issue is switching the frames...its in 'menu' I need to get into 'update':
driver.switch_to.frame('update')
^ does not work....error tells me its not there, even though we can clearly see it is...any ideas?
How do I switch from menu to update?
You need to switch back to the default content before switching to a different frame:
driver.switch_to.default_content()
driver.switch_to.frame("update")
# to prove it is working
title = driver.find_element_by_id("L_DOCTITLE").text
print(title)
Prints:
Civil Case Inquiry
I want to get answers from a online chatbot.
http://talkingbox.dyndns.org:49495/braintalk? (the ? belongs to the link)
To send a question you just have to send a simple request:
http://talkingbox.dyndns.org:49495/in?id=3B9054BC032E53EF691A9A1803040F1C&msg=[Here the question]
Source looks like this:
<frameset cols="*,185" frameborder="no" border="0" framespacing="0">
<frameset rows="100,*,82" frameborder="no" border="0" framespacing="0">
<frame src="http://thebot.de/bt_banner.html" marginwidth="0" name="frtop" scrolling="no" marginheight="0" frameborder="no">
<frame src="out?id=3B9054BC032E53EF691A9A1803040F1C" name="frout" marginwidth="0" marginheight="0">
<frameset rows="100%,*" border="0" framespacing="0" frameborder="no">
<frame src="bt_in?id=3B9054BC032E53EF691A9A1803040F1C" name="frin" scrolling="no" marginwidth="0" marginheight="0" noresize>
<frame src="" name="frempty" marginwidth="0" marginheight="0" scrolling="auto" frameborder="no" >
</frameset>
</frameset>
<frameset frameborder="no" border="0" framespacing="0" rows="82,*">
<frame src="stats?" name="fr1" scrolling="no" marginwidth="0" marginheight="0" frameborder="no">
<frame src="http://thebot.de/bt_rechts.html" name="fr2" scrolling="auto" marginwidth="0" marginheight="0" frameborder="no" >
</frameset>
</frameset>
I was using "mechanize" and beautifulsoup for web scraping but I suppose mechanize does not support dynamic webpages.
How can I get the answers in this case?
I am also looking for a solution which work good on Windows and Linux.
be it BeautifulSoup, mechanize, Requests or even Scrapy, loading that dynamic pages will have to be done by another step written by you.
for example, using scrapy this may look something like:
class TheBotSpider(BaseSpider):
name = 'thebot'
allowed_domains = ['thebot.de', 'talkingbox.dyndns.org']
def __init__(self, *a, **kw):
super(TheBotSpider, self).__init__(*a, **kw)
self.domain = 'http://talkingbox.dyndns.org:49495/'
self.start_urls = [self.domain +
'in?id=3B9054BC032E53EF691A9A1803040F1C&msg=' +
self.question]
def parse(self, response):
sel = Selector(response)
url = sel.xpath('//frame[#name="frout"]/#src').extract()[0]
yield Request(url=url, callback=dynamic_page)
def dynamic_page(self, response):
.... xpath to scrape answer
run it with a question as argument:
scrapy crawl thebot -a question=[Here the question]
for more details on how to use scrapy see scrapy tutorial
I would use Requests for task like this.
import requests
r = requests.get("http://talkingbox.dyndns.org:49495/in?id=3B9054BC032E53EF691A9A1803040F1C&msg=" + your_question)
For webpages that do not contain dynamic content, r.text is what you want.
Since you didn't provide more information about dynamic webpage, there is not much more to say.