I am having trouble sending keys using selenium.
What I want to do is, I want to send_keys('samsung')
BUT, since 'style = display' is changing from 'none' to 'block' when I click it, I cannot send keys.
<div class="modal-search modal-search-analysis" id="analysisSearchModal" style="display: none; transform: translate(0px, 0px); opacity: 1;">
after click
<div class="modal-search modal-search-analysis" id="analysisSearchModal" style="display: block; transform: translate(0px, 0px); opacity: 1;">
Here are the element
this webpage's modal is not in iframe, it's in div.
<div class="modal-search modal-search-analysis" id="analysisSearchModal" style="display: block; transform: translate(0px, 0px); opacity: 1;">
<div class="modal-search-analysis-keyword" data-tooltip-container="">
<input type="text" name="searchKeywordf714s6nggsw" id="searchKeywordf714s6nggsw" data-index="f714s6nggsw" class="input-style type1 x-small modal-search-analysis-keyword-input search-input input-text" placeholder="Type search keyword" maxlength="15" tabindex="1">
Here are codes that I've been trying
browser.find_element(By.CLASS_NAME, "vbtn btn-submit").click() #click search button
time.sleep(3)
input = browser.find_element(by=By.XPATH, value = '//*[#id="searchKeywordn7ssjvuykn"]')
input.send_keys('samsung')
I've also tried this code
browser.find_element(By.CLASS_NAME, "vbtn btn-submit").click()
time.sleep(3)
container = browser.find_element(by=By.XPATH, value = '//*[#id="analysisSearchModal"]')
browser.execute_script("argments[0].style.display = 'block';", container)
input = browser.find_element(by=By.XPATH, value = '//*[#id="searchKeywordn7ssjvuykn"]')
input.send_keys('samsung')
Thank you for those who can help me solve this problem:)
I can see in your html page id = "searchKeywordf714s6nggsw"
but you are using id="searchKeywordn7ssjvuykn"
Related
I'm looking for element xpath <'/html/body/div/div[2]'> to be able to do automatic button hold down using selenium python, however I'm having difficulty since they are inside iframe.
I tried "Expected_conditions as EC" but I'm not knowledgeable enough to make them work. Tell me what to change, or a more sensible approach.
HTML:
<div id="px-captcha" role="main">
<iframe style="display: none; width: 310px; height: 100px; border: 0px; user-select: none;" token="951d7e81fd6fb5e2af2cb2c701dbb6c391ab81d4b983da5f2f2de85667241a43a3a814a87cae2e98c70b730f7eaaac0a04bbf77bbfc63735e436d1d07675cb68"></iframe>
<iframe style="display: block; width: 310px; height: 100px; border: 0; -moz-user-select: none; -khtml-user-select: none; -webkit-user-select: none; -ms-user-select: none; user-select: none;" token="951d7e81fd6fb5e2af2cb2c701dbb6c391ab81d4b983da5f2f2de85667241a43a3a814a87cae2e98c70b730f7eaaac0a04bbf77bbfc63735e436d1d07675cb68">
#document
<html lang="en-US"
<head>...</head>
<body>
<div id="kkBSsePnKDMVkwa" class="eIlUWbNLSMdFkEz">
<div id="#LrJbZYBfdAzlAkl"></div>
<div id="BlXIkuwFPcwvDCY" role="main" aria-label="Please press and hold the button until verified">...</div>
</div>
</body>
</html>
</iframe>
<iframe style="display: none; width: 310px; height: 100px; border: 0px; user-select: none;" token="951d7e81fd6fb5e2af2cb2c701dbb6c391ab81d4b983da5f2f2de85667241a43a3a814a87cae2e98c70b730f7eaaac0a04bbf77bbfc63735e436d1d07675cb68"></iframe>
<p style="color: red; margin-top: 4;">Please try again</p>
</div>
Code (Updated)#Error:
def captcha(url):
driver.get(str(url))
time.sleep(10)
try:
captcha_element = driver.find_element_by_id('px-captcha')
print(len(captcha_element.text), 'Captcha verification request page')
print('Run pass captcha programing')
# 2.1: Verify captcha
# Research iframe containing captcha
# # Example 2: Use pyautogui library
# driver.set_window_position(0, 0)
# driver.set_window_size(1024, 640)
# sleep(randint(5,10))
# pyautogui.moveTo(400, 438)
# pyautogui.click()
# pyautogui.dragTo(596, 438, 5, button='left')
# Example 3:
for i in range(10):
try:
wait = WebDriverWait(driver, 10)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "(//div[#id='px-captcha']/iframe)[{i}]"))) #ERROR HERE. TO TRY REPLACEMENT <wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "(//div[#id='px-captcha']/iframe)[2]")))>
print ("- Found iframe")
element = driver.find_element(By.XPATH, "//div[contains(#aria-label, 'Please press and hold the button until verified')]")
print ("- Found element")
# click and hold 5 seconds to pass the captcha
print("Button verify: ", len(element.text))
action = ActionChains(driver)
click = ActionChains(driver)
frame_x = element.location['x']
frame_y = element.location['y']
print("x: ", frame_x)
print("y: ", frame_y)
print("size box: ", element.size)
print("x max click: ", frame_x + element.size['width'])
print("y max click: ", frame_y + element.size['height'])
x_move = frame_x + element.size['width']/2
y_move = frame_y + element.size['height']/2
print("Click (x,y) = ", x_move, y_move)
action.move_to_element_with_offset(element, x_move, y_move).click_and_hold().perform()
time.sleep(10)
action.release(element)
action.perform()
time.sleep(0.2)
action.release(element)
print('Verify successful')
break
except:
print(f'- NOT Found xpath Num.: {i}')
sleep(randint(5,10))
except:
# 2.2: Skip captcha
print('Website does NOT require captcha verification')
sleep(randint(2,3))
I wanted find element:
<div id="BlXIkuwFPcwvDCY" role="main" aria-label="Please press and hold the button until verified">...</div>
In order to interact with this web element :
<div id="BlXIkuwFPcwvDCY" role="main" aria-label="Please press and hold the button until verified">...</div>
you need to switch to this iframe first :
<iframe style="display: block; width: 310px; height: 100px; border: 0; -moz-user-select: none; -khtml-user-select: none; -webkit-user-select: none; -ms-user-select: none; user-select: none;" token="951d7e81fd6fb5e2af2cb2c701dbb6c391ab81d4b983da5f2f2de85667241a43a3a814a87cae2e98c70b730f7eaaac0a04bbf77bbfc63735e436d1d07675cb68">
Now since you have mentioned that we are not able to find any any unique identifier for this, I would probably use it's parent div <div id="px-captcha" role="main">
Something like this :-
wait = WebDriverWait(driver, 10)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "(//div[#id='px-captcha']/iframe)[2]")))
and then can interact with the desired web element.
driver.find_element(By.XPATH, "//div[contains(#aria-label, 'Please press and hold the button until verified')]").click()
You are gonna need the below imports as well :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I am extracting text from an html file which contains a lot of div tags. However, at some places there are say 4 nested div tags and when I print text, it prints it 4 times.
<div>
<div id="PGBRK" style="TEXT-INDENT: 0pt; WIDTH: 100%; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt">
<div id="PN" style="PAGE-BREAK-AFTER: always; WIDTH: 100%">
<div style="TEXT-ALIGN: center; WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 10pt">27</font></div>
</div>
</div>
</div>
For example, here if I do:
for item in page_soup.find_all('div'):
if "27" in item.text:
print(item)
It prints the number 27 four times and therefore messes up whole text.
How can I get my code to only print the nested text once?
EDIT 1:
This works well for this part of the code. But like I said, this is only true at some places. For example, when I do:
for item in page_soup.find_all('div', recursive = False):
print(item)
It does not print anything. For reference, this is the document I am trying to scrape.
EDIT 2:
From the given html, I am trying to extract the section "ITEM 1A. RISK FACTORS".
should_print = False
for item in page_soup.find_all('div'):
if "ITEM 1A." in item.text:
should_print = True
elif "ITEM 1B." in item.text:
break
if should_print:
print(item)
So I am printing everything starting from ITEM 1A. until it finds ITEM 1B.
Here at some places there are nested div tags, which gets printed multiple times with this piece of code.
If I do, recursive = False, it does not print anything.
Here is one option
import bs4, re
html = '''<div>
<div id="PGBRK" style="TEXT-INDENT: 0pt; WIDTH: 100%; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt">
<div id="PN" style="PAGE-BREAK-AFTER: always; WIDTH: 100%">
<div style="TEXT-ALIGN: center; WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 10pt">27</font></div>
</div>
</div>
</div>
</div>'''
soup = bs4.BeautifulSoup(html,'html.parser')
elements = soup.find_all(text=re.compile('27'))
print(elements)
output
[u'27']
printing everything starting from ITEM 1A. until it finds ITEM 1B
Trough .string attribute (https://www.crummy.com/software/BeautifulSoup/bs4/doc/#string)
import requests
from bs4 import BeautifulSoup
url = 'https://www.sec.gov/Archives/edgar/data/4904/000000490412000013/ye11aep10k.htm'
html_doc = requests.get(url).content
page_soup = BeautifulSoup(html_doc, 'html.parser')
do_print = False
for el in page_soup.find_all('div'):
if el.string:
if "ITEM 1A" in el.string:
do_print = True
elif "ITEM 1B" in el.string:
break
if do_print:
print(el)
The output (I'll show the representative start and end blocks without middle part, to make a short dump):
<div align="justify" style="TEXT-INDENT: 0pt; DISPLAY: block; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 12pt; FONT-WEIGHT: bold"><font style="DISPLAY: inline; TEXT-DECORATION: underline">ITEM 1A. RISK FACTORS</font></font></div>
<div style="TEXT-INDENT: 0pt; DISPLAY: block"><br/>
</div>
<div align="justify" style="TEXT-INDENT: 0pt; DISPLAY: block; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 12pt; FONT-WEIGHT: bold">GENERAL RISKS OF OUR REGULATED OPERATIONS</font></div>
<div style="TEXT-INDENT: 0pt; DISPLAY: block">
<div align="justify" style="TEXT-INDENT: 0pt; DISPLAY: block; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt"><font style="FONT-STYLE: italic; DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 12pt; FONT-WEIGHT: bold"> </font></div>
<div align="justify" style="TEXT-INDENT: 0pt; DISPLAY: block; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt"><font style="FONT-STYLE: italic; DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 12pt; FONT-WEIGHT: bold">The regulatory environment in Ohio has recently become unpredictable and increasingly uncertain. – Affecting AEP and OPCo</font></div>
<div style="TEXT-INDENT: 0pt; DISPLAY: block"><br/>
.....
<div style="TEXT-ALIGN: center; WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 10pt">37</font></div>
<div style="TEXT-ALIGN: center; WIDTH: 100%">
<hr noshade="" size="2" style="COLOR: black"/>
</div>
<div id="HDR">
<div align="right" id="GLHDR" style="WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 8pt"> </font></div>
</div>
<div align="right" id="GLHDR" style="WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 8pt"> </font></div>
<div style="TEXT-INDENT: 0pt; DISPLAY: block"> </div>
You can provide the option text = "27" to search the divs by text and identify only that exact div. The below code should work fine. If you want to get all the divs then just remove the text = "27" or replace it with what text that you want to find. You can also use recursive = False to get only the top level divs.
Edit 1:
from bs4 import BeautifulSoup
t = '''
<div>
27
</div>
<div>
<div id="PGBRK" style="TEXT-INDENT: 0pt; WIDTH: 100%; MARGIN-LEFT: 0pt; MARGIN-RIGHT: 0pt">
<div id="PN" style="PAGE-BREAK-AFTER: always; WIDTH: 100%">
<div style="TEXT-ALIGN: center; WIDTH: 100%"><font style="DISPLAY: inline; FONT-FAMILY: Times New Roman; FONT-SIZE: 10pt">27</font></div>
</div>
</div>
</div>
</div>
'''
page_soup = BeautifulSoup(t, 'html.parser')
for item in page_soup.find_all('div', text="27"):
print(item.text)
Edit 2:
I have added a specific code that works for your problem specifically. Try the below code. The div range that you are expecting is from 567 - 715 with page numbers removed.
import requests
from bs4 import BeautifulSoup
resp = requests.get(
r'https://www.sec.gov/Archives/edgar/data/4904/000000490412000013/ye11aep10k.htm')
t = resp.text
page_soup = BeautifulSoup(t, 'html.parser')
s = 'body > div:not(#PGBRK)'
for i in page_soup.select(s)[567:715]:
print(i.get_text(strip=True))
Well I think that is a cool question, and I don't see a simple answer if you want to generalize it to find out what text there is at each level without resorting to searching for a specific number like 27. Beautiful Soup doesn't seem to have a function for showing only the text in the top , and recursive=False simply prevents the search from delving below the first level but will still include everything below the first level as contents, so if at the top level of tags then it will capture it and everything below it
So I think you'd actually have to recurse down the tree of divs and compare the text at each level. I figure this out. It prints in reverse order as it bubbles up from the recursion but that could be stored in a list and output in forward order.
from bs4 import BeautifulSoup
soup = BeautifulSoup('<div>1A<div>2A</div>1B<div>2B<div>3A</div><div>3A</div>2C</div>1C</div>', 'html.parser')
def mangle(node):
divs = node.find_all('div')
if len(divs):
result = [divs[0]] + [n for n in divs[0].next_siblings if n.__class__.__name__ == 'Tag']
txt = []
for r in result:
txt.append(r.__repr__())
for c in mangle(r):
txt[-1] = txt[-1].replace(c.__repr__(), '')
print(''.join(BeautifulSoup(t, 'html.parser').text for t in txt))
return result
else:
return []
if __name__ == '__main__':
mangle(soup)
Basically it walks down the branches of divs and builds lists at each fork of the tree, including the tags, then the caller removes anything found below it leaving just the text that is defined at that level. I keep the tags in place so that text patterns appearing at multiple levels don't get removed by mistake.
Output from the html 1A2A1B2B3A3A2C1C was
3A3A
2A2B2C
1A1B1C
which is the 3rd, 2nd and 1st nesting levels respectively. Hope this helps.
I will answer my own question since I finally got it to work.
The solution was easy, I was just thinking it too hard.
I just added the condition that the parent of the item should not be "div". Now the program does not print the text multiple times.
should_print = False
for item in page_soup.find_all('div'):
if item.name == "div" and item.parent.name != "div"
if "ITEM 1A." in item.text:
should_print = True
elif "ITEM 1B." in item.text:
break
if should_print:
print(item)
Thank you everyone for your contributions. Appreciated...
Not sure why but this code today is not working as expected (I've used other times the same code for different html and worked like a charm).
I am trying to find out if the text "Anular" is in the HTML code. But is always comes as No, when the text is visible there.
Code is:
from bs4 import BeautifulSoup
import re
html = browser.page_source
soup = BeautifulSoup(html, "html.parser")
if soup.findAll(text = re.compile('Anular')):
registo2 = "Yes"
else:
registo2 = "No"
Html is (part of it):
</td>
</tr></tbody></table>
<span class="filter" style="display: block; width: 100%; position: absolute; top:17px;" id="Almoço">
<form action="" method="post" name="itens" id="item_frm">
<input type="hidden" name="item" value="30" /><span style="top: 70px;" class="item_check2">Marcada</span><br /><div style="display:none;z-index:1000;position:fixed;" class="divBlockClass" id="divBlock7172"></div><br /><div style="z-index:10001;" id="msgbox_ref_7172" class="msgbox_ref"><span id="msg_ref_7172"></span><br /><br /><input type="submit" onclick="
document.getElementById('msgbox_ref_7172').style.visibility='hidden'" onmouseout="this.className='input_off'" onmouseover="this.className='input_on'" name="SetData" class="input_off" id="btn_marcar" value=" OK " />
<input type="button" onclick="document.getElementById('msgbox_ref_7172').style.visibility='hidden';document.getElementById('divBlock7172').style.display='none'" onmouseout="this.className='input_off'" onmouseover="this.className='input_on'" class="input_off" value="Cancelar" />
</div><br /><span class="itemref_btn">
<input type="button" "="" onclick="document.getElementById('marcada').value='true';document.getElementById('divBlock7172').style.display='block'; document.getElementById('msgbox_ref_7172').style.visibility='visible';
document.getElementById('msgbox_ref_7172').style.top=event.pageY+'px';
document.getElementById('msg_ref_7172').innerHTML='Tem a certeza que deseja anular?';" onmouseout="this.className='input_off'" onmouseover="this.className='input_on'" style="cursor: pointer; width: 112px;" value="Anular Refeição" class="input_off" id="btn_anular_7172" /></span><table cellspacing="0" cellpadding="0" border="0" width="95%;" id="conteudoTabela">
Any idea why is not working? Something that could be improved in the regexp?
Thanks.
In the html you provided the string you're looking for is contained in the 'value' attribute of the tag.
So your code would be:
if soup.find_all(value=re.compile('Anular')):
registo2 = "Yes"
else:
registo2 = "No"
Or if you want to check for a string in text or attributes you can use a function in find_all.
def f(tag):
return 'Anular' in tag.get('value', '') or 'Anular' in (tag.string or '')
registo2 = "Yes" if soup.find_all(f) else "No"
I am trying to print out the value of a textfield to the console.
The webpage has the value 1,000.000 in the textfield. 1,000.000
should be printed but my method is printing blank.
I am using Python Webdriver. I am using the .text which should get the text value of the textfield.
My method is:
from selenium.webdriver.common.by import By
# max records textfield has the value 1,000.000 as default
def print_maxrecords_textfield(self):
max_records_textfield = self.driver.find_element((By.XPATH, '//span[#class="gwt-InlineLabel myinlineblock marginbelow" and contains(text(), "Max records")]/following-sibling::*'))
print "max_records_textfield = "
print max_records_textfield.text
I call the method from my test case class as dp.print_maxrecords_textfield()
The output to the console is as follows:
max_records_textfield =
It should say max_records_textfield = 1,000.00
The HTML snippet is:
<div class="padding gwt-TabLayoutPanelContent" style="position: absolute; left: 0px; top: 0px; right: 0px; bottom: 0px;" aria-hidden="false">
<div class="clear">
<span class="gwt-InlineLabel marginbelow myinlineblock" style="width: 8em;">Location</span>
<input class="gwt-TextBox marginbelow" type="text" style="width: 30em;"/>
</div>
<div class="clear">
<span class="gwt-InlineLabel myinlineblock marginbelow" style="width: 8em;">Max records</span>
<input class="gwt-IntegerBox marginbelow" type="text"/>
</div>
Actually try getting the value instead of text.
from selenium.webdriver.common.by import By
# max records textfield has the value 1,000.000 as default
def print_maxrecords_textfield(self):
max_records_textfield = self.driver.find_element((By.XPATH, '//span[#class="gwt-InlineLabel myinlineblock marginbelow" and contains(text(), "Max records")]/following-sibling::*'))
print "max_records_textfield = "
print max_records_textfield.get_attribute('value')
I am using selenium2library(python) for our automation. this is the method is used
def get_appointment_from_manage(self, date, appt_id):
ref_date = "//*[#data-date=\"%s\"]" % date
time.sleep(2)
logging.info(date)
logging.info(appt_id)
while not self.is_element_present_by_xpath(ref_date) :
self._current_browser().find_element_by_xpath("//*[#id=\"calendar1\"]/div[1]/div[3]/div/button[2]").click();
time.sleep(2)
element = self._current_browser().find_element_by_xpath("//*[#data-aid=\"%s\"]" % appt_id)
logging.info(element)
ActionChains(self._current_browser()).move_to_element(element).click().perform()
The logging states that the element was found but it doesn't click.
this is the part that isn't clicking.
element = self._current_browser().find_element_by_xpath("//*[#data-aid=\"%s\"]" % appt_id)
logging.info(element)
ActionChains(self._current_browser()).move_to_element(element).click().perform()
When you inspect the element, the whole element is covered in blue. So I don't know what am i missing. Firefox version is 28. Thanks in advance!
EDIT
This is the html
<div class="fc-event-container">
<div class="fc-event-box" style="position:relative;z-index:1"></div>
<div data-aid="31" class="fc-event-data-container fc-status-2" style="position:absolute;top:0px;right:0;bottom:-62px;left:0;z-index:1">
<div class="fc-event-data-box">
<a class="fc-time-grid-event fc-event fc-start fc-end evnt-1419408000000" style="top: 0px; bottom: -62px; z-index: 1; left: 0%; right: 0%;">
<div class="fc-content">
<div class="fc-time" data-start="8:00" data-full="8:00 AM - 8:30 AM" style="display:none;">
<span>8:00 - 8:30</span>
</div>
<div class="fc-title">Robot-FN</div>
<span class="fc-product">Home Loans</span>
</div>
<div class="fc-bg"></div>
</a>
</div>
</div>
</div>
I'm not sure this what you are trying, but if you want to click on the <a> tag (which is clickable), then, you need to hold that element, not the <div> that contains it.
try somthing like this: (I didn't try this xpath so take it as a general idea)
element = self._current_browser().find_element_by_xpath("//*[#data-aid=\"%s\"]//a" % appt_id)