xPath doesn't go on to the next page - python

When I use below code, the website doesn't go on to the next page.
import unittest
from selenium import webdriver
import time
class ProductPurchase(unittest.TestCase):
"""
Purchase the product on the website http://automationpractice.com/index.php
"""
# Preconditions
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.get("http://automationpractice.com/index.php")
self.driver.maximize_window()
def teardown(self):
self.driver.quit()
# Buying a product on the website
def test_wrong_agreement(self):
driver = self.webdriver
time.sleep(2)
#Click on "Quick view"
quickview_btn = driver.find_element_by_xpath("/html/body/div/div[2]/div/div[2]/div/div[1]/ul[1]/li[1]/div/div[1]/div/a[2]").click()
if __name__ == '__main__':
unittest.main(verbosity=2)
It should go on to the next page but xPath doesn't work.

Hello and good luck on learning test automation.
The first thing I do when an xpath does not work is to check it, usually using an extension like this one to make sure it is correct. Additionally, it is better to use a shorter xpath so there is less room for mistake i.e. "//img[#title='Printed Dress']"

Try the following xpath and use javaScript executor to click on Quick View
This code will click 1st element on the page.If you wish to click all Quick Viw button sequentially you need to write logic further.
driver.get("http://automationpractice.com/index.php")
driver.maximize_window()
ele_Quikview=driver.find_element_by_xpath('(//a[#class="quick-view"])[1]')
driver.execute_script("arguments[0].click();",ele_Quikview)

Related

Web Scraping: how to extract this kind of div tag?

I am looking at a tag :
.
When I write a code,
message = soup.find("div", {"class": "text-msg-container"})
it gave me none. What are _ngcontent-vex-c62 and data-e2e-text-message-content tags? Do I need to include them too? How should I write them to get the div tag?
You can't because the div isn't there when you send a GET request to get the page code.
That page is built using Angular framework which produce SPA(Single Page Application) which means you can't scrape data from it when you send a GET request because the data isn't there.
The data is being generated by Javascript code which needs to run first to add the required data to the webpage.
You need to use another way that allows Javascript code to run first then you try to get the data you want.
If you want to find class text-msg-container, try Selenium. It will find any locator easily.
import unittest
from selenium import webdriver
class PythonSearch(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
def test_search(self):
driver = self.driver
driver.get("http://www.yoursite.com")
elem = driver.find_element_by_css_selector(".text-msg-container")
def tearDown(self):
self.driver.close()
if __name__ == "__main__":
unittest.main()
Use driver = webdriver.Chrome('/path/to/chromedriver') if you are testing Chrome. Look here for more info https://chromedriver.chromium.org/getting-started .
Getting started for Selenium https://selenium-python.readthedocs.io/getting-started.html#simple-usage
try this please
message = soup.find("div", _class="text-msg-container")
i hope that works
from selenium import webdriver
path = "C:/chromedriver.exe" ### path to downloaded chromedriver on your
#pc change this directory or put the same location C:
driver = webdriver.Chrome(path) ## your browser change it if you are not using chrome
driver.get("website link")
out = driver.find_element_by_class_name("text-msg-container")
print(out.text)

Selenium: difference between chrome and PhantomJS?-python

I want to do web scraping for Bing's search results. Basically, I am using selenium, the idea is to using selenium to click 'Next' automatedly and scrap the URLs of search results of each page. I made it run with chrome browser on my Ubuntu:
from selenium import web driver
import os
class bingURL(object):
def __init__(self):
self.driver=webdriver.Chrome(os.path.expanduser('./chromedriver'))
def get_urls(self,url):
driver=self.driver
driver.get(url)
elems = driver.find_elements_by_xpath("//a[#href]")
href=[]
for elem in elems:
link=elem.get_attribute("href")
try:
if 'bing.com' not in link and 'http' in link and 'microsoft.com' not in link and 'smashboards.com' not in link:
href.append(link)
except:
pass
return list(set(href))
def search_urls(self,keyword,pagenum):
driver=self.driver
searchurl=self.lookup(keyword) ### url of first page of google search
driver.get(searchurl)
results=self.get_urls(searchurl)
for i in range(pagenum):
driver.find_elements_by_class_name("sb_pagN")[0].click() # click 'Next' of bing search result
time.sleep(5) # wait to load page
current_url=driver.current_url
#print(current_url)
#print(self.get_urls(current_url))
results[0:0]=self.get_urls(current_url)
driver.quit()
return results
def lookup(self,query):
return "https://www.bing.com/search?q="+query
if __name__ == "__main__":
g=bingURL()
result=g.search_urls('Stackoverflow is good',10)
it works perfectly, when I run the code, it launches a Chrome browser, and I can saw it go to the next page automatically, and get URLs for 10 pages of searching results.
However, my goal is to run these codes on AWS successfully. The original codes failed with error 'Chrome failed to start'. After google, it seems I need to use a headless browser like PhantomJS on AWS. Thus I installed PhantomJS, and change the def __init__(self): to:
def __init__(self):
self.driver=webdriver.PhantomJS()
However, it cannot click 'next' anymore, and cannot scrap URLs using the old code. The error message is:
File ".../SEARCH_BING_MODULE.py", line 70, in search_urls
driver.find_elements_by_class_name("sb_pagN")[0].click()
IndexError: list index out of range
It looks like change the browser completely change the rules. How should I modify the more original code to make it work again? or how to scrap Bing search results' URLs using selenium+PhantomJS?
Thanks for your help!
Yes, You can perform all operations as per of your all 3 point using headless browser. Don't use HTMLUnit as it have many configuration issue.
PhamtomJS was another approach for headless browser but PhantomJs is having bug these days because of poorly maintenance of it.
You can use chromedriver itself for headless jobs.
You just need to pass one option in chromedriver as below:-
chromeOptions.addArguments("--headless");
Full code will appear like this :-
System.setProperty("webdriver.chrome.driver","D:\\Workspace\\JmeterWebdriverProject\\src\\lib\\chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.addArguments("--headless");
chromeOptions.addArguments("--start-maximized");
WebDriver driver = new ChromeDriver(chromeOptions);
driver.get("https://www.google.co.in/");
Hope it will help you :)

Python selenium unable to find element neither by class name nor xpath

I'm newbie in Selenium. I start to learn Selenium via book. And I struggle with unclear behavior of Selenium. For educational purposes I use this site:
http://magento-demo.lexiconn.com/ - I'm trying to find search button by its class name, (which is: class='button search button') or by it xpath
search_button = self.driver.find_element_by_xpath('/html/body/div/div[2]/header/div/div[4]/form/div[1]/button')
or
search_button = self.driver.find_element_by_class_name('button')
but each time selenium unable to find it. Please help me to understand reason of such behavior. Thank you
I used Selenium IDE and it shows me XPATH: //button[#type='submit']
when I tried to find element by xpath,I have got the same error and it is strange. Please advise.
My code is:
import unittest
from selenium import webdriver
class HomePageTest(unittest.TestCase):
#classmethod
def setUpClass(cls):
#create new Firefox session
cls.driver = webdriver.Firefox()
cls.driver.implicitly_wait(30)
cls.driver.maximize_window()
#navvigate to application home page
cls.driver.get('http://magento-demo.lexiconn.com/')
def test_search__text_field_max_length(self):
#get the search text box
search_field=self.driver.find_element_by_id("search")
#check maxlenght attribute st to 128
self.assertEqual("128",search_field.get_attribute("maxlength"))
def test_search_button_enabled(self):
# get Search button
search_button = self.driver.find_element_by_class_name('button')
# check Search button is enabled
self.assertTrue(search_button.is_enabled())
#classmethod
def tearDown(self):
#close the browser window
self.driver.quit()
if __name__=='__main__':
unittest.main(verbosity=2)
Try this :
search_button = self.driver.find_element_by_xpath('//button[#class="button search-button"]')
Try downloading the selenium IDE plugin, install and start recording. Click on the button you want and view how its target is recorded in the IDE. Programmatically, selenium will accept the same xpaths and other selectors as the IDE. After it's been recorded in the IDE, there is a pull down on the target field that lets you see all the different ways you can select that element, ie xpath vs. by class etc.
http://www.seleniumhq.org/projects/ide/
you might try:
css=button.button.search-button
//button[#type='submit']
//form[#id='search_mini_form']/div/button
I think the issue is that your locator isn't specific enough. There is more than one button on the page and more than one element with class=button on the page. This CSS selector is working for me.
self.driver.find_element_by_css_selector("button[title='Search']")
Try this way using xpath locator
Explanation: Use title attribute of <button> tag.
self.driver.find_element_by_xpath("//button[#title='Search']")
OR
Explanation: Use title and type attribute of <button> tag.
self.driver.find_element_by_xpath("//button[#title='Search'][#type='submit']")

Webdriver Timeout Exception

I try to understand where is the problem in code:
class WebTest(unittest.TestCase):
#classmethod
def setUpClass(cls):
binary = FirefoxBinary('/home/andrew/Downloads/firefox 45/firefox')
cls.browser = webdriver.Firefox(firefox_binary=binary)
cls.wait = WebDriverWait(cls.browser, 10)
cls.browser.maximize_window()
cls.browser.get('http://www.test.com/')
def test_login_menu_elements(self):
self.wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#id='menu_min']"))).click()
check_icons(self)
self.wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#id='menu_min']"))).click()
check_fields(self)
def test_add_news(self):
self.wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(.,'News')]"))).click()
self.wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#href='/manager/news']"))).click()
#classmethod
def tearDownClass(cls):
cls.browser.quit()
if __name__=='__main__':
unittest.main()
Every time I receive TimeoutException, and I really don't understand why, and where is the problem in the code
A TimeoutException can be received without having any logical or syntantic errors with your code.
TimeoutExceptions will be raised when the wait.until expected conditions aren't found.
Some things I have found to help:
Isolate the xpath by using chrome/firefox dev tools and right clicking on the element, and show xpath
Using the xpath from the step above, make sure that the condition chose is correct
ime having front end experience, using css selectors is usually more intuative and more understandable than relative xpaths.
check the selector you are using by opening up dev tools console and using $x({{ XPATH_HERE }}) to make sure it is valid
for dynamic HTML use python debugger and make sure that html is in the expected state between each expected condition

Python - Selenium - Print Webpage

How do I print a webpage using selenium please.
import time
from selenium import webdriver
# Initialise the webdriver
chromeOps=webdriver.ChromeOptions()
chromeOps._binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
chromeOps._arguments = ["--enable-internal-flash"]
browser = webdriver.Chrome("C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe", port=4445, chrome_options=chromeOps)
time.sleep(3)
# Login to Webpage
browser.get('www.webpage.com')
Note: I am using the, at present, current version of Google Chrome: Version 32.0.1700.107 m
While it's not directly printing the webpage, it is easy to take a screenshot of the entire current page:
browser.save_screenshot("screenshot.png")
Then the image can be printed using any image printing library. I haven't personally used any such library so I can't necessarily vouch for it, but a quick search turned up win32print which looks promising.
The key "trick" is that we can execute JavaScript in the selenium browser window using the "execute_script" method of the selenium webdriver, and if you execute the JavaScript command "window.print();" it will activate the browsers print function.
Now, getting it to work elegantly requires setting a few preferences to print silently, remove print progress reporting, etc. Here is a small but functional example that loads up and prints whatever website you put in the last line (where 'http://www.cnn.com/' is now):
import time
from selenium import webdriver
import os
class printing_browser(object):
def __init__(self):
self.profile = webdriver.FirefoxProfile()
self.profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False)
self.profile.set_preference("pdfjs.disabled", True)
self.profile.set_preference("print.always_print_silent", True)
self.profile.set_preference("print.show_print_progress", False)
self.profile.set_preference("browser.download.show_plugins_in_list",False)
self.driver = webdriver.Firefox(self.profile)
time.sleep(5)
def get_page_and_print(self, page):
self.driver.get(page)
time.sleep(5)
self.driver.execute_script("window.print();")
if __name__ == "__main__":
browser_that_prints = printing_browser()
browser_that_prints.get_page_and_print('http://www.cnn.com/')
The key command you were probably missing was "self.driver.execute_script("window.print();")" but one needs some of that setup in init to make it run smooth so I thought I'd give a fuller example. I think the trick alone is in a comment above so some credit should go there too.

Categories