How can i add response time in selenium python report - python

I have a selenium-python automation test, I am generating HTML/JSON reports using Pytest. I want to add response time in the HTML/JSON report, Is this even possible?
following is my code
test_screenshot.py
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pytest_html
from selenium.common.exceptions import InvalidSessionIdException
def test_Openurl(setup):
driver = setup["driver"]
url = setup["url"]
try:
before_time = datetime.now().strftime('%H%M%S%f') # Timestamp
driver.get(url)
now_time = datetime.now().strftime('%H%M%S%f') # Timestamp
response_time = int(now_time) - int(before_time)
except Exception as e:
print(e.message)
assert driver.current_url == URL
driver.save_screenshot("ss.png")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.save_screenshot("ss1.png")
driver.close()
Conftest.py
import pytest
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
def pytest_addoption(parser):
parser.addoption("--url", action="store", default="https://google.com/")
#pytest.fixture()
def setup(pytestconfig):
s = Service("C:/Users/Yash/Downloads/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(service=s)
driver.maximize_window()
yield {"driver":driver, "url": pytestconfig.getoption("url")}
Following is the command i am using to generate reports
pytest -v -s --json-report --json-report-indent=4 --json-report-file=report/report.json --html=report/report.html test_screenshot.py

you can use some hooks to JSON reports to do this. something along the line of these in your conftest.py file:
#hookimpl(optionalhook=True)
def pytest_json_runtest_metadata(call):
"""
fixture from the pytest-json-report plugin that will add your info
"""
if call.when != 'call':
return {}
# collect the start and finish times in ISO format for the US/Eastern timezone
start_iso_dt = timezone('US/Eastern').localize(datetime.fromtimestamp(call.start)).isoformat()
stop_iso_dt = timezone('US/Eastern').localize(datetime.fromtimestamp(call.stop)).isoformat()
return {'start': start_iso_dt, 'stop': stop_iso_dt
That will land up in your json_report metadata. (my code needed that US/Eastern timezone, you can obviously adjust accordingly, or just do the difference calculation in this function and return {'response_time': mydiff}

Related

Facing this error in Pytest on selenium python

import time
import self as self
from pytest import mark
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from setuptools import setup
#mark.execute
class First_Tests:
def test_first(self, setup):
driver = setup['driver']
browser = setup['browser']
driver.get("https://shuftipro.com/")
driver.maximize_window()
def header_test(self, setup):
driver = setup['driver']
# Click on solution in header
solution = driver.find_element(By.ID, "menu-item-72751")
solution.click()
if driver.current_url == "https://shuftipro.com/solutions/":
print("land on solution page.")
else:
print("land on wrong page.")
obj = First_Tests()
obj.test_first(self, setup)
obj.header_test(self, setup)
If I remove the "self" from parameter and run the program it showing me error that, test_first() takes 1 positional arguments but 3 were given
one of easier way would be to use webdriver from selenium
driver = webdriver.Chrome()
and remove
driver = setup['driver']
browser = setup['browser']
and then get rid of setup as parameter.
The final code will look something like this:
from pytest import mark
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
#mark.execute
class First_Tests:
def test_first(self):
driver.get("https://shuftipro.com/")
driver.maximize_window()
def header_test(self):
# Click on solution in header
solution = driver.find_element(By.ID, "menu-item-72751")
solution.click()
if driver.current_url == "https://shuftipro.com/solutions/":
print("land on solution page.")
else:
print("land on wrong page.")
obj = First_Tests()
obj.test_first()
obj.header_test()

Web Scaping | Python Selenium webdriver find dynamic elements using xpath

Apologies in advance if this long question seems quite basic!
Given:
search query link in a library website:
url = 'https://digi.kansalliskirjasto.fi/search?query=economic%20crisis&orderBy=RELEVANCE'
I'd like to extract all useful information for each individual search result (total 20 in 1 page) of this specific query as depicted by red rectangles in this figure:
currently, I have the following code:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
def run_selenium(URL):
options = Options()
options.add_argument("--remote-debugging-port=9222"),
options.headless = True
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(URL)
pt = "//app-digiweb/ng-component/section/div/div/app-binding-search-results/div/div"
medias = driver.find_elements(By.XPATH, pt) # expect to obtain a list with 20 elements!!
print(medias) # >>>>>> result: []
print("#"*100)
for i, v in enumerate(medias):
print(i, v.get_attribute("innerHTML"))
if __name__ == '__main__':
url = 'https://digi.kansalliskirjasto.fi/search?query=economic%20crisis&orderBy=RELEVANCE'
run_selenium(URL=url)
Problem:
Having a look at part of the inspect in chrome:
I have tried several xpath generated by Chrome Extensions XPath Helper and SelectorsHub to produce XPath and use it as pt variable in my python code this library search engine, but the result is [] or simply nothing.
Using SelectorsHub and hovering the mouse over Rel XPath, I get this warning: id & class both look dynamic. Uncheck id & class checkbox to generate rel xpath without them if it is generated with them.
Question:
Assuming selenium as a tool for web scraping of a page containing dynamic attributes instead of BeautifulSoup as recommended here and here, shouldn't driver.find_elements(), return a list of 20 elements each of which containing all info and to be extracted?
>>>>> UPDATE <<<<< Working Solution (although time inefficient)
As recommended by #JaSON in the solution, I now use WebDriverWait in try except block as follows:
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common import exceptions
def get_all_search_details(URL):
st_t = time.time()
SEARCH_RESULTS = {}
options = Options()
options.headless = True
options.add_argument("--remote-debugging-port=9222")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-extensions")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver =webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(URL)
print(f"Scraping {driver.current_url}")
try:
medias = WebDriverWait(driver,timeout=10,).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'result-row')))
for media_idx, media_elem in enumerate(medias):
outer_html = media_elem.get_attribute('outerHTML')
result = scrap_newspaper(outer_html) # some function to retrieve results
SEARCH_RESULTS[f"result_{media_idx}"] = result
except exceptions.StaleElementReferenceException as e:
print(f"Selenium: {type(e).__name__}: {e.args}")
return
except exceptions.NoSuchElementException as e:
print(f"Selenium: {type(e).__name__}: {e.args}")
return
except exceptions.TimeoutException as e:
print(f"Selenium: {type(e).__name__}: {e.args}")
return
except exceptions.WebDriverException as e:
print(f"Selenium: {type(e).__name__}: {e.args}")
return
except exceptions.SessionNotCreatedException as e:
print(f"Selenium: {type(e).__name__}: {e.args}")
return
except Exception as e:
print(f"Selenium: {type(e).__name__} line {e.__traceback__.tb_lineno} of {__file__}: {e.args}")
return
except:
print(f"Selenium General Exception: {URL}")
return
print(f"\t\tFound {len(medias)} media(s) => {len(SEARCH_RESULTS)} search result(s)\tElapsed_t: {time.time()-st_t:.2f} s")
return SEARCH_RESULTS
if __name__ == '__main__':
url = 'https://digi.kansalliskirjasto.fi
get_all_search_details(URL=url)
This approach works but seems to be very time consuming and inefficient:
Found 20 media(s) => 20 search result(s) Elapsed_t: 15.22 s
This is an answer for Question#2 only since #1 and #3 (as Prophet've already said in comment) are not valid for SO.
Since you're dealing with dynamic content find_elements is not what you need. Try to wait for required data to appear:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
medias = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'media')))
On top of the search results there is an option to download search results as excel, there comes the newspaper/journal metadata and the text surrounding the search. Could it be easier to use than scrape individual elements? (Excel contains only 10.000 first hits, thou...)

Using selenium to scrape paginated table data (Python)

I have this table: https://www.londonstockexchange.com/indices/ftse-aim-all-share/constituents/table?page=1. It's paginated I want to scrape all the content from the table starting from page 1 to the very end. I am trying to use the xpath but can't seem to get it to work.
Here is my code, any help welcome!
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
# co.add_argument('--ignore-certificate-errors')
#co.add_argument('--no-proxy-server')
#co.add_argument("--proxy-server='direct://'")
#co.add_argument("--proxy-bypass-list=*")
co = webdriver.ChromeOptions()
co.add_argument('--headless')
driver = webdriver.Chrome(executable_path="C:/Users/user/Desktop/IG Trading/chromedriver.exe", chrome_options=co)
driver.get('https://www.londonstockexchange.com/indices/ftse-aim-all-share/constituents/table?page=1')
stock_names = driver.find_elements(By.XPATH, '/html/body/app-root/app-handshake/div/app-page-content/app-filter-toggle/app-ftse-index-table/section/table')
print(stock_names)
# for stock_name in stock_names:
# print(stock_name)
# text = stock_name.text
# print(text)
This is one way you can obtain that information:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options as Firefox_Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
import time as t
import pandas as pd
from tqdm import tqdm
firefox_options = Firefox_Options()
# firefox_options.add_argument("--width=1500")
# firefox_options.add_argument("--height=500")
# firefox_options.headless = True
driverService = Service('chromedriver/geckodriver')
browser = webdriver.Firefox(service=driverService, options=firefox_options)
big_df = pd.DataFrame()
browser.get('https://www.londonstockexchange.com/indices/ftse-aim-all-share/constituents/table')
try:
WebDriverWait(browser, 3).until(EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
t.sleep(2)
for i in tqdm(range(1, 40)):
browser.get(f'https://www.londonstockexchange.com/indices/ftse-aim-all-share/constituents/table?page={i}')
t.sleep(1)
df = pd.read_html(WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "table[class='full-width ftse-index-table-table']"))).get_attribute('outerHTML'))[0]
big_df = pd.concat([big_df, df], axis=0, ignore_index=True)
print(big_df)
big_df.to_csv('lse_companies.csv')
print('all done')
browser.quit()
This will display in terminal the big dataframe once all pages scraped, and also save it as a csv file on disk (in the same folder you are running your script from). Setup is Firefox/geckodriver on linux, however you can adapt it to your own, just observe the imports, and the logic after defining the browser/driver.
Selenium docs: https://www.selenium.dev/documentation/
TQDM: https://pypi.org/project/tqdm/

Selenium + Flask/Falcon in Python - 502 Bad Gateway Error

I'm using selenium to make a headless scraping of a website within an endpoint of an API using Flask for Python. I made several tests and my selenium scraping code works perfectly within a script and while running as an API in the localhost. However, when I deploy the code in a remote server, the requests always return a 502 Bad Gateway error. It is weird because by logging I can see that the scraping is working correctly, but the server responds with 502 before the scraping finish processing, as if it was trying to set up a proxy and it fails. I also noticed that removing the time.sleep in my code makes it return a 200 although the result could be wrong because it doesn't give selenium the proper time to load the all the page to scrape.
I also tried to set up to use falcon instead of flask and I get a similar error. This is a sample of my recent code using Falcon:
class GetUrl(object):
def on_get(self, req, resp):
"""
Get Request
:param req:
:param resp:
:return:
"""
# read parameter
req_body = req.bounded_stream.read()
json_data = json.loads(req_body.decode('utf8'))
url = json_data.get("url")
# get the url
options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(firefox_options=options)
driver.get(url)
time.sleep(5)
result = False
# check for outbound links
content = driver.find_elements_by_xpath("//a[#class='_52c6']")
if len(content) > 0:
href = content[0].get_attribute("href")
result = True
driver.quit()
# make the return
return_doc = {"result": result}
resp.body = json.dumps(return_doc, sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
I saw some other similar issues like this, but even though I can see that there is a gunicorn running in my server, I don't have nginx, or at least it is not running where it should running. And I don't think Falcon uses it. So, what exactly am I doing wrong? Some light in this issue is highly appreciated, thank you!
This might work:
from IPython.display import clear_output
import time as time
import json
!apt-get update
!apt install chromium-chromedriver
!which chromedriver
!pip install selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.expected_conditions import presence_of_element_located
!pip install page_objects
import page_objects
from page_objects import PageObject, PageElement
time.sleep(1)
clear_output()
class GetUrl(object):
def on_get(self, req, resp):
"""
Get Request
:param req:
:param resp:
:return:
"""
# read parameter
req_body = req.bounded_stream.read()
json_data = json.loads(req_body.decode('utf8'))
url = json_data.get("https://stackoverflow.com/questions/69038958/selenium-flask-falcon-in-python-502-bad-gateway-error/69546175#69546175")
# get the url
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver',options = options)
driver.implicitly_wait(3)
driver.get("https://stackoverflow.com/questions/69038958/selenium-flask-falcon-in-python-502-bad-gateway-error/69546175#69546175")
result = False
# check for outbound links
contentStorage = []
content = driver.find_elements_by_tag_name('a')
for i in content:
contentStorage.append(i.get_attribute('text'))
result = True
#driver.quit()
# make the return
return_doc = {"result": result}
resp.body = json.dumps(return_doc, sort_keys=True, indent=2)
resp.content_type = 'text/string'
resp.append_header('Access-Control-Allow-Origin', "*")
resp.status = falcon.HTTP_200
However, I was testing it without using a class object, and also it's using Chrome instead of FireFox:
from IPython.display import clear_output
import time as time
!apt-get update
!apt install chromium-chromedriver
!which chromedriver
!pip install selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.expected_conditions import presence_of_element_located
!pip install page_objects
import page_objects
from page_objects import PageObject, PageElement
time.sleep(1)
clear_output()
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver',options = options)
driver.implicitly_wait(3)
driver.get('https://stackoverflow.com/questions/69038958/selenium-flask-falcon-in-python-502-bad-gateway-error/69546175#69546175')
content = driver.find_elements_by_tag_name('a')
contentStorage = []
for i in content:
contentStorage.append(i.get_attribute('text'))
#driver.quit()

Fixture is not visible when fixture is defined in a different file in pytest

I have 2 files:
conftest.py
import pytest
from selenium import webdriver
driver = None
#pytest.fixture
def browserSetAndClose():
global driver
EXE_PATH = r'C:\Users\1602746\Softwares\chromedriver.exe'
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(executable_path = EXE_PATH, options = chromeOptions,
desired_capabilities = chromeOptions.to_capabilities())
driver.implicitly_wait(10)
driver.maximize_window()
driver.get('http://the-internet.herokuapp.com/')
yield driver
driver.quit()
xzbc.py
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from browser_factory import conftest
def test_abTest(browserSetAndClose):
wait = WebDriverWait(conftest.driver, 15)
wait.until(expected_conditions.element_to_be_clickable((By.XPATH, "//a[starts-with(#href, '/abtest')]"))).click()
a = wait.until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, "h3"))).text
assert 'A/B Test' in a
The problem is I keep getting an error-
fixture 'browserSetAndClose' not found
when I run the test case function test_abTest.
I tried everything but all in vain.
Also, here is an image of the project structure to show that the aforementioned files are in different directories:

Categories