How do I use driver.get to open several URLs in Chrome.
My code:
import requests
import json
import pandas as pd
from selenium import webdriver
chromeOptions = webdriver.ChromeOptions()
chromedriver = r"C:\Users\Harrison Pollock\Downloads\Python\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\Harrison Pollock\Downloads\Python\chromedriver_win32\chromedriver.exe",chrome_options=chromeOptions)
links = []
request1 = requests.get('https://api.beta.tab.com.au/v1/recommendation-service/featured-events?jurisdiction=NSW')
json1 = request1.json()
for n in json1['nextToGoRaces']:
if n['meeting']['location'] in ['VIC','NSW','QLD','SA','WA','TAS','IRL']:
links.append(n['_links']['self'])
driver.get('links')
Based on the comments - you'll want a class to manage your browsers, a class for your tests, then a runner to run in parallel.
Try this:
import unittest
import time
import testtools
from selenium import webdriver
class BrowserManager:
browsers=[]
def createBrowser(self, url):
browser = webdriver.Chrome()
browser.get(url)
self.browsers.append(browser)
def getBrowserByPartialURL(self, url):
for browser in self.browsers:
if url in browser.current_url:
return browser
def CloseItAllDown(self):
for browser in self.browsers:
browser.close()
class UnitTest1(unittest.TestCase):
def test_DoStuffOnGoogle(self):
browser = b.getBrowserByPartialURL("google")
#Point of this is to watch the output! you'll see this +other test intermingled (proves parallel run)
for i in range(10):
print(browser.current_url)
time.sleep(1)
def test_DoStuffOnYahoo(self):
browser = b.getBrowserByPartialURL("yahoo")
#Point of this is to watch the output! you'll see this +other test intermingled (proves parallel run)
for i in range(10):
print(browser.current_url)
time.sleep(1)
#create a global variable for the brwosers
b = BrowserManager()
# To Run the tests
if __name__ == "__main__":
##move to an init to Create your browers
b.createBrowser("https://www.google.com")
b.createBrowser("https://www.yahoo.com")
time.sleep(5) # This is so you can see both open at the same time
suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest1)
concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((case, None) for case in suite))
concurrent_suite.run(testtools.StreamResult())
This code doesn't do anything exciting - it's an example of how to manage multiple browsers and run tests in parallel. It goes to the specified urls (which you should move to an init/setup), then prints out the URL it's on 10 times.
This is how you add a browser to the manager: b.createBrowser("https://www.google.com")
This is how you retrieve your browser: browser = b.getBrowserByPartialURL("google") - note it's a partial URL so you can use the domain as a keyword.
This is the output (just the first few lines- not all of it...) - It's a print URL for google then yahoo, then google then yahoo - showing that they're running at the same time:
PS C:\Git\PythonSelenium\BrowserManager> cd 'c:\Git\PythonSelenium'; & 'C:\Python38\python.exe' 'c:\Users\User\.vscode\extensions\ms-python.python-2020.7.96456\pythonFiles\lib\python\debugpy\launcher' '62426' '--' 'c:\Git\PythonSelenium\BrowserManager\BrowserManager.py'
DevTools listening on ws://127.0.0.1:62436/devtools/browser/7260dee3-368c-4f21-bd59-2932f3122b2e
DevTools listening on ws://127.0.0.1:62463/devtools/browser/9a7ce919-23bd-4fee-b302-8d7481c4afcd
https://www.google.com/
https://consent.yahoo.com/collectConsent?sessionId=3_cc-session_d548b656-8315-4eef-bb1d-82fd4c6469f8&lang=en-GB&inline=false
https://www.google.com/
https://consent.yahoo.com/collectConsent?sessionId=3_cc-session_d548b656-8315-4eef-bb1d-82fd4c6469f8&lang=en-GB&inline=false
https://www.google.com/
Related
I am trying to scrape data from this url with Python-Selenium.
ยป https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445
If you watch the network stream you will see that it returns an api on the back end like this https://shopee.co.id/api/v4/item/get?itemid=19137193916&shopid=323047288. How can I get the response returned by this api with selenium?
Solved!
import json
import time
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
# Set up Selenium webdriver
capabilities = DesiredCapabilities.CHROME
capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
options = webdriver.ChromeOptions()
options.binary_location = "/usr/bin/brave"
options.add_argument("--ignore-certificate-errors")
driver = webdriver.Chrome(desired_capabilities=capabilities, options=options)
# Navigate to URL and monitor network flow
url = "https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445"
driver.get(url)
time.sleep(3) # Wait for the page to load
# Find any API requests and print the returned data to the screen
logs = driver.get_log("performance")
for entry in logs:
message = entry.get("message", {})
parsed_message = json.loads(message)
message_dict = parsed_message.get("message", {})
method = message_dict.get("method")
if method == "Network.requestWillBeSent":
request = message_dict.get("params", {}).get("request", {})
url = request.get("url")
if "https://shopee.co.id/api/v4/item/get?itemid=19137193916&shopid=323047288" in url:
response_url = url.replace("request", "response")
response = driver.execute_cdp_cmd(
"Network.getResponseBody", {"requestId": message_dict.get("params", {}).get("requestId")}
)
with open("response.json", "w") as f:
f.write(response.get("body", ""))
I use selenium wire for this. You can do pip install selenium-wire to get it and then import it into your project and use it like so:
from seleniumwire import webdriver
#Sets the Option to disable response encoding
sw_options = {
'disable_encoding': True
}
#Creates driver with selected options
driver = webdriver.Chrome(seleniumwire_options=sw_options)
#Starts selenium wire interceptor to monitor network traffic
driver.request_interceptor = interceptor
#Navigate to page
driver.get('https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445')
#Iterate through requests and find the one with the endpoint you need in the url
for a in driver.requests:
if("/api/v4/item/get?itemid=19137193916&shopid=323047288" in a.url):
body = a.response.body
print(body)
We add disable encoding to the options otherwise the body would come back encoded and youd have to decode it manually which can be done like so
body = decode(response.body, response.headers.get('Content-Encoding', 'identity'))
Or done in the browser options as I did.
You can find more information here:
https://pypi.org/project/selenium-wire/#response-objects
I want to fetch status code from network FETCH/XHR
i want to get Status Code: 200 from response , so can i get this using selenium python
i try with
from selenium import webdriver
import os
# for LOcal
dpath = os.getcwd()+"/"+'chromedriver'
# create webdriver object
driver = (executable_path=dpath,options=options)
url = "https://pizzaonline.dominos.co.in/cart"
capabilities = DesiredCapabilities.CHROME.copy()
capabilities['goog:loggingPrefs'] = {'performance': 'ALL'}
# get geeksforgeeks.org
driver.get("https://www.geeksforgeeks.org/")
# get browser log
logs = driver.get_log("browser")
but not work
You can use requests package within python:
import requests
url = 'your url'
status_code = requests.get(url).status_code
print(status_code) #This will just print the status code of the url
#do your stuff here
I would like to download data from http://ec.europa.eu/taxation_customs/vies/ site. Case is that when I enter data on it through program the URL doesn't change, so file saved on disc has a page same as the one which were opened from the begining without data.Maybe I don't know how to access this site after adding data? I'm new in Python and tried to look for solution but with no result so if there was such issue, please link me. Here's my code. I appreciate all responses:)
import requests
import selenium
import select as something
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import pdfkit
url = "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
driver = webdriver.Chrome(executable_path ="C:\\Users\\Python\\Chromedriver.exe")
driver.get("http://ec.europa.eu/taxation_customs/vies/")
#wait = WebDriverWait(driver, 10)
obj = Select(driver.find_element_by_id("countryCombobox"))
obj = obj.select_by_index(1)
vies_r = requests.get(url)
vies_vat = driver.find_element_by_id("number")
vies_vat.send_keys('U54799909')
vies_verify = driver.find_element_by_id("submit")
vies_verify.click()
path_wkhtmltopdf = r'C:\Users\Python\wkhtmltox\wkhtmltox\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkhtmltopdf)
print(driver.current_url)
pdfkit.from_url(driver.current_url, "out.pdf", configuration=config)
Ukalo
I am building a Python script and want to split up certain functions into separate files to make maintenance easier.
I have two files currently called main.py and function1.py
main.pydef
#Setup Imports
import os
import os.path
import sys
# Import Functions
from function1 import myfunction
#Setup Selenium
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
#Launch Firefox
def init_driver():
driver = webdriver.Firefox()
return driver
url_list = ['http://www.example.com/page1', 'http://www.example.com/contact', 'http://www.example.com/about', 'http://www.example.com/test'];
driver = init_driver()
# Init Blank List
checked_urls = []
for url in url_list:
myfunction(driver)
print(checked_urls)
function1.py
def myfunction(driver):
driver.get(url)
htmlText = driver.find_element_by_css_selector("#phrase").text
if "This Is My Phrase" in htmlText:
checked_urls.extend(['PHRASE_FOUND'])
else:
checked_urls.extend(['PHRASE_FOUND'])
I am trying to get it to visit each URL in the list and check for This Is My Phrase on the page. If it finds it then it should add to the list.
I am seeing the following error when running the script...
NameError: name 'url' is not defined
I am pretty sure it's related to the way I am importing the separate function but can't work out whats wrong, can anyone help?
You have to also pass url variable to myfunction:
def myfunction(driver, url):
driver.get(url)
htmlText = driver.find_element_by_css_selector("#phrase").text
if "This Is My Phrase" in htmlText:
checked_urls.extend(['PHRASE_FOUND'])
else:
checked_urls.extend(['PHRASE_FOUND'])
Then in main file:
for url in url_list:
myfunction(driver, url)
I think some code should be corrected:
Frist, delete the blank space before url_list:
#url_list = ['http://www.example.com/page1', 'http://www.example.com/contact', 'http://www.example.com/about', 'http://www.example.com/test'];
url_list = ['http://www.example.com/page1', 'http://www.example.com/contact', 'http://www.example.com/about', 'http://www.example.com/test'];
Then, the url is a local variable, it's not directly accessible in the function myfunction. But it can be accessed as a function parameter:
def myfunction(driver, url):
...
Now I am working on tests on python. I am using BrowserMob Proxy and Selenium to capture HTTP
requests.
robot_globals = {'proxy': None, 'selenium': None}
def robot_setup():
server = Server(settings.BROWSERMOB_PROXY_PATH, options={'port': settings.BROWSERMOB_PROXY_PORT})
server.start()
proxy = server.create_proxy()
proxy.selenium_proxy()
if settings.BROWSER_TO_TEST == 'FIREFOX':
from selenium.webdriver.firefox.webdriver import WebDriver
selenium = WebDriver(proxy=proxy, timeout=10)
elif settings.BROWSER_TO_TEST == 'CHROME':
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
capabilities = DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities)
selenium = WebDriver(executable_path=settings.CHROME_DRIVER_PATH,
desired_capabilities=capabilities,
service_log_path=settings.DRIVER_LOG_PATH)
selenium.maximize_window()
return (selenium, proxy)
class BaseGATestCase(unittest.TestCase):
def setUp(self):
self.proxy = robot_globals['proxy']
self.selenium = robot_globals['selenium']
....
class TestHomePage(BaseGATestCase):
def test_01_homepage_utme_vars(self):
self.proxy.new_har('home_page')
self.selenium.get('%s%s' % (settings.SERVER_URL_TO_TEST, '/'))
This code works correctly usually. But once or twice in month system launches the browser
but does not load the url in it. Browser just waits but the page does not get loaded at all.
However browser can load page without self.proxy.new_har('..'). The code likes this works:
class TestHomePage(BaseGATestCase):
def test_01_homepage_utme_vars(self):
self.selenium.get('%s%s' % (settings.SERVER_URL_TO_TEST, '/'))
server.log:
INFO 10/09 03:09:18 n.l.b.p.j.h.HttpSer~ - Version Jetty/5.1.x
INFO 10/09 03:09:18 n.l.b.p.j.u.Contain~ - Started HttpContext[/,/]
INFO 10/09 03:09:18 n.l.b.p.j.h.SocketL~ - Started SocketListener on 0.0.0.0:9159
INFO 10/09 03:09:18 n.l.b.p.j.u.Contain~ - Started net.lightbody.bmp.proxy.jetty.jetty.Server#6a1192e9
INFO 10/09 03:10:25 n.l.b.p.j.u.Threade~ - Stopping Acceptor ServerSocket[addr=0.0.0.0/0.0.0.0,localport=9154]
It is really weird for me because last time I couldn't fix this problem, but it was fixed itself the next day. I do not understand why. And now I have the same problem. It would be great if anyone knows how I can fix this problem. Thanks!