How to download embedded PDF from webpage using selenium?

How to download embedded PDF from webpage using selenium? - python

I want to download embedded PDF from a webpage using selenium just like in this image.
Embedded PDF image
For example, page like this:
https://www.sebi.gov.in/enforcement/orders/jun-2019/adjudication-order-in-respect-of-three-entities-in-the-matter-of-prism-medico-and-pharmacy-ltd-_43323.html
I tried the code mentioned below but it did not work out.
def download_pdf(lnk):
from selenium import webdriver
from time import sleep
options = webdriver.ChromeOptions()
download_folder = "/*My folder*/"
profile = {"plugins.plugins_list": [{"enabled": False,
"name": "Chrome PDF Viewer"}],
"download.default_directory": download_folder,
"download.extensions_to_open": ""}
options.add_experimental_option("prefs", profile)
print("Downloading file from link: {}".format(lnk))
driver = webdriver.Chrome('/*Path of chromedriver*/',chrome_options = options)
driver.get(lnk)
imp_by1 = driver.find_element_by_id("secondaryToolbarToggle")
imp_by1.click()
imp_by = driver.find_element_by_id("secondaryDownload")
imp_by.click()
print("Status: Download Complete.")
driver.close()
download_pdf('https://www.sebi.gov.in/enforcement/orders/jun-2019/adjudication-order-in-respect-of-three-entities-in-the-matter-of-prism-medico-and-pharmacy-ltd-_43323.html')
Any help is appreciated.
Thanks in advance!!

Here You go, description in code:
=^..^=
from selenium import webdriver
import os
# initialise browser
browser = webdriver.Chrome(os.getcwd()+'/chromedriver')
# load page with iframe
browser.get('https://www.sebi.gov.in/enforcement/orders/jun-2019/adjudication-order-in-respect-of-three-entities-in-the-matter-of-prism-medico-and-pharmacy-ltd-_43323.html')
# find pdf url
pdf_url = browser.find_element_by_tag_name('iframe').get_attribute("src")
# load page with pdf
browser.get(pdf_url)
# download file
download = browser.find_element_by_xpath('//*[#id="download"]')
download.click()

Here is another way to grab the file without clicking/downloading. This method also helps you to download the file to your local machine if your tests are executed in Selenium Grid (remote nodes).
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
public class FileDownloader extends MyPage(){
public void downloadFile(){
//grab the file download url from your download icon/button/element
String src = iframe.getAttribute("src");
driver.get(src); //driver object from 'MyPage.java'
// Grab cookies from current driver session (authenticated cookie information
// is vital to download the file from 'src'
StringBuilder cookies = new StringBuilder();
for (Cookie cookie : driver.manage().getCookies()){
String value = cookie.getName() + "=" + cookie.getValue();
if (cookies.length() == 0 )
cookies.append(value);
else
cookies.append(";").append(value);
}
try{
HttpURLConnection con = (HttpURLConnection) new URL(src).openConnection();
con.setRequestMethod("GET");
con.addRequestProperty("Cookie",cookies.toString());
//set your own download path, probably a dynamic file name with timestamp
String downloadPath = System.getProperty("user.dir") + File.separator + "file.pdf";
OutputStream outputStream = new FileOutputStream(new File(downloadPath));
InputStream inputStream = con.getInputStream();
int BUFFER_SIZE = 4096;
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead = -1;
while((bytesRead = inputStream.read(buffer)) != -1)
outputStream.write(buffer, 0, bytesRead);
outputStream.close();
}catch(Exception e){
// file download failed.
}
}
}
Here is how my dom looks like
<iframe src="/files/downloads/pdfgenerator.aspx" id="frame01">
#document
<html>
<body>
<embed width="100%" height ="100%" src="about:blank" type="application/pdf" internalid="1234567890">
</body>
</html>
</iframe>

Related

Is there any way to download the audio from a certain page

I am working on a selenium script with python, and want to download the audio coming from a certain page.
the page looks like this :
the HTML code of the page :
<html>
<head>
<meta name="viewport" content="width=device-width">
</head>
<body>
<video controls="" autoplay="" name="media">
<source src="https://website//id=47c484fc7f8f" type="audio/mp3">
</video>
</body>
</html>
my code so far:
from seleniumwire import webdriver
import sys
from webdriver_manager.chrome import ChromeDriverManager
import time
import pyaudio
import wave
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
# for linux/Ubuntu only
#chrome_options.add_argument("--no-sandbox")
browser = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=chrome_options)
browser.get("website")
search = browser.find_element_by_id("text-area")
search.clear()
text = input("text here : ")
search.send_keys(text)
#print(data)
time.sleep(2)
browser.find_element_by_id("btn").click()
# Access and print requests via the `requests` attribute
for request in browser.requests:
if request.response and request.url.__contains__('website//id'):
browser.get(request.url)
I am open to work with any language to achieve the goal

You don't need Selenium for this, requests library is enough. You must provide a unique identifier to your post request as sessionID, so you can pick up the generated file in the next get request.
Use the following snippet as an example, it saves the generated file under provided sessionID name.
import requests
sessionID = '78aa8dd0-9529-11eb-a8b3-0242ac130003'
payload = {'ssmlText': '<prosody pitch=\"default\" rate=\"-0%\">Roses are red, violets are blue</prosody>', 'sessionID': sessionID}
r1 = requests.post("https://www.ibm.com/demos/live/tts-demo/api/tts/store", data = payload)
r1.raise_for_status()
print(r1.status_code, r1.reason)
tts_url = 'https://www.ibm.com/demos/live/tts-demo/api/tts/newSynthesize?voice=en-US_OliviaV3Voice&id=' + sessionID
try:
r2 = requests.get(tts_url, timeout = 10, cookies = r1.cookies)
print(r2.status_code, r2.reason)
try:
with open(sessionID + '.mp3', "w+b") as f:
f.write(r2.content)
except IOError:
print("IOError: could not write a file")
except requests.exceptions.Timeout as err:
print("Timeout: could not get response from the server")

Login to website with Python and Selenium and subprocess problem [duplicate]

Does anybody know if Selenium (WebDriver preferably) is able to communicate with and act through a browser that is already running before launching a Selenium Client?
I mean if Selenium is able to comunicate with a browser without using the Selenium Server (with could be an Internet Explorer launched manually for example).

This is a duplicate answer
**Reconnect to a driver in python selenium ** This is applicable on all drivers and for java api.
open a driver
driver = webdriver.Firefox() #python
extract to session_id and _url from driver object.
url = driver.command_executor._url #"http://127.0.0.1:60622/hub"
session_id = driver.session_id #'4e167f26-dc1d-4f51-a207-f761eaf73c31'
Use these two parameter to connect to your driver.
driver = webdriver.Remote(command_executor=url,desired_capabilities={})
driver.close() # this prevents the dummy browser
driver.session_id = session_id
And you are connected to your driver again.
driver.get("http://www.mrsmart.in")

This is a pretty old feature request: Allow webdriver to attach to a running browser . So it's officially not supported.
However, there is some working code which claims to support this: https://web.archive.org/web/20171214043703/http://tarunlalwani.com/post/reusing-existing-browser-session-selenium-java/.

This snippet successfully allows to reuse existing browser instance yet avoiding raising the duplicate browser. Found at Tarun Lalwani's blog.
from selenium import webdriver
from selenium.webdriver.remote.webdriver import WebDriver
# executor_url = driver.command_executor._url
# session_id = driver.session_id
def attach_to_session(executor_url, session_id):
original_execute = WebDriver.execute
def new_command_execute(self, command, params=None):
if command == "newSession":
# Mock the response
return {'success': 0, 'value': None, 'sessionId': session_id}
else:
return original_execute(self, command, params)
# Patch the function before creating the driver object
WebDriver.execute = new_command_execute
driver = webdriver.Remote(command_executor=executor_url, desired_capabilities={})
driver.session_id = session_id
# Replace the patched function with original function
WebDriver.execute = original_execute
return driver
bro = attach_to_session('http://127.0.0.1:64092', '8de24f3bfbec01ba0d82a7946df1d1c3')
bro.get('http://ya.ru/')

It is possible. But you have to hack it a little, there is a code
What you have to do is to run stand alone server and "patch" RemoteWebDriver
public class CustomRemoteWebDriver : RemoteWebDriver
{
public static bool newSession;
public static string capPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "TestFiles", "tmp", "sessionCap");
public static string sessiodIdPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "TestFiles", "tmp", "sessionid");
public CustomRemoteWebDriver(Uri remoteAddress)
: base(remoteAddress, new DesiredCapabilities())
{
}
protected override Response Execute(DriverCommand driverCommandToExecute, Dictionary<string, object> parameters)
{
if (driverCommandToExecute == DriverCommand.NewSession)
{
if (!newSession)
{
var capText = File.ReadAllText(capPath);
var sidText = File.ReadAllText(sessiodIdPath);
var cap = JsonConvert.DeserializeObject<Dictionary<string, object>>(capText);
return new Response
{
SessionId = sidText,
Value = cap
};
}
else
{
var response = base.Execute(driverCommandToExecute, parameters);
var dictionary = (Dictionary<string, object>) response.Value;
File.WriteAllText(capPath, JsonConvert.SerializeObject(dictionary));
File.WriteAllText(sessiodIdPath, response.SessionId);
return response;
}
}
else
{
var response = base.Execute(driverCommandToExecute, parameters);
return response;
}
}
}

From here, if the browser was manually opened, then remote debugging can be used:
Start chrome with
chrome --remote-debugging-port=9222
Or with optional profile
chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\selenium\ChromeProfile"
Then:
Java:
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
//Change chrome driver path accordingly
System.setProperty("webdriver.chrome.driver", "C:\\selenium\\chromedriver.exe");
ChromeOptions options = new ChromeOptions();
options.setExperimentalOption("debuggerAddress", "127.0.0.1:9222");
WebDriver driver = new ChromeDriver(options);
System.out.println(driver.getTitle());
Python:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
#Change chrome driver path accordingly
chrome_driver = "C:\chromedriver.exe"
driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)
print driver.title

Inspired by Eric's answer, here is my solution to this problem for selenium 3.7.0. Compared with the solution at http://tarunlalwani.com/post/reusing-existing-browser-session-selenium/, the advantage is that there won't be a blank browser window each time I connect to the existing session.
import warnings
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.remote.errorhandler import ErrorHandler
from selenium.webdriver.remote.file_detector import LocalFileDetector
from selenium.webdriver.remote.mobile import Mobile
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.switch_to import SwitchTo
from selenium.webdriver.remote.webdriver import WebDriver
# This webdriver can directly attach to an existing session.
class AttachableWebDriver(WebDriver):
def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub',
desired_capabilities=None, browser_profile=None, proxy=None,
keep_alive=False, file_detector=None, session_id=None):
"""
Create a new driver that will issue commands using the wire protocol.
:Args:
- command_executor - Either a string representing URL of the remote server or a custom
remote_connection.RemoteConnection object. Defaults to 'http://127.0.0.1:4444/wd/hub'.
- desired_capabilities - A dictionary of capabilities to request when
starting the browser session. Required parameter.
- browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object.
Only used if Firefox is requested. Optional.
- proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will
be started with given proxy settings, if possible. Optional.
- keep_alive - Whether to configure remote_connection.RemoteConnection to use
HTTP keep-alive. Defaults to False.
- file_detector - Pass custom file detector object during instantiation. If None,
then default LocalFileDetector() will be used.
"""
if desired_capabilities is None:
raise WebDriverException("Desired Capabilities can't be None")
if not isinstance(desired_capabilities, dict):
raise WebDriverException("Desired Capabilities must be a dictionary")
if proxy is not None:
warnings.warn("Please use FirefoxOptions to set proxy",
DeprecationWarning)
proxy.add_to_capabilities(desired_capabilities)
self.command_executor = command_executor
if type(self.command_executor) is bytes or isinstance(self.command_executor, str):
self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive)
self.command_executor._commands['GET_SESSION'] = ('GET', '/session/$sessionId') # added
self._is_remote = True
self.session_id = session_id # added
self.capabilities = {}
self.error_handler = ErrorHandler()
self.start_client()
if browser_profile is not None:
warnings.warn("Please use FirefoxOptions to set browser profile",
DeprecationWarning)
if session_id:
self.connect_to_session(desired_capabilities) # added
else:
self.start_session(desired_capabilities, browser_profile)
self._switch_to = SwitchTo(self)
self._mobile = Mobile(self)
self.file_detector = file_detector or LocalFileDetector()
self.w3c = True # added hardcoded
def connect_to_session(self, desired_capabilities):
response = self.execute('GET_SESSION', {
'desiredCapabilities': desired_capabilities,
'sessionId': self.session_id,
})
# self.session_id = response['sessionId']
self.capabilities = response['value']
To use it:
if use_existing_session:
browser = AttachableWebDriver(command_executor=('http://%s:4444/wd/hub' % ip),
desired_capabilities=(DesiredCapabilities.INTERNETEXPLORER),
session_id=session_id)
self.logger.info("Using existing browser with session id {}".format(session_id))
else:
browser = AttachableWebDriver(command_executor=('http://%s:4444/wd/hub' % ip),
desired_capabilities=(DesiredCapabilities.INTERNETEXPLORER))
self.logger.info('New session_id : {}'.format(browser.session_id))

It appears that this feature is not officially supported by selenium. But, Tarun Lalwani has created working Java code to provide the feature. Refer - http://tarunlalwani.com/post/reusing-existing-browser-session-selenium-java/
Here is the working sample code, copied from the above link:
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.remote.*;
import org.openqa.selenium.remote.http.W3CHttpCommandCodec;
import org.openqa.selenium.remote.http.W3CHttpResponseCodec;
import java.io.IOException;
import java.lang.reflect.Field;
import java.net.URL;
import java.util.Collections;
public class TestClass {
public static RemoteWebDriver createDriverFromSession(final SessionId sessionId, URL command_executor){
CommandExecutor executor = new HttpCommandExecutor(command_executor) {
#Override
public Response execute(Command command) throws IOException {
Response response = null;
if (command.getName() == "newSession") {
response = new Response();
response.setSessionId(sessionId.toString());
response.setStatus(0);
response.setValue(Collections.<String, String>emptyMap());
try {
Field commandCodec = null;
commandCodec = this.getClass().getSuperclass().getDeclaredField("commandCodec");
commandCodec.setAccessible(true);
commandCodec.set(this, new W3CHttpCommandCodec());
Field responseCodec = null;
responseCodec = this.getClass().getSuperclass().getDeclaredField("responseCodec");
responseCodec.setAccessible(true);
responseCodec.set(this, new W3CHttpResponseCodec());
} catch (NoSuchFieldException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
}
} else {
response = super.execute(command);
}
return response;
}
};
return new RemoteWebDriver(executor, new DesiredCapabilities());
}
public static void main(String [] args) {
ChromeDriver driver = new ChromeDriver();
HttpCommandExecutor executor = (HttpCommandExecutor) driver.getCommandExecutor();
URL url = executor.getAddressOfRemoteServer();
SessionId session_id = driver.getSessionId();
RemoteWebDriver driver2 = createDriverFromSession(session_id, url);
driver2.get("http://tarunlalwani.com");
}
}
Your test needs to have a RemoteWebDriver created from an existing browser session. To create that Driver, you only need to know the "session info", i.e. address of the server (local in our case) where the browser is running and the browser session id. To get these details, we can create one browser session with selenium, open the desired page, and then finally run the actual test script.
I don't know if there is a way to get session info for a session which was not created by selenium.
Here is an example of session info:
Address of remote server : http://localhost:24266. The port number is different for each session.
Session Id : 534c7b561aacdd6dc319f60fed27d9d6.

All the solutions so far were lacking of certain functionality.
Here is my solution:
public class AttachedWebDriver extends RemoteWebDriver {
public AttachedWebDriver(URL url, String sessionId) {
super();
setSessionId(sessionId);
setCommandExecutor(new HttpCommandExecutor(url) {
#Override
public Response execute(Command command) throws IOException {
if (command.getName() != "newSession") {
return super.execute(command);
}
return super.execute(new Command(getSessionId(), "getCapabilities"));
}
});
startSession(new DesiredCapabilities());
}
}

Javascript solution:
I have successfully attached to existing browser session using this function
webdriver.WebDriver.attachToSession(executor, session_id);
Documentation can be found here.

I got a solution in python, I modified the webdriver class bassed on PersistenBrowser class that I found.
https://github.com/axelPalmerin/personal/commit/fabddb38a39f378aa113b0cb8d33391d5f91dca5
replace the webdriver module /usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py
Ej. to use:
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
runDriver = sys.argv[1]
sessionId = sys.argv[2]
def setBrowser():
if eval(runDriver):
webdriver = w.Remote(command_executor='http://localhost:4444/wd/hub',
desired_capabilities=DesiredCapabilities.CHROME,
)
else:
webdriver = w.Remote(command_executor='http://localhost:4444/wd/hub',
desired_capabilities=DesiredCapabilities.CHROME,
session_id=sessionId)
url = webdriver.command_executor._url
session_id = webdriver.session_id
print url
print session_id
return webdriver

Use Chrome's built in remote debugging. Launch Chrome with remote debugging port open. I did this on OS X:
sudo nohup /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 &
Tell Selenium to use the remote debugging port:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--remote-debugging-port=9222')
driver = webdriver.Chrome("./chromedriver", chrome_options=options)

I'm using Rails + Cucumber + Selenium Webdriver + PhantomJS, and I've been using a monkey-patched version of Selenium Webdriver, which keeps PhantomJS browser open between test runs. See this blog post: http://blog.sharetribe.com/2014/04/07/faster-cucumber-startup-keep-phantomjs-browser-open-between-tests/
See also my answer to this post: How do I execute a command on already opened browser from a ruby file

Solution using Python programming language.
from selenium import webdriver
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
executor_url = "http://localhost:4444/wd/hub"
# Create a desired capabilities object as a starting point.
capabilities = DesiredCapabilities.FIREFOX.copy()
capabilities['platform'] = "WINDOWS"
capabilities['version'] = "10"
# ------------------------ STEP 1 --------------------------------------------------
# driver1 = webdriver.Firefox()
driver1 = webdriver.Remote(command_executor=executor_url, desired_capabilities=capabilities)
driver1.get('http://google.com/')
url = driver1.command_executor._url
print(driver1.command_executor._url)
print(driver1.session_id)
print(driver1.title)
# Serialize the session id in a file
session_id = driver1.session_id
# ------------------ END OF STEP 1 --------------------------------------------------
# Pass the session id from step 1 to step 2
# ------------------------ STEP 2 --------------------------------------------------
def attach_to_session(executor_url, session_id):
original_execute = WebDriver.execute
def new_command_execute(self, command, params=None):
if command == "newSession":
# Mock the response
return {'success': 0, 'value': None, 'sessionId': session_id}
else:
return original_execute(self, command, params)
# Patch the function before creating the driver object
WebDriver.execute = new_command_execute
temp_driver = webdriver.Remote(command_executor=executor_url)
# Replace the patched function with original function
WebDriver.execute = original_execute
return temp_driver
# read the session id from the file
driver2 = attach_to_session(executor_url, existing_session_id)
driver2.get('http://msn.com/')
print(driver2.command_executor._url)
print(driver2.session_id)
print(driver2.title)
driver2.close()
# ------------------ END OF STEP 2 --------------------------------------------------

After trying most of these solutions, this solution has worked for me the best. Thanks to #Ahmed_Ashour.
For those who are struggling with this problem, here are a few tips to make your life a bit easier:
1- use a driver manager instead of a manually installed driver (to avoid compatibility issues)
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install(),options=chrome_options)
2- Make sure to close the running chrome instance before starting the new one with the debugging port
chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\selenum\ChromeProfile"

This is pretty easy using the JavaScript selenium-webdriver client:
First, make sure you have a WebDriver server running. For example, download ChromeDriver, then run chromedriver --port=9515.
Second, create the driver like this:
var driver = new webdriver.Builder()
.withCapabilities(webdriver.Capabilities.chrome())
.usingServer('http://localhost:9515') // <- this
.build();
Here's a complete example:
var webdriver = require('selenium-webdriver');
var driver = new webdriver.Builder()
.withCapabilities(webdriver.Capabilities.chrome())
.usingServer('http://localhost:9515')
.build();
driver.get('http://www.google.com');
driver.findElement(webdriver.By.name('q')).sendKeys('webdriver');
driver.findElement(webdriver.By.name('btnG')).click();
driver.getTitle().then(function(title) {
console.log(title);
});
driver.quit();

How to assert image present in selenium python web driver?

I want to assert that an image is present on a page.
I'm using selenium web driver with python.
I also want a message printed when the image has not been loaded.
Can anyone help me with a script?
Here is a piece of html code that displays an image on a website:
[img src='photo.jpg' class='image class']

I can't help with python, but can give a general suggestion and Java code for those who are looking to do something similar in Java. My approach would be
Get the src attribute of the image and make a quick HTTP GET
call and make sure you get 200 OK response.
Once thats done, you
could check the naturalWidthattribute of the WebElement using
Javascript. If its more than 0, its rendered fine. However this
works differently in IE. IE has .complete attribute. It will be
true if the image is rendered (Reference here)
Here is sample Java code,
#Test
public void test()
{
WebDriver driver = new FirefoxDriver();
String url = "http://www.espncricinfo.com/";
driver.get(url);
WebElement img = driver.findElement(By.cssSelector("a[title='ESPN Cricinfo']>img"));
String src = img.getAttribute("src");
Client client = Client.create();
WebResource resource = client.resource(src);
assertThat("Response code is not 200 OK", resource.get(ClientResponse.class).getStatus(), equalTo(200));
assertThat("Image is not rendered correctly", isImageVisible(driver, img),equalTo(true));
driver.quit();
}
public boolean isImageVisible(WebDriver driver, WebElement image)
{
Boolean result = null;
if (driver instanceof InternetExplorerDriver || ((RemoteWebDriver) driver).getCapabilities().getBrowserName().equals("internet explorer"))
{
result = (Boolean) ((JavascriptExecutor) driver).executeScript("return arguments[0].complete;", image);
}
else
{ //other browser types use diff method to check
result = (Boolean) ((JavascriptExecutor) driver).executeScript("return (typeof arguments[0].naturalWidth!=\"undefined\" && arguments[0].naturalWidth>0);", image);
}
return result.booleanValue();
}
}

There's probably a better way to do this but I decided to retrieve all the image tags.
all_images = self.driver.execute_script("return document.getElementsByTagName('img')")
for image in range(0, len(all_images)):
print self.driver.execute_script("return document.getElementsByTagName('img')[{}].src".format(image))

Left out driver setup but this should work once you add yours in. Partial credit to Nilesh for pointing me in right direction with his Java example.
import requests
import unittest
from selenium import webdriver
from selenium.webdriver.common.by import By
def setUp(self):
self.verificationErrors = []
def test_images_for_200_response(self):
driver.get('http:example.com')
example_images = driver.find_elements(By.TAG_NAME, 'img')
for image in example_images:
current_link = image.get_attribute("src")
r = requests.get(current_link)
try: self.assertEqual(r.status_code, 200)
except AssertionError, e: self.verificationErrors.append(current_link + ' delivered response code of ' + r.status_code)
def tearDown(self):
driver.quit()
self.assertEqual([], self.verificationErrors)

How can I download a file on a click event using selenium?

I am working on python and selenium. I want to download file from clicking event using selenium. I wrote following code.
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
browser = webdriver.Firefox()
browser.get("http://www.drugcite.com/?q=ACTIMMUNE")
browser.close()
I want to download both files from links with name "Export Data" from given url. How can I achieve it as it works with click event only?

Find the link using find_element(s)_by_*, then call click method.
from selenium import webdriver
# To prevent download dialog
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2) # custom location
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', '/tmp')
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/csv')
browser = webdriver.Firefox(profile)
browser.get("http://www.drugcite.com/?q=ACTIMMUNE")
browser.find_element_by_id('exportpt').click()
browser.find_element_by_id('exporthlgt').click()
Added profile manipulation code to prevent download dialog.

I'll admit this solution is a little more "hacky" than the Firefox Profile saveToDisk alternative, but it works across both Chrome and Firefox, and doesn't rely on a browser-specific feature which could change at any time. And if nothing else, maybe this will give someone a little different perspective on how to solve future challenges.
Prerequisites: Ensure you have selenium and pyvirtualdisplay installed...
Python 2: sudo pip install selenium pyvirtualdisplay
Python 3: sudo pip3 install selenium pyvirtualdisplay
The Magic
import pyvirtualdisplay
import selenium
import selenium.webdriver
import time
import base64
import json
root_url = 'https://www.google.com'
download_url = 'https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png'
print('Opening virtual display')
display = pyvirtualdisplay.Display(visible=0, size=(1280, 1024,))
display.start()
print('\tDone')
print('Opening web browser')
driver = selenium.webdriver.Firefox()
#driver = selenium.webdriver.Chrome() # Alternately, give Chrome a try
print('\tDone')
print('Retrieving initial web page')
driver.get(root_url)
print('\tDone')
print('Injecting retrieval code into web page')
driver.execute_script("""
window.file_contents = null;
var xhr = new XMLHttpRequest();
xhr.responseType = 'blob';
xhr.onload = function() {
var reader = new FileReader();
reader.onloadend = function() {
window.file_contents = reader.result;
};
reader.readAsDataURL(xhr.response);
};
xhr.open('GET', %(download_url)s);
xhr.send();
""".replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') % {
'download_url': json.dumps(download_url),
})
print('Looping until file is retrieved')
downloaded_file = None
while downloaded_file is None:
# Returns the file retrieved base64 encoded (perfect for downloading binary)
downloaded_file = driver.execute_script('return (window.file_contents !== null ? window.file_contents.split(\',\')[1] : null);')
print(downloaded_file)
if not downloaded_file:
print('\tNot downloaded, waiting...')
time.sleep(0.5)
print('\tDone')
print('Writing file to disk')
fp = open('google-logo.png', 'wb')
fp.write(base64.b64decode(downloaded_file))
fp.close()
print('\tDone')
driver.close() # close web browser, or it'll persist after python exits.
display.popen.kill() # close virtual display, or it'll persist after python exits.
Explaination
We first load a URL on the domain we're targeting a file download from. This allows us to perform an AJAX request on that domain, without running into cross site scripting issues.
Next, we're injecting some javascript into the DOM which fires off an AJAX request. Once the AJAX request returns a response, we take the response and load it into a FileReader object. From there we can extract the base64 encoded content of the file by calling readAsDataUrl(). We're then taking the base64 encoded content and appending it to window, a gobally accessible variable.
Finally, because the AJAX request is asynchronous, we enter a Python while loop waiting for the content to be appended to the window. Once it's appended, we decode the base64 content retrieved from the window and save it to a file.
This solution should work across all modern browsers supported by Selenium, and works whether text or binary, and across all mime types.
Alternate Approach
While I haven't tested this, Selenium does afford you the ability to wait until an element is present in the DOM. Rather than looping until a globally accessible variable is populated, you could create an element with a particular ID in the DOM and use the binding of that element as the trigger to retrieve the downloaded file.

In chrome what I do is downloading the files by clicking on the links, then I open chrome://downloads page and then retrieve the downloaded files list from shadow DOM like this:
docs = document
.querySelector('downloads-manager')
.shadowRoot.querySelector('#downloads-list')
.getElementsByTagName('downloads-item')
This solution is restrained to chrome, the data also contains information like file path and download date. (note this code is from JS, may not be the correct python syntax)

Here is the full working code. You can use web scraping to enter the username password and other field. For getting the field names appearing on the webpage, use inspect element. Element name(Username,Password or Click Button) can be entered through class or name.
from selenium import webdriver
# Using Chrome to access web
options = webdriver.ChromeOptions()
options.add_argument("download.default_directory=C:/Test") # Set the download Path
driver = webdriver.Chrome(options=options)
# Open the website
try:
driver.get('xxxx') # Your Website Address
password_box = driver.find_element_by_name('password')
password_box.send_keys('xxxx') #Password
download_button = driver.find_element_by_class_name('link_w_pass')
download_button.click()
driver.quit()
except:
driver.quit()
print("Faulty URL")

Selenium and new tab

How i can open link on new tab in selenium webdriver ( firefox ) ?
<a href='/test' id='test'>Link</a>
driver.find_element_by_id('test').click()

It looks like your best bet at the moment is to inject an anchor tag into the page. You'll need to adapt this to python, but it should be relatively straight forward: https://stackoverflow.com/a/9122450/39843

package com.crm.qa.BaseTest;
import java.awt.AWTException;
import java.awt.Robot;
import java.awt.event.KeyEvent;
import java.util.ArrayList;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
public class NewTabChrome {
public static void main(String[] args) throws AWTException {
System.setProperty("webdriver.chrome.driver",
"C:/Users/sunil/Downloads/chromedriver_win32 (2)/chromedriver.exe");
WebDriver driver = new ChromeDriver();//open browser
driver.manage().window().maximize();//browser maximize
driver.get("http://www.google.com");//open google
//open new tab
for(int i = 0; i<=1;i++){
Robot rob = new Robot();
rob.keyPress(KeyEvent.VK_CONTROL);
rob.keyPress(KeyEvent.VK_T);
rob.keyRelease(KeyEvent.VK_CONTROL);
rob.keyRelease(KeyEvent.VK_T);
ArrayList<String> tabs1 = new ArrayList<String> (driver.getWindowHandles());
//Switch to new tab
driver.switchTo().window((String) tabs1.get(i));
}
//open facebook
driver.get("http://facebook.com");
driver.quit();
}
}

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to download embedded PDF from webpage using selenium? - python

Related

Is there any way to download the audio from a certain page

Login to website with Python and Selenium and subprocess problem [duplicate]

How to assert image present in selenium python web driver?

How can I download a file on a click event using selenium?

Selenium and new tab

Categories

Resources