I want to download a webpage using selenium with python. using the following code:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument('--save-page-as-mhtml')
d = DesiredCapabilities.CHROME
driver = webdriver.Chrome()
driver.get("http://www.yahoo.com")
saveas = ActionChains(driver).key_down(Keys.CONTROL)\
.key_down('s').key_up(Keys.CONTROL).key_up('s')
saveas.perform()
print("done")
However the above code isnt working. I am using windows 7.
Is there any by which i can bring up the 'Save as" Dialog box?
Thanks
Karan
You can use below code to download page HTML:
from selenium import webdriver
driver = webdriver.Chrome()
driver.get("http://www.yahoo.com")
with open("/path/to/page_source.html", "w", encoding='utf-8') as f:
f.write(driver.page_source)
Just replace "/path/to/page_source.html" with desirable path to file and file name
Update
If you need to get complete page source (including CSS, JS, ...), you can use following solution:
pip install pyahk # from command line
Python code:
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import ahk
firefox = FirefoxBinary("C:\\Program Files (x86)\\Mozilla Firefox\\firefox.exe")
from selenium import webdriver
driver = web.Firefox(firefox_binary=firefox)
driver.get("http://www.yahoo.com")
ahk.start()
ahk.ready()
ahk.execute("Send,^s")
ahk.execute("WinWaitActive, Save As,,2")
ahk.execute("WinActivate, Save As")
ahk.execute("Send, C:\\path\\to\\file.htm")
ahk.execute("Send, {Enter}")
Related
I'm trying to download a PDF file from this address: https://aisweb.decea.mil.br/inc/notam/gerar-boletim/reports/report-notam.cfm
I wrote some code that first fills out some information in this page (correctly) https://aisweb.decea.mil.br/?i=notam and then clicks a button that opens a new tab to the generated PDF file. The problem is that when it tries to save the PDF file at the end, it downloads directly from the .cfm address, resulting in an empty PDF template (you can see this by clicking the fist link).
How can I download the PDF that is currently being shown to me on the page, instead of accessing the first URL directly?
This is my code
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.print_page_options import PrintOptions
from urllib import request
from bs4 import BeautifulSoup
import re
import os
import urllib
import time
import requests
from urllib.parse import urljoin
aerodromos = "SBNT,SBJP,SBFZ,SBRF" #TEST
driver = webdriver.Chrome('C:\Windows\chromedriver.exe')
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
driver.get("https://aisweb.decea.mil.br/?i=notam")
driver.maximize_window()
caixaTexto = driver.find_element(By.XPATH,'//*[#id="icaocode"]')
caixaTexto.send_keys(aerodromos)
botao = driver.find_element(By.XPATH, '//*[#id="a"]/form/div/div[3]/div/input[2]')
botao.click()
botao = driver.find_element(By.XPATH, '//*[#id="select-all"]')
botao.click()
botao = driver.find_element(By.XPATH, '/html/body/div/div/div/div/div[2]/div/div/form/input[3]')
botao.click()
response = urllib.request.urlretrieve('https://aisweb.decea.mil.br/inc/notam/gerar-boletim/reports/report-notam.cfm', filename='relatorio1.pdf')
I did it! When I tried to change the settings in Chrome to download PDFs instead of opening them, it made no difference, but I ended up finding a solution while searching for another way to do it.
Unable to access the modal elements to download pdf with selenium
I changed Chrome experimental options profile in my code and it worked! Now it opens the tab, immediately downloads the file and closes the tab!
While trying to download a PDF file into a specific directory using Selenium with Python in Firefox browser, the pdf file is not getting downloaded into that directory, it opens in the firefox browser itself. Needs help.
Cannot use Firefox_Profile, it is deprecated.
from selenium import webdriver
from selenium.webdriver.firefox.service import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
options = Options()
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("browser.download.dir", 'C:/Temp/PdfDownload')
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", 'application/pdf')
options.set_preference("pdfjs.disable", True)
driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options = options)
driver.maximize_window()
driver.get("https://file-examples.com/index.php/sample-documents-download/sample-pdf-download/")
driver.find_element(By.XPATH,".//*[text()='Download sample pdf file']").click()
Fix these lines:
options.set_preference("browser.download.dir", 'C:\\Temp\\PdfDownload')
And
options.set_preference("pdfjs.disabled", True)
Problem solved <3
I have an automation project that requires opening the URL to a Microsoft SharePoint file and performing some action on the website instead of downloading it.
In my excel, if i "click" the hyperlink I can get directly opened up in the browser using the always open file option in chrome.
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
#url that generated from my Excel file containing hyperlink
df = pd.DataFrame(['https://company.sharepoint.com/sites/Test.xlsx', 'https://company.sharepoint.com/sites/Test2.xlsx'])
df.columns = ['url']
options = Options()
driver = webdriver.Chrome(options = options)
for url in df.url:
driver.get(url)
it just downloads the file instead of opening it in the browser if i code it using Selenium to open these url link
At least in Chrome you should be able to prevent the save file dialog from opening and to specify a download directory.
from selenium import webdriver
options = webdriver.ChromeOptions()
prefs = {
'download.default_directory': '/tmp',
'download. prompt_for_download': False,
}
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(options=options)
I am trying to use CTRL + S in selenium to save contents of a page but can't get anything happening. If I try to do it using my keyboard the save window opens.
from selenium.webdriver import ActionChains
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.maximize_window()
action_chains = ActionChains(driver)
options = webdriver.ChromeOptions()
options.add_argument("download.default_directory=C:/Downloads, download.prompt_for_download=False")
driver = webdriver.Chrome(options=options)
driver.get("https://imagecyborg.com/")
action_chains.send_keys(Keys.CONTROL).send_keys("s").perform()
The only thing that worked for me was pyautogui:
import pyautogui
pyautogui.hotkey('ctrl','s')
pyautogui.press('enter')
I will try again.
The code below I copied from another site and the user say it works (shows a screenshot).Original code
I tested the code: No error, but no file save.
All questions use this answer to save a file: A question!
why the page is not saved or, if it is, where is the file?
Thanks
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(executable_path=r"C:\Program Files (x86)\Selenium\chromedriver.exe")
driver.get("http://www.example.com")
saveas = ActionChains(driver).key_down(Keys.CONTROL).send_keys('S').key_up(Keys.CONTROL)
saveas.perform()
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r"C:\Program Files (x86)\Selenium\chromedriver.exe")
driver.get("http://www.example.com")
with open('page.html', 'w+') as f:
f.write(driver.page_source)
Must work
If you do the key combination in the browser, you will see this only brings up the 'save page' dialog box. You need to additionally send ALT+S to save the page, in Windows it will be saved in your Downloads folder by default.
saveas = ActionChains(driver).key_down(Keys.CONTROL).send_keys('S').key_up(Keys.CONTROL).send_keys('MyDocumentName').key_down(Keys.ALT).send_keys('S').key_up(Keys.ALT)
EDIT:
ActionChains are unreliable. It would be easier not to interact with the browser GUI.
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r"C:\Program Files (x86)\Selenium\chromedriver.exe")
driver.get("http://www.example.com")
with open('page.html', 'w') as f:
f.write(driver.page_source)