Selenium simulate mouse wheel pull down and webdriverwait Alternative time.sleep - python

I am using the quip office system
I want to use selenium to export all the documentation,Encountered many problems.
1)waiting time, I only use time.sleep,often have problems
2)To load a document, I have encountered many document that need to be scrolled down
Like this folder contains a lot of documents, you need to scroll down to get the href
3)Because it is a human created folder, there may be a new folder under the folder and another folder under the new file.
I am a newbie, please tell me as much as possible.
A test account and password are provided in the code.
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
import time
# Configuration information
email = "187069474#qq.com"
password = "Huangbo1019#"
def work_on():
driver = webdriver.Chrome('drivers/chromedriver72.exe')
index_url = "https://quip.com/"
driver.get(url=index_url)
def get_docs(docs):
for doc in docs:
driver.get(doc)
time.sleep(2)
driver.find_element_by_xpath('//*[#id="app"]/div/div/div/div[3]/div[1]/div[1]/div[1]/div[2]/button[1]').click() # select document
time.sleep(2)
ele = driver.find_element_by_xpath('//div[#class="parts-menu-label" and text()="Export"]') # Determine the position of the element
actions = ActionChains(driver)
actions.move_to_element(ele).perform()
time.sleep(2)
html = driver.find_element_by_xpath('//div[#class="parts-menu-label" and text()="HTML"]')
actions.move_to_element(html).click(html).perform()
time.sleep(5)
time.sleep(1)
driver.find_element_by_xpath('//*[#id="header-nav-collapse"]/ul/li[9]/a').click() # click login
time.sleep(1)
driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[1]/form/div/input').send_keys(email) # input email
driver.find_element_by_xpath('//*[#id="email-submit"]').click()
time.sleep(1)
driver.find_element_by_xpath('/html/body/div/div/form/div/input[2]').send_keys(password) # input password
driver.find_element_by_xpath('/html/body/div/div/form/button').click()
time.sleep(2)
driver.find_element_by_xpath('//*[#id="app"]/div/div/div/div[1]/div/div/div[3]/div[1]/a[2]/div/div').click() # click file
time.sleep(5)
driver.find_element_by_xpath('//*[#id="app"]/div/div/div/div[3]/div[2]/div/div/div/div[1]/div[2]/div[1]/a').click() # select test
time.sleep(2)
docs = driver.find_elements_by_class_name('folder-document-thumbnail')
docs = [x.get_attribute('href') for x in docs]
folders = driver.find_elements_by_class_name('folder-thumbnail')
folders = [x.get_attribute('href') for x in folders]
get_docs(docs)
for folder in folders:
driver.get(folder)
time.sleep(2)
docs = driver.find_elements_by_class_name('folder-document-thumbnail')
docs = [x.get_attribute('href') for x in docs]
get_docs(docs)
time.sleep(5)
driver.close()
if __name__ == '__main__':
work_on()
The current code can only get the second-level directory folder.
Unable to capture all document links because the mouse cannot be swiped down
Waiting time is very painful, sometimes the network is not good will give an error
The quip provided are just tests, but there will be thousands of documents in the production process.
I hope who can improve this code. This is very helpful to me.
I am truely thankful.

You can use selenium's scroll feature. The following causes the scrolling to occur to the bottom of the page:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
More information at this post: How can I scroll a web page using selenium webdriver in python?

Related

How to make nonclickable button click able in python using selenium?

Hi Before starting Thanks for the help in advance
So I am trying to scrape google flight website : https://www.google.com/travel/flights
When scraping I have done the sending Key to the text field but I am stuck at clicking the search button it always gives the error that the field is not clickable at a point or Other elements would receive the click
the error image is
and the code is
from time import sleep
from selenium import webdriver
chromedriver_path = 'E:/chromedriver.exe'
def search(urrl):
driver = webdriver.Chrome(executable_path=chromedriver_path)
driver.get(urrl)
asd= "//div[#aria-label='Enter your destination']//div//input[#aria-label='Where else?']"
driver.find_element_by_xpath("/html/body/c-wiz[2]/div/div[2]/c-wiz/div/c-wiz/c-wiz/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[4]/div/div/div[1]/div/div/input").click()
sleep(2)
TextBox = driver.find_element_by_xpath(asd)
sleep(2)
TextBox.click()
sleep(2)
print(str(TextBox))
TextBox.send_keys('Karachi')
sleep(2)
search_button = driver.find_element_by_xpath('//*[#id="yDmH0d"]/c-wiz[2]/div/div[2]/c-wiz/div/c-wiz/c-wiz/div[2]/div[1]/div[1]/div[2]/div/button/div[1]')
sleep(2)
search_button.click()
print(str(search_button))
sleep(15)
print("DONE")
driver.close()
def main():
url = "https://www.google.com/travel/flights"
print(" Getitng Data ")
search(url)
if __name__ == "__main__":
main()
and i have done it by copying the Xpath using dev tools
Thanks again
The problem you are facing is that after entering the city Karachi in the text box, there is a suggestion dropdown that is displayed over the Search Button. That is the cause of the exception as the dropdown would receive the click instead of the Search button. The intended usage of the website is to select the city from the dropdown and then continue.
A quick fix would be to first look for all of the dropdowns in the source (there are a few) and look for the one that is currently active using is_displayed(). Next you would select the first element in the dropdown suggested:
.....
TextBox.send_keys('Karachi')
sleep(2)
# the attribute(role) in the dropdowns element are named 'listbox'. Warning: This could change in the future
all_dropdowns = driver.find_elements_by_css_selector('ul[role="listbox"]')
# the active dropdown
active_dropdown = [i for i in all_dropdowns if i.is_displayed()][0]
# click the first suggestion in the dropdown (Note: this is very specific to your search. It could not be desirable in other circumstances and the logic can be modified accordingly)
active_dropdown.find_element_by_tag_name('li').click()
# I recommend using the advise #cruisepandey has offered above regarding usage of relative path instead of absolute xpath
search_button = driver.find_element_by_xpath("//button[contains(.,'Search')]")
sleep(2)
search_button.click()
.....
Also suggest to head the advise provided by #cruisepandey including research more about Explicit Waits in selenium to write better performing selenium programs. All the best
Instead of this absolute xapth
//*[#id="yDmH0d"]/c-wiz[2]/div/div[2]/c-wiz/div/c-wiz/c-wiz/div[2]/div[1]/div[1]/div[2]/div/button/div[1]
I would recommend you to have a relative path:
//button[contains(.,'Search')]
Also, I would recommend you to have explicit wait when you are trying a click operation.
Code:
search_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(.,'Search')]")))
search_button.click()
You'd need below imports as well:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Pro tip:
Launch the browser in full screen mode.
driver.maximize_window()
You should have the above line just before driver.get(urrl) command.

why this is not opening the new tab specially in chrome webdriver-browser (selenium-python)? [duplicate]

So I am trying to open websites on new tabs inside my WebDriver. I want to do this, because opening a new WebDriver for each website takes about 3.5secs using PhantomJS, I want more speed...
I'm using a multiprocess python script, and I want to get some elements from each page, so the workflow is like this:
Open Browser
Loop throught my array
For element in array -> Open website in new tab -> do my business -> close it
But I can't find any way to achieve this.
Here's the code I'm using. It takes forever between websites, I need it to be fast... Other tools are allowed, but I don't know too many tools for scrapping website content that loads with JavaScript (divs created when some event is triggered on load etc) That's why I need Selenium... BeautifulSoup can't be used for some of my pages.
#!/usr/bin/env python
import multiprocessing, time, pika, json, traceback, logging, sys, os, itertools, urllib, urllib2, cStringIO, mysql.connector, shutil, hashlib, socket, urllib2, re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from PIL import Image
from os import listdir
from os.path import isfile, join
from bs4 import BeautifulSoup
from pprint import pprint
def getPhantomData(parameters):
try:
# We create WebDriver
browser = webdriver.Firefox()
# Navigate to URL
browser.get(parameters['target_url'])
# Find all links by Selector
links = browser.find_elements_by_css_selector(parameters['selector'])
result = []
for link in links:
# Extract link attribute and append to our list
result.append(link.get_attribute(parameters['attribute']))
browser.close()
browser.quit()
return json.dumps({'data': result})
except Exception, err:
browser.close()
browser.quit()
print err
def callback(ch, method, properties, body):
parameters = json.loads(body)
message = getPhantomData(parameters)
if message['data']:
ch.basic_ack(delivery_tag=method.delivery_tag)
else:
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
def consume():
credentials = pika.PlainCredentials('invitado', 'invitado')
rabbit = pika.ConnectionParameters('localhost',5672,'/',credentials)
connection = pika.BlockingConnection(rabbit)
channel = connection.channel()
# Conectamos al canal
channel.queue_declare(queue='com.stuff.images', durable=True)
channel.basic_consume(callback,queue='com.stuff.images')
print ' [*] Waiting for messages. To exit press CTRL^C'
try:
channel.start_consuming()
except KeyboardInterrupt:
pass
workers = 5
pool = multiprocessing.Pool(processes=workers)
for i in xrange(0, workers):
pool.apply_async(consume)
try:
while True:
continue
except KeyboardInterrupt:
print ' [*] Exiting...'
pool.terminate()
pool.join()
Editor's note: This answer no longer works for new Selenium versions. Refer to this comment.
You can achieve the opening/closing of a tab by the combination of keys COMMAND + T or COMMAND + W (OSX). On other OSs you can use CONTROL + T / CONTROL + W.
In selenium you can emulate such behavior.
You will need to create one webdriver and as many tabs as the tests you need.
Here it is the code.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
driver.get("http://www.google.com/")
#open tab
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
# You can use (Keys.CONTROL + 't') on other OSs
# Load a page
driver.get('http://stackoverflow.com/')
# Make the tests...
# close the tab
# (Keys.CONTROL + 'w') on other OSs.
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 'w')
driver.close()
browser.execute_script('''window.open("http://bings.com","_blank");''')
Where browser is the webDriver
This is a common code adapted from another examples:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
driver.get("http://www.google.com/")
#open tab
# ... take the code from the options below
# Load a page
driver.get('http://bings.com')
# Make the tests...
# close the tab
driver.quit()
the possible ways were:
Sending <CTRL> + <T> to one element
#open tab
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't')
Sending <CTRL> + <T> via Action chains
ActionChains(driver).key_down(Keys.CONTROL).send_keys('t').key_up(Keys.CONTROL).perform()
Execute a javascript snippet
driver.execute_script('''window.open("http://bings.com","_blank");''')
In order to achieve this you need to ensure that the preferences browser.link.open_newwindow and browser.link.open_newwindow.restriction are properly set. The default values in the last versions are ok, otherwise you supposedly need:
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.link.open_newwindow", 3)
fp.set_preference("browser.link.open_newwindow.restriction", 2)
driver = webdriver.Firefox(browser_profile=fp)
the problem is that those preferences preset to other values and are frozen at least selenium 3.4.0. When you use the profile to set them with the java binding there comes an exception and with the python binding the new values are ignored.
In Java there is a way to set those preferences without specifying a profile object when talking to geckodriver, but it seem to be not implemented yet in the python binding:
FirefoxOptions options = new FirefoxOptions().setProfile(fp);
options.addPreference("browser.link.open_newwindow", 3);
options.addPreference("browser.link.open_newwindow.restriction", 2);
FirefoxDriver driver = new FirefoxDriver(options);
The third option did stop working for python in selenium 3.4.0.
The first two options also did seem to stop working in selenium 3.4.0. They do depend on sending CTRL key event to an element. At first glance it seem that is a problem of the CTRL key, but it is failing because of the new multiprocess feature of Firefox. It might be that this new architecture impose new ways of doing that, or maybe is a temporary implementation problem. Anyway we can disable it via:
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.tabs.remote.autostart", False)
fp.set_preference("browser.tabs.remote.autostart.1", False)
fp.set_preference("browser.tabs.remote.autostart.2", False)
driver = webdriver.Firefox(browser_profile=fp)
... and then you can use successfully the first way.
OS: Win 10,
Python 3.8.1
selenium==3.141.0
from selenium import webdriver
import time
driver = webdriver.Firefox(executable_path=r'TO\Your\Path\geckodriver.exe')
driver.get('https://www.google.com/')
# Open a new window
driver.execute_script("window.open('');")
# Switch to the new window
driver.switch_to.window(driver.window_handles[1])
driver.get("http://stackoverflow.com")
time.sleep(3)
# Open a new window
driver.execute_script("window.open('');")
# Switch to the new window
driver.switch_to.window(driver.window_handles[2])
driver.get("https://www.reddit.com/")
time.sleep(3)
# close the active tab
driver.close()
time.sleep(3)
# Switch back to the first tab
driver.switch_to.window(driver.window_handles[0])
driver.get("https://bing.com")
time.sleep(3)
# Close the only tab, will also close the browser.
driver.close()
Reference: Need Help Opening A New Tab in Selenium
The other solutions do not work for chrome driver v83.
Instead, it works as follows, suppose there is only 1 opening tab:
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get("https://www.example.com")
If there are already more than 1 opening tabs, you should first get the index of the last newly-created tab and switch to the tab before calling the url (Credit to tylerl) :
driver.execute_script("window.open('');")
driver.switch_to.window(len(driver.window_handles)-1)
driver.get("https://www.example.com")
In a discussion, Simon clearly mentioned that:
While the datatype used for storing the list of handles may be ordered by insertion, the order in which the WebDriver implementation iterates over the window handles to insert them has no requirement to be stable. The ordering is arbitrary.
Using Selenium v3.x opening a website in a New Tab through Python is much easier now. We have to induce an WebDriverWait for number_of_windows_to_be(2) and then collect the window handles every time we open a new tab/window and finally iterate through the window handles and switchTo().window(newly_opened) as required. Here is a solution where you can open http://www.google.co.in in the initial TAB and https://www.yahoo.com in the adjacent TAB:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://www.google.co.in")
print("Initial Page Title is : %s" %driver.title)
windows_before = driver.current_window_handle
print("First Window Handle is : %s" %windows_before)
driver.execute_script("window.open('https://www.yahoo.com')")
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
windows_after = driver.window_handles
new_window = [x for x in windows_after if x != windows_before][0]
driver.switch_to.window(new_window)
print("Page Title after Tab Switching is : %s" %driver.title)
print("Second Window Handle is : %s" %new_window)
Console Output:
Initial Page Title is : Google
First Window Handle is : CDwindow-B2B3DE3A222B3DA5237840FA574AF780
Page Title after Tab Switching is : Yahoo
Second Window Handle is : CDwindow-D7DA7666A0008ED91991C623105A2EC4
Browser Snapshot:
Outro
You can find the java based discussion in Best way to keep track and iterate through tabs and windows using WindowHandles using Selenium
Try this it will work:
# Open a new Tab
driver.execute_script("window.open('');")
# Switch to the new window and open URL B
driver.switch_to.window(driver.window_handles[1])
driver.get(tab_url)
After struggling for so long the below method worked for me:
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't')
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
windows = driver.window_handles
time.sleep(3)
driver.switch_to.window(windows[1])
from selenium import webdriver
import time
driver = webdriver.Firefox()
driver.get('https://www.google.com')
driver.execute_script("window.open('');")
time.sleep(5)
driver.switch_to.window(driver.window_handles[1])
driver.get("https://facebook.com")
time.sleep(5)
driver.close()
time.sleep(5)
driver.switch_to.window(driver.window_handles[0])
driver.get("https://www.yahoo.com")
time.sleep(5)
#driver.close()
https://www.edureka.co/community/52772/close-active-current-without-closing-browser-selenium-python
just for future reference, the simple way could be done as this:
driver.switch_to.new_window()
t=driver.window_handles[-1]# Get the handle of new tab
driver.switch_to.window(t)
driver.get(target_url) # Now the target url is opened in new tab
The 4.0.0 version of Selenium supports the following operations:
to open a new tab try:
driver.switch_to.new_window()
to switch to a specific tab (note that the tabID starts from 0):
driver.switch_to.window(driver.window_handles[tabID])
Strangely, so many answers, and all of them are using surrogates like JS and keyboard shortcuts instead of just using a selenium feature:
def newTab(driver, url="about:blank"):
wnd = driver.execute(selenium.webdriver.common.action_chains.Command.NEW_WINDOW)
handle = wnd["value"]["handle"]
driver.switch_to.window(handle)
driver.get(url) # changes the handle
return driver.current_window_handle
I'd stick to ActionChains for this.
Here's a function which opens a new tab and switches to that tab:
import time
from selenium.webdriver.common.action_chains import ActionChains
def open_in_new_tab(driver, element, switch_to_new_tab=True):
base_handle = driver.current_window_handle
# Do some actions
ActionChains(driver) \
.move_to_element(element) \
.key_down(Keys.COMMAND) \
.click() \
.key_up(Keys.COMMAND) \
.perform()
# Should you switch to the new tab?
if switch_to_new_tab:
new_handle = [x for x in driver.window_handles if x!=base_handle]
assert len new_handle == 1 # assume you are only opening one tab at a time
# Switch to the new window
driver.switch_to.window(new_handle[0])
# I like to wait after switching to a new tab for the content to load
# Do that either with time.sleep() or with WebDriverWait until a basic
# element of the page appears (such as "body") -- reference for this is
# provided below
time.sleep(0.5)
# NOTE: if you choose to switch to the window/tab, be sure to close
# the newly opened window/tab after using it and that you switch back
# to the original "base_handle" --> otherwise, you'll experience many
# errors and a painful debugging experience...
Here's how you would apply that function:
# Remember your starting handle
base_handle = driver.current_window_handle
# Say we have a list of elements and each is a link:
links = driver.find_elements_by_css_selector('a[href]')
# Loop through the links and open each one in a new tab
for link in links:
open_in_new_tab(driver, link, True)
# Do something on this new page
print(driver.current_url)
# Once you're finished, close this tab and switch back to the original one
driver.close()
driver.switch_to.window(base_handle)
# You're ready to continue to the next item in your loop
Here's how you could wait until the page is loaded.
you can use this to open a new tab
driver.execute_script("window.open('http://google.com', 'new_window')")
This worked for me:-
link = "https://www.google.com/"
driver.execute_script('''window.open("about:blank");''') # Opening a blank new tab
driver.switch_to.window(driver.window_handles[1]) # Switching to newly opend tab
driver.get(link)
just enough use this to open new window(for example):
driver.find_element_by_link_text("Images").send_keys(Keys.CONTROL + Keys.RETURN)
I tried for a very long time to duplicate tabs in Chrome running using action_keys and send_keys on body. The only thing that worked for me was an answer here. This is what my duplicate tabs def ended up looking like, probably not the best but it works fine for me.
def duplicate_tabs(number, chromewebdriver):
#Once on the page we want to open a bunch of tabs
url = chromewebdriver.current_url
for i in range(number):
print('opened tab: '+str(i))
chromewebdriver.execute_script("window.open('"+url+"', 'new_window"+str(i)+"')")
It basically runs some java from inside of python, it's incredibly useful. Hope this helps somebody.
Note: I am using Ubuntu, it shouldn't make a difference but if it doesn't work for you this could be the reason.
Opening the new empty tab within same window in chrome browser is not possible up to my knowledge but you can open the new tab with web-link.
So far I surfed net and I got good working content on this question.
Please try to follow the steps without missing.
import selenium.webdriver as webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get('https://www.google.com?q=python#q=python')
first_link = driver.find_element_by_class_name('l')
# Use: Keys.CONTROL + Keys.SHIFT + Keys.RETURN to open tab on top of the stack
first_link.send_keys(Keys.CONTROL + Keys.RETURN)
# Switch tab to the new tab, which we will assume is the next one on the right
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
driver.quit()
I think this is better solution so far.
Credits: https://gist.github.com/lrhache/7686903
tabs = {}
def new_tab():
global browser
hpos = browser.window_handles.index(browser.current_window_handle)
browser.execute_script("window.open('');")
browser.switch_to.window(browser.window_handles[hpos + 1])
return(browser.current_window_handle)
def switch_tab(name):
global tabs
global browser
if not name in tabs.keys():
tabs[name] = {'window_handle': new_tab(), 'url': url+name}
browser.get(tabs[name]['url'])
else:
browser.switch_to.window(tabs[name]['window_handle'])
As already mentioned several times, the following approaches are NOT working anymore:
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't')
ActionChains(driver).key_down(Keys.CONTROL).send_keys('t').key_up(Keys.CONTROL).perform()
Moreover, driver.execute_script("window.open('');") is working but is limited by the popup blocker. I process hundreds of tabs in parallel (web scraping using scrapy). However, the popup blocker became active after opening 20 new tabs using JavaScript's window.open('') and, thus, has broke my crawler.
As work around I declared a tab as "master" which has opended the following helper.html:
<!DOCTYPE html>
<html><body>
<a id="open_new_window" href="about:blank" target="_blank">open a new window</a>
</body></html>
Now, my (simplified) crawler can open as many tabs as necessary by purposely clicking the link which is not considered by the popup blogger at all:
# master
master_handle = driver.current_window_handle
helper = os.path.join(os.path.dirname(os.path.abspath(__file__)), "helper.html")
driver.get(helper)
# open new tabs
for _ in range(100):
window_handle = driver.window_handles # current state
driver.switch_to_window(master_handle)
driver.find_element_by_id("open_new_window").click()
window_handle = set(driver.window_handles).difference(window_handle).pop()
print("new window handle:", window_handle)
Closing these windows via JavaScript's window.close() is no problem.
#Change the method of finding the element if needed
self.find_element_by_xpath(element).send_keys(Keys.CONTROL + Keys.ENTER)
This will find the element and open it in a new tab. self is just the name used for the webdriver object.

Using Python to Scrape a JS Form

I'm currently working on a research project in which we are trying to collect saved image files from Brazil's Hemeroteca database. I've done web scraping on PHP pages before using C/C++ with HTML forms, but as this is a shared script, I need to switch to python such that everyone in the group can use this tool.
The page which I'm trying to scrape is: http://bndigital.bn.gov.br/hemeroteca-digital/
There are three forms which populate, the first being the newspaper/journal. Upon selecting this, the available times populate, and the final field is the search term. I've inspected the HTML page here and the three IDs of these are respectively: 'PeriodicoCmb1_Input', 'PeriodoCmb1_Input', and 'PesquisaTxt1'.
Some google searches on this topic led me to the Selenium package, and I've put together this sample code to attempt to read the page:
import webbrowser
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
print("Begin...")
browser = webdriver.Chrome()
url = "http://bndigital.bn.gov.br/hemeroteca-digital/"
browser.get(url)
print("Waiting to load page... (Delay 3 seconds)")
time.sleep(3)
print("Searching for elements")
journal = browser.find_element_by_id("PeriodicoCmb1_Input")
timeRange = browser.find_element_by_id("PeriodoCmb1_Input")
searchTerm = browser.find_element_by_id("PesquisaTxt1")
print(journal)
print("Set fields, delay 3 seconds between input")
search_journal = "Relatorios dos Presidentes dos Estados Brasileiros (BA)"
search_timeRange = "1890 - 1899"
search_text = "Milho"
journal.send_keys(search_journal)
time.sleep(3)
timeRange.send_keys(search_timeRange)
time.sleep(3)
searchTerm.send_keys(search_text)
print("Perform search")
submitButton = button.find_element_by_id("PesquisarBtn1_input")
submitButton.click()
The script runs to the print(journal) statement, where an error is thrown saying the element cannot be found.
Can anyone take a quick sweep of the page in question and make sure I've got the general premise of this script in line correctly, or point me towards some examples to get me running on this problem?
Thanks!
Your DOM elements you are trying to find are located in iframe. So before using find_element_by_id API you should switch to iframe context.
Here is a code how to switch to iframe context:
# add your code
frame_ref = browser.find_elements_by_tag_name("iframe")[0]
iframe = browser.switch_to.frame(frame_ref)
journal = browser.find_element_by_id("PeriodicoCmb1_Input")
timeRange = browser.find_element_by_id("PeriodoCmb1_Input")
searchTerm = browser.find_element_by_id("PesquisaTxt1")
# add your code
Here is a link describing switching to iframe context.

Facebook account download automation with python?

As a regular task within my job role i often have to download full Facebook accounts from within a users account. I am trying to improve my workflow and automate this if i can.
I have tried searching for this topic on the site and although many cover the login part i am yet to locate a question that deals with the popup windows of Facebook. If i am wrong i apologise and please amend the post accordingly.
As a starting point i have decided to start learning python and am using this to script the process with a little help from selenium and Chrome Driver. I have managed to write the code to login and navigate to the correct page and click the initial link 'download a copy'. I am struggling however to get the script to locate and click the 'Start My Archive' button within the popup window.
Here is the code that i have used so far including the several alternative code blocks that i have tried commented out at the bottom:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#Global Variables
target = "john.doe.7"
username = "john#doe.com"
password = "Password"
sleep = 5
#Finds and locates the ChromeDriver
driver = webdriver.Chrome("C:\Python35-32\ChromeDriver\chromedriver.exe")
#Set Chrome Options
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options)
#Directs browser to webpage
driver.get('http://www.facebook.com');
#code block to log in user
def logmein():
search_box = driver.find_element_by_name('email')
search_box.send_keys(username)
search_box = driver.find_element_by_name('pass')
search_box.send_keys(password)
search_box.submit()
#code to go to specific URL
def GoToURL(URL):
time.sleep(sleep) # Let the user actually see something!
driver.get(URL);
logmein()
GoToURL('https://www.facebook.com/'+'settings')
link = driver.find_element_by_link_text('Download a copy')
link.click()
#driver.find_element_by.xpath("//button[contains(., 'Start My Archive')]").click()
#driver.find_element_by_css_selector('button._42ft._42fu.selected._42gz._42gy').click()
#driver.find_element_by_xpath("//*[contains(text(), 'Start My Archive')]").click()
#driver.find_element_by_css_selector('button._42ft._42fu.layerConfirm.uiOverlayButton.selected._42g-._42gy').click()
#from selenium.webdriver.common.action_chains.ActionChains import driver
#buttons = driver.find_elements_by_xpath("//title[contains(text(),'Start My Archive')]")
#actions = ActionChains(self.driver)
#time.sleep(2)
#actions.click(button)
#actions.perform()
Just add this to your code and run if it is working or not. Always use CssSelector.
Look i ran the below code in eclipse with java and I'm not aware of python so if something is wrong in syntax , i apologies. Just Try this and see.
driver.implicitly_wait(10)
Startmyarchive = driver.find_element_by_css_selector("._42ft._42fu.selected._42gz._42gy")
Startmyarchive.click()
driver.implicitly_wait(10)
Acknowledge = driver.find_element_by_css_selector("._42ft._42fu.layerConfirm.uiOverlayButton.selected._42g-._42gy")
Acknowledge.click()
driver.implicitly_wait(10)
ClickOkay = driver.find_element_by_css_selector("._42ft._42fu.layerCancel.uiOverlayButton.selected._42g-._42gy")
ClickOkay.click()
Happy Learning :-) Do reply back for any query.

Selenium-python downloading but file is saved as .part

My script works but it's saving the file as a .part, although checking this against a manually downloaded file its the same size and thankfully complete. I can't understand why it's being saved as a partial file though. Sorta inconvenient for my next idea. Does anybody have an idea of why this might be? Here's my code...which works...
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
import mechanize
import urllib
from urllib import urlretrieve
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",1)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir",'Users/matthewyoung/Downloads')
fp.set_preference("browser.helperApps.neverAsk.saveToDisk","Plain text")
fp.set_preference("browser.download.manager.scanWhenDone",False)
fp.set_preference("browser.download.manager.showAlertOnComplete",True)
fp.set_preference("browser.download.manager.useWindow",False)
fp.set_preference("browser.helperApps.alwaysAsk.force",False)
browser = webdriver.Firefox(firefox_profile=fp)
#browser = webdriver.Firefox() # Get local session of firefox
browser.get("http://vizier.u-strasbg.fr/vizier/surveys.htx") # Load page
assert "VizieR" in browser.title
#p = raw_input('Star name? ')
elem = browser.find_element_by_name('-c') # Find the query box
elem.send_keys('mwc 560' + Keys.RETURN)
time.sleep(0.2) # Let the page load, will be added to the API
elem=browser.find_element_by_name('-out.max')
elem.send_keys('unlimited'+Keys.TAB)
elem2=browser.find_element_by_name('-out.form')
time.sleep(0.5)
elem2.send_keys('; -Separated-Values')
time.sleep(0.5)
elem2.send_keys(Keys.TAB)
elem2.send_keys(Keys.TAB)
time.sleep(0.2)
browser.find_element_by_class_name('data').submit()
time.sleep(3.0)
#df=elem2.send_keys(Keys.SPACE)
#print df
browser.close()
It is downloading as .part because that popup save as dialog window appears. Python cannot deal with the popup window. I have found that when you try to set settings for a custom profile in webdriver it doesn't necessarily work (for instance I was able to set a custom profile in selenium to download a csv but not a pdf). However, I was able to solve my pdf problem by creating a custom profile in firefox. I am not very experienced with tsv files so I am not sure what setting that would be. If you can create a new firefox profile (following the instructions here: https://support.mozilla.org/en-US/kb/profile-manager-create-and-remove-firefox-profiles)
you can try to set that profile to save tsv by default. If you don't know the exact setting to go in and change in "about:config" you can try just click the checkbox on the popup to always save those kinds of files.
From there you set your profile to that custom profile you created like this:
profile = webdriver.firefox.firefox_profile.FirefoxProfile("/Users/matthewyoung/Library/Application Support/Firefox/Profiles/"YOUR PROFILE NAME")
Keep in mind that YOUR PROFILE NAME will have a bunch of random letters first, so follow that path to find the actual profile name.
I think the only thing you are missing from your Firefox profile setting is the following
fp.set_preference("browser.helperApps.neverAsk.openFile",
'Plain Text')
So the entire code should be
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",2)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir",'Users/matthewyoung/Downloads')
fp.set_preference("browser.helperApps.neverAsk.openFile", 'Plain Text')
fp.set_preference("browser.helperApps.neverAsk.saveToDisk","Plain text")
fp.set_preference("browser.download.manager.scanWhenDone",False)
fp.set_preference("browser.download.manager.showAlertOnComplete",True)
fp.set_preference("browser.download.manager.useWindow",False)
fp.set_preference("browser.helperApps.alwaysAsk.force",False)
browser = webdriver.Firefox(firefox_profile=fp)
browser.get("http://vizier.u-strasbg.fr/vizier/surveys.htx") # Load page
assert "VizieR" in browser.title
elem = browser.find_element_by_name('-c') # Find the query box
elem.send_keys('mwc 560' + Keys.RETURN)
time.sleep(0.2) # Let the page load, will be added to the API
elem=browser.find_element_by_name('-out.max')
elem.send_keys('unlimited'+Keys.TAB)
elem2=browser.find_element_by_name('-out.form')
time.sleep(0.5)
elem2.send_keys('; -Separated-Values')
time.sleep(0.5)
elem2.send_keys(Keys.TAB)
elem2.send_keys(Keys.TAB)
time.sleep(0.2)
browser.find_element_by_class_name('data').submit()
time.sleep(3.0)
browser.close()
The following value should be used for plain text:
fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/plain")

Categories