I m doing web scraping with selenium. And i m using multithreads library. My script opens 3 firefox browsers at the same time and scraping. After finished to scraping, i want to close all browsers, i tried many way but Browser.quit() and browser.close() closing 1 browser, other 2 browser do not close.
def get_links():
some code here...
def get_driver():
global driver
driver = getattr(threadLocal, 'driver', None)
if driver is None:
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument("--headless")
driver = webdriver.Firefox(executable_path)
setattr(threadLocal, 'driver', driver)
return driver
def get_title(thisdict):
import datetime
driver = get_driver()
driver.get(thisdict["url"])
time.sleep(5)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
if __name__ == '__main__':
ThreadPool(3).map(get_title, get_links())
driver.close() #or driver.quit()
you have to use the self.selenium.stop() function. The quit()basically calls driver.dispose method which in turn closes all the browser windows. close() closes the browser window on which the focus is set.
I solved the problem with the code below. After Multithread finished all scraping, I m calling closeBrowsers function. And the function kills all open firefox browsers.
import os
def closeBrowsers():
os.system("taskkill /im firefox.exe /f")
if __name__ == '__main__':
ThreadPool(2).map(get_title, get_links())
closeBrowsers()
Related
I am having the worlds hardest time getting my Python Code to run headerless.. here's what I've done.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
That's included at the top of my script, then I've included this.
def login(self, username, password):
self.destroyDriver()
try:
print(("Logging into => ", username))
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--proxy-server=%s' % PROXY)
chrome_options.add_argument('--window-size=1420,1080')
chrome_options.add_argument('--headless')
self.driver = webdriver.Chrome()
self.driver.implicitly_wait(10)
self.driver.get("https://example.com")
time.sleep(5)
self.driver.find_element_by_class_name("sign-in").click()
time.sleep(6)
self.driver.find_element_by_name("avatarname").send_keys(username)
self.driver.find_element_by_name("password").send_keys(password)
self.driver.find_element_by_class_name("btn-primary").click()
time.sleep(4)
if "welcome/login" in self.driver.current_url:
self.destroyDriver()
return False
except:
self.destroyDriver()
return 17
return True
However, this doesn't work at all the Chrome tab still pops up. I am trying to run headerless because when the tab is minimized on my desktop the Javascript does not load properly, and causes the entire script to break forcing me to keep the tab constantly open.
you have to attach your option into your driver
self.driver = webdriver.Chrome(options=chrome_options)
When I run the code Chrome opens the URL but after about 2 seconds it crashes. It also says on the top of the chrome window "Chrome is being controlled by automated test software"
I am running the compatible version of the chrome driver for my version of chrome.
This is my code. How can I fix the crashing?
#from config import keys
from selenium import webdriver
def order():
driver = webdriver.Chrome('./chromedriver')
driver.get('https://www.youtube.com/')
if __name__ == '__main__':
order()
as you created the driver object in the scope of order()
after the execution of the order() is done all the local variables are removed.
You must have to declare driver as the global variable
from selenium import webdriver
# declare global varible driver
driver = None
def order():
driver = webdriver.Chrome('./chromedriver')
driver.get('https://www.youtube.com/')
if __name__ == '__main__':
order()
Otherwise, you can add time.sleep() to wait for a while
import time
from selenium import webdriver
def order():
driver = webdriver.Chrome('./chromedriver')
driver.get('https://www.youtube.com/')
# will wait for 5 seconds
time.sleep(5)
if __name__ == '__main__':
order()
I'm trying to run this code to perform some action in Chrome and Firefox, but when I run the test runner Chrome starts and the test cases are failing in Chrome, then Firefox opens and test cases work just fine in Firefox.
I've tried for loop and a couple of things that didn't work.
Here's my code:
from selenium import webdriver as wd
import pytest
import time
Chrome=wd.Chrome(executable_path=r"C:\Chrome\chromedriver.exe")
Firefox=wd.Firefox(executable_path=r"C:\geckodriver\geckodriver.exe")
class TestLogin():
#pytest.fixture()
def setup1(self):
browsers=[Chrome, Firefox]
for i in browsers:
self.driver= i
i.get("https://www.python.org")
time.sleep(3)
yield
time.sleep(3)
self.driver.close()
def test_Python_website(self,setup1):
self.driver.find_element_by_id("downloads").click()
time.sleep(3)
Instead of explicit sleep's, you should wait for the element:
from selenium import webdriver as wd
from selenium.webdriver.support import expected_conditions as EC
import pytest
import time
Chrome=wd.Chrome(executable_path=r"C:\Chrome\chromedriver.exe")
Firefox=wd.Firefox(executable_path=r"C:\geckodriver\geckodriver.exe")
class TestLogin():
#pytest.fixture()
def setup1(self):
browsers = [Chrome, Firefox]
for i in browsers:
self.driver = i
i.get("https://www.python.org")
yield
self.driver.quit()
def test_Python_website(self, setup1):
wait = WebDriverWait(self.driver, 10)
downloads = wait.until(EC.element_to_be_clickable(By.ID, "downloads"))
downloads.click()
Note: You probably want self.driver.quite(), as this will close the window and cause the browser process to close down as well. The call to self.driver.close() will only close the window, but will leave the firefox.exe or chrome.exe process running in memory after the test finishes.
I am trying to create an app which monitors a webpage using phantomjs and selenium but I have found an issue with a certain url as seen in the code.
from selenium import webdriver
SITE = "http://www.adidas.com/"
def main():
print("Building Driver")
driver = webdriver.PhantomJS()
driver.set_window_size(1024, 768)
print("Driver Created")
print("Navigating to: "+SITE)
driver.get(SITE)
print("Site loaded")
print("Saving Screenshot")
driver.save_screenshot("screen.png")
print("Fetching Current URL")
print(driver.current_url)
print("Exiting")
driver.quit()
if __name__ == '__main__':
main()
The program never gets past the line driver.get(SITE) How can I make it so that the website will load?
It appears that this is an error in PhantomJS. I would try using either the firefox or the chrome driver instead.
from selenium import webdriver
SITE = "http://www.adidas.de"
def main():
print("Building Driver")
browser = webdriver.Chrome(*path to chrome driver*)
print("Driver Created")
print("Navigating to: "+SITE)
browser.get(SITE)
print("Site loaded")
browser.quit()
if __name__ == '__main__':
main()
Creating a headless application would also be possible if that's what you wanted.
My ChromeDriver version is 2.22
In my code, there is no quit() or close(), but Chrome browser closes after execution every time.
But if I change webdriver to Firefox, it works well.
My code is
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def scrapy_list_from_youtube_list(url):
browser = webdriver.Chrome()
browser.get(url)
links = browser.find_elements_by_class_name('pl-video-title-link')
download_list = []
for link in links:
download_list.append(link.get_attribute('href'))
print download_list
i = 0
for download_link in download_list[0:2]:
try:
browser.get('http://www.en.savefrom.net/')
inout = browser.find_element_by_id('sf_url')
inout.send_keys(download_link)
inout.send_keys(Keys.ENTER)
time.sleep(20)
c = browser.find_element_by_link_text('Download')
print i
# print c.get_attribute('href')
c.click()
i = i +1
except Exception as e:
print e
scrapy_list_from_youtube_list('https://www.youtube.com/playlist?list=PLqjtD4kfVG7OFk0vLP1BxUJTmN3-Uj9qM')
I had a similar issue. But my code had the line driver.close() in it. I removed that line and my chrome window didn't close after completion of execution. Try to get a similar workaround.