I'm currently using Python and Selenium to loop my server for specific tasks to complete, I have tried to do 2 things, to speed up the process they are:
To use options.add_argument(f"user-data-dir={script_directory}\\profile") in the Chrome driver initiasation to avoid having to log in all the time.
To try and reuse the same browser window instead of closing and then re-opening the browser all the time.
Code:
#!/usr/bin/env python
import pathlib
import time
import urllib.parse
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
USER = "..."
PASS = "..."
def upload_to_server(link, redirect, unique_hash):
try:
requests.get(
"https://www.example.com/crons.php?cronUploadToServer=1&link={0}&redirect={1}&hash={2}".format(link,
redirect,
unique_hash))
except Exception as e:
print(e)
def download_from_server():
try:
server = requests.get("https://www.example.com/crons.php?cronDownloadFromServer=1")
return server.text.strip()
except Exception as e:
print(e)
# tear down chrome.
def tear_down(_driver):
_driver.quit()
_driver.close()
def check_for_tasks():
if download_from_server() == "NO_TASKS":
print("--> NO TASKS")
else:
# init the chrome driver.
def init_driver(using_linux, proxy):
script_directory = pathlib.Path().absolute()
try:
options = Options()
options.headless = False
options.add_argument('start-maximized')
options.add_argument('--disable-popup-blocking')
options.add_argument('--disable-notifications')
options.add_argument('--log-level=3')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument(f"user-data-dir={script_directory}\\profile")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("detach", True)
prefs = {'profile.default_content_setting_values.notifications': 2}
options.add_experimental_option('prefs', prefs)
if proxy == "0.0.0.0:0":
print("--> PROXY DISABLED ...")
else:
print("--> PROXY: " + str(proxy) + " ...")
options.add_argument('--proxy-server=%s' % proxy)
if using_linux:
return webdriver.Chrome(options=options)
else:
return webdriver.Chrome(options=options)
except Exception as e:
print(e)
# create session.
driver = init_driver(False, "0.0.0.0:00")
# starting URL.
driver.get('https://www.example.com/logon')
# click recorded.
def topcashback_click(_driver):
try:
_driver.get('https://www.example.com/Earn.aspx?mpurl=shein&mpID=17233')
if "redirect.aspx?mpurl=shein" in _driver.current_url:
return _driver.current_url
else:
return False
except Exception as e:
print(e)
# already logged in check.
if ">Account</span>" in driver.page_source:
print("--> LOGGED IN (ALREADY) ...")
driver.get('https://www.SITE.CO.UK/Earn.aspx?mpurl=shein&mpID=17233')
try:
server = download_from_server()
data_from_server = server.split('|')
link = topcashback_click(driver)
print("--> LINK --> " + link)
time.sleep(4)
if link != driver.current_url:
print("--> LINK (REDIRECT) --> " + driver.current_url)
upload_to_server(urllib.parse.quote_plus(link),
urllib.parse.quote_plus(
driver.current_url.replace('https://www.example.com', data_from_server[0])),
data_from_server[1])
# print(driver.current_url.replace('https://www.example.com', data_from_server[0]))
print("--> LINK UPLOADED TO THE DB ...")
# tear_down(driver)
except Exception as e:
print(e)
else:
# TopCashBack login for the first time.
def topcashback_login(_driver):
_driver.get('https://www.example.com/logon')
# small sleep to let the page load.
time.sleep(1)
_driver.find_element(By.XPATH, '//*[#id="txtEmail"]').send_keys(USER)
time.sleep(1)
_driver.find_element(By.XPATH, '//*[#id="loginPasswordInput"]').send_keys(PASS)
time.sleep(1)
_driver.find_element(By.XPATH, '//*[#id="Loginbtn"]').click()
time.sleep(5)
if ">Account</span>" in _driver.page_source:
return True
else:
return False
def topcashback_click(_driver):
try:
_driver.get('https://www.SITE.CO.UK/Earn.aspx?mpurl=shein&mpID=17233')
if "redirect.aspx?mpurl=shein" in _driver.current_url:
return _driver.current_url
else:
return False
except Exception as e:
print(e)
if topcashback_login(driver):
try:
print("--> LOGGED IN ...")
server = download_from_server()
data_from_server = server.split('|')
link = topcashback_click(driver)
print("--> LINK --> " + link)
time.sleep(4)
if link != driver.current_url:
print("--> LINK (REDIRECT) --> " + driver.current_url)
upload_to_server(urllib.parse.quote_plus(link),
urllib.parse.quote_plus(
driver.current_url.replace('https://www.example.com',
data_from_server[0])),
data_from_server[1])
# print(driver.current_url.replace('https://www.example.com', data_from_server[0]))
print("--> LINK UPLOADED TO THE DB ...")
# tear_down(driver)
except Exception as e:
print(e)
else:
print("--> ERROR --> DEBUG TIME ...")
tear_down(driver)
if __name__ == "__main__":
while True:
check_for_tasks()
time.sleep(2)
It's the 2nd one I'm having trouble with, currently, with my code, I'm getting the error:
driver.get('https://www.example.com/logon')
AttributeError: 'NoneType' object has no attribute 'get'
I think this is because I'm not connecting the first browser window, instead it's opening a new one which fails with the error above straight away.
Is there possibly a way to keep the first browser open and reuse it? any help would be appreciated.
Related
I'm trying solve re-captcha in a site using 2captcha service, but always returns to me the error:
selenium.common.exceptions.JavascriptException: Message: javascript error: document.getElementById(...).submit is not a function
my code:
try:
time.sleep(0.3)
driver.find_element(by=By.XPATH, value='//*[#id="email"]').send_keys(mail)
except:
def Solver():
driver.get(page_url)
u1 = f"https://2captcha.com/in.php?key={API_KEY}&method=userrecaptcha&googlekey={data_sitekey}&pageurl={page_url}&json=1&invisible=1"
r1 = requests.get(u1)
print(r1.json())
rid = r1.json().get("request")
u2 = f"https://2captcha.com/res.php?key={API_KEY}&action=get&id={int(rid)}&json=1"
time.sleep(5)
while True:
r2 = requests.get(u2)
print(r2.json())
if r2.json().get("status") == 1:
form_tokon = r2.json().get("request")
break
time.sleep(5)
wirte_tokon_js = f'document.getElementById("g-recaptcha-response").innerHTML="{form_tokon}";'
submit_js = 'document.getElementById("g-recaptcha-response").submit();'
driver.execute_script(wirte_tokon_js)
time.sleep(3)
driver.execute_script(submit_js)
time.sleep(3)
pic of captcha
I'm trying to generate a screenshot for the allure report using the pytest_runtest_makereport hook but get stuck because 'if' statement=false in this block of code:
if 'setup' in item.fixturenames:
web_driver = item.funcargs['setup']
else:
print('Fail to take screenshot.No setup fixture found')
Here is conftest.py:
#pytest.fixture(scope='function')
def get_webdriver(get_edge_options):
options = get_edge_options
print("Current working dir : %s" % os.getcwd())
s = Service('D:\mmanager\msedgedriver.exe')
driver = webdriver.Edge(service=s, options=options)
# driver.delete_all_cookies()
return driver
#pytest.fixture(scope='function') # function means run each test in new browser session
def setup(request, get_webdriver):
driver = get_webdriver
if request.cls is not None:
request.cls.driver = driver
driver.get(FCC_HOME)
yield driver
driver.quit()
# Shared Given Steps
#given('the MM3-0 login page is displayed', target_fixture='MM_Login_page')
def MM_Login_page(setup):
pass
#pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
rep = outcome.get_result()
if (rep.when == 'call' or rep.when == 'setup') and (rep.failed or rep.skipped):
try:
if 'setup' in item.fixturenames:
web_driver = item.funcargs['setup']
else:
print('Fail to take screenshot.No setup fixture found')
return
allure.attach(
web_driver.get_screenshot_as_png(),
name='!! Screenshot Captured !!',
attachment_type=allure.attachment_type.PNG)
except Exception as e:
print('Fail to take screen-shot: {}'.format(e))
I am trying to write changes to a file but I get the error :
'charmap' codec can't encode character '\u0159' in position 17: character maps to <undefined>
Other people said that you need to set the encoding to UTF-8 and so I set :
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write
After this is done the text is no longer being written to the ScrapedContent.csv file and the whole program becomes pretty much useless afterwards. Here is my code :
(I am providing the entire code since I don't know where the issue happens)
Desired solution :
There are "special" characters written to the file such as "č, ř, š". These are not actually special but rather normal in the 21st century but unfortunately it seems like that computers are still having a hard time understanding.
So in any case I need to write those characters to the file so they don't get broken. I don't care about what has to be done as long as the final file provides the result. I have spend about 6 hours trying to fix this now and I got nowhere.
This is the complete error output :
Traceback (most recent call last):
File "E:\Projects\Reality Scrapers\SRealityContentScraper\main.py", line 113, in <module>
writer.writerow([title.text, offers.text, address.text, phone_number, email])
File "C:\Users\workstationone\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u011b' in position 57: character maps to <undefined>
This is the code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException, TimeoutException
from platform import system
from os import getcwd, getlogin
import csv
cwd = getcwd()
os = system()
user = getlogin()
browser = input("Browser name ex.: Chromium | Chrome | Firefox: ")
if os == "Linux":
if user == "root":
print(
"You are executing the script as root. Make sure that the profile folder is also located in the root directory.")
del user
if browser == "Firefox" or browser == "Firefox ESR" or browser == "Firefox Browser":
try:
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe")
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver")
except WebDriverException:
print("Warning 10: Firefox is not installed in the default location")
bin_location = input("Firefox executable location: ")
binary = FirefoxBinary(bin_location)
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe", firefox_binary=bin_location)
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver", firefox_binary=bin_location)
del bin_location
elif browser == "Chrome" or browser == "Chrome Browser" or browser == "Google Chrome" or browser == "Chromium" or browser == "Chromium Browser":
try:
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
except WebDriverException:
print("Warning 11: Chrome/Chromium is not installed in the default location")
bin_location = input("Chrome/Chromium executable location: ")
options = Options()
options.binary_location = bin_location
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
del bin_location
else:
print("Error 10: Invalid browser selected")
input("Press ENTER to exit: ")
exit()
wait = WebDriverWait(driver, 10)
with open('links.csv', 'w+', newline='', encoding="utf-8") as write:
driver.get("https://www.sreality.cz/adresar")
writer = csv.writer(write)
page_spawn = 0
while page_spawn == 0:
try:
links = wait.until(ec.presence_of_all_elements_located((By.CSS_SELECTOR, "h2.title > a")))
# print(len(links))
for link in links:
print(link.get_attribute("href"))
writer.writerow([link.get_attribute("href")])
wait.until(ec.element_to_be_clickable(
(By.CSS_SELECTOR, "a.btn-paging-pn.icof.icon-arr-right.paging-next:not(.disabled"))).click()
except TimeoutException:
page_spawn = 1
break
with open('links.csv') as read:
reader = csv.reader(read)
link_list = list(reader)
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write:
writer = csv.writer(write)
for link in link_list:
driver.get(', '.join(link))
title = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "h1.page-title span.text.ng-binding")))
offers = wait.until(ec.presence_of_element_located(
(By.CSS_SELECTOR, "a.switcher.ng-binding.ng-scope span.ng-binding.ng-scope")))
address = wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "tr.c-aginfo__table__row td.ng-binding")))
try:
wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding.ng-scope"))).click()
phone_number = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "span.phone.ng-binding")))
except TimeoutException:
pass
try:
wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding"))).click()
email = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "a.value.link.ng-binding")))
except TimeoutException:
pass
try:
phone_number = phone_number.text
except AttributeError:
phone_number = " "
pass
try:
email = email.text
except AttributeError:
email = " "
pass
print(title.text, " ", offers.text, " ", address.text, " ", phone_number, " ", email)
try:
writer.writerow([title.text, offers.text, address.text, phone_number, email])
except Exception as e:
print (e)
driver.quit()
This is heavily based on this answer.
Basically, you can't directly write unicode characters using csv.
You need a helper function:
def utf8ify(l):
return [str(s).encode('utf-8') for s d]
Then when you write the row add:
writer.writerow(utf8ify([title.text, offers.text, address.text, phone_number, email]))
The answer I linked to is better than mine in every way. If you want to learn why this works, read that answer.
I built a selenium web scraper (see below for code). It works fine and normally takes 4-6 seconds per loop. However, if I use a different web browser to do something else, say check my email, the web scraper slows down (sometimes taking up to a couple minutes per loop) and it also takes a long time to load my email (or whatever else I am trying to do with the internet.
Is there something wrong with my scraper? Or is it not possible to run a web scraper while also using the internet to do other things? Or...
Thanks!
counter = 36386
options = Options()
options.set_headless(True)
driver = webdriver.Firefox(options=options, executable_path = r'C:\Users\jajacobs\Downloads\geckodriver.exe')
while counter <= 50000:
start_time = time.time()
try:
driver.get("url goes here")
timeout = 20
inputElement = driver.find_element_by_name("naics_lookup[companyName]")
inputElement.send_keys(naics.iloc[counter, 1])
inputElement = driver.find_element_by_name("naics_lookup[city]")
inputElement.send_keys(naics.iloc[counter, 3])
inputElement = driver.find_element_by_name("naics_lookup[state]")
inputElement.send_keys(naics.iloc[counter, 2])
inputElement.submit()
print('Looking for NAICS code of company number ', counter)
try:
element_present = EC.presence_of_element_located((By.CLASS_NAME, 'results'))
WebDriverWait(driver, timeout).until(element_present)
print("element is ready")
try:
data = driver.find_element_by_class_name('results').text
naics.at[counter, 'naics'] = re.findall(r"\D(\d{6})\D", data)[0]
print(re.findall(r"\D(\d{6})\D", data)[0])
except:
print("No NAICS code")
pass
except:
print("element did not load")
pass
list = [1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,11000,12000,13000,
14000,15000,16000,17000,18000,19000,20000,21000,22000,23000,24000,25000,
25000,26000,27000,28000,29000,30000,31000,32000,33000,34000,35000,36000,
37000,38000,39000,40000,41000,42000,43000,44000,45000,46000,47000,48000,
49000,50000,]
if counter in list:
data_folder = Path('C:/Users/jajacobs/Documents/ipynb/')
file_to_save = data_folder / ('naics' + str(counter) + '.csv')
naics.to_csv(file_to_save)
counter += 1
except Exception as e:
print(e)
pass
print("total time taken this loop: ", time.time() - start_time)
driver.close()
I have a table with multiple pages. I want to select say 5 elements from the table and click on the checkbox corresponding to those at a time. How is that possible through selenium python automation
def __init__(self, env):
self.driver = webdriver.Firefox()
self.driver.maximize_window()
if env == 'Staging':
self.driver.get("https://serviceconsole-stg.tivo.com/serviceconsole/login.action")
elif env == 'QE':
self.driver.get("http://serviceconsolese01.tivo.com:9090/serviceconsole")
else:
print "Environment is not available", env
print "\n Supported Environments are Staging and QE"
self.driver.quit()
raise SystemExit("Program Exited")
with open('config.json','r') as user_credentials:
config = json.load(user_credentials)
self.driver.find_element_by_id('username').send_keys(config['user']['name'])
self.driver.find_element_by_id('password').send_keys(config['user']['password'])
self.driver.find_element_by_id("signIn").click()
try:
self.driver.find_element_by_xpath('// *[ # id = "loginValidationError"]')
print "Login Not successful"
self.driver.quit()
raise SystemExit("Program Exited")
except NoSuchElementException:
print "Login Successful"
def addnewlinearpackage(self, title, enddate_days_from_today):
try:
# Select Manage
self.driver.find_element_by_xpath("//*[#id='configuration-tab']").click()
# Creating new Linear Package
self.driver.find_element_by_id("linearpublishing").click()
self.driver.find_element_by_id("linpub").click()
self.driver.find_element_by_id("addLinearPackage").click()
self.driver.find_element_by_id("linearpackageTitle").send_keys(title)
self.driver.find_element_by_id('tempPackageId').send_keys(
datetime.strftime(datetime.now(), '%Y%m%d%H%M'))
self.driver.find_element_by_id("inlineLinearPackageCheckbox").click()
start_time = self.driver.find_element_by_id('startDate')
execute = start_time.find_element_by_xpath("*//span[#class='fa fa-calendar']")
self.driver.execute_script("arguments[0].click();", execute)
time.sleep(7)
end_time = self.driver.find_element_by_id('endDate')
end_time.find_element_by_xpath("*//span[#class='fa fa-calendar']").click()
end_date = (datetime.now() + timedelta(days=enddate_days_from_today)).strftime('%m/%d/%Y')
self.driver.find_element_by_xpath("*//td[#data-day='" + end_date + "']").click()
time.sleep(7)
except NoSuchElementException as exp:
print exp
self.driver.quit()
raise SystemExit("Program Exited")
def addlinearservice(self, serviceId):
try:
self.driver.find_element_by_id("linearServiceSection").click()
time.sleep(10)
self.driver.find_element_by_id("publishLinearPackageBtn").click()
time.sleep(30)
self.driver.find_element_by_class_name("sorting_1")
linear_service_found = False
# Searching existing linear service
if linear_service_found == False:
try:# Search in first page
self.driver.find_element_by_xpath(
"/html/body/div[4]/div/div/div[2]/div/div/div/div[2]/div[2]/div/ul/li[9]/a").click()
if self.driver.find_element_by_link_text(serviceId).is_displayed():
self.driver.find_element_by_xpath(
"//a[contains(text(),'" + serviceId + "')]/following::td/input[#type='checkbox']").click()
linear_service_found = True
print "Linear service found"
except NoSuchElementException:
print"No such Element found in page 1"
try:
while linear_service_found == False: # loop to navigate to next page till finding the service ID
try: # Search till last page is reached and next button is disabled
self.driver.find_element_by_xpath(
"// *[#id = 'associatedLinearServicesTable1_next']/../li[#class ='paginate_button next disabled']")
print 'No further Page available to search'
break
except NoSuchElementException:
try:
self.driver.find_element_by_xpath(
'/html/body/div[4]/div/div/div[2]/div/div/div/div[2]/div[2]/div/ul/li[9]/a').click()
if self.driver.find_element_by_link_text(serviceId).is_displayed():
# click the checkbox of Service ID
self.driver.find_element_by_xpath(
"//a[contains(text(),'" + serviceId + "')]/following::td/input[#type='checkbox']").click()
linear_service_found = True
print "Linear Service found"
break
except NoSuchElementException:
print "No such Element found in current page"
except NoSuchElementException:
print"No such Element found"
if linear_service_found == True:
time.sleep(10)
#Click on Save button
self.driver.find_element_by_xpath('/ html / body / div[4] / div / div / div[3] / button[1]').click()
time.sleep(10)
except NoSuchElementException as exp:
print exp
self.driver.quit()
raise SystemExit("Program Exited")
def publish(self):
try:
self.driver.find_element_by_xpath('//button[contains(text(), "Publish")]').click()
time.sleep(5)
self.driver.find_element_by_xpath('//*[#id="confirmDialogOk"]').click()
time.sleep(10)
try:
self.driver.find_element_by_xpath('//*[#id="appSuccessMsg"]')
print("Linear Package Published Successfully")
except NoSuchElementException:
print ("Linear Package NOT PUBLISHED.. check the Error Message in Service console webpage")
time.sleep(60)
self.driver.quit()
raise SystemExit("Program Exited")
except NoSuchElementException as exp:
print exp
self.driver.quit()
raise SystemExit("Program Exited")
def exit(self):
print("Exiting.....")
time.sleep(5)
self.driver.quit()
Please find the full code. This code works only for selecting one element. I have to select multiple elements.
image
If the checkboxes have a common locator, you can use find_elements_by_xpath instead of find_element_by_xpath. This will return a list of WebElements, which you can then iterate over to click the boxes.