How to write a proper function file in Python - python

I want to write a Python file that contains functions which we want to use in our project. We are working on a Selenium web scraping bot fot Instagram. Right now we write all the functions in the scripts but we want to make a "function" file which we will import and use for our scripts. But the thing is that VS code does not use autocompletion when I want to use a webdrivers function like driver.find_element_by_xpath(cookies_button_xpath).click().
The function file (not finished yet) looks like this:
import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
# set constants for functions to run
WEBSITE_PRE_FIX = 'https://www.instagram.com/'
FORBIDDEN_CAPTION_WORDS = ['link in bio','buy now','limited time']
def open_ig(driver: webdriver):
# opens the website and waits till it is loaded
driver.get(WEBSITE_PRE_FIX)
time.sleep(2)
# accept cookies
cookies_button_xpath = "/html/body/div[4]/div/div/button[1]"
driver.find_element_by_xpath(cookies_button_xpath).click()
def login(driver: webdriver, username, password):
time.sleep(2)
# fill in user name and password and log in
username_box_xpath = '/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input'
username_element = driver.find_element_by_xpath(username_box_xpath)
username_element.send_keys(username)
password_box_xpath = '/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[2]/div/label/input'
password_element = driver.find_element_by_xpath(password_box_xpath)
password_element.send_keys(password)
password_element.send_keys(Keys.ENTER)
# click on do not save username and password + do not turn on notifications
time.sleep(3)
dont_save_username_button_password_xpath = '/html/body/div[1]/section/main/div/div/div/div/button'
dont_save_username_button_element = driver.find_element_by_xpath(dont_save_username_button_password_xpath)
dont_save_username_button_element.click()
So the code does work (as in it runs and does what I want) but I would like to know if we can write the function file another way so things like autocompletion en the color filters work. I'm not completely sure if it is possible. If there is any other way to write the functions file, all recommendations are welcome.

Have you tried writing the functions file as a simple class?
class FunctionsFile():
def __init__(self):
self.website_pre_fix = 'https://www.instagram.com/'
self.forbidden_capture_words = ['link in bio','buy now','limited time']
def open_ig(self, driver: webdriver):
# opens the website and waits till it is loaded
driver.get(WEBSITE_PRE_FIX)
time.sleep(2)
# accept cookies
cookies_button_xpath = "/html/body/div[4]/div/div/button[1]"
driver.find_element_by_xpath(cookies_button_xpath).click()
def login(self, driver: webdriver, username, password):
time.sleep(2)
# fill in user name and password and log in
username_box_xpath = '/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input'
username_element = driver.find_element_by_xpath(username_box_xpath)
username_element.send_keys(username)
password_box_xpath = '/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[2]/div/label/input'
password_element = driver.find_element_by_xpath(password_box_xpath)
password_element.send_keys(password)
password_element.send_keys(Keys.ENTER)
# click on do not save username and password + do not turn on notifications
time.sleep(3)
dont_save_username_button_password_xpath = '/html/body/div[1]/section/main/div/div/div/div/button'
dont_save_username_button_element = driver.find_element_by_xpath(dont_save_username_button_password_xpath)
dont_save_username_button_element.click()
You can then instantiate the class in any file. If in same directory:
from FunctionsFile import FunctionsFile
funcs = FunctionsFile()
funcs.open_ig(driver)
That should use the standard VS Code color schemes and autocompletion. (I think anyway).

Related

Python - Downloading PDF and saving to disk using Selenium

I'm creating an application that downloads PDF's from a website and saves them to disk. I understand the Requests module is capable of this but is not capable of handling the logic behind the download (File size, progress, time remaining etc.).
I've created the program using selenium thus far and would like to eventually incorporate this into a GUI Tkinter app eventually.
What would be the best way to handle the downloading, tracking and eventually creating a progress bar?
This is my code so far:
from selenium import webdriver
from time import sleep
import requests
import secrets
class manual_grabber():
""" A class creating a manual downloader for the Roger Technology website """
def __init__(self):
""" Initialize attributes of manual grabber """
self.driver = webdriver.Chrome('\\Users\\Joel\\Desktop\\Python\\manual_grabber\\chromedriver.exe')
def login(self):
""" Function controlling the login logic """
self.driver.get('https://rogertechnology.it/en/b2b')
sleep(1)
# Locate elements and enter login details
user_in = self.driver.find_element_by_xpath('/html/body/div[2]/form/input[6]')
user_in.send_keys(secrets.username)
pass_in = self.driver.find_element_by_xpath('/html/body/div[2]/form/input[7]')
pass_in.send_keys(secrets.password)
enter_button = self.driver.find_element_by_xpath('/html/body/div[2]/form/div/input')
enter_button.click()
# Click Self Service Area button
self_service_button = self.driver.find_element_by_xpath('//*[#id="bs-example-navbar-collapse-1"]/ul/li[1]/a')
self_service_button.click()
def download_file(self):
"""Access file tree and navigate to PDF's and download"""
# Wait for all elements to load
sleep(3)
# Find and switch to iFrame
frame = self.driver.find_element_by_xpath('//*[#id="siteOutFrame"]/iframe')
self.driver.switch_to.frame(frame)
# Find and click tech manuals button
tech_manuals_button = self.driver.find_element_by_xpath('//*[#id="fileTree_1"]/ul/li/ul/li[6]/a')
tech_manuals_button.click()
bot = manual_grabber()
bot.login()
bot.download_file()
So in summary, I'd like to make this code download PDF's on a website, store them in a specific directory (named after it's parent folder in the JQuery File Tree) and keep tracking of the progress (file size, time remaining etc.)
Here is the DOM:
I hope this is enough information. Any more required please let me know.
I would recommend using tqdm and the request module for this.
Here is a sample code that effectively achieves that hard job of downloading and updating progress bar.
from tqdm import tqdm
import requests
url = "http://www.ovh.net/files/10Mb.dat" #big file test
# Streaming, so we can iterate over the response.
response = requests.get(url, stream=True)
total_size_in_bytes= int(response.headers.get('content-length', 0))
block_size = 1024 #1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open('test.dat', 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data)) #change this to your widget in tkinter
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
The block_size is your file-size and the time-remaining can be calculated with the number of iterations performed per second with respect to the block-size that remains. Here is an alternative - How to measure download speed and progress using requests?

open multiple webdrivers without login everytime

I am trying to run selenium using ThreadsPoolExecutor. The website requires a login and I am trying to speed up a step in what I am trying to do in the website. But everytime a thread opens chrome, I need to relogin and it sometimes just hangs. I login once first without using threads to do some processing. And from here on, I like to open a few chome webdrivers without the need to relogin. Is there a way around this? PS: website has no id and password strings in the url.
def startup(dirPath):
# Start the WebDriver, load options
options = webdriver.ChromeOptions()
options.add_argument("--disable-infobars")
options.add_argument("--enable-file-cookies")
params = {'behavior': 'allow', 'downloadPath': dirPath}
wd = webdriver.Chrome(options=options, executable_path=r"C:\Chrome\chromedriver.exe")
wd.execute_cdp_cmd('Page.setDownloadBehavior', params)
# wd.delete_all_cookies()
wd.set_page_load_timeout(30)
wd.implicitly_wait(10)
return wd
def webLogin(dID, pw, wd):
wd.get('some url')
# Login, clear any outstanding login in id
wd.find_element_by_id('username').clear()
wd.find_element_by_id('username').send_keys(dID)
wd.find_element_by_id('password').clear()
wd.find_element_by_id('password').send_keys(pw)
wd.find_element_by_css_selector('.button').click()
if __name__ == '__main__':
dirPath, styleList = firstProcessing()
loginAndClearLB(dID, dPw, dirPath) # calls startup & webLogin, this is also my 1st login
# many webdrivers spawned here
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
results = {executor.submit(addArtsToLB, dID, dPw, dirPath, style): style for style in styleList}
#Do other stuff
wd2 = startup(dirPath)
webLogin(dID, dPw, wd2)
startDL(wd2)
logOut(wd2, dirPath)
Any help would be greatly appreciated. Thanks!!
Like mentioned above, you could obtain the authentication token from the first login and than include it in all the subsequent requests.
However, another option (if you're using basic auth) is to just add the username and password into the URL, like:
https://username:password#your.domain.com
ok it looks like there is no solution yet for more complicated websites that do not use basic authentication. My modified Solution:
def webOpenWithCookie(wd, cookies):
wd.get('https://some website url/404')
for cookie in cookies:
wd.add_cookie(cookie)
wd.get('https://some website url/home')
return wd
def myThreadedFunc(dirPath, style, cookies): # this is the function that gets threaded
wd = startup(dirPath) # just starts chrome
wd = webOpenWithCookie(wd, cookies) # opens a page in the site and adds cookies to wd and then open your real target page. No login required now.
doSomethingHere(wd, style)
wd.quit() # close all the threads here better I think
if __name__ == '__main__':
dirPath, styleList = firstProcessing()
wd1 = startup(dirPath)
wd1 = webLogin(dID, dPw, wd1) # here i login once
cookies = wd1.get_cookies() # get the cookie from here
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
results = {executor.submit(myThreadedFunc, dirPath, style, cookies): style for style in styleList} # this spawns threads, but each thread will not need login although the compromise is it needs to go to 404 page first.

How to define a button with selenium without executing "find_element"? (Python)

I really like to pre-define locators and assign them to variables in my test automation project so I could just refer to the variable name later on like this:
login_button = <browser>.find_element_by_id("login")
login_button.click()
The problem is that if these locators are saved in 'File A' and I import this file into 'File B' (at the very beginning of the program) than those "find_element" methods are executed during the import process while the pages that contain those buttons are not loaded yet which of course leaves me with an exception.
How can I save buttons into variables and import the containing file in the very beginning?
You can store such variables as strings, eg:
login_button_click = "driver.find_element_by_id('login').click()"
And then when required use:
exec(login_button_click)
In my opinion, you should divide your files like this:
file_a:
def click_login(driver):
login_elem = driver.find_element_by_id('login')
login_elem.click()
def send_username(driver, username: str):
login_elem = driver.find_element_by_id('username')
login_elem.send_keys(username)
def send_password(driver, password: str):
login_elem = driver.find_element_by_id('password')
login_elem.send_keys(password)
file_b:
from file_a import *
from selenium import webdriver
driver = webdriver.Chrome()
username = "my_username"
password = "my_password"
def preform_login():
send_username(driver, username)
send_password(driver, password)
click_login(driver)
For testing, you should use a config file config.ini for all your var's.
Again this is just my opinion...
Hope you find this helpful!

Use login credentials that depends on the URL

I have a function created that takes several arguments including:
url
username
password
downloaded_filename
new_filename_name
example code:
def reports(url, usr, pass, downloaded_filename, new_filename_name):
driver = webdriver.Chrome('location of webdriver')
driver get(url)
usernamebox = driver.find_element_by_name('username')
userbox.send_keys(username)
....
I use this to log in to several sites and download reports.
But I defined all the above variables to specific names. Is there a way that I can assign this variables to a function so that it is only needed the function name for the variables to be updated? Is there a better way of doing this?
You can declare them outside of your method body on top of your script if you're sure that you wouldn't need to change them at any instance of you calling your method.
Your sample script would look like:
from yourImports import *
# just below your import statements
url = r'https://your_url.com'
usr = 'the_username'
password = 'your password'
downloaded_file_name = r'myFile.txt'
new_file_name = 'my_new_file'
def reports():
driver = webdriver.Chrome('location of webdriver')
driver get(url)
usernamebox = driver.find_element_by_name('username')
userbox.send_keys(usr)
....
I strongly recommend not using pass as a variable name in place for password since pass is a known keyword in python and naming a variable would shadow the keyword and python interpreter wouldn't allow it.
Other approaches:
using a dictionary to hold values and using the keys to fetch values.
using a config text file to store these things and putting them at a directory path so that if you package your code (like an .exe) and you want to change these credentials, you can just change the config file.

How to loop entire Selenium script X times

I have a Selenium script that logs into Salesforce, creates a contact with standard data and saves.
I would like to ask the running user how many contacts to create using raw input from the console and then loop the script x times until they have been created.
Would it be best to add this into the code below? Or enter a loop in the console when running this python file in Terminal?
Thanks!
NOTE: I have used a random int between 0-5000 to create a unique (near enough) last name and email so that duplicates are unlikely.
NOTE 2: I will only create max of 10 contacts before deleting and repeating the experiment.
Here's the code:
from selenium import webdriver
import unittest
import time
from random import randint
class SalesforceLogin(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Chrome()
self.driver.get("https://test.salesforce.com")
def test_salesforce_login(self):
driver = self.driver
self.driver.find_element_by_id("username").send_keys("xxxx")
self.driver.find_element_by_id("password").send_keys("xxxx")
driver.find_element_by_id("Login").click()
time.sleep(1)
# Generate random number and assign to lastRnadom
lastRandom = randint(2,5000)
driver.find_element_by_link_text("Contacts").click()
driver.find_element_by_name("new").click()
driver.find_element_by_id("name_firstcon2").clear()
driver.find_element_by_id("name_firstcon2").send_keys("Test")
driver.find_element_by_id("name_lastcon2").clear()
driver.find_element_by_id("name_lastcon2").send_keys(lastRandom)
driver.find_element_by_id("con4").clear()
driver.find_element_by_id("con4").send_keys("xxxx")
driver.find_element_by_id("con15").clear()
# Form an email address from strings and ints
email = ("test" + "#" + str(lastRandom) + ".com")
driver.find_element_by_id("con15").send_keys(email)
driver.find_element_by_id("con10").clear()
driver.find_element_by_id("con10").send_keys("012345678")
driver.find_element_by_id("con12").clear()
driver.find_element_by_id("con12").send_keys("0123456789")
driver.find_element_by_id("con5").clear()
driver.find_element_by_id("con5").send_keys("Mr")
driver.find_element_by_id("con19street").clear()
driver.find_element_by_id("con19street").send_keys("Made Up Mailing Street")
driver.find_element_by_id("con19city").clear()
driver.find_element_by_id("con19city").send_keys("Mailing City")
driver.find_element_by_id("con19state").clear()
driver.find_element_by_id("con19state").send_keys("Mailing State")
driver.find_element_by_id("con19zip").clear()
driver.find_element_by_id("con19zip").send_keys("Mailing Zip")
driver.find_element_by_id("con19country").clear()
driver.find_element_by_id("con19country").send_keys("Mailing Country")
driver.find_element_by_id("con18street").clear()
driver.find_element_by_id("con18street").send_keys("Other Street")
driver.find_element_by_id("con18city").clear()
driver.find_element_by_id("con18city").send_keys("Other City")
driver.find_element_by_id("con18state").clear()
driver.find_element_by_id("con18state").send_keys("Other State")
driver.find_element_by_id("con18zip").clear()
driver.find_element_by_id("con18zip").send_keys("Other Zip")
driver.find_element_by_id("con18country").clear()
driver.find_element_by_id("con18country").send_keys("Other Country")
driver.find_element_by_id("con11").clear()
driver.find_element_by_id("con11").send_keys("Fax")
driver.find_element_by_id("con13").clear()
driver.find_element_by_id("con13").send_keys("Home Phone")
driver.find_element_by_id("con14").clear()
driver.find_element_by_id("con14").send_keys("Other Phone")
driver.find_element_by_id("con16").clear()
driver.find_element_by_id("con16").send_keys("Assistant")
driver.find_element_by_id("con17").clear()
driver.find_element_by_id("con17").send_keys("Asst. Phone")
driver.find_element_by_id("con20").click()
driver.find_element_by_id("con20").clear()
driver.find_element_by_id("con20").send_keys("Description")
driver.find_element_by_name("save").click()
def tearDown(self):
self.driver.quit()
if __name__ == "__main__":
unittest.main()
Loop it our use concurrency. Sadly i cant suggest you any code as im doing my scripts in java.
Also your code is missing explicit waits so theres a chance it might fail during process.

Categories