from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
Path = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(Path)
driver.get("https://www.emag.ro/")
search_bar = driver.find_element_by_id("searchboxTrigger")
search_bar.send_keys("laptopuri")
search_bar.send_keys(Keys.RETURN)
main = None
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "main-container"))
)
print("Page loaded,main retrived succesfully")
except:
driver.quit()
items = main.find_element_by_id("card_grid")
products = items.find_elements_by_css_selector("div.card-item.js-product-data")
count = 0
for product in products:
raw_name = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
raw_price = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "product-new-price"))
)
#Parsing the product name
raw_name = raw_name.replace("Laptop", "")
raw_name = raw_name.strip()
if raw_name.startswith("Apple"):
sEnd = raw_name.find(",")
else:
sEnd = raw_name.find("cu") - 1
product_name = raw_name[:sEnd]
#Parsing the product price
raw_price = raw_price.text[:raw_price.text.find(" ")]
print(raw_price)
count += 1
print(f"{count} results returned")
driver.quit()
Code works perfectly fine sometimes,but sometimes i get the error:
Please note i am new at this,so an explanation would be very appreciated.I just learned how to use selenium and the reason i transitioned from beautifulsoup because of the lack of wait possibility,and now when trying to use that,i get this error SOMETIMES
See this :
driver = webdriver.Chrome(Path)
and how have you used it here :
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "main-container"))
)
print("Page loaded,main retrived succesfully")
If you pay attention you would see that, you are using WebDriverWait(driver, 10) and passing driver reference.
But here
raw_name = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
you are passing product in WebDriverWait, which is wrong, you should pass driver reference here. like
raw_name = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
This should help you past this issue for sure.
also, make changes here
raw_price = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "product-new-price"))
)
This is what we have internally :
class WebDriverWait(object):
def __init__(self, driver, timeout, poll_frequency=POLL_FREQUENCY, ignored_exceptions=None):
"""Constructor, takes a WebDriver instance and timeout in seconds.
Update 1 :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(50)
driver.get("https://www.emag.ro/")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//i[contains(#class,'close')]/parent::button[#class='close']"))).click()
ActionChains(driver).move_to_element(wait.until(EC.visibility_of_element_located((By.XPATH, "//button[contains(#class,'js-accept')]")))).click().perform()
search_bar = driver.find_element_by_id("searchboxTrigger")
search_bar.send_keys("laptopuri")
search_bar.send_keys(Keys.RETURN)
main = None
try:
main = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "main-container")))
print("Page loaded,main retrived succesfully")
except:
driver.quit()
items = main.find_element_by_id("card_grid")
products = driver.find_elements_by_css_selector("div.card-item.js-product-data")
count = 0
for product in products:
raw_name = product.find_element_by_css_selector("h2.card-body.product-title-zone a").text
print(raw_name)
raw_price = product.find_element_by_css_selector("p.product-new-price").text
print(raw_price)
#Parsing the product name
# raw_name = raw_name.replace("Laptop", "").strip()
# if raw_name.startswith("Apple"):
# sEnd = raw_name.find(",")
# else:
# sEnd = raw_name.find("cu") - 1
# product_name = raw_name[:sEnd]
#Parsing the product price
# raw_price = raw_price[raw_price.find(" ")]
# print(raw_price)
# count += 1
#print(f"{count} results returned")
driver.quit()
Related
Selenium python- skip an iteration if a web element is not present
Please I'm trying to fetch data from https://b2b.baidu.com/ after inputting a keyword in a search field on the website. I want to skip an iteration if an element is not present on the first page.
I know this can work seamlessly but I'm still a novice and can't figure out what I'm doing wrongly at the moment. Your help will be greatly appreciated.
Here is what I've done:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
import pandas as pd
website = 'https://b2b.baidu.com/'
path = "C:/Users/ICT/chromedriver.exe"
driver = webdriver.Chrome(path)
driver.get(website)
driver.implicitly_wait(4)
wait = WebDriverWait(driver, 10)
driver.maximize_window()
# the search terms which contains location and keyword are from a dataframe in another file
from baidu_locations import location_key_row
from baidu_locations import location_data_col
from baidu_locations import key_data_col
for i in range(1, 6):
website = []
rep_name = []
contact = []
location = []
keyword = []
business_name = []
# Input location and keyword
enter_query = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[placeholder='我要采购…']")))
enter_query.clear()
enter_query.send_keys(location_key_row[i-1])
location_query = location_data_col[i-1]
location.append(location_query)
keyword_query = location_data_col[i-1]
keyword.append(keyword_query)
search_type = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "li[class='search-type']")))
search_type.click()
# If *```company_url```* element is not available, I want to go back to the next *```enter_query```* and continue the iteration.
try:
company_url = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div > div:nth-child(1) > div > div > div > div.title-container > span > span.name > a')))
website.append(company_url.get_property('href'))
first_location = wait.until(EC.element_to_be_clickable((By.XPATH, '(//span[#class="title link"])[1]')))
first_location.click()
driver.switch_to.window(driver.window_handles[1])
name = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='shop-contact-warp shop-contact-vertical-warp'] div[class='top'] div span[class='show-name']")))
business_name.append(name.text)
#print(reps)
representative = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.shop-index-new-right> div > div.top > div:nth-child(1) > div > div.text > p.sub-text")))
rep_name.append(representative.text)
phone_option = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'查看电话')]")))
phone_option.click()
popup_contact = driver.window_handles[1]
driver.switch_to.window(popup_contact)
phone_number = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'p[class="phone"]')))
contact.append(phone_number.text)
#print(contact_no)
time.sleep(2)
return_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'返回')]")))
return_button.click()
driver.close()
driver.switch_to.window(driver.window_handles[0])
except:
continue
df = pd.DataFrame({'Location': location, 'Keyword': keyword, 'Name': business_name, 'Representative': rep_name, 'Phone': contact, 'Link': website})
So if the company_url variable element is present on the first page, I want to click on it, go to the new tab and copy the data on that page and return to the first tab and repeat the process.
If the element variable company_url is not present, I want to skip that iteration and input the next search term enter_query from the specified range.
I want to fetch the data of enter_query where company_url element is present and save in a dataframe.
This code block seems to only fetch one row of data no matter the range I set.
Thank you for your help. Kindly let me know if my question is not clear or any questions you might have.
enter image description here
Well I guess you only want to loop in specific conditions. In that case, why not increment iterator only when satisfied your conditions?
Hope below could be a help
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
website = "https://b2b.baidu.com/"
path = "C:/Users/ICT/chromedriver.exe"
driver = webdriver.Chrome(path)
driver.get(website)
driver.implicitly_wait(4)
wait = WebDriverWait(driver, 10)
driver.maximize_window()
# the search terms which contains location and keyword are from a dataframe in another file
from baidu_locations import key_data_col, location_data_col, location_key_row
# ------------- added -------------
i = index_from = 1
index_to = 6
# ---------------------------------
# ------------------ modified ------------------
while i < index_to:
# ----------------------------------------------
website = []
rep_name = []
contact = []
location = []
keyword = []
business_name = []
# Input location and keyword
enter_query = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "input[placeholder='我要采购…']"))
)
enter_query.clear()
enter_query.send_keys(location_key_row[i - 1])
location_query = location_data_col[i - 1]
location.append(location_query)
keyword_query = location_data_col[i - 1]
keyword.append(keyword_query)
search_type = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "li[class='search-type']"))
)
search_type.click()
# ------------------ modified ------------------
try:
company_url = wait.until(
EC.element_to_be_clickable(
(
By.CSS_SELECTOR,
"div > div:nth-child(1) > div > div > div > div.title-container > span > span.name > a",
)
)
)
except:
continue
try:
# ----------------------------------------------
website.append(company_url.get_property("href"))
first_location = wait.until(
EC.element_to_be_clickable((By.XPATH, '(//span[#class="title link"])[1]'))
)
first_location.click()
driver.switch_to.window(driver.window_handles[1])
name = wait.until(
EC.element_to_be_clickable(
(
By.CSS_SELECTOR,
"div[class='shop-contact-warp shop-contact-vertical-warp'] div[class='top'] div span[class='show-name']",
)
)
)
business_name.append(name.text)
# print(reps)
representative = wait.until(
EC.element_to_be_clickable(
(
By.CSS_SELECTOR,
"div.shop-index-new-right> div > div.top > div:nth-child(1) > div > div.text > p.sub-text",
)
)
)
rep_name.append(representative.text)
phone_option = wait.until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'查看电话')]"))
)
phone_option.click()
popup_contact = driver.window_handles[1]
driver.switch_to.window(popup_contact)
phone_number = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, 'p[class="phone"]'))
)
contact.append(phone_number.text)
# print(contact_no)
time.sleep(2)
return_button = wait.until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'返回')]"))
)
return_button.click()
driver.close()
driver.switch_to.window(driver.window_handles[0])
# ------------- added -------------
# No problem here
i += 1
# ---------------------------------
except:
continue
df = pd.DataFrame(
{
"Location": location,
"Keyword": keyword,
"Name": business_name,
"Representative": rep_name,
"Phone": contact,
"Link": website,
}
)
I have this code, and the error is very silly (I don't think the selenium have anything to do with it), but i've tried a lot of things and nothing works. What can I do?
THE ERROR IS IN THE VARIABLES NAME "sHoras"
class Clima():
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
website = "https://weather.com/?Goto=Redirected"
path = "C:/Users/Administrador/Downloads/chromedriver_win32/chromedriver.exe"
driver = webdriver.Chrome(options=options, service=Service(path))
driver.get(website)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "truste-button3"))).click()
time.sleep(3)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'LanguageSelector--LanguageSelectorStatus--mXkYQ'))).click()
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[3]/div[1]/header/div/div[2]/div[2]/nav/div/div/section/div/ul/li[2]'))).click()
time.sleep(3)
button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'LocationSearch_input')))
time.sleep(2)
button.click()
time.sleep(1)
button.send_keys("Medellin, Antioquia")
button.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[3]/div[3]/div/nav/div/div[1]/a[2]/span'))).click()
time.sleep(2)
temperatura = driver.find_elements(By.XPATH, "//summary/div/div/div/span[#class='DetailsSummary--tempValue--1K4ka']")
temperatura = [temperatura.text for temperatura in temperatura]
horasT = driver.find_elements(By.XPATH, "//div/h3")
horasT = [horasT.text for horasT in horasT]
driver.quit()
indx = datetime.datetime.now()
indx = int(indx.strftime('%H'))
indx = (24 - indx)
sHoras = []
[sHoras.append(hora) for hora in horasT if hora not in sHoras]
horas = []
for i in range(0, indx):
horas.append(sHoras[i])
THE ERROR IS IN THE VARIABLES NAME "sHoras"
I am trying to scrape odds from https://en.stoiximan.gr/live. While my code is working, I get an error for having uneven lists in my final dataframe. Unfortunately, stoiximan seems to place 3-way odds together with over/under odds and suspended/locked matches (as in the picture).
What I am trying to do is to delete both home and away teams from their respective lists if their odds are over/under or locked. Any suggestions?
Here 's my code so far:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import openpyxl
import os
#launch chrome and keep window open
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
#visit en.stoiximan.gr and maximize window
driver.get("https://en.stoiximan.gr/live/")
driver.maximize_window()
#close modal window
try:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.XPATH, "//button[#class='sb-modal__close__btn uk-modal-close-default uk-icon uk-
close']"
))).click()
except:
pass
#accept cookies
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.ID, "onetrust-accept-btn-handler"
))).click()
#Initialize storage for stoiximan
stoiximan_home_teams_list = []
stoiximan_away_teams_list = []
stoiximan_home_odds_list = []
stoiximan_draw_odds_list = []
stoiximan_away_odds_list = []
#grab all home/away teams and explicit odds
try:
stoiximan_home_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-
row__container--row']/div[1]/a/div[1]/div[1]/span"))
)
stoiximan_away_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[1]/a/div[1]/div[2]/span"))
)
stoiximan_home_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[1]/span[2]"))
)
stoiximan_draw_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[2]/span[2]"))
)
stoiximan_away_odds = WebDriverWait(driver, 1).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[3]/span[2]"))
)
except:
driver.quit()
#loop each home team and append the lists
for stoiximan_home_team in stoiximan_home_teams:
stoiximan_home_teams_list.append(stoiximan_home_team.get_attribute('innerText'))
for stoiximan_away_team in stoiximan_away_teams:
stoiximan_away_teams_list.append(stoiximan_away_team.get_attribute('innerText'))
for stoiximan_home_odd in stoiximan_home_odds:
stoiximan_home_odds_list.append(stoiximan_home_odd.text)
for stoiximan_draw_odd in stoiximan_draw_odds:
stoiximan_draw_odds_list.append(stoiximan_draw_odd.text)
for stoiximan_away_odd in stoiximan_away_odds:
stoiximan_away_odds_list.append(stoiximan_away_odd.text)
print(stoiximan_home_teams_list)
print(len(stoiximan_home_teams_list))
print(stoiximan_away_teams_list)
print(len(stoiximan_away_teams_list))
print(stoiximan_home_odds_list)
print(len(stoiximan_home_odds_list))
print(stoiximan_draw_odds_list)
print(len(stoiximan_draw_odds_list))
print(stoiximan_away_odds_list)
print(len(stoiximan_away_odds_list))
#make str to float in odds lists
stoiximan_home_odds_list_float = [float(i) for i in stoiximan_home_odds_list]
stoiximan_draw_odds_list_float = [float(j) for j in stoiximan_draw_odds_list]
stoiximan_away_odds_list_float = [float(k) for k in stoiximan_away_odds_list]
#create dictionary for data
stoiximan_dict = {'Stoiximan Home Team': stoiximan_home_teams_list,
'Stoiximan Away Team': stoiximan_away_teams_list,
'Stoiximan Home Odd': stoiximan_home_odds_list_float,
'Stoiximan Draw Odd': stoiximan_draw_odds_list_float,
'Stoiximan Away Odd': stoiximan_away_odds_list_float
}
#create dataframe for data
df4 = pd.DataFrame(stoiximan_dict)
print(df4)
#write to excel file and open it
df4.to_excel(r'C:\Users\sweet_000\Desktop\data.xlsx', sheet_name="stoiximan", index=False)
os.system('start EXCEL.EXE "C:\\Users\\sweet_000\\Desktop\\data.xlsx"')
driver.quit()
I apologize ahead of time for the length of this question but I want to give enough context.
I've been running in circles trying to figure out why this is happening. I'm indexing all the dropdown values of a bookstore and have done so successfully for 'departments' and 'course_nums' but when I try to do the same thing for 'sections' relative to the 'course_nums' it returns some of the lists of sections and fails to return others. I've seen alternative methods of getting options from Selector on Stack and Documentation but I've had no success with these methods.
When a dropdown is selected the attributes of the HTML get an additional id called
<option value="001" data-select2-id="703">001</option>
So I've tried to just use Selector without first clicking on the dropdown (by commenting out the DriverWait before the Selector in fill_sections()) but this does not work although the element is present in the DOM.
When run, it will sometimes return the corresponding course sections and other times an empty list of sections, but each course number should have at least 1 section. As I reviewed the automated input it's as if it goes too fast on some course numbers which might cause it to miss fetching all options, but I'm not sure. I'm stumped because this works for every other fetch for the departments and course numbers relative to the department.
Fair warning, let it run until it prints the arrays otherwise it starts this infinite loop for some reason when you hit
control-c and I honestly don't know why.
Upon request from the comments, here is the entire script relevant to indexing the departments, course_nums, and sections...
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from pprint import pprint
import sys
import time
import os
# path = '/usr/local/bin/chromedriver'
# sys.path.append(path)
URL = "https://gmu.bncollege.com/course-material/course-finder"
# page = requests.get(URL)
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
departments = []
courses = []
sections = []
def fill_departments():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/select"))
options = selector.options
for index in range(1, len(options)):
# (index - 1) to ensure proper indexing of departments
dep_dict = {"index": index-1, "department": options[index].text}
departments.append(dep_dict)
except:
return 1
return 0
def fill_course_nums(department, index):
try:
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select"))
options = selector.options
courses = []
for ind in range(0, len(options)):
if (options[ind].text == "Select"):
continue
course_obj = {"index": ind - 1, "course_num": options[ind].text}
courses.append(course_obj)
departments[index]['courses'] = courses
except:
return 1
return 0
def fill_sections(dep_index, index):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/span[2]/span/span[1]/input'))
)
# selector_s = Select(WebDriverWait(driver, 5).until(EC.find_element((By.XPATH, "/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))))
selector_s = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))
options_s = selector_s.options
# #
sections = []
for op in options_s:
if (op.text == "Select"):
continue
sections.append(op.text)
departments[dep_index]['courses'][index]['sections'] = sections
except:
return 1
return 0
def select_term(term):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div'))
)
element.click()
# print('selected term drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div/span[2]/span/span[2]/ul/li[2]'))
)
element.click()
# print('selected spring 2022')
except:
return 1
return 0
def clear_form():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[5]/div/a'))
)
element.click()
except:
return 1
return 0
def select_department(department):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(department)
# print('typed department cs')
element.send_keys(Keys.ENTER)
# print('selected department cs')
except:
return 1
return 0
def select_course(course):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
# print('selected course 321')
except:
return 1
return 0
def select_campus_info():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located(
(By.XPATH, '/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span'))
)
element.click()
# print('found campus button')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span[2]/span/span[2]/ul/li[3]'))
)
element.click()
# print('selected fairfax campus')
except:
return 1
return 0
How these methods are run in main()
def main():
start = time.time()
driver.get("https://gmu.bncollege.com/course-material/course-finder")
while(select_campus_info()):
print('selecting campus')
while(select_term("spring")):
print('selecting term')
while(fill_departments()):
print("filling departments")
for dep in range(0, len(departments)):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
while(fill_course_nums(departments[dep]['department'], departments[dep]['index'])):
print("filling courses")
while(clear_form()):
print("clearing")
# break here after 3 to limit filling all courses for debugging purposes
if (dep >= 3):
break
while(select_term("spring")):
print('selecting term')
for dep in range(0, 3):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
for course in range(0, len(departments[dep]['courses'])):
while(select_course(departments[dep]['courses'][course]['course_num'])):
print("trying (cnum)")
while(fill_sections(dep, course)):
print('filling (sections)')
while(clear_form()):
print('clearing form.')
pprint(departments[0])
pprint(departments[1])
pprint(departments[2])
# fill_textbook_info('spring', 'CS', 310, '002')
# curUrl = driver.current_url
# print(curUrl)
time.sleep(100)
end = time.time()
print(end - start)
driver.close()
if __name__ == "__main__":
main()
I run your code and usually problem was to fast running code - and it needed time.sleep() in some places - especially after sending ENTER (it used 1 second because 0.5 second was too small)
I put full code because I organized code in different way (I use nested for-loops`) and I used different XPATH (I tried to make them shorter and similar with different functions).
I also put all function at the beginning and put in order of use in main()
I also don't use global list departaments but I return list from function and assign to local variable. And later I do the same with courses and sections.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from pprint import pprint
import time
# --- functions ---
def select_campus(driver, word="Tech"):
print('[select_campus] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[#class="bned-campus-select"]//span[#class="selection"]/span'))
).click()
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//li[contains(text(), "{word}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_campus] Exception:', ex)
return False
return True
def select_term(driver, term):
print('[select_term] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[contains(#class, "term")]//span[#class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//div[contains(#class, "term")]//li[contains(text(), "{term}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_term] Exception:', ex)
return False
return True
def get_all_departments(driver):
print('[get_all_departments] start')
departments = []
try:
#select [2]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[2]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "department": option.text}
departments.append(item)
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[get_all_departments] Exception:', ex)
return departments
def select_department(driver, department):
print('[select_department] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "department")]//span[#class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "department")]//input'))
)
element.send_keys(department)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_department] Exception:', ex)
return False
return True
def get_all_courses(driver):
print('[get_all_courses] start')
courses = []
try:
#select [3]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[3]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "section": option.text}
courses.append(item)
except Exception as ex:
print('[get_all_courses] Exception:', ex)
return courses
def select_course(driver, course):
print('[select_course] start')
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "course")]//span[#class="selection"]/span'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "course")]//input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_course] Exception:', ex)
return False
return True
def get_all_sections(driver):
print('[get_all_sections] start')
sections = []
try:
#select [4]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[4]//option')
for index, option in enumerate(all_options[1:], 1):
#item = {"index": index, "course": option.text}
sections.append(option.text)
except Exception as ex:
print('[get_all_sections] Exception:', ex)
return sections
def clear_form(driver):
print('[clear_form] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,'//a[#class="js-clear-row"]'))
).click()
time.sleep(1) # time for JavaScript to clear elements
except Exception as ex:
print('[clear_form] Exception:', ex)
return False
return True
def main():
URL = "https://gmu.bncollege.com/course-material/course-finder"
options = webdriver.ChromeOptions()
#options.headless = True
#options.add_argument("--headless")
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
start = time.time()
driver.get(URL)
select_campus(driver)
select_term(driver, "Spring")
departments = get_all_departments(driver)
print('departments:')
pprint(departments[:4])
for dep in departments[:4]: # 3 to limit filling all courses for debugging purposes
print(dep)
select_department(driver, dep['department'])
print(dep)
dep['courses'] = get_all_courses(driver)
print('departments:')
pprint(departments[:4])
for course in dep['courses']:
select_course(driver, course['course'])
course['sections'] = get_all_sections(driver)
print('departments:')
pprint(departments[:4])
#clear_form(driver) # DON'T use it
# --- display ---
for dep in departments[:4]:
pprint(dep)
end = time.time()
print('time:', end - start)
input('Press ENTER to close') # to keep open browser and check elements in DevTools
driver.close()
if __name__ == "__main__":
main()
is there a way to get this bot to automaticly restart if the site crashes and is there a way for the bot to refresh the page if the site doesnt load properly. becsause i cant get an xpath on the refresh and i have no clue how to make the bot restart if it didnt accomplish its goals
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
start_time = time.time()
# my code here
path = "C:\Program Files (x86)\Common Files\Chromedriver.exe"
driver = webdriver.Chrome(path)
# open page
driver.get("https://www.usmint.gov/")
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="navigation"]/div[3]/ul/li[2]/a')) #PRODUCT S
)
finally:
# product schedule
driver.find_element_by_xpath('//*[#id="navigation"]/div[3]/ul/li[2]/a').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="navigation"]/div[3]/ul/li[2]/div/div/ul/li[3]/a')) #2020
)
finally:
# 2020 product schedule
driver.find_element_by_xpath('//*[#id="navigation"]/div[3]/ul/li[2]/div/div/ul/li[3]/a').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="4cb2318c15eb72316187ca9691"]/div/div/div[2]/div/div[1]/a')) #birth set
)
finally:
# birth set 2020
driver.find_element_by_xpath('//*[#id="4cb2318c15eb72316187ca9691"]/div/div/div[2]/div/div[1]/a').click()
'''
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="emailmodalclose"]')) #clear
)
finally:
# clear email list stupidity
driver.find_element_by_xpath('//*[#id="emailmodalclose"]').click()
'''
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[3]/div[2]/div[2]/form/div/div[5]/button[1]')) #add to
)
finally:
# add to bag
driver.find_element_by_xpath("/html/body/div[1]/div[3]/div[2]/div[2]/form/div/div[5]/button[1]").click()# double qoutes?
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="mini-cart"]/div[3]/div[2]/div[3]/a')) #checkout
)
finally:
# checkout
driver.find_element_by_xpath('//*[#id="mini-cart"]/div[3]/div[2]/div[3]/a').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_login_username"]')) #login
)
finally:
# login
driver.find_element_by_xpath('//*[#id="dwfrm_login_username"]').send_keys("email")
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_login_password"]')) #Password
)
finally:
# password
driver.find_element_by_xpath('//*[#id="dwfrm_login_password"]').send_keys("password")
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="checkoutMethodLoginSubmit"]/span')) #checkout as
)
finally:
# checkout as registered user
driver.find_element_by_xpath('//*[#id="checkoutMethodLoginSubmit"]/span').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCardList"]/option[2]')) #credit card scroll
)
finally:
# credit card scroll
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCardList"]/option[2]').click() # .format?
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_month"]/option[2]')) #cc exp m
)
finally:
# cc exp month
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_month"]/option[2]').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_year"]/option[11]')) #cc exp y
)
finally:
# cc exp year
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_year"]/option[11]').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_cvn"]')) #cvv
)
finally:
# cvv
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_cvn"]').send_keys("999")
time.sleep(2)
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="checkoutContinuePaymentDelegator"]')) #continue to final
)
finally:
# continue to final review
driver.find_element_by_xpath('//*[#id="checkoutContinuePaymentDelegator"]').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="formAgreementLabel"]/span')) #terms of use
)
finally:
# terms of use button
driver.find_element_by_xpath('//*[#id="formAgreementLabel"]/span').click()
try:
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="submitOrderButton"]')) #Place order
)
finally:
# place order
driver.find_element_by_xpath('//*[#id="submitOrderButton"]').click()
print ("time elapsed: {:.2f}s".format(time.time() - start_time))
driver.quit()
#if __name__ == '__main__':
# order(keys)
and if you see anything in the code that can be fixed it would be appreciated
As requested in the comment, this is the simplest but not the ideal way. Since your try except clause does not include an except clause, they serve no purpose in your code except it just suppress all your TimeoutException, which you actually want them as you need to know what error it encounters. To improve this solution, you can consider to split the try except clause and re-execute those wait element lines only instead of the whole script.
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
start_time = time.time()
# my code here
path = "C:\Program Files (x86)\Common Files\Chromedriver.exe"
driver = webdriver.Chrome(path)
while True:
try:
# open page
driver.get("https://www.usmint.gov/")
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="navigation"]/div[3]/ul/li[2]/a')) #PRODUCT S
)
# product schedule
driver.find_element_by_xpath('//*[#id="navigation"]/div[3]/ul/li[2]/a').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="navigation"]/div[3]/ul/li[2]/div/div/ul/li[3]/a')) #2020
)
# 2020 product schedule
driver.find_element_by_xpath('//*[#id="navigation"]/div[3]/ul/li[2]/div/div/ul/li[3]/a').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="4cb2318c15eb72316187ca9691"]/div/div/div[2]/div/div[1]/a')) #birth set
)
# birth set 2020
driver.find_element_by_xpath('//*[#id="4cb2318c15eb72316187ca9691"]/div/div/div[2]/div/div[1]/a').click()
'''
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="emailmodalclose"]')) #clear
)
# clear email list stupidity
driver.find_element_by_xpath('//*[#id="emailmodalclose"]').click()
'''
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[3]/div[2]/div[2]/form/div/div[5]/button[1]')) #add to
)
# add to bag
driver.find_element_by_xpath("/html/body/div[1]/div[3]/div[2]/div[2]/form/div/div[5]/button[1]").click()# double qoutes?
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="mini-cart"]/div[3]/div[2]/div[3]/a')) #checkout
)
# checkout
driver.find_element_by_xpath('//*[#id="mini-cart"]/div[3]/div[2]/div[3]/a').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_login_username"]')) #login
)
# login
driver.find_element_by_xpath('//*[#id="dwfrm_login_username"]').send_keys("email")
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_login_password"]')) #Password
)
# password
driver.find_element_by_xpath('//*[#id="dwfrm_login_password"]').send_keys("password")
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="checkoutMethodLoginSubmit"]/span')) #checkout as
)
# checkout as registered user
driver.find_element_by_xpath('//*[#id="checkoutMethodLoginSubmit"]/span').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCardList"]/option[2]')) #credit card scroll
)
# credit card scroll
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCardList"]/option[2]').click() # .format?
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_month"]/option[2]')) #cc exp m
)
# cc exp month
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_month"]/option[2]').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_year"]/option[11]')) #cc exp y
)
# cc exp year
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_year"]/option[11]').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="dwfrm_billing_paymentMethods_creditCard_cvn"]')) #cvv
)
# cvv
driver.find_element_by_xpath('//*[#id="dwfrm_billing_paymentMethods_creditCard_cvn"]').send_keys("999")
time.sleep(2)
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="checkoutContinuePaymentDelegator"]')) #continue to final
)
# continue to final review
driver.find_element_by_xpath('//*[#id="checkoutContinuePaymentDelegator"]').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="formAgreementLabel"]/span')) #terms of use
)
# terms of use button
driver.find_element_by_xpath('//*[#id="formAgreementLabel"]/span').click()
element = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="submitOrderButton"]')) #Place order
)
# place order
driver.find_element_by_xpath('//*[#id="submitOrderButton"]').click()
print ("time elapsed: {:.2f}s".format(time.time() - start_time))
driver.quit()
break
except Exception as err:
print(f"Error: {str(err)}, trying again")
pass
#if __name__ == '__main__':
# order(keys)