Related
I am trying to scrape some links from https://www.mckinsey.com/capabilities/operations/our-insights using selenium with python.
from selenium.webdriver.common.by import By
from selenium import webdriver
from bs4 import BeautifulSoup
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-notifications")
# browser = webdriver.Chrome('C:\\chromedriver.exe', options=chrome_options)
browser = webdriver.Firefox()
url = "https://www.mckinsey.com/capabilities/operations/our-insights"
browser.get(url)
time.sleep(5)
try:
accept = browser.find_element(By.XPATH, '//*[#id="onetrust-accept-btn-handler"]')
accept.click()
time.sleep(2)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
except:
pass
n = 1
while n < 3:
try:
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
button = browser.find_element(By.XPATH, '//*[#id="skipToMain"]/div[2]/section[11]/div[2]/a')
button.click()
time.sleep(2)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
print('page', n)
n = n + 1
except:
print('page ended at', n)
break
source = browser.execute_script("return document.body.innerHTML")
time.sleep(5)
soup = BeautifulSoup(source, 'lxml')
Running above code gave the following error.
Exception ignored in: <function Service.__del__ at 0x000002AE1979DAF0>
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\selenium\webdriver\common\service.py", line 177, in __del__
AttributeError: 'NoneType' object has no attribute 'suppress'
I tried both Chrome and Firefox. Both of them are giving the same error.
Python version - 3.9
You can try the next example selenium with bs4
url='https://www.mckinsey.com/capabilities/operations/our-insights'
driver.get(url)
driver.maximize_window()
time.sleep(3)
accept = driver.find_element(By.XPATH, '//*[#id="onetrust-accept-btn-handler"]')
accept.click()
time.sleep(2)
data = []
for x in range(3):
try:
soup = BeautifulSoup(driver.page_source, 'lxml')
links = soup.select('[class="block-list text-s"]>div')
print(len(links))
for x in links:
link = x.a
link = 'https://www.mckinsey.com' + link.get('href') if link else None
data.append(link)
loadMoreButton = driver.find_element(By.XPATH, "//a[contains(text(),'View more')]")
if loadMoreButton:
driver.execute_script("arguments[0].click();" ,loadMoreButton)
#loadMoreButton.click()
time.sleep(3)
except Exception as e:
print(e)
break
print(set(data))
# df = pd.DataFrame(set(data))
# print(df)
Output:
{'https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/v
oices-introduction-october-2022', 'https://www.mckinsey.com/capabilities/people-and-organizational-perfor
mance/our-insights/leading-operating-model-modernization-what-do-transformation-leaders-say', 'https://ww
w.mckinsey.com/capabilities/operations/our-insights/sustainable-spaces-countering-climate-risk-in-capital
-projects', 'https://www.mckinsey.com/capabilities/operations/our-insights/harnessing-volatility-technolo
gy-transformation-in-oil-and-gas', 'https://www.mckinsey.com/capabilities/operations/our-insights/buildin
g-supply-chain-resilience', 'https://www.mckinsey.com/capabilities/operations/our-insights/industrial-res
ource-productivity-and-the-road-to-sustainability', 'https://www.mckinsey.com/capabilities/operations/our
-insights/outsprinting-the-energy-crisis', 'https://www.mckinsey.com/capabilities/operations/our-insights
/emerging-from-disruption-the-future-of-pharma-operations-strategy', 'https://www.mckinsey.com/industries
/public-and-social-sector/our-insights/using-advanced-analytics-to-improve-performance-in-customs-agencie
s', 'https://www.mckinsey.com/industries/life-sciences/our-insights/against-the-odds-how-life-sciences-co
mpanies-excel-in-large-transformations', 'https://www.mckinsey.com/capabilities/operations/our-insights/t
he-hidden-value-of-voice-conversations-part-1-trends-and-technologies', 'https://www.mckinsey.com/feature
d-insights/mckinsey-on-books/the-titanium-economy', 'https://www.mckinsey.com/capabilities/operations/our
-insights/smart-scheduling-how-to-solve-workforce-planning-challenges-with-ai', 'https://www.mckinsey.com
/capabilities/operations/our-insights/generative-scheduling-saving-time-and-money-in-capital-projects', '
https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/on-
the-path-to-net-zero-steel-in-building-and-construction', 'https://www.mckinsey.com/capabilities/operatio
ns/our-insights/global-infrastructure-initiative/voices/disrupting-transport-an-interview-with-robert-fal
ck-of-einride', 'https://www.mckinsey.com/capabilities/operations/our-insights/the-industrial-revolution-
in-services', 'https://www.mckinsey.com/capabilities/operations/our-insights/how-mining-companies-reach-t
he-operational-excellence-gold-standard', 'https://www.mckinsey.com/capabilities/operations/our-insights/
a-more-resilient-supply-chain-from-optimized-operations-planning', 'https://www.mckinsey.com/capabilities
/operations/our-insights/full-potential-procurement-lessons-amid-inflation-and-volatility', 'https://www.
mckinsey.com/capabilities/operations/our-insights/taking-the-pulse-of-shifting-supply-chains', 'https://w
ww.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/news-from-th
e-global-infrastructure-initiative-august-2022', 'https://www.mckinsey.com/capabilities/operations/our-in
sights/delivering-the-us-manufacturing-renaissance', 'https://www.mckinsey.com/capabilities/operations/ou
r-insights/building-sustainability-into-operations', 'https://www.mckinsey.com/capabilities/operations/ou
r-insights/global-infrastructure-initiative/voices/news-from-the-global-infrastructure-initiative-october
-2022', 'https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/v
oices/dhl-on-sustainable-customer-centric-delivery-in-the-last-mile', 'https://www.mckinsey.com/industrie
s/advanced-electronics/our-insights/sustainability-in-packaging-five-key-levers-for-significant-impact',
'https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/do
ing-good-demands-doing-better-delivering-net-zero-capital-projects', 'https://www.mckinsey.com/industries
/life-sciences/our-insights/reimagining-the-future-of-biopharma-manufacturing', 'https://www.mckinsey.com
/capabilities/operations/our-insights/coca-cola-the-people-first-story-of-a-digital-transformation', 'htt
ps://www.mckinsey.com/capabilities/operations/our-insights/is-your-manufacturing-network-an-anchor-or-a-s
ail', 'https://www.mckinsey.com/industries/semiconductors/our-insights/rapid-throughput-improvement-at-ma
ture-semiconductor-fabs', 'https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastru
cture-initiative/voices/reducing-embodied-carbon-in-new-construction', 'https://www.mckinsey.com/industri
es/healthcare-systems-and-services/our-insights/optimizing-health-system-supply-chain-performance', 'http
s://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/managin
g-capital-risk-in-the-race-to-net-zero', 'https://www.mckinsey.com/capabilities/operations/our-insights/a
ccelerating-green-growth-in-the-built-environment', 'https://www.mckinsey.com/capabilities/transformation
/our-insights/you-cant-move-too-fast-a-conversation-with-andy-penn', 'https://www.mckinsey.com/capabiliti
es/operations/our-insights/inflation-fighter-and-value-creator-procurements-best-kept-secret', 'https://w
ww.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/preparing-fo
r-tomorrow-an-interview-with-tariq-taherbhai', 'https://www.mckinsey.com/capabilities/operations/our-insi
ghts/value-speed-and-scale-a-new-era-for-operations-in-asia', 'https://www.mckinsey.com/capabilities/oper
ations/our-insights/stepping-up-what-coos-will-need-to-succeed-in-2023-and-beyond', None, 'https://www.mc
kinsey.com/capabilities/operations/our-insights/digital-twins-what-could-they-do-for-your-business', 'htt
ps://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/voices
-introduction-august-2022', 'https://www.mckinsey.com/capabilities/operations/our-insights/how-good-are-y
our-internal-operations-really', 'https://www.mckinsey.com/capabilities/operations/our-insights/114-down-
10-million-to-go-the-global-lighthouse-networks-mission', 'https://www.mckinsey.com/capabilities/operatio
ns/our-insights/people-and-places-how-and-where-to-work-next', 'https://www.mckinsey.com/capabilities/ope
rations/our-insights/accelerating-capital-projects-to-secure-advantages-in-the-net-zero-transition', 'htt
ps://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiative/voices/unlock
ing-hydrogens-power-for-long-haul-freight-transport', 'https://www.mckinsey.com/industries/engineering-co
nstruction-and-building-materials/our-insights/how-much-is-a-brick-that-depends', 'https://www.mckinsey.c
om/capabilities/operations/our-insights/global-infrastructure-initiative/voices/mapping-the-way-decarboni
zing-roads', 'https://www.mckinsey.com/capabilities/operations/our-insights/global-infrastructure-initiat
ive/voices/investing-in-pathways-to-decarbonize-infrastructure', 'https://www.mckinsey.com/capabilities/o
perations/our-insights/the-hidden-value-of-voice-conversations-part-2-reaping-the-rewards', 'https://www.
mckinsey.com/capabilities/operations/our-insights/power-spike-how-battery-makers-can-respond-to-surging-d
emand-from-evs', 'https://www.mckinsey.com/capabilities/operations/our-insights/the-care-of-one-hyperpers
onalization-of-customer-care', 'https://www.mckinsey.com/capabilities/operations/our-insights/the-scaling
-imperative-for-industry-4-point-0', 'https://www.mckinsey.com/capabilities/operations/our-insights/utili
ty-procurement-ready-to-meet-new-market-challenges', 'https://www.mckinsey.com/capabilities/operations/ou
r-insights/global-infrastructure-initiative/voices/the-art-of-the-possible-an-interview-with-leaders-from
-scottish-water', 'https://www.mckinsey.com/capabilities/operations/our-insights/converge-it-and-ot-to-tu
rbocharge-business-operations-scaling-power'}
I have this code, and the error is very silly (I don't think the selenium have anything to do with it), but i've tried a lot of things and nothing works. What can I do?
THE ERROR IS IN THE VARIABLES NAME "sHoras"
class Clima():
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
website = "https://weather.com/?Goto=Redirected"
path = "C:/Users/Administrador/Downloads/chromedriver_win32/chromedriver.exe"
driver = webdriver.Chrome(options=options, service=Service(path))
driver.get(website)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "truste-button3"))).click()
time.sleep(3)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'LanguageSelector--LanguageSelectorStatus--mXkYQ'))).click()
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[3]/div[1]/header/div/div[2]/div[2]/nav/div/div/section/div/ul/li[2]'))).click()
time.sleep(3)
button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'LocationSearch_input')))
time.sleep(2)
button.click()
time.sleep(1)
button.send_keys("Medellin, Antioquia")
button.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[3]/div[3]/div/nav/div/div[1]/a[2]/span'))).click()
time.sleep(2)
temperatura = driver.find_elements(By.XPATH, "//summary/div/div/div/span[#class='DetailsSummary--tempValue--1K4ka']")
temperatura = [temperatura.text for temperatura in temperatura]
horasT = driver.find_elements(By.XPATH, "//div/h3")
horasT = [horasT.text for horasT in horasT]
driver.quit()
indx = datetime.datetime.now()
indx = int(indx.strftime('%H'))
indx = (24 - indx)
sHoras = []
[sHoras.append(hora) for hora in horasT if hora not in sHoras]
horas = []
for i in range(0, indx):
horas.append(sHoras[i])
THE ERROR IS IN THE VARIABLES NAME "sHoras"
I've been trying to flag/report a list of spam comments in a particular YouTube video.
For that I've been using this code on Python, which loads my previous profile so I log in with my account:
URL = "https://www.youtube.com/watch?
v=dvecqwfU6xw&lc=Ugxw_nsUNUor9AUEBGp4AaABAg.9fDfvkgiqtW9fDkE2r6Blm"
soup = BeautifulSoup(requests.get(URL).content, "html.parser")
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
# resp = driver.request('POST', 'https://www.youtube.com/youtubei/v1/flag/get_form?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false')
# print(resp.text)
button = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="button"]')))
driver.execute_script("arguments[0].click();", button)
The problem comes with opening the menu, I believe since you have to hover over the 3 dots menu it would then appear as the clickable menu so I never get to open the actual menu to report/flag the comment.
My mistake was not to take full Xpath path.... It works perfectly like this, THANKS
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
options.add_argument('--headless')
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
option_button = '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[2]/ytd-comments/ytd-item-section-renderer/div[3]/ytd-comment-thread-renderer[1]/div/ytd-comment-replies-renderer/div[2]/ytd-comment-renderer/div[3]/div[3]/ytd-menu-renderer/yt-icon-button/button'
option_button = wait.until(EC.presence_of_element_located((By.XPATH, option_button)))
driver.execute_script("arguments[0].click();", option_button)
report_button = '/html/body/ytd-app/ytd-popup-container/tp-yt-iron-dropdown/div/ytd-menu-popup-renderer/tp-yt-paper-listbox/ytd-menu-service-item-renderer/tp-yt-paper-item/yt-formatted-string'
report_button = wait.until(EC.presence_of_element_located((By.XPATH,report_button)))
driver.execute_script("arguments[0].click();", report_button)
report_button_spam = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/tp-yt-paper-dialog-scrollable/div/div/yt-options-renderer/div/tp-yt-paper-radio-group/tp-yt-paper-radio-button[1]/div[1]'
report_button_spam = wait.until(EC.presence_of_element_located((By.XPATH, report_button_spam)))
driver.execute_script("arguments[0].click();", report_button_spam)
report_button_send = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/div/yt-button-renderer[2]/a/tp-yt-paper-button'
report_button_send = wait.until(EC.presence_of_element_located((By.XPATH, report_button_send)))
driver.execute_script("arguments[0].click();", report_button_send)
popup_button_done = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog[2]/yt-confirm-dialog-renderer/div[2]/div[2]/yt-button-renderer[3]/a/tp-yt-paper-button'
popup_button_done = wait.until(EC.presence_of_element_located((By.XPATH, popup_button_done)))
print(popup_button_done.text)
I apologize ahead of time for the length of this question but I want to give enough context.
I've been running in circles trying to figure out why this is happening. I'm indexing all the dropdown values of a bookstore and have done so successfully for 'departments' and 'course_nums' but when I try to do the same thing for 'sections' relative to the 'course_nums' it returns some of the lists of sections and fails to return others. I've seen alternative methods of getting options from Selector on Stack and Documentation but I've had no success with these methods.
When a dropdown is selected the attributes of the HTML get an additional id called
<option value="001" data-select2-id="703">001</option>
So I've tried to just use Selector without first clicking on the dropdown (by commenting out the DriverWait before the Selector in fill_sections()) but this does not work although the element is present in the DOM.
When run, it will sometimes return the corresponding course sections and other times an empty list of sections, but each course number should have at least 1 section. As I reviewed the automated input it's as if it goes too fast on some course numbers which might cause it to miss fetching all options, but I'm not sure. I'm stumped because this works for every other fetch for the departments and course numbers relative to the department.
Fair warning, let it run until it prints the arrays otherwise it starts this infinite loop for some reason when you hit
control-c and I honestly don't know why.
Upon request from the comments, here is the entire script relevant to indexing the departments, course_nums, and sections...
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from pprint import pprint
import sys
import time
import os
# path = '/usr/local/bin/chromedriver'
# sys.path.append(path)
URL = "https://gmu.bncollege.com/course-material/course-finder"
# page = requests.get(URL)
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
departments = []
courses = []
sections = []
def fill_departments():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/select"))
options = selector.options
for index in range(1, len(options)):
# (index - 1) to ensure proper indexing of departments
dep_dict = {"index": index-1, "department": options[index].text}
departments.append(dep_dict)
except:
return 1
return 0
def fill_course_nums(department, index):
try:
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select"))
options = selector.options
courses = []
for ind in range(0, len(options)):
if (options[ind].text == "Select"):
continue
course_obj = {"index": ind - 1, "course_num": options[ind].text}
courses.append(course_obj)
departments[index]['courses'] = courses
except:
return 1
return 0
def fill_sections(dep_index, index):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/span[2]/span/span[1]/input'))
)
# selector_s = Select(WebDriverWait(driver, 5).until(EC.find_element((By.XPATH, "/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))))
selector_s = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))
options_s = selector_s.options
# #
sections = []
for op in options_s:
if (op.text == "Select"):
continue
sections.append(op.text)
departments[dep_index]['courses'][index]['sections'] = sections
except:
return 1
return 0
def select_term(term):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div'))
)
element.click()
# print('selected term drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div/span[2]/span/span[2]/ul/li[2]'))
)
element.click()
# print('selected spring 2022')
except:
return 1
return 0
def clear_form():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[5]/div/a'))
)
element.click()
except:
return 1
return 0
def select_department(department):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(department)
# print('typed department cs')
element.send_keys(Keys.ENTER)
# print('selected department cs')
except:
return 1
return 0
def select_course(course):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
# print('selected course 321')
except:
return 1
return 0
def select_campus_info():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located(
(By.XPATH, '/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span'))
)
element.click()
# print('found campus button')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span[2]/span/span[2]/ul/li[3]'))
)
element.click()
# print('selected fairfax campus')
except:
return 1
return 0
How these methods are run in main()
def main():
start = time.time()
driver.get("https://gmu.bncollege.com/course-material/course-finder")
while(select_campus_info()):
print('selecting campus')
while(select_term("spring")):
print('selecting term')
while(fill_departments()):
print("filling departments")
for dep in range(0, len(departments)):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
while(fill_course_nums(departments[dep]['department'], departments[dep]['index'])):
print("filling courses")
while(clear_form()):
print("clearing")
# break here after 3 to limit filling all courses for debugging purposes
if (dep >= 3):
break
while(select_term("spring")):
print('selecting term')
for dep in range(0, 3):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
for course in range(0, len(departments[dep]['courses'])):
while(select_course(departments[dep]['courses'][course]['course_num'])):
print("trying (cnum)")
while(fill_sections(dep, course)):
print('filling (sections)')
while(clear_form()):
print('clearing form.')
pprint(departments[0])
pprint(departments[1])
pprint(departments[2])
# fill_textbook_info('spring', 'CS', 310, '002')
# curUrl = driver.current_url
# print(curUrl)
time.sleep(100)
end = time.time()
print(end - start)
driver.close()
if __name__ == "__main__":
main()
I run your code and usually problem was to fast running code - and it needed time.sleep() in some places - especially after sending ENTER (it used 1 second because 0.5 second was too small)
I put full code because I organized code in different way (I use nested for-loops`) and I used different XPATH (I tried to make them shorter and similar with different functions).
I also put all function at the beginning and put in order of use in main()
I also don't use global list departaments but I return list from function and assign to local variable. And later I do the same with courses and sections.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from pprint import pprint
import time
# --- functions ---
def select_campus(driver, word="Tech"):
print('[select_campus] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[#class="bned-campus-select"]//span[#class="selection"]/span'))
).click()
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//li[contains(text(), "{word}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_campus] Exception:', ex)
return False
return True
def select_term(driver, term):
print('[select_term] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[contains(#class, "term")]//span[#class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//div[contains(#class, "term")]//li[contains(text(), "{term}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_term] Exception:', ex)
return False
return True
def get_all_departments(driver):
print('[get_all_departments] start')
departments = []
try:
#select [2]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[2]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "department": option.text}
departments.append(item)
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[get_all_departments] Exception:', ex)
return departments
def select_department(driver, department):
print('[select_department] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "department")]//span[#class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "department")]//input'))
)
element.send_keys(department)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_department] Exception:', ex)
return False
return True
def get_all_courses(driver):
print('[get_all_courses] start')
courses = []
try:
#select [3]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[3]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "section": option.text}
courses.append(item)
except Exception as ex:
print('[get_all_courses] Exception:', ex)
return courses
def select_course(driver, course):
print('[select_course] start')
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "course")]//span[#class="selection"]/span'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[#role="table"]//div[#role="row"])[2]//div[contains(#class, "course")]//input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_course] Exception:', ex)
return False
return True
def get_all_sections(driver):
print('[get_all_sections] start')
sections = []
try:
#select [4]
all_options = driver.find_elements(By.XPATH, '((//div[#role="table"]//div[#role="row"])[2]//select)[4]//option')
for index, option in enumerate(all_options[1:], 1):
#item = {"index": index, "course": option.text}
sections.append(option.text)
except Exception as ex:
print('[get_all_sections] Exception:', ex)
return sections
def clear_form(driver):
print('[clear_form] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,'//a[#class="js-clear-row"]'))
).click()
time.sleep(1) # time for JavaScript to clear elements
except Exception as ex:
print('[clear_form] Exception:', ex)
return False
return True
def main():
URL = "https://gmu.bncollege.com/course-material/course-finder"
options = webdriver.ChromeOptions()
#options.headless = True
#options.add_argument("--headless")
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
start = time.time()
driver.get(URL)
select_campus(driver)
select_term(driver, "Spring")
departments = get_all_departments(driver)
print('departments:')
pprint(departments[:4])
for dep in departments[:4]: # 3 to limit filling all courses for debugging purposes
print(dep)
select_department(driver, dep['department'])
print(dep)
dep['courses'] = get_all_courses(driver)
print('departments:')
pprint(departments[:4])
for course in dep['courses']:
select_course(driver, course['course'])
course['sections'] = get_all_sections(driver)
print('departments:')
pprint(departments[:4])
#clear_form(driver) # DON'T use it
# --- display ---
for dep in departments[:4]:
pprint(dep)
end = time.time()
print('time:', end - start)
input('Press ENTER to close') # to keep open browser and check elements in DevTools
driver.close()
if __name__ == "__main__":
main()
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
Path = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(Path)
driver.get("https://www.emag.ro/")
search_bar = driver.find_element_by_id("searchboxTrigger")
search_bar.send_keys("laptopuri")
search_bar.send_keys(Keys.RETURN)
main = None
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "main-container"))
)
print("Page loaded,main retrived succesfully")
except:
driver.quit()
items = main.find_element_by_id("card_grid")
products = items.find_elements_by_css_selector("div.card-item.js-product-data")
count = 0
for product in products:
raw_name = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
raw_price = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "product-new-price"))
)
#Parsing the product name
raw_name = raw_name.replace("Laptop", "")
raw_name = raw_name.strip()
if raw_name.startswith("Apple"):
sEnd = raw_name.find(",")
else:
sEnd = raw_name.find("cu") - 1
product_name = raw_name[:sEnd]
#Parsing the product price
raw_price = raw_price.text[:raw_price.text.find(" ")]
print(raw_price)
count += 1
print(f"{count} results returned")
driver.quit()
Code works perfectly fine sometimes,but sometimes i get the error:
Please note i am new at this,so an explanation would be very appreciated.I just learned how to use selenium and the reason i transitioned from beautifulsoup because of the lack of wait possibility,and now when trying to use that,i get this error SOMETIMES
See this :
driver = webdriver.Chrome(Path)
and how have you used it here :
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "main-container"))
)
print("Page loaded,main retrived succesfully")
If you pay attention you would see that, you are using WebDriverWait(driver, 10) and passing driver reference.
But here
raw_name = WebDriverWait(product, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
you are passing product in WebDriverWait, which is wrong, you should pass driver reference here. like
raw_name = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "h2.card-body.product-title-zone"))
).text
This should help you past this issue for sure.
also, make changes here
raw_price = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "product-new-price"))
)
This is what we have internally :
class WebDriverWait(object):
def __init__(self, driver, timeout, poll_frequency=POLL_FREQUENCY, ignored_exceptions=None):
"""Constructor, takes a WebDriver instance and timeout in seconds.
Update 1 :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(50)
driver.get("https://www.emag.ro/")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//i[contains(#class,'close')]/parent::button[#class='close']"))).click()
ActionChains(driver).move_to_element(wait.until(EC.visibility_of_element_located((By.XPATH, "//button[contains(#class,'js-accept')]")))).click().perform()
search_bar = driver.find_element_by_id("searchboxTrigger")
search_bar.send_keys("laptopuri")
search_bar.send_keys(Keys.RETURN)
main = None
try:
main = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "main-container")))
print("Page loaded,main retrived succesfully")
except:
driver.quit()
items = main.find_element_by_id("card_grid")
products = driver.find_elements_by_css_selector("div.card-item.js-product-data")
count = 0
for product in products:
raw_name = product.find_element_by_css_selector("h2.card-body.product-title-zone a").text
print(raw_name)
raw_price = product.find_element_by_css_selector("p.product-new-price").text
print(raw_price)
#Parsing the product name
# raw_name = raw_name.replace("Laptop", "").strip()
# if raw_name.startswith("Apple"):
# sEnd = raw_name.find(",")
# else:
# sEnd = raw_name.find("cu") - 1
# product_name = raw_name[:sEnd]
#Parsing the product price
# raw_price = raw_price[raw_price.find(" ")]
# print(raw_price)
# count += 1
#print(f"{count} results returned")
driver.quit()