I'm fairly new with Selenium and I've been running a couple of very small web scraping projects.
When I try to click on this element through the .click() function I keep getting "Element not interactable"
The html section I'm trying to interact is this:
<a class="hawk-iconBefore hawk-styleCheckbox hawk-styleList" data-options="{"name":"finish","value":"Foil"}" href="https://starcitygames.com/search/?card_name=Glimmervoid&finish=Foil" rel="nofollow"><span class="hawk-selectionInner">Foil <span class="hawk-facetCount">(5)</span></span></a>
And my python code looks like this:
from selenium import webdriver
from selenium.webdriver.common.by import By
url = 'https://starcitygames.com/'
card_name = 'Fatal Push'
expansion_name = 'Double Masters'
foil = True
card_price = 0
browser_options = webdriver.ChromeOptions()
browser_options.add_argument("headless")
browser = webdriver.Chrome(options=browser_options)
browser.get(url)
browser.implicitly_wait(0.2)
browser.maximize_window()
print(card_name)
def get_card_price():
global card_price
print("Finding card...")
browser.find_element(By.CSS_SELECTOR, "[name='search_query']").send_keys(card_name)
search_button = browser.find_element(By.CLASS_NAME, "search-submit")
search_button.click()
if foil:
print("Checking if Foil...")
foil_select = browser.find_element(By.XPATH, "/html/body/div/div[1]/main/aside/div[2]/div[2]/div/div[5]/div/ul/li[1]/a")
try:
foil_select.click()
print("It's Foil")
except:
print("Element not interactable")
cards = browser.find_elements(By.CLASS_NAME,"hawk-results-item")
for card in cards:
c = card.text
price = card.find_element(By.CSS_SELECTOR, "div[class='hawk-results-item__options-table-cell hawk-results-item__options-table-cell--price childAttributes']")
if expansion_name in c:
card_price = price.text
return card_price
get_card_price()
print("Fetching card price...")
print(card_price)
browser.quit()
All other part send the info I need but when I check it the condition foil is true it jumps to the exception due to the element not being interactable.
I have tried accesing it with css_selector, and with the regular xpath, I saw another answer in which they suggested using the full XPATH and that it fixed the issue but it didn't work.
What could I do?
So I figured out how to fetch the href for the element I wanted and it was as simple as just getting that and then telling my code to go to that page and execute the rest of the code:
That's how it looks now:
if foil:
print("Checking if Foil...")
try:
foil_select=browser.find_element(By.XPATH, '//*[#id="hawkfacet_finish"]/li[1]/a')
link = foil_select.get_attribute("href")
print("It's Foil")
browser.get(link)
except:
print("Element not interactable")
else:
foil_select=browser.find_element(By.XPATH, '//*[#id="hawkfacet_finish"]/li[2]/a')
link = foil_select.get_attribute("href")
print("It's not foil")
browser.get(link)
Now to move on with the next step. Thanks everyone!
This
browser_options.add_argument("headless")
should be
browser_options.add_argument("--headless")
You need to scroll to each cards first before grabbing the price.
Below is the sample code:
driver.maximize_window()
wait = WebDriverWait(driver, 20)
url = 'https://starcitygames.com/'
card_name = 'Fatal Push'
expansion_name = 'Double Masters'
foil = True
card_price = 0
#browser_options = webdriver.ChromeOptions()
#browser_options.add_argument("headless")
#browser = webdriver.Chrome(options=browser_options)
driver.get(url)
driver.implicitly_wait(0.2)
driver.maximize_window()
print(card_name)
def get_card_price():
global card_price
print("Finding card...")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='search_query']"))).send_keys(card_name)
search_button = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "search-submit")))
search_button.click()
if foil:
print("Checking if Foil...")
foil_select = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "ul#hawkfacet_rarity li a[data-options*='Rare']")))
try:
foil_select.click()
print("It's Foil")
except:
print("Element not interactable")
time.sleep(5)
cards = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class='hawk-results-item']")))
for card in cards:
driver.execute_script("arguments[0].scrollIntoView(true);", card)
c = card.get_attribute('innerText')
print(c)
price = card.find_element(By.XPATH, ".//descendant::div[contains(#class, 'price childAttributes')]")
print(price.text)
if expansion_name in c:
card_price = price.text
return card_price
get_card_price()
print("Fetching card price...")
print(card_price)
Output:
Fatal Push
Finding card...
Checking if Foil...
It's Foil
Fatal Push (Borderless)
Double Masters - Variants
Near Mint -
English
$14.99
QTY: 0
NOTIFY ME
$14.99
Fatal Push (Borderless)
Double Masters - Variants (Foil)
Near Mint -
English
$14.99
QTY: 3
Add to cart
$14.99
Fetching card price...
$14.99
Process finished with exit code 0
Related
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()
TYPES = ['user', 'verified_audience', 'top_critics']
TYPE = TYPES[2]
URL = 'https://www.rottentomatoes.com/m/dunkirk_2017/reviews'
PAGES = 2
driver = Chrome()
driver.get(URL)
data_reviews = []
while PAGES != 0:
wait = WebDriverWait(driver, 30)
reviews = wait.until(lambda _driver: _driver.find_elements(
By.CSS_SELECTOR, '.review_table_row'))
# Extracting review data
for review in reviews:
if TYPE == 'top_critics':
critic_name_el = review.find_element(
By.CSS_SELECTOR, '[data-qa=review-critic-link]')
critic_review_text_el = review.find_element(
By.CSS_SELECTOR, '[data-qa=review-text]')
data_reviews.append(critic_name_el.text)
try:
next_button_el = driver.find_element(
By.CSS_SELECTOR, '[data-qa=next-btn]:not([disabled=disabled])'
)
if not next_button_el:
PAGES = 0
next_button_el.click() # refresh new reviews
PAGES -= 1
except Exception as e:
driver.quit()
Here, a rotten tomatoes review page is being opened and the reviews are being scraped, but when the next button is clicked and the new reviews are going to be scraped, this error pops up... I am guessing that the new reviews have not been loaded and trying to access them is causing the problem, I tried driver.implicitly_wait but that doesn't work too.
The error originates from line 33, data_reviews.append(critic_name_el.text)
By clicking a next page button next_button_el the new page is being loaded but this process takes some time while your Selenium code continues instantly after that click so probably on this line reviews = wait.until(lambda _driver: _driver.find_elements(By.CSS_SELECTOR, '.review_table_row')) it collects the elements on the old page but then the page is being refreshed so some of these elements critic_name_el collected after that (still on the old page) is no more there since the old page is refreshed.
To make your code working you need to introduce a short delay after clicking the next page button, as following:
data_reviews = []
while PAGES != 0:
wait = WebDriverWait(driver, 30)
reviews = wait.until(lambda _driver: _driver.find_elements(
By.CSS_SELECTOR, '.review_table_row'))
# Extracting review data
for review in reviews:
if TYPE == 'top_critics':
critic_name_el = review.find_element(
By.CSS_SELECTOR, '[data-qa=review-critic-link]')
critic_review_text_el = review.find_element(
By.CSS_SELECTOR, '[data-qa=review-text]')
data_reviews.append(critic_name_el.text)
try:
next_button_el = driver.find_element(
By.CSS_SELECTOR, '[data-qa=next-btn]:not([disabled=disabled])'
)
if not next_button_el:
PAGES = 0
next_button_el.click() # refresh new reviews
PAGES -= 1
time.sleep(2)
except Exception as e:
driver.quit()
Also I'd suggest to wait for elements visibility, not just presence here:
reviews = wait.until(lambda _driver: _driver.find_elements(By.CSS_SELECTOR, '.review_table_row'))
Also you need to understand that driver.implicitly_wait do not introduce any actual pause. This just sets the timeout for find_element and find_elements methods.
I am trying to scrape youtube comments so that each row contains the title of the video, author of comment, and comment itself. As seen in the code below I open the drive successfully and get rid of some authentication and cookie messages as well. Scroll enough to get the first comments loaded. After this happens I still am not able to get the comment text by xpath as seen below.
csv_file = open('funda_youtube_comments.csv', 'w', encoding="UTF-8", newline="")
writer = csv.writer(csv_file)
writer.writerow(['title', 'comment', 'author'])
PATH = r"C:\Users\veiza\OneDrive\Desktop\AUAS\University\Quarter 2\Online Data Mining\Project1test\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.implicitly_wait(10)
driver.get("https://www.youtube.com/watch?v=VWQaP9txG6M&t=76s")
driver.maximize_window()
time.sleep(2)
driver.execute_script('window.scrollTo(0,700);')
wait = WebDriverWait(driver, 20)
wait.until(EC.presence_of_element_located((By.XPATH, "//div[#id='dismiss-button']"))).click()
time.sleep(2)
WebDriverWait(driver,10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[src^='https://consent.google.com']")))
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[#id='introAgreeButton']"))).click()
time.sleep(2)
title = driver.title
print(title)
time.sleep(5)
totalcomments= len(driver.find_elements_by_xpath("""//*[#id="content-text"]"""))
if totalcomments < 50:
index = totalcomments
else:
index = 50
youtube_dict ={}
ccount = 0
while ccount < index:
try:
comment = driver.find_elements_by_xpath('//*[#id="content-text"]')[ccount].text
except:
comment = ""
try:
authors = driver.find_elements_by_xpath('//a[#id="author-text"]/span')[ccount].text
except:
authors = ""
try:
title = title
except:
title = ""
youtube_dict['comment'] = comment
youtube_dict['author'] = authors
youtube_dict['video title'] = title
writer.writerow(youtube_dict.values())
ccount = ccount + 1
print(youtube_dict)
driver.close()
What am I doing wrong?
If you want to make it simple, you can use tube_dl
pip install tube_dl
This module has Comments class that can help you with processing comments.
Here's the simple usage of that:
from tube_dl.comments import Comments
comments = Comments('yt url').process_comments()
#If you want limited comments, you can specify that. Ex : process_comments(count=45)
Feel free to raise issues at github.com/shekharchander/tube_dl. I'll be happy to resolve issues.
I was able to scrape youtube comments. below you can see the solution.
options = Options()
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
PATH = r"C:\Users\veiza\OneDrive\Desktop\AUAS\University\Quarter 2\Online Data " \
r"Mining\Project1test\chromedriver.exe "
driver = webdriver.Chrome(executable_path=PATH, options=options)
driver.get(response.url)
time.sleep(5)
try:
title = driver.find_element_by_xpath('//*[#id="container"]/h1/yt-formatted-string').text
comment_section = driver.find_element_by_xpath('//*[#id="comments"]')
except exceptions.NoSuchElementException:
error = "Error: Double check selector OR "
error += "element may not yet be on the screen at the time of the find operation"
print(error)
driver.execute_script("arguments[0].scrollIntoView();", comment_section)
time.sleep(7)
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
time.sleep(2)
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
try:
accounts_elems = driver.find_elements_by_xpath('//*[#id="author-text"]')
comment_elems = driver.find_elements_by_xpath('//*[#id="content-text"]')
except exceptions.NoSuchElementException:
error = "Error: Double check selector OR "
error += "element may not yet be on the screen at the time of the find operation"
print(error)
accounts = [elem.text for elem in accounts_elems]
comments = [elem.text for elem in comment_elems]
for comment_index in range(len(comment_elems)):
yield {
'title': title,
'url': driver.current_url,
'account': accounts[comment_index],
'comment': comments[comment_index]
}
I'm scraping an E-Commerce website, Lazada using Selenium and bs4, I manage to scrape on the 1st page but I unable to iterate to the next page. What I'm tyring to achieve is to scrape the whole pages based on the categories I've selected.
Here what I've tried :
# Run the argument with incognito
option = webdriver.ChromeOptions()
option.add_argument(' — incognito')
driver = webdriver.Chrome(executable_path='chromedriver', chrome_options=option)
driver.get('https://www.lazada.com.my/')
driver.maximize_window()
# Select category item #
element = driver.find_elements_by_class_name('card-categories-li-content')[0]
webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
t = 10
try:
WebDriverWait(driver,t).until(EC.visibility_of_element_located((By.ID,"a2o4k.searchlistcategory.0.i0.460b6883jV3Y0q")))
except TimeoutException:
print('Page Refresh!')
driver.refresh()
element = driver.find_elements_by_class_name('card-categories-li-content')[0]
webdriver.ActionChains(driver).move_to_element(element).click(element).perform()
print('Page Load!')
#Soup and select element
def getData(np):
soup = bs(driver.page_source, "lxml")
product_containers = soup.findAll("div", class_='c2prKC')
for p in product_containers:
title = (p.find(class_='c16H9d').text)#title
selling_price = (p.find(class_='c13VH6').text)#selling price
try:
original_price=(p.find("del", class_='c13VH6').text)#original price
except:
original_price = "-1"
if p.find("i", class_='ic-dynamic-badge ic-dynamic-badge-freeShipping ic-dynamic-group-2'):
freeShipping = 1
else:
freeShipping = 0
try:
discount = (p.find("span", class_='c1hkC1').text)
except:
discount ="-1"
if p.find(("div", {'class':['c16H9d']})):
url = "https:"+(p.find("a").get("href"))
else:
url = "-1"
nextpage_elements = driver.find_elements_by_class_name('ant-pagination-next')[0]
np=webdriver.ActionChains(driver).move_to_element(nextpage_elements).click(nextpage_elements).perform()
print("- -"*30)
toSave = [title,selling_price,original_price,freeShipping,discount,url]
print(toSave)
writerows(toSave,filename)
getData(np)
The problem might be that the driver is trying to click the button before the element is even loaded correctly.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(PATH, chrome_options=option)
# use this code after driver initialization
# this is make the driver wait 5 seconds for the page to load.
driver.implicitly_wait(5)
url = "https://www.lazada.com.ph/catalog/?q=phone&_keyori=ss&from=input&spm=a2o4l.home.search.go.239e359dTYxZXo"
driver.get(url)
next_page_path = "//ul[#class='ant-pagination ']//li[#class=' ant-pagination-next']"
# the following code will wait 5 seconds for
# element to become clickable
# and then try clicking the element.
try:
next_page = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, next_page_path)))
next_page.click()
except Exception as e:
print(e)
EDIT 1
Changed the code to make the driver wait for the element to become clickable. You can add this code inside a while loop for iterating multiple times and break the loop if the button is not found and is not clickable.
The web url is
CCB bank login page
driver.find_element_by_xpath("//span[#class='data_money']")
but I can't find any element.
I've try switch to frame before find element by
driver.switch_to_frame
but still can't find any element, Any one can help me to resolve this issue?
My code as blew
driver = webdriver.Ie()
driver.maximize_window()
driver.implicitly_wait(8)
driver.get(CCB)
time.sleep(2)
driver.switch_to_frame
driver.switch_to_frame(0)
driver.find_element_by_name("USERID").send_keys(user_id)
driver.find_element_by_name("LOGPASS").send_keys(password)
driver.find_element_by_id("loginButton").click()
time.sleep(5)
dm_ret = dm.FindPic(0,0,2000,2000,"d:\Test_Code\Talk_later.bmp","303030",0.9,0,intX,intY)
if dm_ret[1] > 0 and dm_ret[2] > 0 :
print("PIC found")
dm.moveto(dm_ret[1]+24, dm_ret[2]+12)
dm.leftclick()
else :
print("PIC not found")
time.sleep(1)
driver.find_element_by_xpath('//span[#class='data_money']")
The problem is that it dosen't like the posts.
I have tried difrend methods like tag name
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def like_photo(self):
driver = self.driver
driver.get("https://www.instagram.com")
time.sleep(1)
for i in range(1, 4):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# find all the heart links
hrefs = driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
pic_hrefs = [elem.get_attribute('href') for elem in hrefs]
pic_hrefs = [href for href in pic_hrefs]
print(' Photos ' + str(len(pic_hrefs)))
for _ in pic_hrefs:
driver.get("https://www.instagram.com")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
like_button = lambda: driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
like_button.click()
time.sleep(18)
except Exception as e:
time.sleep(1)
nameIG = InstagramBot(username, password)
nameIG.login()
nameIG.like_photo()
It dosent like any post the output is just: Photos 4
Process finished with exit code 0
exit code 0 means your code is running with no error. However, there's still a problem.
To see if there are actual errors in your code, change the exception actions.
except Exception as e:
print(e) # shows actual error
Try this:
like_buttons = driver.find_elements_by_xpath(some_xpath_to_buttons) # list of WebElements
for button in like_buttons:
button.click()
time.sleep(18)