I am scraping Banggood, the problem is that driver open just first link and then doesn't go to next link of links list( next product )
and get this error in line 24
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
but i tried to print links out of loop i got all of the
print( links[0].get_attribute('href') )
print( links[2].get_attribute('href') )
main code :
import time
from selenium import webdriver #THIS IS MAIN SCRIPT
driver = webdriver.Chrome(executable_path='C:\\Users\\Compu City\\Desktop\\chromedriver.exe')#DRIVER LOCATION
driver.get('https://usa.banggood.com/Deals_Electronics.html#dealscategories2')#DRIVER LOCATION
driver.implicitly_wait(30)
links = driver.find_elements_by_css_selector('body > div.flashdeals-container.fixed > div.main > div.product-list.cf > ul > li > a.products_name.exclick')
#links has 25 link
product=0
while product <= len(links):
driver.get(links[product].get_attribute('href'))
try:# TITLE
title = driver.find_element_by_css_selector('#centerCtrl > div.title_hd > h2 > strong')
print(title.text)
except:
print('no title')
try:# NEW PRICE
new_price = driver.find_element_by_css_selector('#centerCtrl > div.itemBox > div.item_price_box > div.item_now_price')
print(new_price.text)
except:
print('no new price')
try:# OLD PRICE
old_price = driver.find_element_by_css_selector('#centerCtrl > div.itemBox > div.item_price_box > div.item_old_price')
print(old_price.text)
except:
print('no old price')
try:#image
image = driver.find_element_by_css_selector('#landingImage').get_attribute('src')
print(image)
except:
print('no image')
product +=1
try that
v=[]
for x in links:
#driver.get(links[1].get_attribute('href'))
print(v.append(x.get_attribute('href')))
print(len(v))
driver.get(v[1])
time.sleep(10)
driver.get(v[2])
product=0
while product <= len(v):
driver.get(v[product])
product +=1
Related
Using Selenium to interact with authors on Medium.com
I am trying to target a popup element with selenium that appears when text is double-clicked. Once this is detected and clicked it opens a message box to the right. My end goal has been to insert (send_keys) text into this text box and yet it is proving to be quite difficult.
[the dynamic element]1 # the element on the far right is the button to open the chat box.
enter image description here2 # this is the text box
WHAT I HAVE TRIED:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
def interact_with_author(message):
# variables
target_class = "meteredContent"
body_css = "body"
header_xpath = "/html/head"
second_article_css = "#root > div > div.s.n.t > div.ah.ay > div > div.n.p > div > div:nth-child(2)" # the second article on the page
first_par = "#\39 993"
second_par = "#\35 f50"
first_par_css = "#root > div > div.s > article > div > section > div > div > p"
first_par_class = "ht hu dj hv b ei hw hx hy el hz ia ib ic id ie if ig ih ii ij ik il im in io db eg"
wait_time = 5 # seconds to wait when sleep is called with the wait_time variable
#code
text_box = driver.find_element_by_css_selector('body > div:nth-child(47) > div > div > div > div > div')
action = ActionChains(driver)
listing=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,"a")))
articles = driver.find_elements_by_tag_name("a")
an_article = driver.find_element_by_css_selector(second_article_css)
an_article.click()
time.sleep(wait_time) # todo change to sleep four seconds after the article is fully loaded
listing=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,"p")))
try:
paragraphs = driver.find_elements_by_tag_name('p')
driver.execute_script("document.body.style.zoom='250%'")
try:
first_par = ''
for i in range(1,len(paragraphs)):
first_par_commentable = None
try:
first_par_commentable = driver.find_element_by_xpath(f"/html/body/div[1]/div/div[3]/article/div/section/div/div/p[{i}]")
driver.execute_script("document.body.style.zoom='200%'")
except Exception as e:
ic(e)
if first_par_commentable != None:
break
except Exception as f:
ic(f)
try:
first_par_commentable.click()
action.double_click(first_par_commentable).perform()
time.sleep(random.randint(1,3))
except Exception as e:
ic(e)
except Exception as e:
ic(e)
'''
If anyone knows how to access this element quickly and in a scaleable way it would be appreciated.
'''
I've looked all through Stackoverflow to try and find the answer to this but couldn't. What's wrong with my code is that it clicks the first element and then gets the 'href' I want but stops right after that, and throws errors like
box[x].click()
&
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
Here's the code
box = driver.find_elements_by_class_name("info-section.info-primary")
x = 0
#for x in range(0, len(box)):
while True:
while x <= len(box):
#if box[x].is_displayed():
driver.implicitly_wait(2)
# error is happening here
box[x].click()
x += 1
try:
website = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "primary-btn.website-link"))
)
print(website.get_attribute('href'))
driver.back()
except:
driver.back()
if not driver.find_element_by_class_name('ajax-page'):
break
else:
driver.find_element_by_class_name('ajax-page').click()
You are getting the StaleElementReference error because you define box, navigate to another page, then try to use the box variable again. The quickest way to resolve this would be to locate the element without the variable each loop:
box = driver.find_elements_by_class_name("info-section.info-primary")
x = 0
#for x in range(0, len(box)):
while True:
while x <= len(box):
#if box[x].is_displayed():
driver.implicitly_wait(2)
# error is happening here
driver.find_elements_by_class_name("info-section.info-primary")[x].click()
x += 1
try:
website = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "primary-btn.website-link"))
)
print(website.get_attribute('href'))
driver.back()
except:
driver.back()
if not driver.find_element_by_class_name('ajax-page'):
break
else:
driver.find_element_by_class_name('ajax-page').click()
I keep getting this message
"Message: no such element: Unable to locate element: {"method":"css selector","selector":".MGdpg > button:nth-child(1)"}"
while i was scraping a 500 comments post on Instagram and end up only getting 10 comments. is there any problem with the code?
from selenium import webdriver
import time
import sys
driver = webdriver.Chrome()
driver.get(sys.argv[1])
time.sleep(3)
#if user not logined
try:
close_button = driver.find_element_by_class_name('xqRnw')
close_button.click()
except:
pass
try:
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < int(sys.argv[2]):
load_more_comment.click()
time.sleep(2.5)
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i += 1
except Exception as e:
print(e)
pass
user_names = []
user_comments = []
comment = driver.find_elements_by_class_name('gElp9 ')
for c in comment:
container = c.find_element_by_class_name('C4VMK')
name = container.find_element_by_class_name('_6lAjh').text
content = container.find_element_by_tag_name('span').text
content = content.replace('\n', ' ').strip().rstrip()
user_names.append(name)
user_comments.append(content)
user_names.pop(0)
user_comments.pop(0)
import excel_exporter
excel_exporter.export(user_names, user_comments)
driver.close()
btw the code belongs to Agi Maulana. You can check his Github repo regarding to this
https://github.com/AgiMaulana/Instagram-Comments-Scraper
I'm having an issue with my web scraping script with Selenium
Normally, the script can run smoothly.
However, I would usually have this error within this for loop
(I believe the script ran too fast before the elements can be visible):
NoSuchElementException Traceback (most recent call last)
<ipython-input-6-470748a6674f> in <module>
66 item_brand.append(driver.find_element_by_xpath('.//*[#id="brand"]/a/span/bdi').get_attribute('textContent'))
67 item_prices.append(driver.find_element_by_css_selector('[id="price"]').text)
---> 68 item_names.append(driver1.find_element_by_css_selector('[class="nav-product-link-text"] span').text)
69 total_rate.append(driver1.find_element_by_class_name('css-i36p8g').text)
70 review_contents.append(containers.find_element_by_class_name('review-text').text)
......
"NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"[class="nav-product-link-text"] span"}"
I had to add driver.implicitly_wait(3) within the for loop so it can wait until the elements are visible but it didn't work.
please help to check my script as below:
driver = webdriver.Chrome(chrome_path)
driver1 = webdriver.Chrome(chrome_path)
# Create lists for the dataframe:
item_names = list()
item_description = list()
item_brand = list()
review_titles= list()
review_contents = list()
product_helpful= list()
product_not_helpful = list()
member_rating = list()
total_rate = list()
item_prices = list()
item_images = list()
URL = "https://ca.iherb.com/c/Vitamins?sr=2&noi=48&p="
for n in range(1,2):
driver.get(f"{URL}{n}") # modify the page numbers to scrape the products information
# driver.get(f"https://ca.iherb.com/c/Vitamins?sr=2&noi=48&p={n}".format(n+1))
wait = WebDriverWait(driver, 10)
# Store all the links in a list
item_links = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".absolute-link-wrapper > a.product-link")))]
# Iterate over the links
for item_link in item_links:
driver.get(item_link)
# Locate and click on the `View All Reviews` link
all_reviews_link = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"span.all-reviews-link > a")))
time.sleep(2)
x = all_reviews_link.get_attribute("href")
MAX_PAGE_NUM = 60 # Scrape maximum 60 pages in the review section
for i in range(1, MAX_PAGE_NUM + 1):
page_num = str(i)
url = x +'?&p='+ page_num
print(url)
driver1.get(url)
review_containers = driver1.find_elements_by_class_name('review-row')
for containers in review_containers:
driver.implicitly_wait(3) # waiting for the browser to se the website elements
elements = ', '.join([item.text for item in driver.find_elements_by_css_selector("[itemprop='description'] > ul:nth-of-type(1) > li")])
item_description.append(elements)
item_images.append(driver.find_element_by_xpath('//*[#id="product-image"]/div[1]/a').get_attribute('href'))
item_brand.append(driver.find_element_by_xpath('.//*[#id="brand"]/a/span/bdi').get_attribute('textContent'))
item_prices.append(driver.find_element_by_css_selector('[id="price"]').text)
item_names.append(driver1.find_element_by_css_selector('[class="nav-product-link-text"] span').text)
total_rate.append(driver1.find_element_by_class_name('css-i36p8g').text)
review_contents.append(containers.find_element_by_class_name('review-text').text)
product_helpful.append(containers.find_element_by_css_selector('[title="Helpful"] span').text)
product_not_helpful.append(containers.find_element_by_css_selector('[title="Unhelpful"] span').text)
stars = containers.find_elements_by_class_name("css-172co2l")
rating = 0
for star in stars:
star_color = star.find_element_by_tag_name("path").get_attribute("fill")
if star_color != "transparent":
rating += 1
member_rating.append(rating)
time.sleep(2) # Slow the script down
driver.quit()
Please help to check this issue for me. I really appreciate it.
I'm trying to scrape all the corner betting odds for a given game at skybet, but it looks like scrolling is messing things up in my loop. When I print section.text it looks like its doing what I want but then it clicks the wrong thing?
And when I don't scroll it will only click on the first few odds sections before the code just freezes.
Any help would be really appreciated thanks!
Also, I made the odds_sections refresh itself at each iteration because I thought that might be the problem.
driver = webdriver.Safari()
driver.get("https://m.skybet.com/football/competitions")
driver.maximize_window()
#click accept cookie
try:
button_cookie = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//body/div[2]/div[1]/a[2]"))
)
button_cookie.click()
except:
print("no cookie")
#find location of premier league
pl = driver.find_elements_by_class_name("split__title")
locate_pl=0
link_name = pl[locate_pl].text
while link_name != "Premier League":
locate_pl += 1
link_name = pl[locate_pl].text
pl[locate_pl].click()
N = locate_pl + 1
#use N now to find pl matches
time.sleep(2)
#click on first match
button_match = driver.find_element_by_xpath("//div[#id='competitions']/ul[1]/li[{}]/div[1]/table[2]/tbody[1]/tr[2]/td[1]/a[1]".format(N))
teams = driver.find_element_by_xpath("//div[#id='competitions']/ul[1]/li[{}]/div[1]/table[2]/tbody[1]/tr[2]/td[1]/a[1]/b/span".format(N))
button_match.send_keys(Keys.ENTER)
time.sleep(2)
#find and click corners button
try:
button_corners = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME,"_1ouz2ki")))
#button_corners = driver.find_elements_by_class_name("_1ouz2ki")
except:
print("no corners")
n=0
link_name = button_corners[n].text
while link_name != "Corners":
n += 1
link_name = button_corners[n].text
button_corners[n].click()
#Now we will scrape all corner odds for this game.
odds_sections = driver.find_elements_by_class_name('_t0tx82')
N_sections = len(odds_sections)
c=0
scroll_to = 35
#the issue is within this loop
while c <= N_sections:
odds_sections = driver.find_elements_by_class_name('_t0tx82')
section = odds_sections[c]
print(section.text)
section.click()
time.sleep(2)
section.click()
c += 1
driver.execute_script("window.scrollTo(0,{})".format(scroll_to))