Stale exception web scraping selenium python - python

I am trying to grab information from tripadvisor. I sometimes get
Message: stale element reference: element is not attached to the page document
(Session info: chrome=47.0.2526.73)
(Driver info: chromedriver=2.20.353124 (035346203162d32c80f1dce587c8154a1efa0c3b),platform=Mac OS X 10.10.4 x86_64)
and then the element is just whatever I assign it to. How can I fix my code to handle the issue and then figure out a solution to it instead of re running the code?
def getElements(driver):
elements = []
for dd in driver.find_elements_by_xpath("//*[contains(#class, 'ui_button original')]"):
try:
if dd.text == "Book Now":
elements.append(dd)
except Exception as ee:
print ee
return elements
def getBookingPartner(driver, ibInfo):
data = []
i = 0
elements = []
time.sleep(2)
elements = getElements(driver)
elementCounter = 0
while(elements == [] or elementCounter >5):
elements = getElements(driver)
elementCounter+=1
print "Length of elements should be > 0 : " + str(len(elements))
for ii in ibInfo:
if ii[0] == "Yes":
driver.implicitly_wait(3)
bookingPartner = "Error"
print ii
driver.implicitly_wait(3)
try:
elements[i].click()
driver.implicitly_wait(3)
driver.switch_to_window(driver.window_handles[-1])
except Exception as ee:
try:
driver.refresh()
getElements(driver)[i].click()
time.sleep(1)
driver.switch_to_window(driver.window_handles[-1])
except Exception as ee:
print "Stale Exception...."
print ee
try:
driver.implicitly_wait(3)
driver.find_elements_by_xpath("//*[contains(#class, 'book_now')]")[1].click()
driver.implicitly_wait(1)
page = etree.HTML(driver.page_source)
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[0].split("will")[0].strip()
except:
try:
time.sleep(3)
driver.find_elements_by_xpath("//*[contains(#class, 'book_now')]")[1].click()
time.sleep(2)
page = etree.HTML(driver.page_source)
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[0].split("will")[0].strip()
except:
try:
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[1].split("will")[0].strip()
except Exception as ee:
bookingPartner = "Error"
print "error"
i+=1
if bookingPartner == "The remainder":
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[1].split("will")[0].strip()
if len(driver.window_handles) > 1:
driver.close()
driver.switch_to_window(driver.window_handles[0])
print bookingPartner
data.append([ii[0], ii[1], bookingPartner])
else:
data.append([ii[0], ii[1], "N/A"])
ii.extend(["N/A"])
print data
return data

A Stale Element Reference Exception occurs when an element:
Has been deleted
Is no longer attached to the DOM (as in your case)
Has changed
From the docs:
You should discard the current reference you hold and replace it, possibly by locating the element again once it is attached to the DOM.
i.e.: "Find" the element again.
You'll need to modify the code to catch this error for the appropriate step.
from selenium.common.exceptions import StaleElementReferenceException
elem = driver.find_element_by_xpath('something leaves dom')
# ... do other actions which change the page and then later...
try:
elem.click()
except StaleElementReferenceException:
elem = driver.find_element_by_xpath('something leaves dom')
elem.click()
Make a re-usable a version if you need it extensively for several elements.
Btw, you should not be catching Exception in your code. Be specific about which ones you want to handle.

Related

Starting from the last index when inserting data to MySQL while Scraping

I am scraping a website and inserting data in a MySQL DB at the same time. Something like this.I had to delete the scraping portions or the code would be too big.
def get_page(links):
parent_window = driver.current_window_handle
for link in links:
driver.execute_script('window.open(arguments[0]);', link)
all_windows = driver.window_handles
child_window = [window for window in all_windows if window != parent_window][0]
driver.switch_to.window(child_window)
#scraping
try:
cursor.execute("INSERT INTO Investors(name, tags, website, introduction) VALUES(%s,%s,%s,%s)", (name,tag,website,introduction,))
except Exception as e:
raise e
parent_window1 = driver.current_window_handle
for lin in team_div:
driver.execute_script('window.open(arguments[0]);', lin)
all_windows = driver.window_handles
child_window1 = [window for window in all_windows if window != parent_window1][1]
driver.switch_to.window(child_window1)
time.sleep(2)
#scraping
driver.close()
driver.switch_to.window(parent_window1)
sql = cursor.execute(f"SELECT inv_id FROM Investors WHERE name =\'{name}\'")
pid = cursor.fetchone()
try:
cursor.execute("INSERT INTO team_members(inv_id,mem_name, picture, experience) VALUES(%s,%s,%s,%s)", (pid,port_name,headshot, work_ex,))
except:
pass
driver.refresh()
time.sleep(3)
driver.execute_script("window.scrollBy(0,2825)", "")
time.sleep(2)
#scraping
try:
cursor.execute("INSERT INTO portfolio(inv_id,port_name, port_icon, port_desc) VALUES(%s,%s,%s,%s)", (pid1,p_name, p_icon, p_short_des,))
except:
pass
driver.close()
driver.switch_to.window(parent_window)
def get_links(page):
if page == 1:
url = 'https://www.cypherhunter.com/en/search/?q=investments'
driver.get(url)
time.sleep(2)
links = driver.find_elements_by_xpath('//div[#class="app-item-container"]//a')
return links
else:
url = f'https://www.cypherhunter.com/en/search/page/{page}/?q=investments'
driver.get(url)
time.sleep(2)
links = driver.find_elements_by_xpath('//div[#class="app-item-container"]//a')
return links
for p in range(1, 48):
z = get_links(p)
get_page(z)
I have a sense that maybe this is a inefficient way of sending data, but then it would become two questions. My question is how can I make it so that if the script fails for some reason..it starts from the same place on the next run. The last index we can get from the MySQL but how to do it in the code. Only manually?

having trouble looping through elements with selenium python

I've looked all through Stackoverflow to try and find the answer to this but couldn't. What's wrong with my code is that it clicks the first element and then gets the 'href' I want but stops right after that, and throws errors like
box[x].click()
&
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
Here's the code
box = driver.find_elements_by_class_name("info-section.info-primary")
x = 0
#for x in range(0, len(box)):
while True:
while x <= len(box):
#if box[x].is_displayed():
driver.implicitly_wait(2)
# error is happening here
box[x].click()
x += 1
try:
website = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "primary-btn.website-link"))
)
print(website.get_attribute('href'))
driver.back()
except:
driver.back()
if not driver.find_element_by_class_name('ajax-page'):
break
else:
driver.find_element_by_class_name('ajax-page').click()
You are getting the StaleElementReference error because you define box, navigate to another page, then try to use the box variable again. The quickest way to resolve this would be to locate the element without the variable each loop:
box = driver.find_elements_by_class_name("info-section.info-primary")
x = 0
#for x in range(0, len(box)):
while True:
while x <= len(box):
#if box[x].is_displayed():
driver.implicitly_wait(2)
# error is happening here
driver.find_elements_by_class_name("info-section.info-primary")[x].click()
x += 1
try:
website = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "primary-btn.website-link"))
)
print(website.get_attribute('href'))
driver.back()
except:
driver.back()
if not driver.find_element_by_class_name('ajax-page'):
break
else:
driver.find_element_by_class_name('ajax-page').click()

Instagram Scraper with Selenium

I keep getting this message
"Message: no such element: Unable to locate element: {"method":"css selector","selector":".MGdpg > button:nth-child(1)"}"
while i was scraping a 500 comments post on Instagram and end up only getting 10 comments. is there any problem with the code?
from selenium import webdriver
import time
import sys
driver = webdriver.Chrome()
driver.get(sys.argv[1])
time.sleep(3)
#if user not logined
try:
close_button = driver.find_element_by_class_name('xqRnw')
close_button.click()
except:
pass
try:
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < int(sys.argv[2]):
load_more_comment.click()
time.sleep(2.5)
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i += 1
except Exception as e:
print(e)
pass
user_names = []
user_comments = []
comment = driver.find_elements_by_class_name('gElp9 ')
for c in comment:
container = c.find_element_by_class_name('C4VMK')
name = container.find_element_by_class_name('_6lAjh').text
content = container.find_element_by_tag_name('span').text
content = content.replace('\n', ' ').strip().rstrip()
user_names.append(name)
user_comments.append(content)
user_names.pop(0)
user_comments.pop(0)
import excel_exporter
excel_exporter.export(user_names, user_comments)
driver.close()
btw the code belongs to Agi Maulana. You can check his Github repo regarding to this
https://github.com/AgiMaulana/Instagram-Comments-Scraper

Skip extracting value if text node not found

I am scraping all opening odds from this page
But I want to skip all canceled events (like on this page) and print blank result as below:
try:
xpath = '//table[starts-with(#id,"aodds")]//tr[th="Opening odds"]/following-sibling::tr/td[#class="bold"]'
except:
print('')
homeodd = driver.find_element_by_xpath(xpath).text
Try to update your code as
from selenium.common.exceptions import NoSuchElementException
xpath = '//table[starts-with(#id,"aodds")]//tr[th="Opening odds"]/following-sibling::tr/td[#class="bold"]'
try:
homeodd = driver.find_element_by_xpath(xpath).text
except NoSuchElementException:
print('No bets found')

I'm making a bot that likes every post that aren't liked yet

The problem is that it dosen't like the posts.
I have tried difrend methods like tag name
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def like_photo(self):
driver = self.driver
driver.get("https://www.instagram.com")
time.sleep(1)
for i in range(1, 4):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# find all the heart links
hrefs = driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
pic_hrefs = [elem.get_attribute('href') for elem in hrefs]
pic_hrefs = [href for href in pic_hrefs]
print(' Photos ' + str(len(pic_hrefs)))
for _ in pic_hrefs:
driver.get("https://www.instagram.com")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
like_button = lambda: driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
like_button.click()
time.sleep(18)
except Exception as e:
time.sleep(1)
nameIG = InstagramBot(username, password)
nameIG.login()
nameIG.like_photo()
It dosent like any post the output is just: Photos 4
Process finished with exit code 0
exit code 0 means your code is running with no error. However, there's still a problem.
To see if there are actual errors in your code, change the exception actions.
except Exception as e:
print(e) # shows actual error
Try this:
like_buttons = driver.find_elements_by_xpath(some_xpath_to_buttons) # list of WebElements
for button in like_buttons:
button.click()
time.sleep(18)

Categories