Skip extracting value if text node not found

Skip extracting value if text node not found - python

I am scraping all opening odds from this page
But I want to skip all canceled events (like on this page) and print blank result as below:
try:
xpath = '//table[starts-with(#id,"aodds")]//tr[th="Opening odds"]/following-sibling::tr/td[#class="bold"]'
except:
print('')
homeodd = driver.find_element_by_xpath(xpath).text

Try to update your code as
from selenium.common.exceptions import NoSuchElementException
xpath = '//table[starts-with(#id,"aodds")]//tr[th="Opening odds"]/following-sibling::tr/td[#class="bold"]'
try:
homeodd = driver.find_element_by_xpath(xpath).text
except NoSuchElementException:
print('No bets found')

Related

Selenium Loop through table pages

I've been trying to scrape a table of contents with Selenium and Beautiful Soup, but I can't seem to find a good way to loop through the table's pages given how the HTML is written as there is no next button and the currently selected page button has the active class.
This is the code I have so far:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import pandas as pd
path_driver = "C:/Users/CS330584/Documents/Documentos de Defesa da Concorrência/Automatização de Processos/chromedriver.exe"
website = "https://sat.sef.sc.gov.br/tax.NET/Sat.Dva.Web/ConsultaPublicaDevedores.aspx"
value_search = "300"
driver = webdriver.Chrome(path_driver)
driver.get(website)
search_max = driver.find_element_by_id("Body_Main_Main_ctl00_txtTotalDevedores")
search_max.send_keys(value_search)
btn_consult = driver.find_element_by_id("Body_Main_Main_ctl00_btnBuscar")
btn_consult.click()
driver.implicitly_wait(10)
i = 1
while True:
try:
#some wait
driver.find_element_by_xpath("//*[#id='Body_Main_Main_grpDevedores_gridView']/tbody/tr[51]/td/ul/li' and .='[]']".format(str(i + 1))).click()
except:
break
How can I effectively (or even not so effectively) loop through these table pages in order to scrape the data ?

Buttons to next pages run JavaScript code
javascript:GridView_ScrollToTop('Body_Main_Main_grpDevedores_gridView');__doPostBack('ctl00$ctl00$ctl00$Body$Main$Main$grpDevedores$gridView','Page$1')
and you can also use it to change pages.
You have to only update number in Page$1 - ie. using f-string
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import math
path_driver = "C:/Users/CS330584/Documents/Documentos de Defesa da Concorrência/Automatização de Processos/chromedriver.exe"
website = "https://sat.sef.sc.gov.br/tax.NET/Sat.Dva.Web/ConsultaPublicaDevedores.aspx"
value_search = 300
#driver = webdriver.Chrome(path_driver)
driver = webdriver.Firefox()
driver.get(website)
search_max = driver.find_element_by_id("Body_Main_Main_ctl00_txtTotalDevedores")
search_max.send_keys(value_search)
btn_consult = driver.find_element_by_id("Body_Main_Main_ctl00_btnBuscar")
btn_consult.click()
driver.implicitly_wait(10)
pages = math.ceil(value_search/50)
print('pages:', pages)
for i in range(2, pages+1):
try:
time.sleep(2)
driver.execute_script(f"javascript:GridView_ScrollToTop('Body_Main_Main_grpDevedores_gridView');__doPostBack('ctl00$ctl00$ctl00$Body$Main$Main$grpDevedores$gridView','Page${i}')")
except Exception as ex:
print(ex)
break
You can also get all links to page and use i as index - you have to add 1 to skip link <<
for i in range(2, pages+1):
try:
time.sleep(2)
all_links = driver.find_elements_by_xpath('//tr[#class="sat-gv-pagination-row"]//li//a')
all_links[i+1].click()
except Exception as ex:
print(ex)
break
Or you can use f-string to create xpath with li[{i+1}]
for i in range(2, pages+1):
try:
time.sleep(2)
next_link = driver.find_element_by_xpath(f'//tr[#class="sat-gv-pagination-row"]//li[{i+1}]//a')
next_link.click()
except Exception as ex:
print(ex)
break

i verify that autocomplete works well but there are no results appear

I verify if Autocomplte works well or not. I send the keys but he does not select the required element. Finally I want to print the URL of the page that appear after finding the required element and pressing on it. I recieve only this result:
Ran 1 test in 33.110s
OK
Process finished with exit code 0
Message:
def test_autocomplet(self):
try:
driver = webdriver.Chrome()
self.driver=webdriver.Chrome()
url = self.driver.get("http://automationpractice.com/index.php")
self.driver.maximize_window()
Serach_text_box=self.driver.find_element_by_id("search_query_top")
Serach_text_box.send_keys("Printed")
Serach_text_box.send_keys(Keys.ARROW_DOWN)
five_option= WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH,"//*[contains(text(),'Dress')]")))
five_option.send_keys(Keys.ENTER)
print self.driver.current_url
self.assertEqual("http://automationpractice.com/index.php?id_product=3&controller=product",self.driver.current_url, "This Test case is fallied")
except NoSuchElementException as e:
print (e)
except AssertionError as e:
print (e)
except TimeoutException as e:
print (e)
I want to know if any thing in the code is wrong and why he does not select and click on the required element and print the URL of the next page that appear after click on the required element.
I would be thanksfull for any help.

I put here code which I used to test this page.
To select item on menu I can use ARROW_DOWN but it doesn't give information about selected item.
Second method is to search
//div[#class='ac_results']//li[contains(text(),'Dress')]
or at least
//li[contains(text(),'Dress')]
eventually
//div[#class='ac_results']//li
to access item in menu. And then I can get full text .text or highlighted part .//strong
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
try:
#driver = webdriver.Chrome()
driver = webdriver.Firefox()
url = driver.get("http://automationpractice.com/index.php")
#driver.maximize_window()
search_text_box = driver.find_element_by_id("search_query_top")
search_text_box.send_keys("Printed")
time.sleep(1) # page display (and update) autocompletion when you make little longer delay
# --- select using arrow key ---
# move selection on list and accept it
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ENTER)
# OR
# --- select using tag `<li>` and `text()` in autocompletion ---
# click on first matching item on list
#one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//li[contains(text(),'Dress')]")))
one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='ac_results']//li[contains(text(),'Dress')]")))
print(' tag:', one_option.tag_name)
print('text:', one_option.text)
print('bold:', one_option.find_element_by_xpath('.//strong').text)
one_option.click()
# OR
# --- get all elements in autocompletion using `<li>` tag ---
# get many matching items and use [index] to click on some item on list
#one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//li[contains(text(),'Dress')]")))
#all_options = driver.find_elements_by_xpath("//li[contains(text(),'Dress')]")
#for option in all_options:
# print(option.tag_name, ':', option.text)
#all_options[1].click()
print(' current:', driver.current_url)
print('expected:', "http://automationpractice.com/index.php?id_product=3&controller=product")
print('the same:', driver.current_url == "http://automationpractice.com/index.php?id_product=3&controller=product")
assert "http://automationpractice.com/index.php?id_product=3&controller=product" == driver.current_url, "This Test case is fallied"
#assertEqual("http://automationpractice.com/index.php?id_product=3&controller=product", self.driver.current_url, "This Test case is fallied")
except NoSuchElementException as e:
print('NoSuchElementException:', e)
except TimeoutException as e:
print('TimeoutException:', e)
except AssertionError as e:
print('AssertionError:', e)

I'm making a bot that likes every post that aren't liked yet

The problem is that it dosen't like the posts.
I have tried difrend methods like tag name
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
def like_photo(self):
driver = self.driver
driver.get("https://www.instagram.com")
time.sleep(1)
for i in range(1, 4):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# find all the heart links
hrefs = driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
pic_hrefs = [elem.get_attribute('href') for elem in hrefs]
pic_hrefs = [href for href in pic_hrefs]
print(' Photos ' + str(len(pic_hrefs)))
for _ in pic_hrefs:
driver.get("https://www.instagram.com")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
like_button = lambda: driver.find_elements_by_xpath("//span[#aria-label='Synes godt om']")
like_button.click()
time.sleep(18)
except Exception as e:
time.sleep(1)
nameIG = InstagramBot(username, password)
nameIG.login()
nameIG.like_photo()
It dosent like any post the output is just: Photos 4
Process finished with exit code 0

exit code 0 means your code is running with no error. However, there's still a problem.
To see if there are actual errors in your code, change the exception actions.
except Exception as e:
print(e) # shows actual error
Try this:
like_buttons = driver.find_elements_by_xpath(some_xpath_to_buttons) # list of WebElements
for button in like_buttons:
button.click()
time.sleep(18)

selenium fails to iterate on elements

Im trying to translate user comments from tripadvisor.
My code :-
1.]Selects only portuguese comments( from language dropdown),
2.]Then expands each of the comments,
3.]Then saves all these expanded comments in a list
4.]Then translates them into english & prints on screen
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
com_=[] # To save translated comments
expanded_comments=[] # To save expanded comments
driver = webdriver.Chrome("C:\Users\shalini\Downloads\chromedriver_win32\chromedriver.exe")
driver.maximize_window()
def expand_reviews(driver):
# TRYING TO EXPAND REVIEWS (& CLOSE A POPUP)
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err"
try:
driver.find_element_by_class_name("ui_close_x").click()
except:
print "err"
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err3"
def save_comments(driver):
expand_reviews(driver)
# SELECTING ALL EXPANDED COMMENTS
expanded_com_elements=driver.find_elements_by_class_name("entry")
time.sleep(3)
for i in expanded_com_elements:
expanded_comments.append(i.text)
# SELECTING ALL GOOGLE-TRANSLATOR links
gt= driver.find_elements(By.CSS_SELECTOR,".googleTranslation>.link")
# NOW PRINTING TRANSLATED COMMENTS
for i in gt:
try:
driver.execute_script("arguments[0].click()",i)
#i.click().perform()
com=driver.find_element_by_class_name("ui_overlay").text
com_.append(com)
time.sleep(5)
driver.find_element_by_class_name("ui_close_x").click().perform()
time.sleep(5)
except Exception as e:
pass
#print e
for i in range(282):
page=i*10
url="https://www.tripadvisor.com/Airline_Review-d8729164-Reviews-Cheap-Flights-or"+str(page)+"-TAP-Portugal#REVIEWS"
driver.get(url)
wait = WebDriverWait(driver, 10)
if i==0:
# SELECTING PORTUGUESE COMMENTS ONLY # Run for one time then iterate over pages
try:
langselction = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.sprite-date_picker-triangle")))
langselction.click()
driver.find_element_by_xpath("//div[#class='languageList']//li[normalize-space(.)='Portuguese first']").click()
time.sleep(5)
except Exception as e:
print e
save_comments(driver)
================ERROR=================
expanded_comments return empty list. Some comments get saved, some get skipped.
First page is saved properly (all comments expanded), but thereafter only first comment gets saved, without being expanded. But translated comments from all pages get saved properly in com_

I have changed your code and now it's working.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("./chromedriver.exe")
driver.maximize_window()
url="https://www.tripadvisor.com/Airline_Review-d8729164-Reviews-Cheap-Flights-TAP-Portugal#REVIEWS"
driver.get(url)
wait = WebDriverWait(driver, 10)
# SELECTING PORTUGUESE COMMENTS ONLY
#show_lan = driver.find_element_by_xpath("//div[#class='languageList']/ul/li[contains(text(),'Portuguese first')]")
try:
langselction = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.sprite-date_picker-triangle")))
langselction.click()
driver.find_element_by_xpath("//div[#class='languageList']//li[normalize-space(.)='Portuguese first']").click()
time.sleep(5)
except Exception as e:
print e
# TRYING TO EXPAND REVIEWS (& CLOSE A POPUP)
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err"
try:
driver.find_element_by_class_name("ui_close_x").click()
except:
print "err"
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err3"
# SELECTING ALL EXPANDED COMMENTS
expanded_com_elements=driver.find_elements_by_class_name("entry")
expanded_comments=[]
time.sleep(3)
for i in expanded_com_elements:
expanded_comments.append(i.text)
# SELECTING ALL GOOGLE-TRANSLATOR links
gt= driver.find_elements(By.CSS_SELECTOR,".googleTranslation>.link")
# NOW PRINTING TRANSLATED COMMENTS
for i in gt:
try:
driver.execute_script("arguments[0].click()",i)
#i.click().perform()
print driver.find_element_by_class_name("ui_overlay").text
time.sleep(5)
driver.find_element_by_class_name("ui_close_x").click().perform()
time.sleep(5)
except Exception as e:
pass
#print e

Stale exception web scraping selenium python

I am trying to grab information from tripadvisor. I sometimes get
Message: stale element reference: element is not attached to the page document
(Session info: chrome=47.0.2526.73)
(Driver info: chromedriver=2.20.353124 (035346203162d32c80f1dce587c8154a1efa0c3b),platform=Mac OS X 10.10.4 x86_64)
and then the element is just whatever I assign it to. How can I fix my code to handle the issue and then figure out a solution to it instead of re running the code?
def getElements(driver):
elements = []
for dd in driver.find_elements_by_xpath("//*[contains(#class, 'ui_button original')]"):
try:
if dd.text == "Book Now":
elements.append(dd)
except Exception as ee:
print ee
return elements
def getBookingPartner(driver, ibInfo):
data = []
i = 0
elements = []
time.sleep(2)
elements = getElements(driver)
elementCounter = 0
while(elements == [] or elementCounter >5):
elements = getElements(driver)
elementCounter+=1
print "Length of elements should be > 0 : " + str(len(elements))
for ii in ibInfo:
if ii[0] == "Yes":
driver.implicitly_wait(3)
bookingPartner = "Error"
print ii
driver.implicitly_wait(3)
try:
elements[i].click()
driver.implicitly_wait(3)
driver.switch_to_window(driver.window_handles[-1])
except Exception as ee:
try:
driver.refresh()
getElements(driver)[i].click()
time.sleep(1)
driver.switch_to_window(driver.window_handles[-1])
except Exception as ee:
print "Stale Exception...."
print ee
try:
driver.implicitly_wait(3)
driver.find_elements_by_xpath("//*[contains(#class, 'book_now')]")[1].click()
driver.implicitly_wait(1)
page = etree.HTML(driver.page_source)
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[0].split("will")[0].strip()
except:
try:
time.sleep(3)
driver.find_elements_by_xpath("//*[contains(#class, 'book_now')]")[1].click()
time.sleep(2)
page = etree.HTML(driver.page_source)
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[0].split("will")[0].strip()
except:
try:
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[1].split("will")[0].strip()
except Exception as ee:
bookingPartner = "Error"
print "error"
i+=1
if bookingPartner == "The remainder":
bookingPartner = page.xpath("//div[contains(#class, 'custServiceMsg')]//text()")[1].split("will")[0].strip()
if len(driver.window_handles) > 1:
driver.close()
driver.switch_to_window(driver.window_handles[0])
print bookingPartner
data.append([ii[0], ii[1], bookingPartner])
else:
data.append([ii[0], ii[1], "N/A"])
ii.extend(["N/A"])
print data
return data

A Stale Element Reference Exception occurs when an element:
Has been deleted
Is no longer attached to the DOM (as in your case)
Has changed
From the docs:
You should discard the current reference you hold and replace it, possibly by locating the element again once it is attached to the DOM.
i.e.: "Find" the element again.
You'll need to modify the code to catch this error for the appropriate step.
from selenium.common.exceptions import StaleElementReferenceException
elem = driver.find_element_by_xpath('something leaves dom')
# ... do other actions which change the page and then later...
try:
elem.click()
except StaleElementReferenceException:
elem = driver.find_element_by_xpath('something leaves dom')
elem.click()
Make a re-usable a version if you need it extensively for several elements.
Btw, you should not be catching Exception in your code. Be specific about which ones you want to handle.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Skip extracting value if text node not found - python

Related

Selenium Loop through table pages

i verify that autocomplete works well but there are no results appear

I'm making a bot that likes every post that aren't liked yet

selenium fails to iterate on elements

Stale exception web scraping selenium python

Categories

Resources