Can't locate HTML element on Rotten Tomatoes

Can't locate HTML element on Rotten Tomatoes - python

I would like to find a movie and get ratings of it on Rotten Tomatoes, but I'm stuck because I don't know how to click on it in search results section. I tried almost every XPATH or CLASS NAME but every time I got error message that it couldn't find the element. I'm using Python Selenium.
My code:
import sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def element(driver, by_x, html_element):
try:
element = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((by_x, html_element))
)
return element
except:
print("Can not locate this element")
class rottenTomatoes:
def __init__(self, film):
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
self.driver = webdriver.Chrome(executable_path="./drivers/chromedriver", options=options)
self.film = film
self.driver.get("https://www.rottentomatoes.com/")
def search(self):
# search for a film
search_bar = self.driver.find_element_by_class_name("search-text")
search_bar.click()
search_bar.send_keys(self.film, Keys.RETURN)
# filter movies only
element(self.driver, By.XPATH, "//*[#id='main-page-content']/div/section[1]/search-result-
container/nav/ul/li[3]/span").click()
# accept cookies
self.driver.find_element_by_id("truste-consent-button").click()
# click on film (THE PROBLEM)
element(self.driver, By.CLASS_NAME, "media-col thumbnail-group").click()
rottentomatoes = rottenTomatoes("Shawshank")
rottentomatoes.search()
EDIT
Error message:
Can not locate this element
Traceback (most recent call last):
File "d:\Programovanie\selenium\movie_info\rotten_tomat.py", line 38, in <module>
rottentomatoes.search()
File "d:\Programovanie\selenium\movie_info\rotten_tomat.py", line 35, in search
element(self.driver, By.CLASS_NAME, "media-col thumbnail-group").click()
AttributeError: 'NoneType' object has no attribute 'click'

import sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
def element(driver, by_x, html_element):
try:
element = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((by_x, html_element))
)
return element
except:
print("Can not locate this element")
class rottenTomatoes:
def __init__(self, film):
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
self.driver = webdriver.Chrome(
options=options)
self.film = film
self.driver.get("https://www.rottentomatoes.com/")
def search(self):
# search for a film
search_bar = self.driver.find_element_by_class_name("search-text")
search_bar.click()
search_bar.send_keys(self.film, Keys.RETURN)
# filter movies only
element(self.driver, By.XPATH, "// *[#id='main-page-content']/div/section[1]/search-result-container/nav/ul/li[3]/span").click()
# accept cookies
time.sleep(5)
try:
self.driver.find_element_by_id("truste-consent-button").click()
except:
pass
ele = self.driver.execute_script(
"return document.querySelector('search-result-container').shadowRoot.querySelector('[type=\"movie\"]').shadowRoot.querySelector('media-row').shadowRoot.querySelector('[class=\"media-row center\"]')")
# click on film (THE PROBLEM)
ele.click()
time.sleep(10)
rottentomatoes = rottenTomatoes("Shawshank")
rottentomatoes.search()
Added the full code the element was inside shadowRoot , so you have to use javascript

You can use requests:
import requests
shearch = "Shawshank"
choose_type = "movie"
url = f"https://www.rottentomatoes.com/napi/search/all?type={choose_type}&searchQuery={shearch}"
r = requests.get(url)
r_json = r.json()
print(r_json)

Related

Scroll down through a list of the same element and fetch data selenium python

I want to fetch data by iterating through a list of eleme3nt. I will need to scroll to an each item in the list before I can fetch the data. Each scroll contains the same element but I want to iterate through the elements. Besides the first element, I need to scroll to view and fetch data from the next element. My code below scrolls only ones and throws error.
What I've tried:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
import undetected_chromedriver as uc
import time
import pandas as pd
from selenium.webdriver import ActionChains
import tkinter as tk
options = uc.ChromeOptions()
driver = uc.Chrome(options=options)
website = 'https://douyin.com'
driver.get(website)
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 20)
driver.maximize_window()
LOGIN TO SITE
INPUT LOCATION & KEYWORD INTO SEARCH FIELD
web_list = []
e = 1
while e <= 5:
time.sleep(2)
e + 1
name = driver.find_elements(By.XPATH, f'(//p[#class="_K3SLhVc"]//span/span/span/span/span/span)[{e}]')
for value in name:
value.text
time.sleep(3)
share_arrow = driver.find_element(By.XPATH, "(//div[#class='BwiwR0v7'])")
actions.move_to_element(share_arrow)
actions.perform()
time.sleep(2)
copy_link = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div > button._FNKGf5B.W1wbuR_h > svg')))
time.sleep(2)
actions.move_to_element(copy_link).click().perform()
time.sleep(2)
root = tk.Tk()
root.withdraw()
copied_text = root.clipboard_get()
driver.execute_script("window.scrollBy(0,800)","")
web_item = {
'Location': location_query,
'Key': keyword_query,
'Name': name,
'Copied_Links': copied_text
}
web_list.append(web_item)
Error Code
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Temp\2\ipykernel_2692\2751035614.py", line 22, in <module>
copy_link = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div > button._FNKGf5B.W1wbuR_h > svg')))
File "C:\Users\Administrator\anaconda3\lib\site-packages\selenium\webdriver\support\wait.py", line 95, in until
raise TimeoutException(message, screen, stacktrace)
TimeoutException'''
Please how do I scroll continously and the same time iterate through `name`, `copied_text` variables?

Reading weblink from dataframe throws "stale element reference: element is not attached to the page document" error

I got a dataframe that contains links to google reviews of two restaurants. I wanted to load all reviews of two restaurants (one by one) into the browser and then save them into a new data frame. I wrote a script that reads and load all reviews into the browser as follow:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
link_df = Link
0 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
1 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
i = 0
driver = webdriver.Chrome()
for index, i in link_df.iterrows():
base_url = i['Link'] #link_df['Link'][i]
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
print('Restaurant number is ',index)
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text =driver.find_element_by_xpath("//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews +=1
reviews_info = driver.find_elements_by_xpath("//div[#class='jxjCjc']")
review_information = pd.DataFrame(columns=["Restaurant title","Restaurant rating","Total reviews","Reviewer Name","Rating", "Review"])
name= ''
rating = ''
text = ''
for index,review_info in enumerate(reviews_info):
name = review_info.find_element_by_xpath("./div/div/a").text
rating = review_info.find_element_by_xpath(".//div[#class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
text = review_info.find_element_by_xpath(".//div[#class='Jtu6Td']//span").text
review_information.at[len(review_information)] = [title,overall_rating,num_reviews,name,rating,text]
filename = 'Google_reviews' + ' ' +pd.to_datetime("now").strftime("%Y_%m_%d")+'.csv'
files_present = glob.glob(filename)
if files_present:
review_information.to_csv(filename,index=False,mode='a',header=False)
else:
review_information.to_csv(filename,index=False)
driver.get('https:ww.google.com')
time.sleep(3)
The problem is that script throws an error when it reaches the following line.
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
It throws following error:
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=95.0.4638.69)
When I tried the same program without storing google links in dataframe (i.e. no for loop and instead of base_url = i['Link'], I wrote base_url = google review link) it works fine.
I am not sure where I am making the mistake. Any suggestion or help to fix the issue would be highly appreciated?

EDIT
you put the creation of driver outside the for loop
you cant launch the new url with gps data when the first popup is always in front, if you launch it, it stays in backdoor, the easier way is to launch a new url without gps data -> https:ww.google.com and wait 3 dec before to follow your loop:
your count is not good, i have changed your selector and change the total and set some lines in comment
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options
import time
link_df = ["https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binary = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
options = Options()
options.binary = binary
driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
# i have to launch one time to accept the cookies manually
#by setting a breakpoint after, but you dont have that i think
#driver.get(link_df[0])
print ("Headless Firefox Initialized")
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Avis les plus récents']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
all_reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()
it seems the solution for chrome needs some fixes:
org.openqa.selenium.StaleElementReferenceException: stale element reference: element is not attached to the page document
The literal meaning is about , The referenced element is out of date , No longer attached to the current page . Usually , This is because the page has been refreshed or skipped , The solution is , Reuse findElement or findElements Method to locate the element .
so its seems for chrome there is a problem of refreshing, so i suggest to load the number of record before to scroll, to have a fresh copy of DOM items, and i have to add a wait 1sec at the end of while loop
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
#from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
import time
link_df = [
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binaryfirefox = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
binarychrome = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
options = Options()
#cap = DesiredCapabilities().CHROME
#cap["marionette"] = True
#cap = DesiredCapabilities().FIREFOX
#options.binary = binaryfirefox
#driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
options.binary_location = binarychrome
driver = webdriver.Chrome(options=options, executable_path="E:\\Téléchargement\\chromedriver.exe" )
# same reason tha Firefox i have to load one time
# an url to accept manually the cookies
#driver.get(link_df[0])
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Newest']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
#reload to avoid exception, or trap scroll with try/except but more expznsive
all_reviews = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
time.sleep(1)
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()

Extract data from table by field name. Xpath, python

I want to extract data from this page https://mbasic.facebook.com/kristina.layus
There is a table "Places lived" with two rows
Current city --- Moscow, Russia
Home town --- Saint Petersburg, Russia
I can extract data with help of full xpath (extracted data "Moscow, Russia"):
/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a
But I want extract data with help of names in table. I tried this
//div[#id='living']//div[#title='Current City']//a/text()
But received error
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//div[#id='living']//div[#title='Current City']//a/text()"}
(Session info: chrome=84.0.4147.89)
My code
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class FacebookParser:
LOGIN_URL = 'https://www.facebook.com/login.php'
def __init__(self, login, password):
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)
self.driver = webdriver.Chrome(chrome_options=chrome_options)
self.wait = WebDriverWait(self.driver, 10)
self.login(login, password)
def login(self, login, password):
self.driver.get(self.LOGIN_URL)
# wait for the login page to load
self.wait.until(EC.visibility_of_element_located((By.ID, "email")))
self.driver.find_element_by_id('email').send_keys(login)
self.driver.find_element_by_id('pass').send_keys(password)
self.driver.find_element_by_id('loginbutton').click()
def get_user_by_id(self, id):
self.driver.get(BASIC_URL + 'profile.php?id=' + str(id))
def get_user_by_url(self, url):
self.driver.get(url)
def find_element_by_xpath_safe(self, path):
try:
return parser.driver.find_element_by_xpath(path)
except:
return None
def get_first_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 0:
return vec[0]
else:
print("Can't split {}".format(res.text))
return ""
def get_second_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 1:
return vec[1]
else:
print("Can't split {}".format(res.text))
return ""
def get_curr_city(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
def get_home_town(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[2]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
#####################################
LOGIN = '----.com'
PASSWORD = '----'
BASIC_URL = 'https://mbasic.facebook.com/'
#####################################
parser = FacebookParser(login=LOGIN, password=PASSWORD)
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
print(parser.get_curr_city())

To print the text Moscow, Russia you need to induce WebDriverWait for the visibility_of_element_located() and you can use the following xpath based Locator Strategy:
Printing Moscow, Russia:
driver.get('https://mbasic.facebook.com/kristina.layus')
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Current City']//following::td//a"))).text)
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
References
You can find a couple of relevant discussions on NoSuchElementException in:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element while trying to click Next button with selenium
selenium in python : NoSuchElementException: Message: no such element: Unable to locate element

Try to add following code between login (loginbutton.click()) and opening target page:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
WebDriverWait(wd, DELAY).until(EC.presence_of_element_located((By.ID, "mount_0_0")))
This code will wait till login process will be finished,only after that target page should be opened.
Also check your xpath expression: when investigating page source div element with id="living" can be found, but div with attribute title="Current City" is absent.

How to get First or third word from description in selenium python

import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import time
import autoit
wait = WebDriverWait
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=r'C:\\chromedriver\\chromedriver.exe')
driver.maximize_window()
driver.get("https://www.arttoframe.com/search")
Search_Price = print(wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="index"]/div[10]/div/div[4]/div/div[5]/div[2]/div[1]/div[2]/div/p[2]'))).text)
splitted = Search_Price.split()
first = splitted[0]
Traceback (most recent call last):
File "C:\Users\Dell\PycharmProjects\untitled3\ATF_TestOrder.py", line 23, in
splitted = Search_Price.split()
AttributeError: 'NoneType' object has no attribute 'split'

#MikeScotty is right, print() function return None, don't use print() here:
# wrong
# Search_Price = print(wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="index"]/div[10]/div/div[4]/div/div[5]/div[2]/div[1]/div[2]/div/p[2]'))).text)
# right
Search_Price = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="index"]/div[10]/div/div[4]/div/div[5]/div[2]/div[1]/div[2]/div/p[2]'))).text
splitted = Search_Price.split()
first = splitted[0]
print(first)
Output
$900

Selenium about how to get the new_driver after click the more_button?

Q:
I'm using Selenium to get a page with contents, and after I click the more button,the page outputs more content,and how I get the new page through webdriver?
some codes like this:
def parase_questions(self):
driver = self.login()
driver.implicitly_wait(2)
more_btn = driver.find_element_by_css_selector(".zg-btn-white.zg-r3px.zu-button-more")
more_btn.click()
# should I do something to get the new driver ?
print driver.page_source
question_links = driver.find_elements_by_css_selector('.question_link')
print len(question_links)

If I understand you correctly, after you click the More button, there are more elements with question_link class loaded. You would need a way to wait for the question links to be loaded.
Here is one idea - a custom Expected Condition that would help you to wait until there are more than N number of elements:
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
class wait_for_more_than_n_elements(object):
def __init__(self, locator, count):
self.locator = locator
self.count = count
def __call__(self, driver):
try:
count = len(EC._find_elements(driver, self.locator))
return count > self.count
except StaleElementReferenceException:
return False
Usage:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
driver = self.login()
driver.implicitly_wait(2)
question_links = driver.find_elements_by_css_selector('.question_link')
more_btn = driver.find_element_by_css_selector(".zg-btn-white.zg-r3px.zu-button-more")
more_btn.click()
# wait
wait = WebDriverWait(driver, 10)
wait.until(wait_for_more_than_n_elements((By.CSS_SELECTOR, ".question_link"), len(question_links))
# now more question links were loaded, get the page source
print(driver.page_source)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Can't locate HTML element on Rotten Tomatoes - python

You can use requests: import requests shearch = "Shawshank" choose_type = "movie" url = f"https://www.rottentomatoes.com/napi/search/all?type={choose_type}&searchQuery={shearch}" r = requests.get(url) r_json = r.json() print(r_json)

Related

Scroll down through a list of the same element and fetch data selenium python

Reading weblink from dataframe throws "stale element reference: element is not attached to the page document" error

Extract data from table by field name. Xpath, python

How to get First or third word from description in selenium python

Selenium about how to get the new_driver after click the more_button?

Categories

Resources